Trigonometric optimizations for POWER cpus

These optimizations remove most of the FP->INT conversions making the computation done with FP operations instead. This eliminates Load-Hit-Store (LHS) stalls on POWER, increasing performance of hypot/hypotf (about 50% on POWER7, 25% on POWER6, and 30% on POWER5) and sinf/cosf (30% on POWER7, 15% on POWER6, and 10% on POWER5). (cherry picked from commit 5939fc0867e1616a537c26d84ca1612b53b0303e)
author: Adhemerval Zanella <azanella@linux.vnet.ibm.com> 2011-06-17 13:30:52 -0500
committer: Ryan S. Arnold <rsa@us.ibm.com> 2011-06-17 14:12:07 -0500
commit: 6559b01648c2a08ea862ae923ce7d86c6e8cca45 (patch)
tree: 456119f629e086a9a279edc57a440e912c20846f
parent: 4749a0058b27274a95c5a798e339c7299cdf890e (diff)
download: glibc-6559b01648c2a08ea862ae923ce7d86c6e8cca45.tar
glibc-6559b01648c2a08ea862ae923ce7d86c6e8cca45.tar.gz
glibc-6559b01648c2a08ea862ae923ce7d86c6e8cca45.tar.bz2
glibc-6559b01648c2a08ea862ae923ce7d86c6e8cca45.zip
15 files changed, 1474 insertions, 0 deletions
diff --git a/ChangeLog b/ChangeLog
index dd8a270412..b8d1a5f37b 100644
--- a/ChangeLog
+++ b/ChangeLog
@@ -1,3 +1,25 @@
+2011-06-15  Adhemerval Zanella  <azanella@linux.vnet.ibm.com>
+
+	* sysdeps/powerpc/fpu/Makefile: Added new objects.
+	* sysdeps/powerpc/fpu/e_hypot.c: New file: hypot optimized for POWER.
+	* sysdeps/powerpc/fpu/e_hypotf.c: New file: hypotf optimized for
+	POWER.
+	* sysdeps/powerpc/fpu/e_rem_pio2f.c: New file: optimized for POWER.
+	* sysdeps/powerpc/fpu/k_cosf.c: New file: optimized for POWER.
+	* sysdeps/powerpc/fpu/k_sinf.c: New file: optimized for POWER.
+	* sysdeps/powerpc/fpu/s_cosf.c: New file: sinf optimized for POWER.
+	* sysdeps/powerpc/fpu/s_sinf.c: New file: cosf optimized for POWER.
+	* sysdeps/powerpc/fpu/s_float_bitwise.h: New file:  bitwise operation
+	over floats.
+	* sysdeps/powerpc/powerpc32/fpu/s_float_bitwise.S: New file: bitwise
+	operation over float, PPC32 implementation.
+	* sysdeps/powerpc/powerpc32/power7/fpu/s_float_bitwise.S: New file:
+	bitwise operation over floats, PPC32 implementation using VSX.
+	* sysdeps/powerpc/powerpc64/fpu/s_float_bitwise.S: New file: bitwise
+	operation over float, PPC64 implementation.
+	* sysdeps/powerpc/powerpc64/power7/fpu/s_float_bitwise.S: New file:
+	bitwise operation over floats, PPC64 implementation using VSX.
+
 2011-05-18  Ryan S. Arnold  <rsa@us.ibm.com>
 
 	* sysdeps/powerpc/powerpc64/Makefile (no-special-regs): Add -mno-vsx
diff --git a/sysdeps/powerpc/fpu/Makefile b/sysdeps/powerpc/fpu/Makefile
index ffacf1a754..1c98ca6f82 100644
--- a/sysdeps/powerpc/fpu/Makefile
+++ b/sysdeps/powerpc/fpu/Makefile
@@ -4,6 +4,11 @@ libm-tests += test-powerpc-snan
 
 # libm needs ld.so to access dl_hwcap
 $(objpfx)libm.so: $(elfobjdir)/ld.so
+
+duplicated-routines = s_float_bitwise
+
+libm-sysdep_routines += $(duplicated-routines:s_%=m_%)
+sysdep_routines += $(duplicated-routines)
 endif
 
 ifeq ($(subdir),stdlib)
diff --git a/sysdeps/powerpc/fpu/e_hypot.c b/sysdeps/powerpc/fpu/e_hypot.c
new file mode 100644
index 0000000000..c09c0c8b46
--- /dev/null
+++ b/sysdeps/powerpc/fpu/e_hypot.c
@@ -0,0 +1,86 @@
+/* Pythagorean addition using doubles
+   Copyright (C) 2011 Free Software Foundation, Inc.
+   This file is part of the GNU C Library
+   Contributed by Adhemerval Zanella <azanella@br.ibm.com>, 2011
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Library General Public License as
+   published by the Free Software Foundation; either version 2 of the
+   License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Library General Public License for more details.
+
+   You should have received a copy of the GNU Library General Public
+   License along with the GNU C Library; see the file COPYING.LIB.  If not,
+   write to the Free Software Foundation, Inc., 59 Temple Place - Suite 330,
+   Boston, MA 02111-1307, USA.  */
+
+#include "math.h"
+
+static const double two60   = 1.152921504606847e+18;
+static const double two500  = 3.2733906078961419e+150;
+static const double two600  = 4.149515568880993e+180;
+static const double two1022 = 4.49423283715579e+307;
+static const double twoM500 = 3.054936363499605e-151;
+static const double twoM600 = 4.616489308892868e-128;
+static const double pdnum   = 2.225073858507201e-308;
+
+/* __ieee754_hypot(x,y)
+ *
+ * This a FP only version without any FP->INT conversion.
+ * It is similar to default C version, making appropriates
+ * overflow and underflows checks as well scaling when it
+ * is needed.
+ */
+
+double
+__ieee754_hypot (double x, double y)
+{
+  double j;
+
+  if (isinf(x) || isinf(y))
+    {
+      return INFINITY;
+    }
+  if (isnan(x) || isnan(y))
+    {
+      return NAN;
+    }
+  x = __builtin_fabs (x);
+  y = __builtin_fabs (y);
+  if (y > x)
+    {
+      j = x;
+      x = y;
+      y = j;
+    }
+  if (y == 0.0 || (x / y) > two60)
+    {
+      return x + y;
+    }
+  if (x > two500)
+    {
+      x *= twoM600;
+      y *= twoM600;
+      return __builtin_sqrt (x * x + y * y) / twoM600;
+    }
+  if (y < twoM500)
+    {
+      if (y <= pdnum)
+	{
+	  x *= two1022;
+	  y *= two1022;
+	  return __builtin_sqrt (x * x + y * y) / two1022;
+	}
+      else
+	{
+	  x *= two600;
+	  y *= two600;
+	  return __builtin_sqrt (x * x + y * y) / two600;
+	}
+    }
+  return __builtin_sqrt (x * x + y * y);
+}
diff --git a/sysdeps/powerpc/fpu/e_hypotf.c b/sysdeps/powerpc/fpu/e_hypotf.c
new file mode 100644
index 0000000000..55eb2cb0c8
--- /dev/null
+++ b/sysdeps/powerpc/fpu/e_hypotf.c
@@ -0,0 +1,86 @@
+/* Pythagorean addition using floats
+   Copyright (C) 2011 Free Software Foundation, Inc.
+   This file is part of the GNU C Library.
+   Contributed by Adhemerval Zanella <azanella@br.ibm.com>, 2011
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Library General Public License as
+   published by the Free Software Foundation; either version 2 of the
+   License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Library General Public License for more details.
+
+   You should have received a copy of the GNU Library General Public
+   License along with the GNU C Library; see the file COPYING.LIB.  If not,
+   write to the Free Software Foundation, Inc., 59 Temple Place - Suite 330,
+   Boston, MA 02111-1307, USA.  */
+
+#include "math.h"
+
+/* __ieee754_hypotf(x,y)
+ *
+ * This a FP only version without any FP->INT conversion.
+ * It is similar to default C version, making appropriates
+ * overflow and underflows checks as well scaling when it
+ * is needed.
+ */
+
+static const float two30  = 1.0737418e09;
+static const float two50  = 1.1259000e15;
+static const float two60  = 1.1529221e18;
+static const float two126 = 8.5070592e+37;
+static const float twoM50 = 8.8817842e-16;
+static const float twoM60 = 6.7762644e-21;
+static const float pdnum  = 1.1754939e-38;
+
+float
+__ieee754_hypotf (float x, float y)
+{
+  float j;
+
+  if (isinff(x) || isinff(y))
+    {
+      return INFINITY;
+    }
+  if (isnanf(x) || isnanf(y))
+    {
+      return NAN;
+    }
+  x = __builtin_fabsf (x);
+  y = __builtin_fabsf (y);
+  if (y > x)
+    {
+      j = x;
+      x = y;
+      y = j;
+    }
+  if (y == 0.0 || (x / y) > two30)
+    {
+      return x + y;
+    }
+  if (x > two50)
+    {
+      x *= twoM60;
+      y *= twoM60;
+      return __builtin_sqrtf (x * x + y * y) / twoM60;
+    }
+  if (y < twoM50)
+    {
+      if (y <= pdnum)
+	{
+	  x *= two126;
+	  y *= two126;
+	  return __builtin_sqrtf (x * x + y * y) / two126;
+	}
+      else
+	{
+	  x *= two60;
+	  y *= two60;
+	  return __builtin_sqrtf (x * x + y * y) / two60;
+	}
+    }
+  return __builtin_sqrtf (x * x + y * y);
+}
diff --git a/sysdeps/powerpc/fpu/e_rem_pio2f.c b/sysdeps/powerpc/fpu/e_rem_pio2f.c
new file mode 100644
index 0000000000..5b6e7b60ef
--- /dev/null
+++ b/sysdeps/powerpc/fpu/e_rem_pio2f.c
@@ -0,0 +1,431 @@
+/* e_rem_pio2f.c -- float version of e_rem_pio2.c
+   Copyright (C) 2011 Free Software Foundation, Inc.
+   This file is part of the GNU C Library.
+   Contributed by Adhemerval Zanella <azanella@br.ibm.com>, 2011
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Library General Public License as
+   published by the Free Software Foundation; either version 2 of the
+   License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Library General Public License for more details.
+
+   You should have received a copy of the GNU Library General Public
+   License along with the GNU C Library; see the file COPYING.LIB.  If not,
+   write to the Free Software Foundation, Inc., 59 Temple Place - Suite 330,
+   Boston, MA 02111-1307, USA.  */
+
+#include <math.h>
+#include <stdint.h>
+
+#include "s_float_bitwise.h"
+
+static int32_t __fp_kernel_rem_pio2f (float *, float *, float, int32_t);
+
+/* __ieee754_rem_pio2f(x,y)
+ *
+ * return the remainder of x rem pi/2 in y[0]+y[1]
+ */
+
+static const float two_over_pi[] = {
+  1.62000000e+02, 2.49000000e+02, 1.31000000e+02, 1.10000000e+02,
+  7.80000000e+01, 6.80000000e+01, 2.10000000e+01, 4.10000000e+01,
+  2.52000000e+02, 3.90000000e+01, 8.70000000e+01, 2.09000000e+02,
+  2.45000000e+02, 5.20000000e+01, 2.21000000e+02, 1.92000000e+02,
+  2.19000000e+02, 9.80000000e+01, 1.49000000e+02, 1.53000000e+02,
+  6.00000000e+01, 6.70000000e+01, 1.44000000e+02, 6.50000000e+01,
+  2.54000000e+02, 8.10000000e+01, 9.90000000e+01, 1.71000000e+02,
+  2.22000000e+02, 1.87000000e+02, 1.97000000e+02, 9.70000000e+01,
+  1.83000000e+02, 3.60000000e+01, 1.10000000e+02, 5.80000000e+01,
+  6.60000000e+01, 7.70000000e+01, 2.10000000e+02, 2.24000000e+02,
+  6.00000000e+00, 7.30000000e+01, 4.60000000e+01, 2.34000000e+02,
+  9.00000000e+00, 2.09000000e+02, 1.46000000e+02, 2.80000000e+01,
+  2.54000000e+02, 2.90000000e+01, 2.35000000e+02, 2.80000000e+01,
+  1.77000000e+02, 4.10000000e+01, 1.67000000e+02, 6.20000000e+01,
+  2.32000000e+02, 1.30000000e+02, 5.30000000e+01, 2.45000000e+02,
+  4.60000000e+01, 1.87000000e+02, 6.80000000e+01, 1.32000000e+02,
+  2.33000000e+02, 1.56000000e+02, 1.12000000e+02, 3.80000000e+01,
+  1.80000000e+02, 9.50000000e+01, 1.26000000e+02, 6.50000000e+01,
+  5.70000000e+01, 1.45000000e+02, 2.14000000e+02, 5.70000000e+01,
+  1.31000000e+02, 8.30000000e+01, 5.70000000e+01, 2.44000000e+02,
+  1.56000000e+02, 1.32000000e+02, 9.50000000e+01, 1.39000000e+02,
+  1.89000000e+02, 2.49000000e+02, 4.00000000e+01, 5.90000000e+01,
+  3.10000000e+01, 2.48000000e+02, 1.51000000e+02, 2.55000000e+02,
+  2.22000000e+02, 5.00000000e+00, 1.52000000e+02, 1.50000000e+01,
+  2.39000000e+02, 4.70000000e+01, 1.70000000e+01, 1.39000000e+02,
+  9.00000000e+01, 1.00000000e+01, 1.09000000e+02, 3.10000000e+01,
+  1.09000000e+02, 5.40000000e+01, 1.26000000e+02, 2.07000000e+02,
+  3.90000000e+01, 2.03000000e+02, 9.00000000e+00, 1.83000000e+02,
+  7.90000000e+01, 7.00000000e+01, 6.30000000e+01, 1.02000000e+02,
+  1.58000000e+02, 9.50000000e+01, 2.34000000e+02, 4.50000000e+01,
+  1.17000000e+02, 3.90000000e+01, 1.86000000e+02, 1.99000000e+02,
+  2.35000000e+02, 2.29000000e+02, 2.41000000e+02, 1.23000000e+02,
+  6.10000000e+01, 7.00000000e+00, 5.70000000e+01, 2.47000000e+02,
+  1.38000000e+02, 8.20000000e+01, 1.46000000e+02, 2.34000000e+02,
+  1.07000000e+02, 2.51000000e+02, 9.50000000e+01, 1.77000000e+02,
+  3.10000000e+01, 1.41000000e+02, 9.30000000e+01, 8.00000000e+00,
+  8.60000000e+01, 3.00000000e+00, 4.80000000e+01, 7.00000000e+01,
+  2.52000000e+02, 1.23000000e+02, 1.07000000e+02, 1.71000000e+02,
+  2.40000000e+02, 2.07000000e+02, 1.88000000e+02, 3.20000000e+01,
+  1.54000000e+02, 2.44000000e+02, 5.40000000e+01, 2.90000000e+01,
+  1.69000000e+02, 2.27000000e+02, 1.45000000e+02, 9.70000000e+01,
+  9.40000000e+01, 2.30000000e+02, 2.70000000e+01, 8.00000000e+00,
+  1.01000000e+02, 1.53000000e+02, 1.33000000e+02, 9.50000000e+01,
+  2.00000000e+01, 1.60000000e+02, 1.04000000e+02, 6.40000000e+01,
+  1.41000000e+02, 2.55000000e+02, 2.16000000e+02, 1.28000000e+02,
+  7.70000000e+01, 1.15000000e+02, 3.90000000e+01, 4.90000000e+01,
+  6.00000000e+00, 6.00000000e+00, 2.10000000e+01, 8.60000000e+01,
+  2.02000000e+02, 1.15000000e+02, 1.68000000e+02, 2.01000000e+02,
+  9.60000000e+01, 2.26000000e+02, 1.23000000e+02, 1.92000000e+02,
+  1.40000000e+02, 1.07000000e+02
+};
+
+
+static const float npio2_hw[] = {
+  1.57077026e+00, 3.14154053e+00, 4.71228027e+00, 6.28308105e+00,
+  7.85388184e+00, 9.42456055e+00, 1.09953613e+01, 1.25661621e+01,
+  1.41369629e+01, 1.57077637e+01, 1.72783203e+01, 1.88491211e+01,
+  2.04199219e+01, 2.19907227e+01, 2.35615234e+01, 2.51323242e+01,
+  2.67031250e+01, 2.82739258e+01, 2.98447266e+01, 3.14155273e+01,
+  3.29863281e+01, 3.45566406e+01, 3.61279297e+01, 3.76982422e+01,
+  3.92695312e+01, 4.08398438e+01, 4.24111328e+01, 4.39814453e+01,
+  4.55527344e+01, 4.71230469e+01, 4.86943359e+01, 5.02646484e+01
+};
+
+static const float PIo2[] = {
+  1.5703125000e+00,		/* 0x3fc90000 */
+  4.5776367188e-04,		/* 0x39f00000 */
+  2.5987625122e-05,		/* 0x37da0000 */
+  7.5437128544e-08,		/* 0x33a20000 */
+  6.0026650317e-11,		/* 0x2e840000 */
+  7.3896444519e-13,		/* 0x2b500000 */
+  5.3845816694e-15,		/* 0x27c20000 */
+  5.6378512969e-18,		/* 0x22d00000 */
+  8.3009228831e-20,		/* 0x1fc40000 */
+  3.2756352257e-22,		/* 0x1bc60000 */
+  6.3331015649e-25,		/* 0x17440000 */
+};
+
+static const float zero  = 0.0000000000e+00;
+static const float one   = 1.0000000000;
+static const float two8  = 2.5600000000e+02;
+static const float twon8 = 3.9062500000e-03;
+
+static const float half    = 5.0000000000e-01;
+static const float invpio2 = 6.3661980629e-01;
+static const float pio2_1  = 1.5707855225e+00;
+static const float pio2_1t = 1.0804334124e-05;
+static const float pio2_2  = 1.0804273188e-05;
+static const float pio2_2t = 6.0770999344e-11;
+static const float pio2_3  = 6.0770943833e-11;
+static const float pio2_3t = 6.1232342629e-17;
+
+static const float pio4     = 7.8539801e-01;
+static const float pio3_4   = 2.3561945e+00;
+static const float pio2_24b = 1.5707951e+00;
+static const float pio2_2e7 = 2.0106054e+02;
+
+
+int32_t
+__ieee754_rem_pio2f (float x, float *y)
+{
+  float ax, z, n, r, w, t, e0;
+  float tx[3];
+  int32_t i, nx;
+
+  ax = __builtin_fabsf (x);
+  if (ax <= pio4)
+    {
+      y[0] = x;
+      y[1] = 0;
+      return 0;
+    }
+  if (ax < pio3_4)
+    {
+      if (x > 0)
+	{
+	  z = x - pio2_1;
+	  if (!__float_and_test28 (ax, pio2_24b))
+	    {
+	      y[0] = z - pio2_1t;
+	      y[1] = (z - y[0]) - pio2_1t;
+	    }
+	  else
+	    {
+	      z -= pio2_2;
+	      y[0] = z - pio2_2t;
+	      y[1] = (z - y[0]) - pio2_2t;
+	    }
+	  return 1;
+	}
+      else
+	{
+	  z = x + pio2_1;
+	  if (!__float_and_test28 (ax, pio2_24b))
+	    {
+	      y[0] = z + pio2_1t;
+	      y[1] = (z - y[0]) + pio2_1t;
+	    }
+	  else
+	    {
+	      z += pio2_2;
+	      y[0] = z + pio2_2t;
+	      y[1] = (z - y[0]) + pio2_2t;
+	    }
+	  return -1;
+	}
+    }
+  if (ax <= pio2_2e7)
+    {
+      n = floorf (ax * invpio2 + half);
+      i = (int32_t) n;
+      r = ax - n * pio2_1;
+      w = n * pio2_1t;		/* 1st round good to 40 bit */
+      if (i < 32 && !__float_and_test24 (ax, npio2_hw[i - 1]))
+	{
+	  y[0] = r - w;
+	}
+      else
+	{
+	  float i, j;
+	  j = __float_and8 (ax);
+	  y[0] = r - w;
+	  i = __float_and8 (y[0]);
+	  if (j / i > 256.0 || j / i < 3.9062500e-3)
+	    {			/* 2nd iterations needed, good to 57 */
+	      t = r;
+	      w = n * pio2_2;
+	      r = t - w;
+	      w = n * pio2_2t - ((t - r) - w);
+	      y[0] = r - w;
+	      i = __float_and8 (y[0]);
+	      if (j / i > 33554432 || j / i < 2.9802322e-8)
+		{		/* 3rd iteration needed, 74 bits acc */
+		  t = r;
+		  w = n * pio2_3;
+		  r = t - w;
+		  w = n * pio2_3t - ((t - r) - w);
+		  y[0] = r - w;
+		}
+	    }
+	}
+      y[1] = (r - y[0]) - w;
+      if (x < 0)
+	{
+	  y[0] = -y[0];
+	  y[1] = -y[1];
+	  return -i;
+	}
+      else
+	{
+	  return i;
+	}
+    }
+
+  /* all other (large) arguments */
+  if (isnanf (x) || isinff (x))
+    {
+      y[0] = y[1] = x - x;
+      return 0;
+    }
+
+  /* set z = scalbn(|x|,ilogb(x)-7) */
+  e0 = __float_and8 (ax / 128.0);
+  z = ax / e0;
+
+  tx[0] = floorf (z);
+  z = (z - tx[0]) * two8;
+  tx[1] = floorf (z);
+  z = (z - tx[1]) * two8;
+  tx[2] = floorf (z);
+
+  nx = 3;
+  while (tx[nx - 1] == zero)
+    nx--;
+
+  i = __fp_kernel_rem_pio2f (tx, y, e0, nx);
+  if (x < 0)
+    {
+      y[0] = -y[0];
+      y[1] = -y[1];
+      return -i;
+    }
+  return i;
+}
+
+static int32_t
+__fp_kernel_rem_pio2f (float *x, float *y, float e0, int32_t nx)
+{
+  int32_t jz, jx, jv, jp, jk, carry, n, iq[20], i, j, k, m, q0, ih, exp;
+  float z, fw, f[20], fq[20], q[20];
+
+  /* initialize jk */
+  jp = jk = 9;
+
+  /* determine jx,jv,q0, note that 3>q0 */
+  jx = nx - 1;
+  exp = __float_get_exp (e0) - 127;
+  jv = (exp - 3) / 8;
+  if (jv < 0)
+    jv = 0;
+  q0 = exp - 8 * (jv + 1);
+
+  /* set up f[0] to f[jx+jk] where f[jx+jk] = two_over_pi[jv+jk] */
+  j = jv - jx;
+  m = jx + jk;
+  for (i = 0; i <= m; i++, j++)
+    f[i] = (j < 0) ? zero : two_over_pi[j];
+
+  /* compute q[0],q[1],...q[jk] */
+  for (i = 0; i <= jk; i++)
+    {
+      for (j = 0, fw = 0.0; j <= jx; j++)
+	fw += x[j] * f[jx + i - j];
+      q[i] = fw;
+    }
+
+  jz = jk;
+recompute:
+  /* distill q[] into iq[] reversingly */
+  for (i = 0, j = jz, z = q[jz]; j > 0; i++, j--)
+    {
+      fw = truncf (twon8 * z);
+      iq[i] = (int32_t) (z - two8 * fw);
+      z = q[j - 1] + fw;
+    }
+
+  /* compute n */
+  z = __scalbnf (z, q0);	/* actual value of z */
+  z -= 8.0 * floorf (z * 0.125);	/* trim off integer >= 8 */
+  n = (int32_t) z;
+  z -= truncf (z);
+  ih = 0;
+  if (q0 > 0)
+    {				/* need iq[jz-1] to determine n */
+      i = (iq[jz - 1] >> (8 - q0));
+      n += i;
+      iq[jz - 1] -= i << (8 - q0);
+      ih = iq[jz - 1] >> (7 - q0);
+    }
+  else if (q0 == 0)
+    ih = iq[jz - 1] >> 8;
+  else if (z >= 0.5)
+    ih = 2;
+
+  if (ih > 0)
+    {				/* q > 0.5 */
+      n += 1;
+      carry = 0;
+      for (i = 0; i < jz; i++)
+	{			/* compute 1-q */
+	  j = iq[i];
+	  if (carry == 0)
+	    {
+	      if (j != 0)
+		{
+		  carry = 1;
+		  iq[i] = 0x100 - j;
+		}
+	    }
+	  else
+	    iq[i] = 0xff - j;
+	}
+      if (q0 > 0)
+	{			/* rare case: chance is 1 in 12 */
+	  switch (q0)
+	    {
+	    case 1:
+	      iq[jz - 1] &= 0x7f;
+	      break;
+	    case 2:
+	      iq[jz - 1] &= 0x3f;
+	      break;
+	    }
+	}
+      if (ih == 2)
+	{
+	  z = one - z;
+	  if (carry != 0)
+	    z -= __scalbnf (one, q0);
+	}
+    }
+
+  /* check if recomputation is needed */
+  if (z == zero)
+    {
+      j = 0;
+      for (i = jz - 1; i >= jk; i--)
+	j |= iq[i];
+      if (j == 0)
+	{			/* need recomputation */
+	  for (k = 1; iq[jk - k] == 0; k++);	/* k = no. of terms needed */
+
+	  for (i = jz + 1; i <= jz + k; i++)
+	    {			/* add q[jz+1] to q[jz+k] */
+	      f[jx + i] = two_over_pi[jv + i];
+	      for (j = 0, fw = 0.0; j <= jx; j++)
+		fw += x[j] * f[jx + i - j];
+	      q[i] = fw;
+	    }
+	  jz += k;
+	  goto recompute;
+	}
+    }
+
+  /* chop off zero terms */
+  if (z == 0.0)
+    {
+      jz -= 1;
+      q0 -= 8;
+      while (iq[jz] == 0)
+	{
+	  jz--;
+	  q0 -= 8;
+	}
+    }
+  else
+    {				/* break z into 8-bit if necessary */
+      z = __scalbnf (z, -q0);
+      if (z >= two8)
+	{
+	  fw = truncf (twon8 * z);
+	  iq[jz] = (int32_t) (z - two8 * fw);
+	  jz += 1;
+	  q0 += 8;
+	  iq[jz] = (int32_t) fw;
+	}
+      else
+	iq[jz] = (int32_t) z;
+    }
+
+  /* convert integer "bit" chunk to floating-point value */
+  fw = __scalbnf (one, q0);
+  for (i = jz; i >= 0; i--)
+    {
+      q[i] = fw * (float) iq[i];
+      fw *= twon8;
+    }
+
+  /* compute PIo2[0,...,jp]*q[jz,...,0] */
+  for (i = jz; i >= 0; i--)
+    {
+      for (fw = 0.0, k = 0; k <= jp && k <= jz - i; k++)
+	fw += PIo2[k] * q[i + k];
+      fq[jz - i] = fw;
+    }
+
+  /* compress fq[] into y[] */
+  fw = 0.0;
+  for (i = jz; i >= 0; i--)
+    fw += fq[i];
+  y[0] = (ih == 0) ? fw : -fw;
+  fw = fq[0] - fw;
+  for (i = 1; i <= jz; i++)
+    fw += fq[i];
+  y[1] = (ih == 0) ? fw : -fw;
+
+  return n & 7;
+}
diff --git a/sysdeps/powerpc/fpu/k_cosf.c b/sysdeps/powerpc/fpu/k_cosf.c
new file mode 100644
index 0000000000..cb0e34e398
--- /dev/null
+++ b/sysdeps/powerpc/fpu/k_cosf.c
@@ -0,0 +1,65 @@
+/* k_cosf.c -- float version of k_cos.c
+   Copyright (C) 2011 Free Software Foundation, Inc.
+   This file is part of the GNU C Library.
+   Contributed by Adhemerval Zanella <azanella@br.ibm.com>, 2011
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Library General Public License as
+   published by the Free Software Foundation; either version 2 of the
+   License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Library General Public License for more details.
+
+   You should have received a copy of the GNU Library General Public
+   License along with the GNU C Library; see the file COPYING.LIB.  If not,
+   write to the Free Software Foundation, Inc., 59 Temple Place - Suite 330,
+   Boston, MA 02111-1307, USA.  */
+
+#include "math.h"
+#include "math_private.h"
+
+static const float twom27   = 7.4505806e-09;
+static const float dot3     = 3.0000001e-01;
+static const float dot78125 = 7.8125000e-01;
+
+static const float one =  1.0000000000e+00;
+static const float C1  =  4.1666667908e-02;
+static const float C2  = -1.3888889225e-03;
+static const float C3  =  2.4801587642e-05;
+static const float C4  = -2.7557314297e-07;
+static const float C5  =  2.0875723372e-09;
+static const float C6  = -1.1359647598e-11;
+
+float
+__kernel_cosf (float x, float y)
+{
+  float a, hz, z, r, qx;
+  float ix;
+  ix = __builtin_fabsf (x);
+  if (ix < twom27)
+    {				/* |x| < 2**-27 */
+      if (x == 0.0)
+	return one;
+    }
+  z = x * x;
+  r = z * (C1 + z * (C2 + z * (C3 + z * (C4 + z * (C5 + z * C6)))));
+  if (ix < dot3)		/* if |x| < 0.3 */
+    return one - ((float) 0.5 * z - (z * r - x * y));
+  else
+    {
+      if (ix > dot78125)
+	{			/* x > 0.78125 */
+	  qx = (float) 0.28125;
+	}
+      else
+	{
+	  qx = ix / 4.0;
+	}
+      hz = (float) 0.5 *z - qx;
+      a = one - qx;
+      return a - (hz - (z * r - x * y));
+    }
+}
diff --git a/sysdeps/powerpc/fpu/k_sinf.c b/sysdeps/powerpc/fpu/k_sinf.c
new file mode 100644
index 0000000000..f93a478712
--- /dev/null
+++ b/sysdeps/powerpc/fpu/k_sinf.c
@@ -0,0 +1,53 @@
+/* k_sinf.c -- float version of k_sin.c
+   Copyright (C) 2011 Free Software Foundation, Inc.
+   This file is part of the GNU C Library.
+   Contributed by Adhemerval Zanella <azanella@br.ibm.com>, 2011
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Library General Public License as
+   published by the Free Software Foundation; either version 2 of the
+   License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Library General Public License for more details.
+
+   You should have received a copy of the GNU Library General Public
+   License along with the GNU C Library; see the file COPYING.LIB.  If not,
+   write to the Free Software Foundation, Inc., 59 Temple Place - Suite 330,
+   Boston, MA 02111-1307, USA.  */
+
+#include "math.h"
+#include "math_private.h"
+
+
+static const float twom27 =  7.4505806000e-09;
+static const float half   =  5.0000000000e-01;
+static const float S1     = -1.6666667163e-01;
+static const float S2     =  8.3333337680e-03;
+static const float S3     = -1.9841270114e-04;
+static const float S4     =  2.7557314297e-06;
+static const float S5     = -2.5050759689e-08;
+static const float S6     =  1.5896910177e-10;
+
+
+float
+__kernel_sinf (float x, float y, int iy)
+{
+  float z, r, v;
+  float ix;
+  ix = __builtin_fabsf (x);
+  if (ix < twom27)
+    {				/* |x| < 2**-27 */
+      if (x == 0.0)
+	return x;
+    }
+  z = x * x;
+  v = z * x;
+  r = S2 + z * (S3 + z * (S4 + z * (S5 + z * S6)));
+  if (iy == 0)
+    return x + v * (S1 + z * r);
+  else
+    return x - ((z * (half * y - v * r) - y) - v * S1);
+}
diff --git a/sysdeps/powerpc/fpu/s_cosf.c b/sysdeps/powerpc/fpu/s_cosf.c
new file mode 100644
index 0000000000..8b8778a4a8
--- /dev/null
+++ b/sysdeps/powerpc/fpu/s_cosf.c
@@ -0,0 +1,71 @@
+/* s_cosf.c -- float version of s_cos.c.
+   Copyright (C) 2011 Free Software Foundation, Inc.
+   This file is part of the GNU C Library.
+   Contributed by Adhemerval Zanella <azanella@br.ibm.com>, 2011
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Library General Public License as
+   published by the Free Software Foundation; either version 2 of the
+   License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Library General Public License for more details.
+
+   You should have received a copy of the GNU Library General Public
+   License along with the GNU C Library; see the file COPYING.LIB.  If not,
+   write to the Free Software Foundation, Inc., 59 Temple Place - Suite 330,
+   Boston, MA 02111-1307, USA.  */
+
+#include <errno.h>
+#include "math.h"
+#include "math_private.h"
+
+static const float one  = 1.0;
+static const float pio4 = 7.8539801e-1;
+
+float
+__cosf (float x)
+{
+  float y[2], z = 0.0;
+  float ix;
+  int32_t n;
+
+  ix = __builtin_fabsf (x);
+
+  /* |x| ~< pi/4 */
+  if (ix <= pio4)
+    {
+      return __kernel_cosf (x, z);
+      /* cos(Inf or NaN) is NaN */
+    }
+  else if (isnanf (ix))
+    {
+      return x - x;
+    }
+  else if (isinff (ix))
+    {
+      __set_errno (EDOM);
+      return x - x;
+    }
+
+  /* argument reduction needed */
+  else
+    {
+      n = __ieee754_rem_pio2f (x, y);
+      switch (n & 3)
+	{
+	case 0:
+	  return __kernel_cosf (y[0], y[1]);
+	case 1:
+	  return -__kernel_sinf (y[0], y[1], 1);
+	case 2:
+	  return -__kernel_cosf (y[0], y[1]);
+	default:
+	  return __kernel_sinf (y[0], y[1], 1);
+	}
+    }
+}
+
+weak_alias (__cosf, cosf)
diff --git a/sysdeps/powerpc/fpu/s_float_bitwise.h b/sysdeps/powerpc/fpu/s_float_bitwise.h
new file mode 100644
index 0000000000..25892eea33
--- /dev/null
+++ b/sysdeps/powerpc/fpu/s_float_bitwise.h
@@ -0,0 +1,35 @@
+/* Bitwise manipulation over float. Function prototypes.
+   Copyright (C) 2011 Free Software Foundation, Inc.
+   This file is part of the GNU C Library.
+   Contributed by Adhemerval Zanella <azanella@br.ibm.com>, 2011
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library; if not, write to the Free
+   Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
+   02111-1307 USA.  */
+
+#ifndef _FLOAT_BITWISE_
+#define _FLOAT_BITWISE_ 1
+
+/* Returns (num & 0x7FFFFFF0 == value) */
+int __float_and_test28 (float num, float value);
+/* Returns (num & 0x7FFFFF00 == value) */
+int __float_and_test24 (float num, float value);
+/* Returns (num & 0x7F800000) */
+float __float_and8 (float num);
+/* Returns ((int32_t)(num & 0x7F800000) >> 23) */
+int32_t __float_get_exp (float num);
+/* Returns ((num & 0x807FFFFF) | exp) */
+float __float_set_exp (float num, float exp);
+
+#endif /* s_float_bitwise.h */
diff --git a/sysdeps/powerpc/fpu/s_scalbnf.c b/sysdeps/powerpc/fpu/s_scalbnf.c
new file mode 100644
index 0000000000..b49dd07b88
--- /dev/null
+++ b/sysdeps/powerpc/fpu/s_scalbnf.c
@@ -0,0 +1,135 @@
+/* s_scalbnf.c -- float version of s_scalbn.c.
+   Copyright (C) 2011 Free Software Foundation, Inc.
+   This file is part of the GNU C Library.
+   Contributed by Adhemerval Zanella <azanella@br.ibm.com>, 2011
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Library General Public License as
+   published by the Free Software Foundation; either version 2 of the
+   License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Library General Public License for more details.
+
+   You should have received a copy of the GNU Library General Public
+   License along with the GNU C Library; see the file COPYING.LIB.  If not,
+   write to the Free Software Foundation, Inc., 59 Temple Place - Suite 330,
+   Boston, MA 02111-1307, USA.  */
+
+#include "math.h"
+#include "math_private.h"
+
+#include "s_float_bitwise.h"
+
+static const float huge   = 1.0e+30;
+static const float tiny   = 1.0e-30;
+static const float two25  = 3.35544320e+07;
+static const float twom25 = 2.98023224e-08;
+
+/* (float)(i << 23), 0<=i<255 -  [ 2**0, 2**-126, ... , 2**127 ] */
+static const float pow2[] = {
+  0.00000000e+00, 1.17549435e-38, 2.35098870e-38, 4.70197740e-38,
+  9.40395481e-38, 1.88079096e-37, 3.76158192e-37, 7.52316385e-37,
+  1.50463277e-36, 3.00926554e-36, 6.01853108e-36, 1.20370622e-35,
+  2.40741243e-35, 4.81482486e-35, 9.62964972e-35, 1.92592994e-34,
+  3.85185989e-34, 7.70371978e-34, 1.54074396e-33, 3.08148791e-33,
+  6.16297582e-33, 1.23259516e-32, 2.46519033e-32, 4.93038066e-32,
+  9.86076132e-32, 1.97215226e-31, 3.94430453e-31, 7.88860905e-31,
+  1.57772181e-30, 3.15544362e-30, 6.31088724e-30, 1.26217745e-29,
+  2.52435490e-29, 5.04870979e-29, 1.00974196e-28, 2.01948392e-28,
+  4.03896783e-28, 8.07793567e-28, 1.61558713e-27, 3.23117427e-27,
+  6.46234854e-27, 1.29246971e-26, 2.58493941e-26, 5.16987883e-26,
+  1.03397577e-25, 2.06795153e-25, 4.13590306e-25, 8.27180613e-25,
+  1.65436123e-24, 3.30872245e-24, 6.61744490e-24, 1.32348898e-23,
+  2.64697796e-23, 5.29395592e-23, 1.05879118e-22, 2.11758237e-22,
+  4.23516474e-22, 8.47032947e-22, 1.69406589e-21, 3.38813179e-21,
+  6.77626358e-21, 1.35525272e-20, 2.71050543e-20, 5.42101086e-20,
+  1.08420217e-19, 2.16840434e-19, 4.33680869e-19, 8.67361738e-19,
+  1.73472348e-18, 3.46944695e-18, 6.93889390e-18, 1.38777878e-17,
+  2.77555756e-17, 5.55111512e-17, 1.11022302e-16, 2.22044605e-16,
+  4.44089210e-16, 8.88178420e-16, 1.77635684e-15, 3.55271368e-15,
+  7.10542736e-15, 1.42108547e-14, 2.84217094e-14, 5.68434189e-14,
+  1.13686838e-13, 2.27373675e-13, 4.54747351e-13, 9.09494702e-13,
+  1.81898940e-12, 3.63797881e-12, 7.27595761e-12, 1.45519152e-11,
+  2.91038305e-11, 5.82076609e-11, 1.16415322e-10, 2.32830644e-10,
+  4.65661287e-10, 9.31322575e-10, 1.86264515e-09, 3.72529030e-09,
+  7.45058060e-09, 1.49011612e-08, 2.98023224e-08, 5.96046448e-08,
+  1.19209290e-07, 2.38418579e-07, 4.76837158e-07, 9.53674316e-07,
+  1.90734863e-06, 3.81469727e-06, 7.62939453e-06, 1.52587891e-05,
+  3.05175781e-05, 6.10351562e-05, 1.22070312e-04, 2.44140625e-04,
+  4.88281250e-04, 9.76562500e-04, 1.95312500e-03, 3.90625000e-03,
+  7.81250000e-03, 1.56250000e-02, 3.12500000e-02, 6.25000000e-02,
+  1.25000000e-01, 2.50000000e-01, 5.00000000e-01, 1.00000000e+00,
+  2.00000000e+00, 4.00000000e+00, 8.00000000e+00, 1.60000000e+01,
+  3.20000000e+01, 6.40000000e+01, 1.28000000e+02, 2.56000000e+02,
+  5.12000000e+02, 1.02400000e+03, 2.04800000e+03, 4.09600000e+03,
+  8.19200000e+03, 1.63840000e+04, 3.27680000e+04, 6.55360000e+04,
+  1.31072000e+05, 2.62144000e+05, 5.24288000e+05, 1.04857600e+06,
+  2.09715200e+06, 4.19430400e+06, 8.38860800e+06, 1.67772160e+07,
+  3.35544320e+07, 6.71088640e+07, 1.34217728e+08, 2.68435456e+08,
+  5.36870912e+08, 1.07374182e+09, 2.14748365e+09, 4.29496730e+09,
+  8.58993459e+09, 1.71798692e+10, 3.43597384e+10, 6.87194767e+10,
+  1.37438953e+11, 2.74877907e+11, 5.49755814e+11, 1.09951163e+12,
+  2.19902326e+12, 4.39804651e+12, 8.79609302e+12, 1.75921860e+13,
+  3.51843721e+13, 7.03687442e+13, 1.40737488e+14, 2.81474977e+14,
+  5.62949953e+14, 1.12589991e+15, 2.25179981e+15, 4.50359963e+15,
+  9.00719925e+15, 1.80143985e+16, 3.60287970e+16, 7.20575940e+16,
+  1.44115188e+17, 2.88230376e+17, 5.76460752e+17, 1.15292150e+18,
+  2.30584301e+18, 4.61168602e+18, 9.22337204e+18, 1.84467441e+19,
+  3.68934881e+19, 7.37869763e+19, 1.47573953e+20, 2.95147905e+20,
+  5.90295810e+20, 1.18059162e+21, 2.36118324e+21, 4.72236648e+21,
+  9.44473297e+21, 1.88894659e+22, 3.77789319e+22, 7.55578637e+22,
+  1.51115727e+23, 3.02231455e+23, 6.04462910e+23, 1.20892582e+24,
+  2.41785164e+24, 4.83570328e+24, 9.67140656e+24, 1.93428131e+25,
+  3.86856262e+25, 7.73712525e+25, 1.54742505e+26, 3.09485010e+26,
+  6.18970020e+26, 1.23794004e+27, 2.47588008e+27, 4.95176016e+27,
+  9.90352031e+27, 1.98070406e+28, 3.96140813e+28, 7.92281625e+28,
+  1.58456325e+29, 3.16912650e+29, 6.33825300e+29, 1.26765060e+30,
+  2.53530120e+30, 5.07060240e+30, 1.01412048e+31, 2.02824096e+31,
+  4.05648192e+31, 8.11296384e+31, 1.62259277e+32, 3.24518554e+32,
+  6.49037107e+32, 1.29807421e+33, 2.59614843e+33, 5.19229686e+33,
+  1.03845937e+34, 2.07691874e+34, 4.15383749e+34, 8.30767497e+34,
+  1.66153499e+35, 3.32306999e+35, 6.64613998e+35, 1.32922800e+36,
+  2.65845599e+36, 5.31691198e+36, 1.06338240e+37, 2.12676479e+37,
+  4.25352959e+37, 8.50705917e+37, 1.70141183e+38,
+};
+
+
+float
+__scalbnf (float x, int n)
+{
+  float k;
+  int32_t ke;
+
+  k = __float_and8 (x);
+  if (k == 0.0)
+    {
+      if (x == 0.0)
+	return x;
+      x *= two25;
+      k = __float_and8 (x) / two25;
+    }
+  if (isnanf (k) || isinff (k))
+    return x + x;
+  ke = __float_get_exp (k) + n;
+  if (n > 50000 || ke > 0xfe)
+    return huge * __copysignf (huge, x);
+  if (n < -50000)
+    return tiny * __copysignf (tiny, x);
+  /* ke <= 254 */
+  if (ke > 0)
+    {
+      x = __float_set_exp (x, pow2[ke]);
+      return x;
+    }
+  if (ke <= -25)
+    return tiny * __copysignf (tiny, x);
+  /* 0 <= ke <= 254 */
+  k *= two25;
+  ke += 25;
+  x = __float_set_exp (x, pow2[ke]);
+  return x * twom25;
+}
+weak_alias (__scalbnf, scalbnf)
diff --git a/sysdeps/powerpc/fpu/s_sinf.c b/sysdeps/powerpc/fpu/s_sinf.c
new file mode 100644
index 0000000000..dd724aaa81
--- /dev/null
+++ b/sysdeps/powerpc/fpu/s_sinf.c
@@ -0,0 +1,70 @@
+/* s_sinf.c -- float version of s_sin.c.
+   Copyright (C) 2011 Free Software Foundation, Inc.
+   This file is part of the GNU C Library.
+   Contributed by Adhemerval Zanella <azanella@br.ibm.com>, 2011
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Library General Public License as
+   published by the Free Software Foundation; either version 2 of the
+   License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Library General Public License for more details.
+
+   You should have received a copy of the GNU Library General Public
+   License along with the GNU C Library; see the file COPYING.LIB.  If not,
+   write to the Free Software Foundation, Inc., 59 Temple Place - Suite 330,
+   Boston, MA 02111-1307, USA.  */
+
+#include <errno.h>
+#include "math.h"
+#include "math_private.h"
+
+static const float pio4 = 7.8539801e-1;
+
+float
+__sinf (float x)
+{
+  float y[2], z = 0.0;
+  float ix;
+  int32_t n;
+
+  ix = __builtin_fabsf (x);
+
+  /* |x| ~< pi/4 */
+  if (ix <= pio4)
+    {
+      return __kernel_sinf (x, z, 0);
+      /* sin(Inf or NaN) is NaN */
+    }
+  else if (isnanf (ix))
+    {
+      return x - x;
+    }
+  else if (isinff (ix))
+    {
+      __set_errno (EDOM);
+      return x - x;
+    }
+
+  /* argument reduction needed */
+  else
+    {
+      n = __ieee754_rem_pio2f (x, y);
+      switch (n & 3)
+	{
+	case 0:
+	  return __kernel_sinf (y[0], y[1], 1);
+	case 1:
+	  return __kernel_cosf (y[0], y[1]);
+	case 2:
+	  return -__kernel_sinf (y[0], y[1], 1);
+	default:
+	  return -__kernel_cosf (y[0], y[1]);
+	}
+    }
+}
+
+weak_alias (__sinf, sinf)
diff --git a/sysdeps/powerpc/powerpc32/fpu/s_float_bitwise.S b/sysdeps/powerpc/powerpc32/fpu/s_float_bitwise.S
new file mode 100644
index 0000000000..2a7b64eee2
--- /dev/null
+++ b/sysdeps/powerpc/powerpc32/fpu/s_float_bitwise.S
@@ -0,0 +1,96 @@
+/* Bitwise manipulation over float. PowerPC32 version.
+   Copyright (C) 2011 Free Software Foundation, Inc.
+   This file is part of the GNU C Library.
+   Contributed by Adhemerval Zanella <azanella@br.ibm.com>, 2011
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library; if not, write to the Free
+   Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
+   02111-1307 USA.  */
+
+#include <sysdep.h>
+
+/* int [r3] __float_and_test28(float [f1] num, float [f2] value)
+   Returns (num & 0xFFFFFFF0 == value) */
+ENTRY(__float_and_test28)
+    stwu    r1,-32(r1)
+    cfi_adjust_cfa_offset(32)
+    stfs    fp1,16(r1)
+    stfs    fp2,8(r1)
+    lwz     r0,16(r1)
+    rlwinm  r3,r0,0,0,27
+    lwz     r0,8(r1)
+    xor     r3,r3,r0
+    cntlzw  r3,r3
+    srwi    r3,r3,5
+    extsw   r3,r3
+    addi    r1,r1,32
+    blr
+END(__float_and_test28)
+
+/* int [r3] __float_and_test24(float [f1] num, float [f2] value)
+   Returns (num & 0xFFFFFF00 == value) */
+ENTRY(__float_and_test24)
+    stwu    r1,-32(r1)
+    cfi_adjust_cfa_offset(32)
+    stfs    fp1,16(r1)
+    stfs    fp2,8(r1)
+    lwz     r0,16(r1)
+    rlwinm  r3,r0,0,0,23
+    lwz     r0,8(r1)
+    xor     r3,r3,r0
+    cntlzw  r3,r3
+    srwi    r3,r3,5
+    extsw   r3,r3
+    addi    r1,r1,32
+    blr
+END(__float_and_test24)
+
+/* float [f1] __float_and8(float [f1] num)
+   Returns (num & 0x7F800000) */
+ENTRY(__float_and8)
+    stfs   fp1,-16(r1)
+    lwz    r0,-16(r1)
+    rlwinm r0,r0,0,1,8
+    stw    r0,-16(r1)
+    lfs    fp1,-16(r1)
+    blr
+END(__float_and8)
+
+/* int32_t [r2] __float_get_exp(float [f1] num)
+   Returns ((int32_t)(num & 0x7F800000) >> 23) */
+ENTRY(__float_get_exp)
+    stfs   fp1,-16(r1)
+    lwz    r0,-16(r1)
+    rlwinm r3,r0,0,1,8
+    rldicl r3,r3,41,56
+    blr
+END(__float_get_exp)
+
+/* float [fp1] __float_set_exp(float [fp1] num, float [fp2] exp)
+   Returns ((num & 0x807FFFFF) | exp) */
+ENTRY(__float_set_exp)
+    stwu   r1,-32(r1)
+    cfi_adjust_cfa_offset(32)
+    stfs   fp1,16(r1)
+    stfs   fp2,8(r1)
+    lwz    r9,16(r1)
+    rlwinm r0,r9,1,8,31
+    rlwinm r0,r0,31,0xffffffff
+    lwz    r9,8(r1)
+    or     r9,r0,r9
+    stw    r9,16(r1)
+    lfs    fp1,16(r1)
+    addi   r1,r1,32
+    blr
+END(__float_set_exp)
diff --git a/sysdeps/powerpc/powerpc32/power7/fpu/s_float_bitwise.S b/sysdeps/powerpc/powerpc32/power7/fpu/s_float_bitwise.S
new file mode 100644
index 0000000000..02644d99d0
--- /dev/null
+++ b/sysdeps/powerpc/powerpc32/power7/fpu/s_float_bitwise.S
@@ -0,0 +1,149 @@
+/* Bitwise manipulation over float. PowerPC32 version using VSX.
+   Copyright (C) 2011 Free Software Foundation, Inc.
+   This file is part of the GNU C Library.
+   Contributed by Adhemerval Zanella <azanella@br.ibm.com>, 2011
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License a4byte with the GNU C Library; if not, write to the Free
+   Software Foundation, Inc., 1 Franklin Street, Fifth Floor, Boston MA
+   02110-1301 USA. */
+
+#include <sysdep.h>
+
+    .section    .rodata.cst4,"aM",@progbits,8
+    .align  2
+.LC0: /* 28-bits mask extended to double */
+    .4byte 0x7ffffffe
+    .4byte 0x00000000
+.LC1: /* 24-bits mask extended to double */
+    .4byte 0x7fffffe0
+    .4byte 0x00000000
+.LC2: /* 9-bits mask extended to double */
+    .4byte 0x7ff00000
+    .4byte 0x00000000
+.LC3: /* mask used to get the float exp */
+    .4byte 0x800FFFFF
+    .4byte 0xFFFFFFFF
+
+    .section    ".text"
+    .machine    power7
+/* int [r3] __float_and_test28(float [f1] num, float [f2] value)
+   Returns (num & 0x7FFFFFF0 == value) */
+ENTRY(__float_and_test28)
+#ifdef SHARED
+    mflr    r11
+    cfi_register(lr,r11)
+    bcl     20,31,1f
+1:  mflr    r9
+    addis   r9,r9,.LC0-1b@ha
+    lfd     fp3,.LC0-1b@l(r9)
+    mtlr    r11
+    cfi_same_value (lr)
+#else
+    lis     r9,.LC0@ha
+    lfd     fp3,.LC0@l(r9)
+#endif
+    xxland  v4,v1,v3
+    fcmpu   cr7,fp4,fp2
+    mfcr    r3,cr1
+    rlwinm  r3,r3,31,1
+    blr
+END(__float_and_test28)
+
+/* int [r3] __float_and_test24(float [f1] num, float [f2] value)
+   Returns (num & 0x7FFFFF00 == value) */
+ENTRY(__float_and_test24)
+#ifdef SHARED
+    mflr    r11
+    cfi_register(lr,r11)
+    bcl     20,31,1f
+1:  mflr    r9
+    addis   r9,r9,.LC1-1b@ha
+    lfd     fp3,.LC1-1b@l(r9)
+    mtlr    r11
+    cfi_same_value (lr)
+#else
+    lis     r9,.LC1@ha
+    lfd     fp3,.LC1@l(r9)
+#endif
+    xxland  v4,v1,v3
+    fcmpu   cr7,fp4,fp2
+    mfcr    r3,cr1
+    rlwinm  r3,r3,31,1
+    blr
+END(__float_and_test24)
+
+/* float [f1] __float_and8(float [f1] num)
+   Returns (num & 0x7F800000) */
+EALIGN(__float_and8, 4, 1)
+#ifdef SHARED
+    mflr    r11
+    cfi_register(lr,r11)
+    bcl     20,31,1f
+1:  mflr    r9
+    addis   r9,r9,.LC2-1b@ha
+    lfd     fp3,.LC2-1b@l(r9)
+    mtlr    r11
+    cfi_same_value (lr)
+#else
+    lis     r9,.LC2@ha
+    lfd     fp3,.LC2@l(r9)
+#endif
+    xxland  v1,v1,v3
+    blr
+END(__float_and8)
+
+/* int32_t [r2] __float_get_exp(float [f1] num
+   Returns ((int32_t)(num & 0x7F800000) >> 23) */
+ENTRY(__float_get_exp)
+#ifdef SHARED
+    mflr    r11
+    cfi_register(lr,r11)
+    bcl     20,31,1f
+1:  mflr    r9
+    addis   r9,r9,.LC2-1b@ha
+    lfd     fp3,.LC2-1b@l(r9)
+    mtlr    r11
+    cfi_same_value (lr)
+#else
+    lis     r9,.LC2@ha
+    lfd     fp3,.LC2@l(r9)
+#endif
+    xxland  v4,v1,v3
+    stfs    fp4,-16(r1)
+    lwz     r3,-16(r1)
+    srawi   r3,r3,23
+    clrldi  r3,r3,32
+    blr
+END(__float_get_exp)
+
+/* float [fp1] __float_set_exp(float [fp1] num, float [fp2] exp)
+   Returns ((num & 0x807FFFFF) | exp) */
+EALIGN(__float_set_exp, 4, 1)
+#ifdef SHARED
+    mflr    r11
+    cfi_register(lr,r11)
+    bcl     20,31,1f
+1:  mflr    r9
+    addis   r9,r9,.LC3-1b@ha
+    lfd     fp3,.LC3-1b@l(r9)
+    mtlr    r11
+    cfi_same_value (lr)
+#else
+    lis     r9,.LC3@ha
+    lfd     fp3,.LC3@l(r9)
+#endif
+    xxland  v4,v1,v3
+    xxlor   v1,v4,v2
+    blr
+END(__float_set_exp)
diff --git a/sysdeps/powerpc/powerpc64/fpu/s_float_bitwise.S b/sysdeps/powerpc/powerpc64/fpu/s_float_bitwise.S
new file mode 100644
index 0000000000..6cea572c0d
--- /dev/null
+++ b/sysdeps/powerpc/powerpc64/fpu/s_float_bitwise.S
@@ -0,0 +1,87 @@
+/* Bitwise manipulation over float. PowerPC64 version using VSX.
+   Copyright (C) 2011 Free Software Foundation, Inc.
+   This file is part of the GNU C Library.
+   Contributed by Adhemerval Zanella <azanella@br.ibm.com>, 2011
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library; if not, write to the Free
+   Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
+   02111-1307 USA.  */
+
+#include <sysdep.h>
+
+/* int [r3] __float_and_test28(float [f1] num, float [f2] value)
+   Returns (num & 0x7FFFFFF0 == value) */
+ENTRY(__float_and_test28)
+    stfs    fp1,-32(r1)
+    stfs    fp2,-16(r1)
+    lwz     r0,-32(r1)
+    rlwinm  r3,r0,0,0,27
+    lwz     r0,-16(r1)
+    xor     r3,r3,r0
+    cntlzw  r3,r3
+    srwi    r3,r3,5
+    extsw   r3,r3
+    blr
+END(__float_and_test28)
+
+/* int [r3] __float_and_test24(float [f1] num, float [f2] value)
+   Returns (num & 0x7FFFFF00 == value) */
+ENTRY(__float_and_test24)
+    stfs    fp1,-32(r1)
+    stfs    fp2,-16(r1)
+    lwz     r0,-32(r1)
+    rlwinm  r3,r0,0,0,23
+    lwz     r0,-16(r1)
+    xor     r3,r3,r0
+    cntlzw  r3,r3
+    srwi    r3,r3,5
+    extsw   r3,r3
+    blr
+END(__float_and_test24)
+
+/* float [f1] __float_and8(float [f1] num)
+   Returns (num & 0x7F800000) */
+ENTRY(__float_and8)
+    stfs   fp1,-16(r1)
+    lwz    r0,-16(r1)
+    rlwinm r0,r0,0,1,8
+    stw    r0,-16(r1)
+    lfs    fp1,-16(r1)
+    blr
+END(__float_and8)
+
+/* int32_t [r2] __float_get_exp(float [f1] num)
+   Returns ((int32_t)(num & 0x7F800000) >> 23) */
+ENTRY(__float_get_exp)
+    stfs   fp1,-16(r1)
+    lwz    r0,-16(r1)
+    rlwinm r3,r0,0,1,8
+    rldicl r3,r3,41,56
+    blr
+END(__float_get_exp)
+
+/* float [fp1] __float_set_exp(float [fp1] num, float [fp2] exp)
+   Returns ((num & 0x807FFFFF) | exp) */
+ENTRY(__float_set_exp)
+    stfs   fp1,-32(r1)
+    stfs   fp2,-16(r1)
+    lwz    r9,-32(r1)
+    rlwinm r0,r9,1,8,31
+    rlwinm r0,r0,31,0xffffffff
+    lwz    r9,-16(r1)
+    or     r9,r0,r9
+    stw    r9,-16(r1)
+    lfs    fp1,-16(r1)
+    blr
+END(__float_set_exp)
diff --git a/sysdeps/powerpc/powerpc64/power7/fpu/s_float_bitwise.S b/sysdeps/powerpc/powerpc64/power7/fpu/s_float_bitwise.S
new file mode 100644
index 0000000000..fb3e9f7b20
--- /dev/null
+++ b/sysdeps/powerpc/powerpc64/power7/fpu/s_float_bitwise.S
@@ -0,0 +1,83 @@
+/* Bitwise manipulation over float. PowerPC64 version using VSX.
+   Copyright (C) 2011 Free Software Foundation, Inc.
+   This file is part of the GNU C Library.
+   Contributed by Adhemerval Zanella <azanella@br.ibm.com>, 2011
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library; if not, write to the Free
+   Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
+   02111-1307 USA.  */
+
+#include <sysdep.h>
+
+    .section    ".toc","aw"
+.LC0:
+    .tc FS_28MASK_0[TC],0x7ffffffe00000000  /* 28-bits mask */
+.LC1:
+    .tc FS_24MASK_0[TC],0x7fffffe000000000  /* 24-bits mask */
+.LC2:
+    .tc FL_09MASK_0[TC],0x7ff0000000000000  /* 9-bits mask */
+.LC3:
+    .tc FL_EXP_MASK[TC],0x800FFFFFFFFFFFFF  /* float exponent mask */
+    .section    ".text"
+    .machine    power7
+/* int [r3] __float_and_test28(float [f1] num, float [f2] value)
+   Returns (num & 0x7FFFFF00 == value) */
+ENTRY(__float_and_test28)
+    lfd     fp3,.LC0@toc(2)
+    xxland  v4,v1,v3
+    fcmpu   cr7,fp4,fp2
+    mfcr    r3,cr1
+    rlwinm  r3,r3,31,1
+    blr
+END(__float_and_test28)
+
+/* int [r3] __float_and_test24(float [f1] num, float [f2] value)
+   Returns (num & 0x7FFFFFF0 == value) */
+ENTRY(__float_and_test24)
+    lfd     fp3,.LC1@toc(2)
+    xxland  v4,v1,v3
+    fcmpu   cr7,fp4,fp2
+    mfcr    r3,cr1
+    rlwinm  r3,r3,31,1
+    blr
+END(__float_and_test24)
+
+/* float [f1] __float_and8(float [f1] num)
+   Returns (num & 0x7F800000) */
+EALIGN(__float_and8, 4, 2)
+    lfd     fp3,.LC2@toc(2)
+    xxland  v1,v1,v3
+    blr
+END(__float_and8)
+
+/* int32_t [r2] __float_get_exp(float [f1] num)
+   Returns ((int32_t)(num & 0x7F800000) >> 23) */
+ENTRY(__float_get_exp)
+    lfd     fp3,.LC2@toc(2)
+    xxland  v4,v1,v3
+    stfs    fp4,-16(r1)
+    lwz     r3,-16(r1)
+    srawi   r3,r3,23
+    clrldi  r3,r3,32
+    blr
+END(__float_get_exp)
+
+/* float [fp1] __float_set_exp(float [fp1] num, float [fp2] exp)
+   Returns ((num & 0x807FFFFF) | exp) */
+EALIGN(__float_set_exp, 4, 1)
+    lfd     fp3,.LC3@toc(2)
+    xxland  v4,v1,v3
+    xxlor   v1,v4,v2
+    blr
+END(__float_set_exp)
author	Adhemerval Zanella <azanella@linux.vnet.ibm.com>	2011-06-17 13:30:52 -0500
committer	Ryan S. Arnold <rsa@us.ibm.com>	2011-06-17 14:12:07 -0500
commit	6559b01648c2a08ea862ae923ce7d86c6e8cca45 (patch)
tree	456119f629e086a9a279edc57a440e912c20846f
parent	4749a0058b27274a95c5a798e339c7299cdf890e (diff)
download	glibc-6559b01648c2a08ea862ae923ce7d86c6e8cca45.tar glibc-6559b01648c2a08ea862ae923ce7d86c6e8cca45.tar.gz glibc-6559b01648c2a08ea862ae923ce7d86c6e8cca45.tar.bz2 glibc-6559b01648c2a08ea862ae923ce7d86c6e8cca45.zip