aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorSiddhesh Poyarekar <siddhesh@redhat.com>2013-03-26 19:28:50 +0530
committerSiddhesh Poyarekar <siddhesh@redhat.com>2013-03-26 19:28:50 +0530
commit6f2e90e78f151bab153c2b38492505ae2012db06 (patch)
tree03965f28878b90725ad89572ae61993bff4ebdfd
parentfce14d4e9c6e08ad8c825fe88d8cbdac5c739565 (diff)
downloadglibc-6f2e90e78f151bab153c2b38492505ae2012db06.tar
glibc-6f2e90e78f151bab153c2b38492505ae2012db06.tar.gz
glibc-6f2e90e78f151bab153c2b38492505ae2012db06.tar.bz2
glibc-6f2e90e78f151bab153c2b38492505ae2012db06.zip
Make mantissa type of mp_no configurable
The mantissa of mp_no is intended to take only integral values. This is a relatively good choice for powerpc due to its 4 fpus, but not for other architectures, which suffer due to this choice. This change makes the default mantissa a long integer and allows powerpc to override it. Additionally, some operations have been optimized for integer manipulation, resulting in a significant improvement in performance.
-rw-r--r--ChangeLog19
-rw-r--r--sysdeps/ieee754/dbl-64/mpa-arch.h47
-rw-r--r--sysdeps/ieee754/dbl-64/mpa.c96
-rw-r--r--sysdeps/ieee754/dbl-64/mpa.h27
-rw-r--r--sysdeps/powerpc/power4/fpu/mpa-arch.h56
5 files changed, 176 insertions, 69 deletions
diff --git a/ChangeLog b/ChangeLog
index 2fab9b0551..4448647357 100644
--- a/ChangeLog
+++ b/ChangeLog
@@ -1,3 +1,22 @@
+2013-03-26 Siddhesh Poyarekar <siddhesh@redhat.com>
+
+ * sysdeps/ieee754/dbl-64/mpa-arch.h: New file.
+ * sysdeps/ieee754/dbl-64/mpa.c (norm): Use MANTISSA_T and
+ MANTISSA_STORE_T to store computations on mantissa. Use
+ macros for rounding and division.
+ (denorm): Likewise.
+ (__dbl_mp): Likewise.
+ (add_magnitudes): Likewise.
+ (sub_magnitudes): Likewise.
+ (__mul): Likewise.
+ (__sqr): Likewise.
+ * sysdeps/ieee754/dbl-64/mpa.h: Include mpa-arch.h. Define
+ powers of two in terms of TWOPOW macro.
+ (mp_no): Make type of mantissa as MANTISSA_T.
+ [!RADIXI]: Define RADIXI.
+ [!TWO52]: Define TWO52.
+ * sysdeps/powerpc/power4/fpu/mpa-arch.h: New file.
+
2013-03-25 Adhemerval Zanella <azanella@linux.vnet.ibm.com>
* sysdeps/powerpc/fpu/s_llround.c: Fix libm ABI issue with missing
diff --git a/sysdeps/ieee754/dbl-64/mpa-arch.h b/sysdeps/ieee754/dbl-64/mpa-arch.h
new file mode 100644
index 0000000000..7de9d51ae2
--- /dev/null
+++ b/sysdeps/ieee754/dbl-64/mpa-arch.h
@@ -0,0 +1,47 @@
+/* Overridable constants and operations.
+ Copyright (C) 2013 Free Software Foundation, Inc.
+
+ This program is free software; you can redistribute it and/or modify
+ it under the terms of the GNU Lesser General Public License as published by
+ the Free Software Foundation; either version 2.1 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public License
+ along with this program; if not, see <http://www.gnu.org/licenses/>. */
+
+#include <stdint.h>
+
+typedef long mantissa_t;
+typedef int64_t mantissa_store_t;
+
+#define TWOPOW(i) (1L << i)
+
+#define RADIX_EXP 24
+#define RADIX TWOPOW (RADIX_EXP) /* 2^24 */
+
+/* Divide D by RADIX and put the remainder in R. D must be a non-negative
+ integral value. */
+#define DIV_RADIX(d, r) \
+ ({ \
+ r = d & (RADIX - 1); \
+ d >>= RADIX_EXP; \
+ })
+
+/* Put the integer component of a double X in R and retain the fraction in
+ X. This is used in extracting mantissa digits for MP_NO by using the
+ integer portion of the current value of the number as the current mantissa
+ digit and then scaling by RADIX to get the next mantissa digit in the same
+ manner. */
+#define INTEGER_OF(x, i) \
+ ({ \
+ i = (mantissa_t) x; \
+ x -= i; \
+ })
+
+/* Align IN down to F. The code assumes that F is a power of two. */
+#define ALIGN_DOWN_TO(in, f) ((in) & -(f))
diff --git a/sysdeps/ieee754/dbl-64/mpa.c b/sysdeps/ieee754/dbl-64/mpa.c
index 0766476544..c238ccf2af 100644
--- a/sysdeps/ieee754/dbl-64/mpa.c
+++ b/sysdeps/ieee754/dbl-64/mpa.c
@@ -125,7 +125,8 @@ norm (const mp_no *x, double *y, int p)
{
#define R RADIXI
long i;
- double a, c, u, v, z[5];
+ double c;
+ mantissa_t a, u, v, z[5];
if (p < 5)
{
if (p == 1)
@@ -147,17 +148,14 @@ norm (const mp_no *x, double *y, int p)
for (i = 2; i < 5; i++)
{
- z[i] = X[i] * a;
- u = (z[i] + CUTTER) - CUTTER;
- if (u > z[i])
- u -= RADIX;
- z[i] -= u;
- z[i - 1] += u * RADIXI;
+ mantissa_store_t d, r;
+ d = X[i] * (mantissa_store_t) a;
+ DIV_RADIX (d, r);
+ z[i] = r;
+ z[i - 1] += d;
}
- u = (z[3] + TWO71) - TWO71;
- if (u > z[3])
- u -= TWO19;
+ u = ALIGN_DOWN_TO (z[3], TWO19);
v = z[3] - u;
if (v == TWO18)
@@ -200,7 +198,8 @@ denorm (const mp_no *x, double *y, int p)
{
long i, k;
long p2 = p;
- double c, u, z[5];
+ double c;
+ mantissa_t u, z[5];
#define R RADIXI
if (EX < -44 || (EX == -44 && X[1] < TWO5))
@@ -280,9 +279,7 @@ denorm (const mp_no *x, double *y, int p)
z[3] = X[k];
}
- u = (z[3] + TWO57) - TWO57;
- if (u > z[3])
- u -= TWO5;
+ u = ALIGN_DOWN_TO (z[3], TWO5);
if (u == z[3])
{
@@ -330,7 +327,6 @@ __dbl_mp (double x, mp_no *y, int p)
{
long i, n;
long p2 = p;
- double u;
/* Sign. */
if (x == ZERO)
@@ -356,11 +352,7 @@ __dbl_mp (double x, mp_no *y, int p)
n = MIN (p2, 4);
for (i = 1; i <= n; i++)
{
- u = (x + TWO52) - TWO52;
- if (u > x)
- u -= ONE;
- Y[i] = u;
- x -= u;
+ INTEGER_OF (x, Y[i]);
x *= RADIX;
}
for (; i <= p2; i++)
@@ -377,7 +369,7 @@ add_magnitudes (const mp_no *x, const mp_no *y, mp_no *z, int p)
{
long i, j, k;
long p2 = p;
- double zk;
+ mantissa_t zk;
EZ = EX;
@@ -445,7 +437,7 @@ sub_magnitudes (const mp_no *x, const mp_no *y, mp_no *z, int p)
{
long i, j, k;
long p2 = p;
- double zk;
+ mantissa_t zk;
EZ = EX;
i = p2;
@@ -621,9 +613,9 @@ __mul (const mp_no *x, const mp_no *y, mp_no *z, int p)
{
long i, j, k, ip, ip2;
long p2 = p;
- double u, zk;
+ mantissa_store_t zk;
const mp_no *a;
- double *diag;
+ mantissa_store_t *diag;
/* Is z=0? */
if (__glibc_unlikely (X[0] * Y[0] == ZERO))
@@ -680,11 +672,11 @@ __mul (const mp_no *x, const mp_no *y, mp_no *z, int p)
/* Precompute sums of diagonal elements so that we can directly use them
later. See the next comment to know we why need them. */
- diag = alloca (k * sizeof (double));
- double d = ZERO;
+ diag = alloca (k * sizeof (mantissa_store_t));
+ mantissa_store_t d = ZERO;
for (i = 1; i <= ip; i++)
{
- d += X[i] * Y[i];
+ d += X[i] * (mantissa_store_t) Y[i];
diag[i] = d;
}
while (i < k)
@@ -697,18 +689,15 @@ __mul (const mp_no *x, const mp_no *y, mp_no *z, int p)
if (k % 2 == 0)
/* We want to add this only once, but since we subtract it in the sum
of products above, we add twice. */
- zk += 2 * X[lim] * Y[lim];
+ zk += 2 * X[lim] * (mantissa_store_t) Y[lim];
for (i = k - p2, j = p2; i < j; i++, j--)
- zk += (X[i] + X[j]) * (Y[i] + Y[j]);
+ zk += (X[i] + X[j]) * (mantissa_store_t) (Y[i] + Y[j]);
zk -= diag[k - 1];
- u = (zk + CUTTER) - CUTTER;
- if (u > zk)
- u -= RADIX;
- Z[k--] = zk - u;
- zk = u * RADIXI;
+ DIV_RADIX (zk, Z[k]);
+ k--;
}
/* The real deal. Mantissa digit Z[k] is the sum of all X[i] * Y[j] where i
@@ -731,18 +720,15 @@ __mul (const mp_no *x, const mp_no *y, mp_no *z, int p)
if (k % 2 == 0)
/* We want to add this only once, but since we subtract it in the sum
of products above, we add twice. */
- zk += 2 * X[lim] * Y[lim];
+ zk += 2 * X[lim] * (mantissa_store_t) Y[lim];
for (i = 1, j = k - 1; i < j; i++, j--)
- zk += (X[i] + X[j]) * (Y[i] + Y[j]);
+ zk += (X[i] + X[j]) * (mantissa_store_t) (Y[i] + Y[j]);
zk -= diag[k - 1];
- u = (zk + CUTTER) - CUTTER;
- if (u > zk)
- u -= RADIX;
- Z[k--] = zk - u;
- zk = u * RADIXI;
+ DIV_RADIX (zk, Z[k]);
+ k--;
}
Z[k] = zk;
@@ -774,7 +760,7 @@ SECTION
__sqr (const mp_no *x, mp_no *y, int p)
{
long i, j, k, ip;
- double u, yk;
+ mantissa_store_t yk;
/* Is z=0? */
if (__glibc_unlikely (X[0] == ZERO))
@@ -798,11 +784,11 @@ __sqr (const mp_no *x, mp_no *y, int p)
while (k > p)
{
- double yk2 = 0.0;
+ mantissa_store_t yk2 = 0;
long lim = k / 2;
if (k % 2 == 0)
- yk += X[lim] * X[lim];
+ yk += X[lim] * (mantissa_store_t) X[lim];
/* In __mul, this loop (and the one within the next while loop) run
between a range to calculate the mantissa as follows:
@@ -814,36 +800,30 @@ __sqr (const mp_no *x, mp_no *y, int p)
result. For cases where the range size is even, the mid-point needs
to be added separately (above). */
for (i = k - p, j = p; i < j; i++, j--)
- yk2 += X[i] * X[j];
+ yk2 += X[i] * (mantissa_store_t) X[j];
yk += 2.0 * yk2;
- u = (yk + CUTTER) - CUTTER;
- if (u > yk)
- u -= RADIX;
- Y[k--] = yk - u;
- yk = u * RADIXI;
+ DIV_RADIX (yk, Y[k]);
+ k--;
}
while (k > 1)
{
- double yk2 = 0.0;
+ mantissa_store_t yk2 = 0;
long lim = k / 2;
if (k % 2 == 0)
- yk += X[lim] * X[lim];
+ yk += X[lim] * (mantissa_store_t) X[lim];
/* Likewise for this loop. */
for (i = 1, j = k - 1; i < j; i++, j--)
- yk2 += X[i] * X[j];
+ yk2 += X[i] * (mantissa_store_t) X[j];
yk += 2.0 * yk2;
- u = (yk + CUTTER) - CUTTER;
- if (u > yk)
- u -= RADIX;
- Y[k--] = yk - u;
- yk = u * RADIXI;
+ DIV_RADIX (yk, Y[k]);
+ k--;
}
Y[k] = yk;
diff --git a/sysdeps/ieee754/dbl-64/mpa.h b/sysdeps/ieee754/dbl-64/mpa.h
index 168b334ed0..54044a0586 100644
--- a/sysdeps/ieee754/dbl-64/mpa.h
+++ b/sysdeps/ieee754/dbl-64/mpa.h
@@ -35,6 +35,7 @@
/* Common types and definition */
/************************************************************************/
+#include <mpa-arch.h>
/* The mp_no structure holds the details of a multi-precision floating point
number.
@@ -61,7 +62,7 @@
typedef struct
{
int e;
- double d[40];
+ mantissa_t d[40];
} mp_no;
typedef union
@@ -82,9 +83,13 @@ extern const mp_no mptwo;
#define ABS(x) ((x) < 0 ? -(x) : (x))
-#define RADIX 0x1.0p24 /* 2^24 */
-#define RADIXI 0x1.0p-24 /* 2^-24 */
-#define CUTTER 0x1.0p76 /* 2^76 */
+#ifndef RADIXI
+# define RADIXI 0x1.0p-24 /* 2^-24 */
+#endif
+
+#ifndef TWO52
+# define TWO52 0x1.0p52 /* 2^52 */
+#endif
#define ZERO 0.0 /* 0 */
#define MZERO -0.0 /* 0 with the sign bit set */
@@ -92,13 +97,13 @@ extern const mp_no mptwo;
#define MONE -1.0 /* -1 */
#define TWO 2.0 /* 2 */
-#define TWO5 0x1.0p5 /* 2^5 */
-#define TWO8 0x1.0p8 /* 2^52 */
-#define TWO10 0x1.0p10 /* 2^10 */
-#define TWO18 0x1.0p18 /* 2^18 */
-#define TWO19 0x1.0p19 /* 2^19 */
-#define TWO23 0x1.0p23 /* 2^23 */
-#define TWO52 0x1.0p52 /* 2^52 */
+#define TWO5 TWOPOW (5) /* 2^5 */
+#define TWO8 TWOPOW (8) /* 2^52 */
+#define TWO10 TWOPOW (10) /* 2^10 */
+#define TWO18 TWOPOW (18) /* 2^18 */
+#define TWO19 TWOPOW (19) /* 2^19 */
+#define TWO23 TWOPOW (23) /* 2^23 */
+
#define TWO57 0x1.0p57 /* 2^57 */
#define TWO71 0x1.0p71 /* 2^71 */
#define TWOM1032 0x1.0p-1032 /* 2^-1032 */
diff --git a/sysdeps/powerpc/power4/fpu/mpa-arch.h b/sysdeps/powerpc/power4/fpu/mpa-arch.h
new file mode 100644
index 0000000000..9007c9d0ca
--- /dev/null
+++ b/sysdeps/powerpc/power4/fpu/mpa-arch.h
@@ -0,0 +1,56 @@
+/* Overridable constants and operations.
+ Copyright (C) 2013 Free Software Foundation, Inc.
+
+ This program is free software; you can redistribute it and/or modify
+ it under the terms of the GNU Lesser General Public License as published by
+ the Free Software Foundation; either version 2.1 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public License
+ along with this program; if not, see <http://www.gnu.org/licenses/>. */
+
+typedef double mantissa_t;
+typedef double mantissa_store_t;
+
+#define TWOPOW(i) (0x1.0p##i)
+
+#define RADIX TWOPOW (24) /* 2^24 */
+#define CUTTER TWOPOW (76) /* 2^76 */
+#define RADIXI 0x1.0p-24 /* 2^-24 */
+#define TWO52 TWOPOW (52) /* 2^52 */
+
+/* Divide D by RADIX and put the remainder in R. */
+#define DIV_RADIX(d,r) \
+ ({ \
+ double u = ((d) + CUTTER) - CUTTER; \
+ if (u > (d)) \
+ u -= RADIX; \
+ r = (d) - u; \
+ (d) = u * RADIXI; \
+ })
+
+/* Put the integer component of a double X in R and retain the fraction in
+ X. */
+#define INTEGER_OF(x, r) \
+ ({ \
+ double u = ((x) + TWO52) - TWO52; \
+ if (u > (x)) \
+ u -= ONE; \
+ (r) = u; \
+ (x) -= u; \
+ })
+
+/* Align IN down to a multiple of F, where F is a power of two. */
+#define ALIGN_DOWN_TO(in, f) \
+ ({ \
+ double factor = f * TWO52; \
+ double u = (in + factor) - factor; \
+ if (u > in) \
+ u -= f; \
+ u; \
+ })