diff options
author | Siddhesh Poyarekar <siddhesh@redhat.com> | 2013-03-26 19:28:50 +0530 |
---|---|---|
committer | Siddhesh Poyarekar <siddhesh@redhat.com> | 2013-03-26 19:28:50 +0530 |
commit | 6f2e90e78f151bab153c2b38492505ae2012db06 (patch) | |
tree | 03965f28878b90725ad89572ae61993bff4ebdfd | |
parent | fce14d4e9c6e08ad8c825fe88d8cbdac5c739565 (diff) | |
download | glibc-6f2e90e78f151bab153c2b38492505ae2012db06.tar glibc-6f2e90e78f151bab153c2b38492505ae2012db06.tar.gz glibc-6f2e90e78f151bab153c2b38492505ae2012db06.tar.bz2 glibc-6f2e90e78f151bab153c2b38492505ae2012db06.zip |
Make mantissa type of mp_no configurable
The mantissa of mp_no is intended to take only integral values. This
is a relatively good choice for powerpc due to its 4 fpus, but not for
other architectures, which suffer due to this choice. This change
makes the default mantissa a long integer and allows powerpc to
override it. Additionally, some operations have been optimized for
integer manipulation, resulting in a significant improvement in
performance.
-rw-r--r-- | ChangeLog | 19 | ||||
-rw-r--r-- | sysdeps/ieee754/dbl-64/mpa-arch.h | 47 | ||||
-rw-r--r-- | sysdeps/ieee754/dbl-64/mpa.c | 96 | ||||
-rw-r--r-- | sysdeps/ieee754/dbl-64/mpa.h | 27 | ||||
-rw-r--r-- | sysdeps/powerpc/power4/fpu/mpa-arch.h | 56 |
5 files changed, 176 insertions, 69 deletions
@@ -1,3 +1,22 @@ +2013-03-26 Siddhesh Poyarekar <siddhesh@redhat.com> + + * sysdeps/ieee754/dbl-64/mpa-arch.h: New file. + * sysdeps/ieee754/dbl-64/mpa.c (norm): Use MANTISSA_T and + MANTISSA_STORE_T to store computations on mantissa. Use + macros for rounding and division. + (denorm): Likewise. + (__dbl_mp): Likewise. + (add_magnitudes): Likewise. + (sub_magnitudes): Likewise. + (__mul): Likewise. + (__sqr): Likewise. + * sysdeps/ieee754/dbl-64/mpa.h: Include mpa-arch.h. Define + powers of two in terms of TWOPOW macro. + (mp_no): Make type of mantissa as MANTISSA_T. + [!RADIXI]: Define RADIXI. + [!TWO52]: Define TWO52. + * sysdeps/powerpc/power4/fpu/mpa-arch.h: New file. + 2013-03-25 Adhemerval Zanella <azanella@linux.vnet.ibm.com> * sysdeps/powerpc/fpu/s_llround.c: Fix libm ABI issue with missing diff --git a/sysdeps/ieee754/dbl-64/mpa-arch.h b/sysdeps/ieee754/dbl-64/mpa-arch.h new file mode 100644 index 0000000000..7de9d51ae2 --- /dev/null +++ b/sysdeps/ieee754/dbl-64/mpa-arch.h @@ -0,0 +1,47 @@ +/* Overridable constants and operations. + Copyright (C) 2013 Free Software Foundation, Inc. + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU Lesser General Public License as published by + the Free Software Foundation; either version 2.1 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public License + along with this program; if not, see <http://www.gnu.org/licenses/>. */ + +#include <stdint.h> + +typedef long mantissa_t; +typedef int64_t mantissa_store_t; + +#define TWOPOW(i) (1L << i) + +#define RADIX_EXP 24 +#define RADIX TWOPOW (RADIX_EXP) /* 2^24 */ + +/* Divide D by RADIX and put the remainder in R. D must be a non-negative + integral value. */ +#define DIV_RADIX(d, r) \ + ({ \ + r = d & (RADIX - 1); \ + d >>= RADIX_EXP; \ + }) + +/* Put the integer component of a double X in R and retain the fraction in + X. This is used in extracting mantissa digits for MP_NO by using the + integer portion of the current value of the number as the current mantissa + digit and then scaling by RADIX to get the next mantissa digit in the same + manner. */ +#define INTEGER_OF(x, i) \ + ({ \ + i = (mantissa_t) x; \ + x -= i; \ + }) + +/* Align IN down to F. The code assumes that F is a power of two. */ +#define ALIGN_DOWN_TO(in, f) ((in) & -(f)) diff --git a/sysdeps/ieee754/dbl-64/mpa.c b/sysdeps/ieee754/dbl-64/mpa.c index 0766476544..c238ccf2af 100644 --- a/sysdeps/ieee754/dbl-64/mpa.c +++ b/sysdeps/ieee754/dbl-64/mpa.c @@ -125,7 +125,8 @@ norm (const mp_no *x, double *y, int p) { #define R RADIXI long i; - double a, c, u, v, z[5]; + double c; + mantissa_t a, u, v, z[5]; if (p < 5) { if (p == 1) @@ -147,17 +148,14 @@ norm (const mp_no *x, double *y, int p) for (i = 2; i < 5; i++) { - z[i] = X[i] * a; - u = (z[i] + CUTTER) - CUTTER; - if (u > z[i]) - u -= RADIX; - z[i] -= u; - z[i - 1] += u * RADIXI; + mantissa_store_t d, r; + d = X[i] * (mantissa_store_t) a; + DIV_RADIX (d, r); + z[i] = r; + z[i - 1] += d; } - u = (z[3] + TWO71) - TWO71; - if (u > z[3]) - u -= TWO19; + u = ALIGN_DOWN_TO (z[3], TWO19); v = z[3] - u; if (v == TWO18) @@ -200,7 +198,8 @@ denorm (const mp_no *x, double *y, int p) { long i, k; long p2 = p; - double c, u, z[5]; + double c; + mantissa_t u, z[5]; #define R RADIXI if (EX < -44 || (EX == -44 && X[1] < TWO5)) @@ -280,9 +279,7 @@ denorm (const mp_no *x, double *y, int p) z[3] = X[k]; } - u = (z[3] + TWO57) - TWO57; - if (u > z[3]) - u -= TWO5; + u = ALIGN_DOWN_TO (z[3], TWO5); if (u == z[3]) { @@ -330,7 +327,6 @@ __dbl_mp (double x, mp_no *y, int p) { long i, n; long p2 = p; - double u; /* Sign. */ if (x == ZERO) @@ -356,11 +352,7 @@ __dbl_mp (double x, mp_no *y, int p) n = MIN (p2, 4); for (i = 1; i <= n; i++) { - u = (x + TWO52) - TWO52; - if (u > x) - u -= ONE; - Y[i] = u; - x -= u; + INTEGER_OF (x, Y[i]); x *= RADIX; } for (; i <= p2; i++) @@ -377,7 +369,7 @@ add_magnitudes (const mp_no *x, const mp_no *y, mp_no *z, int p) { long i, j, k; long p2 = p; - double zk; + mantissa_t zk; EZ = EX; @@ -445,7 +437,7 @@ sub_magnitudes (const mp_no *x, const mp_no *y, mp_no *z, int p) { long i, j, k; long p2 = p; - double zk; + mantissa_t zk; EZ = EX; i = p2; @@ -621,9 +613,9 @@ __mul (const mp_no *x, const mp_no *y, mp_no *z, int p) { long i, j, k, ip, ip2; long p2 = p; - double u, zk; + mantissa_store_t zk; const mp_no *a; - double *diag; + mantissa_store_t *diag; /* Is z=0? */ if (__glibc_unlikely (X[0] * Y[0] == ZERO)) @@ -680,11 +672,11 @@ __mul (const mp_no *x, const mp_no *y, mp_no *z, int p) /* Precompute sums of diagonal elements so that we can directly use them later. See the next comment to know we why need them. */ - diag = alloca (k * sizeof (double)); - double d = ZERO; + diag = alloca (k * sizeof (mantissa_store_t)); + mantissa_store_t d = ZERO; for (i = 1; i <= ip; i++) { - d += X[i] * Y[i]; + d += X[i] * (mantissa_store_t) Y[i]; diag[i] = d; } while (i < k) @@ -697,18 +689,15 @@ __mul (const mp_no *x, const mp_no *y, mp_no *z, int p) if (k % 2 == 0) /* We want to add this only once, but since we subtract it in the sum of products above, we add twice. */ - zk += 2 * X[lim] * Y[lim]; + zk += 2 * X[lim] * (mantissa_store_t) Y[lim]; for (i = k - p2, j = p2; i < j; i++, j--) - zk += (X[i] + X[j]) * (Y[i] + Y[j]); + zk += (X[i] + X[j]) * (mantissa_store_t) (Y[i] + Y[j]); zk -= diag[k - 1]; - u = (zk + CUTTER) - CUTTER; - if (u > zk) - u -= RADIX; - Z[k--] = zk - u; - zk = u * RADIXI; + DIV_RADIX (zk, Z[k]); + k--; } /* The real deal. Mantissa digit Z[k] is the sum of all X[i] * Y[j] where i @@ -731,18 +720,15 @@ __mul (const mp_no *x, const mp_no *y, mp_no *z, int p) if (k % 2 == 0) /* We want to add this only once, but since we subtract it in the sum of products above, we add twice. */ - zk += 2 * X[lim] * Y[lim]; + zk += 2 * X[lim] * (mantissa_store_t) Y[lim]; for (i = 1, j = k - 1; i < j; i++, j--) - zk += (X[i] + X[j]) * (Y[i] + Y[j]); + zk += (X[i] + X[j]) * (mantissa_store_t) (Y[i] + Y[j]); zk -= diag[k - 1]; - u = (zk + CUTTER) - CUTTER; - if (u > zk) - u -= RADIX; - Z[k--] = zk - u; - zk = u * RADIXI; + DIV_RADIX (zk, Z[k]); + k--; } Z[k] = zk; @@ -774,7 +760,7 @@ SECTION __sqr (const mp_no *x, mp_no *y, int p) { long i, j, k, ip; - double u, yk; + mantissa_store_t yk; /* Is z=0? */ if (__glibc_unlikely (X[0] == ZERO)) @@ -798,11 +784,11 @@ __sqr (const mp_no *x, mp_no *y, int p) while (k > p) { - double yk2 = 0.0; + mantissa_store_t yk2 = 0; long lim = k / 2; if (k % 2 == 0) - yk += X[lim] * X[lim]; + yk += X[lim] * (mantissa_store_t) X[lim]; /* In __mul, this loop (and the one within the next while loop) run between a range to calculate the mantissa as follows: @@ -814,36 +800,30 @@ __sqr (const mp_no *x, mp_no *y, int p) result. For cases where the range size is even, the mid-point needs to be added separately (above). */ for (i = k - p, j = p; i < j; i++, j--) - yk2 += X[i] * X[j]; + yk2 += X[i] * (mantissa_store_t) X[j]; yk += 2.0 * yk2; - u = (yk + CUTTER) - CUTTER; - if (u > yk) - u -= RADIX; - Y[k--] = yk - u; - yk = u * RADIXI; + DIV_RADIX (yk, Y[k]); + k--; } while (k > 1) { - double yk2 = 0.0; + mantissa_store_t yk2 = 0; long lim = k / 2; if (k % 2 == 0) - yk += X[lim] * X[lim]; + yk += X[lim] * (mantissa_store_t) X[lim]; /* Likewise for this loop. */ for (i = 1, j = k - 1; i < j; i++, j--) - yk2 += X[i] * X[j]; + yk2 += X[i] * (mantissa_store_t) X[j]; yk += 2.0 * yk2; - u = (yk + CUTTER) - CUTTER; - if (u > yk) - u -= RADIX; - Y[k--] = yk - u; - yk = u * RADIXI; + DIV_RADIX (yk, Y[k]); + k--; } Y[k] = yk; diff --git a/sysdeps/ieee754/dbl-64/mpa.h b/sysdeps/ieee754/dbl-64/mpa.h index 168b334ed0..54044a0586 100644 --- a/sysdeps/ieee754/dbl-64/mpa.h +++ b/sysdeps/ieee754/dbl-64/mpa.h @@ -35,6 +35,7 @@ /* Common types and definition */ /************************************************************************/ +#include <mpa-arch.h> /* The mp_no structure holds the details of a multi-precision floating point number. @@ -61,7 +62,7 @@ typedef struct { int e; - double d[40]; + mantissa_t d[40]; } mp_no; typedef union @@ -82,9 +83,13 @@ extern const mp_no mptwo; #define ABS(x) ((x) < 0 ? -(x) : (x)) -#define RADIX 0x1.0p24 /* 2^24 */ -#define RADIXI 0x1.0p-24 /* 2^-24 */ -#define CUTTER 0x1.0p76 /* 2^76 */ +#ifndef RADIXI +# define RADIXI 0x1.0p-24 /* 2^-24 */ +#endif + +#ifndef TWO52 +# define TWO52 0x1.0p52 /* 2^52 */ +#endif #define ZERO 0.0 /* 0 */ #define MZERO -0.0 /* 0 with the sign bit set */ @@ -92,13 +97,13 @@ extern const mp_no mptwo; #define MONE -1.0 /* -1 */ #define TWO 2.0 /* 2 */ -#define TWO5 0x1.0p5 /* 2^5 */ -#define TWO8 0x1.0p8 /* 2^52 */ -#define TWO10 0x1.0p10 /* 2^10 */ -#define TWO18 0x1.0p18 /* 2^18 */ -#define TWO19 0x1.0p19 /* 2^19 */ -#define TWO23 0x1.0p23 /* 2^23 */ -#define TWO52 0x1.0p52 /* 2^52 */ +#define TWO5 TWOPOW (5) /* 2^5 */ +#define TWO8 TWOPOW (8) /* 2^52 */ +#define TWO10 TWOPOW (10) /* 2^10 */ +#define TWO18 TWOPOW (18) /* 2^18 */ +#define TWO19 TWOPOW (19) /* 2^19 */ +#define TWO23 TWOPOW (23) /* 2^23 */ + #define TWO57 0x1.0p57 /* 2^57 */ #define TWO71 0x1.0p71 /* 2^71 */ #define TWOM1032 0x1.0p-1032 /* 2^-1032 */ diff --git a/sysdeps/powerpc/power4/fpu/mpa-arch.h b/sysdeps/powerpc/power4/fpu/mpa-arch.h new file mode 100644 index 0000000000..9007c9d0ca --- /dev/null +++ b/sysdeps/powerpc/power4/fpu/mpa-arch.h @@ -0,0 +1,56 @@ +/* Overridable constants and operations. + Copyright (C) 2013 Free Software Foundation, Inc. + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU Lesser General Public License as published by + the Free Software Foundation; either version 2.1 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public License + along with this program; if not, see <http://www.gnu.org/licenses/>. */ + +typedef double mantissa_t; +typedef double mantissa_store_t; + +#define TWOPOW(i) (0x1.0p##i) + +#define RADIX TWOPOW (24) /* 2^24 */ +#define CUTTER TWOPOW (76) /* 2^76 */ +#define RADIXI 0x1.0p-24 /* 2^-24 */ +#define TWO52 TWOPOW (52) /* 2^52 */ + +/* Divide D by RADIX and put the remainder in R. */ +#define DIV_RADIX(d,r) \ + ({ \ + double u = ((d) + CUTTER) - CUTTER; \ + if (u > (d)) \ + u -= RADIX; \ + r = (d) - u; \ + (d) = u * RADIXI; \ + }) + +/* Put the integer component of a double X in R and retain the fraction in + X. */ +#define INTEGER_OF(x, r) \ + ({ \ + double u = ((x) + TWO52) - TWO52; \ + if (u > (x)) \ + u -= ONE; \ + (r) = u; \ + (x) -= u; \ + }) + +/* Align IN down to a multiple of F, where F is a power of two. */ +#define ALIGN_DOWN_TO(in, f) \ + ({ \ + double factor = f * TWO52; \ + double u = (in + factor) - factor; \ + if (u > in) \ + u -= f; \ + u; \ + }) |