aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorSiddhesh Poyarekar <siddhesh@redhat.com>2013-02-26 21:28:16 +0530
committerSiddhesh Poyarekar <siddhesh@redhat.com>2013-02-26 21:28:16 +0530
commit45f058844c33f670475bd02f266942746bcb332b (patch)
tree516c213f668b79a29216965086a6116f8cc6df78
parent2236d3595af6e19d57cf9ff4d93b18614fc9436a (diff)
downloadglibc-45f058844c33f670475bd02f266942746bcb332b.tar
glibc-45f058844c33f670475bd02f266942746bcb332b.tar.gz
glibc-45f058844c33f670475bd02f266942746bcb332b.tar.bz2
glibc-45f058844c33f670475bd02f266942746bcb332b.zip
Another tweak to the multiplication algorithm
Reduce the formula to calculate mantissa so that we reduce the net number of multiplications performed.
-rw-r--r--ChangeLog3
-rw-r--r--sysdeps/ieee754/dbl-64/mpa.c56
2 files changed, 53 insertions, 6 deletions
diff --git a/ChangeLog b/ChangeLog
index c633a60c0b..99afa0963c 100644
--- a/ChangeLog
+++ b/ChangeLog
@@ -1,5 +1,8 @@
2013-02-26 Siddhesh Poyarekar <siddhesh@redhat.com>
+ * sysdeps/ieee754/dbl-64/mpa.c: Include alloca.h.
+ (__mul): Reduce iterations for calculating mantissa.
+
* sysdeps/ieee754/dbl-64/sincos32.c (__c32): Use MPONE and
MPTWO.
(__mpranred): Likewise.
diff --git a/sysdeps/ieee754/dbl-64/mpa.c b/sysdeps/ieee754/dbl-64/mpa.c
index 7a6f01854b..8fc2626f76 100644
--- a/sysdeps/ieee754/dbl-64/mpa.c
+++ b/sysdeps/ieee754/dbl-64/mpa.c
@@ -43,6 +43,7 @@
#include "endian.h"
#include "mpa.h"
#include <sys/param.h>
+#include <alloca.h>
#ifndef SECTION
# define SECTION
@@ -621,6 +622,7 @@ __mul (const mp_no *x, const mp_no *y, mp_no *z, int p)
long p2 = p;
double u, zk;
const mp_no *a;
+ double *diag;
/* Is z=0? */
if (__glibc_unlikely (X[0] * Y[0] == ZERO))
@@ -673,12 +675,33 @@ __mul (const mp_no *x, const mp_no *y, mp_no *z, int p)
while (k > ip + ip2 + 1)
Z[k--] = ZERO;
- zk = Z[k] = ZERO;
+ zk = ZERO;
+
+ /* Precompute sums of diagonal elements so that we can directly use them
+ later. See the next comment to know we why need them. */
+ diag = alloca (k * sizeof (double));
+ double d = ZERO;
+ for (i = 1; i <= ip; i++)
+ {
+ d += X[i] * Y[i];
+ diag[i] = d;
+ }
+ while (i < k)
+ diag[i++] = d;
while (k > p2)
{
- for (i = k - p2, j = p2; i < p2 + 1; i++, j--)
- zk += X[i] * Y[j];
+ long lim = k / 2;
+
+ if (k % 2 == 0)
+ /* We want to add this only once, but since we subtract it in the sum
+ of products above, we add twice. */
+ zk += 2 * X[lim] * Y[lim];
+
+ for (i = k - p2, j = p2; i < j; i++, j--)
+ zk += (X[i] + X[j]) * (Y[i] + Y[j]);
+
+ zk -= diag[k - 1];
u = (zk + CUTTER) - CUTTER;
if (u > zk)
@@ -687,11 +710,32 @@ __mul (const mp_no *x, const mp_no *y, mp_no *z, int p)
zk = u * RADIXI;
}
- /* The real deal. */
+ /* The real deal. Mantissa digit Z[k] is the sum of all X[i] * Y[j] where i
+ goes from 1 -> k - 1 and j goes the same range in reverse. To reduce the
+ number of multiplications, we halve the range and if k is an even number,
+ add the diagonal element X[k/2]Y[k/2]. Through the half range, we compute
+ X[i] * Y[j] as (X[i] + X[j]) * (Y[i] + Y[j]) - X[i] * Y[i] - X[j] * Y[j].
+
+ This reduction tells us that we're summing two things, the first term
+ through the half range and the negative of the sum of the product of all
+ terms of X and Y in the full range. i.e.
+
+ SUM(X[i] * Y[i]) for k terms. This is precalculated above for each k in
+ a single loop so that it completes in O(n) time and can hence be directly
+ used in the loop below. */
while (k > 1)
{
- for (i = 1, j = k - 1; i < k; i++, j--)
- zk += X[i] * Y[j];
+ long lim = k / 2;
+
+ if (k % 2 == 0)
+ /* We want to add this only once, but since we subtract it in the sum
+ of products above, we add twice. */
+ zk += 2 * X[lim] * Y[lim];
+
+ for (i = 1, j = k - 1; i < j; i++, j--)
+ zk += (X[i] + X[j]) * (Y[i] + Y[j]);
+
+ zk -= diag[k - 1];
u = (zk + CUTTER) - CUTTER;
if (u > zk)