aboutsummaryrefslogtreecommitdiff
path: root/sysdeps/libm-ieee754
diff options
context:
space:
mode:
authorUlrich Drepper <drepper@redhat.com>1998-04-04 07:46:55 +0000
committerUlrich Drepper <drepper@redhat.com>1998-04-04 07:46:55 +0000
commit923609d1497f3116d57b297e3e84fc07b2b15b20 (patch)
tree2d49bc0960358fd0d21e4545905c43e1d9e258d8 /sysdeps/libm-ieee754
parent0d9f67937f0c9329c35c2c0d15848ab8316dc520 (diff)
downloadglibc-923609d1497f3116d57b297e3e84fc07b2b15b20.tar
glibc-923609d1497f3116d57b297e3e84fc07b2b15b20.tar.gz
glibc-923609d1497f3116d57b297e3e84fc07b2b15b20.tar.bz2
glibc-923609d1497f3116d57b297e3e84fc07b2b15b20.zip
Update.
1998-04-03 23:38 Ulrich Drepper <drepper@cygnus.com> * sysdeps/libm-ieee754/e_acos.c: Optimize by splitting large expressions and using array variables. * sysdeps/libm-ieee754/e_asin.c: Likewise. * sysdeps/libm-ieee754/e_j0.c: Likewise. * sysdeps/libm-ieee754/e_j1.c: Likewise. * sysdeps/libm-ieee754/e_log.c: Likewise. * sysdeps/libm-ieee754/e_pow.c: Likewise. * sysdeps/libm-ieee754/k_cos.c: Likewise. * sysdeps/libm-ieee754/k_sin.c: Likewise. * sysdeps/libm-ieee754/k_tan.c: Likewise. * sysdeps/libm-ieee754/s_atan.c: Likewise. * sysdeps/libm-ieee754/s_erf.c: Likewise. * sysdeps/libm-ieee754/s_expm1.c: Likewise. * sysdeps/libm-ieee754/s_log1p.c: Likewise. Patch by Naohiko Shimizu <nshimizu@et.u-tokai.ac.jp>.
Diffstat (limited to 'sysdeps/libm-ieee754')
-rw-r--r--sysdeps/libm-ieee754/e_acos.c67
-rw-r--r--sysdeps/libm-ieee754/e_asin.c48
-rw-r--r--sysdeps/libm-ieee754/e_j0.c92
-rw-r--r--sysdeps/libm-ieee754/e_j1.c72
-rw-r--r--sysdeps/libm-ieee754/e_log.c40
-rw-r--r--sysdeps/libm-ieee754/e_pow.c160
-rw-r--r--sysdeps/libm-ieee754/k_cos.c55
-rw-r--r--sysdeps/libm-ieee754/k_sin.c46
-rw-r--r--sysdeps/libm-ieee754/k_tan.c36
-rw-r--r--sysdeps/libm-ieee754/s_atan.c21
-rw-r--r--sysdeps/libm-ieee754/s_erf.c213
-rw-r--r--sysdeps/libm-ieee754/s_expm1.c24
-rw-r--r--sysdeps/libm-ieee754/s_log1p.c27
13 files changed, 644 insertions, 257 deletions
diff --git a/sysdeps/libm-ieee754/e_acos.c b/sysdeps/libm-ieee754/e_acos.c
index 78bdae9f83..fa858defc5 100644
--- a/sysdeps/libm-ieee754/e_acos.c
+++ b/sysdeps/libm-ieee754/e_acos.c
@@ -5,24 +5,27 @@
*
* Developed at SunPro, a Sun Microsystems, Inc. business.
* Permission to use, copy, modify, and distribute this
- * software is freely granted, provided that this notice
+ * software is freely granted, provided that this notice
* is preserved.
* ====================================================
*/
+/* Modified by Naohiko Shimizu/Tokai University, Japan 1997/08/25,
+ for performance improvement on pipelined processors.
+ */
#if defined(LIBM_SCCS) && !defined(lint)
static char rcsid[] = "$NetBSD: e_acos.c,v 1.9 1995/05/12 04:57:13 jtc Exp $";
#endif
/* __ieee754_acos(x)
- * Method :
+ * Method :
* acos(x) = pi/2 - asin(x)
* acos(-x) = pi/2 + asin(x)
* For |x|<=0.5
* acos(x) = pi/2 - (x + x*x^2*R(x^2)) (see asin.c)
* For x>0.5
* acos(x) = pi/2 - (pi/2 - 2asin(sqrt((1-x)/2)))
- * = 2asin(sqrt((1-x)/2))
+ * = 2asin(sqrt((1-x)/2))
* = 2s + 2s*z*R(z) ...z=(1-x)/2, s=sqrt(z)
* = 2f + (2c + 2s*z*R(z))
* where f=hi part of s, and c = (z-f*f)/(s+f) is the correction term
@@ -40,26 +43,26 @@ static char rcsid[] = "$NetBSD: e_acos.c,v 1.9 1995/05/12 04:57:13 jtc Exp $";
#include "math.h"
#include "math_private.h"
+#define one qS[0]
#ifdef __STDC__
-static const double
+static const double
#else
-static double
+static double
#endif
-one= 1.00000000000000000000e+00, /* 0x3FF00000, 0x00000000 */
pi = 3.14159265358979311600e+00, /* 0x400921FB, 0x54442D18 */
pio2_hi = 1.57079632679489655800e+00, /* 0x3FF921FB, 0x54442D18 */
pio2_lo = 6.12323399573676603587e-17, /* 0x3C91A626, 0x33145C07 */
-pS0 = 1.66666666666666657415e-01, /* 0x3FC55555, 0x55555555 */
-pS1 = -3.25565818622400915405e-01, /* 0xBFD4D612, 0x03EB6F7D */
-pS2 = 2.01212532134862925881e-01, /* 0x3FC9C155, 0x0E884455 */
-pS3 = -4.00555345006794114027e-02, /* 0xBFA48228, 0xB5688F3B */
-pS4 = 7.91534994289814532176e-04, /* 0x3F49EFE0, 0x7501B288 */
-pS5 = 3.47933107596021167570e-05, /* 0x3F023DE1, 0x0DFDF709 */
-qS1 = -2.40339491173441421878e+00, /* 0xC0033A27, 0x1C8A2D4B */
-qS2 = 2.02094576023350569471e+00, /* 0x40002AE5, 0x9C598AC8 */
-qS3 = -6.88283971605453293030e-01, /* 0xBFE6066C, 0x1B8D0159 */
-qS4 = 7.70381505559019352791e-02; /* 0x3FB3B8C5, 0xB12E9282 */
+pS[] = {1.66666666666666657415e-01, /* 0x3FC55555, 0x55555555 */
+ -3.25565818622400915405e-01, /* 0xBFD4D612, 0x03EB6F7D */
+ 2.01212532134862925881e-01, /* 0x3FC9C155, 0x0E884455 */
+ -4.00555345006794114027e-02, /* 0xBFA48228, 0xB5688F3B */
+ 7.91534994289814532176e-04, /* 0x3F49EFE0, 0x7501B288 */
+ 3.47933107596021167570e-05}, /* 0x3F023DE1, 0x0DFDF709 */
+qS[] ={1.0, -2.40339491173441421878e+00, /* 0xC0033A27, 0x1C8A2D4B */
+ 2.02094576023350569471e+00, /* 0x40002AE5, 0x9C598AC8 */
+ -6.88283971605453293030e-01, /* 0xBFE6066C, 0x1B8D0159 */
+ 7.70381505559019352791e-02}; /* 0x3FB3B8C5, 0xB12E9282 */
#ifdef __STDC__
double __ieee754_acos(double x)
@@ -68,7 +71,7 @@ qS4 = 7.70381505559019352791e-02; /* 0x3FB3B8C5, 0xB12E9282 */
double x;
#endif
{
- double z,p,q,r,w,s,c,df;
+ double z,p,q,r,w,s,c,df,p1,p2,p3,q1,q2,q3,z2,z4,z6;
int32_t hx,ix;
GET_HIGH_WORD(hx,x);
ix = hx&0x7fffffff;
@@ -84,14 +87,34 @@ qS4 = 7.70381505559019352791e-02; /* 0x3FB3B8C5, 0xB12E9282 */
if(ix<0x3fe00000) { /* |x| < 0.5 */
if(ix<=0x3c600000) return pio2_hi+pio2_lo;/*if|x|<2**-57*/
z = x*x;
+#ifdef DO_NOT_USE_THIS
p = z*(pS0+z*(pS1+z*(pS2+z*(pS3+z*(pS4+z*pS5)))));
q = one+z*(qS1+z*(qS2+z*(qS3+z*qS4)));
+#else
+ p1 = z*pS[0]; z2=z*z;
+ p2 = pS[1]+z*pS[2]; z4=z2*z2;
+ p3 = pS[3]+z*pS[4]; z6=z4*z2;
+ q1 = one+z*qS[1];
+ q2 = qS[2]+z*qS[3];
+ p = p1 + z2*p2 + z4*p3 + z6*pS[5];
+ q = q1 + z2*q2 + z4*qS[4];
+#endif
r = p/q;
return pio2_hi - (x - (pio2_lo-x*r));
} else if (hx<0) { /* x < -0.5 */
z = (one+x)*0.5;
+#ifdef DO_NOT_USE_THIS
p = z*(pS0+z*(pS1+z*(pS2+z*(pS3+z*(pS4+z*pS5)))));
q = one+z*(qS1+z*(qS2+z*(qS3+z*qS4)));
+#else
+ p1 = z*pS[0]; z2=z*z;
+ p2 = pS[1]+z*pS[2]; z4=z2*z2;
+ p3 = pS[3]+z*pS[4]; z6=z4*z2;
+ q1 = one+z*qS[1];
+ q2 = qS[2]+z*qS[3];
+ p = p1 + z2*p2 + z4*p3 + z6*pS[5];
+ q = q1 + z2*q2 + z4*qS[4];
+#endif
s = __ieee754_sqrt(z);
r = p/q;
w = r*s-pio2_lo;
@@ -102,8 +125,18 @@ qS4 = 7.70381505559019352791e-02; /* 0x3FB3B8C5, 0xB12E9282 */
df = s;
SET_LOW_WORD(df,0);
c = (z-df*df)/(s+df);
+#ifdef DO_NOT_USE_THIS
p = z*(pS0+z*(pS1+z*(pS2+z*(pS3+z*(pS4+z*pS5)))));
q = one+z*(qS1+z*(qS2+z*(qS3+z*qS4)));
+#else
+ p1 = z*pS[0]; z2=z*z;
+ p2 = pS[1]+z*pS[2]; z4=z2*z2;
+ p3 = pS[3]+z*pS[4]; z6=z4*z2;
+ q1 = one+z*qS[1];
+ q2 = qS[2]+z*qS[3];
+ p = p1 + z2*p2 + z4*p3 + z6*pS[5];
+ q = q1 + z2*q2 + z4*qS[4];
+#endif
r = p/q;
w = r*s+c;
return 2.0*(df+w);
diff --git a/sysdeps/libm-ieee754/e_asin.c b/sysdeps/libm-ieee754/e_asin.c
index 09d06a9158..65dc318db3 100644
--- a/sysdeps/libm-ieee754/e_asin.c
+++ b/sysdeps/libm-ieee754/e_asin.c
@@ -9,6 +9,9 @@
* is preserved.
* ====================================================
*/
+/* Modified by Naohiko Shimizu/Tokai University, Japan 1997/08/25,
+ for performance improvement on pipelined processors.
+*/
#if defined(LIBM_SCCS) && !defined(lint)
static char rcsid[] = "$NetBSD: e_asin.c,v 1.9 1995/05/12 04:57:22 jtc Exp $";
@@ -47,28 +50,27 @@ static char rcsid[] = "$NetBSD: e_asin.c,v 1.9 1995/05/12 04:57:22 jtc Exp $";
#include "math.h"
#include "math_private.h"
-
+#define one qS[0]
#ifdef __STDC__
static const double
#else
static double
#endif
-one = 1.00000000000000000000e+00, /* 0x3FF00000, 0x00000000 */
huge = 1.000e+300,
pio2_hi = 1.57079632679489655800e+00, /* 0x3FF921FB, 0x54442D18 */
pio2_lo = 6.12323399573676603587e-17, /* 0x3C91A626, 0x33145C07 */
pio4_hi = 7.85398163397448278999e-01, /* 0x3FE921FB, 0x54442D18 */
/* coefficient for R(x^2) */
-pS0 = 1.66666666666666657415e-01, /* 0x3FC55555, 0x55555555 */
-pS1 = -3.25565818622400915405e-01, /* 0xBFD4D612, 0x03EB6F7D */
-pS2 = 2.01212532134862925881e-01, /* 0x3FC9C155, 0x0E884455 */
-pS3 = -4.00555345006794114027e-02, /* 0xBFA48228, 0xB5688F3B */
-pS4 = 7.91534994289814532176e-04, /* 0x3F49EFE0, 0x7501B288 */
-pS5 = 3.47933107596021167570e-05, /* 0x3F023DE1, 0x0DFDF709 */
-qS1 = -2.40339491173441421878e+00, /* 0xC0033A27, 0x1C8A2D4B */
-qS2 = 2.02094576023350569471e+00, /* 0x40002AE5, 0x9C598AC8 */
-qS3 = -6.88283971605453293030e-01, /* 0xBFE6066C, 0x1B8D0159 */
-qS4 = 7.70381505559019352791e-02; /* 0x3FB3B8C5, 0xB12E9282 */
+pS[] = {1.66666666666666657415e-01, /* 0x3FC55555, 0x55555555 */
+ -3.25565818622400915405e-01, /* 0xBFD4D612, 0x03EB6F7D */
+ 2.01212532134862925881e-01, /* 0x3FC9C155, 0x0E884455 */
+ -4.00555345006794114027e-02, /* 0xBFA48228, 0xB5688F3B */
+ 7.91534994289814532176e-04, /* 0x3F49EFE0, 0x7501B288 */
+ 3.47933107596021167570e-05}, /* 0x3F023DE1, 0x0DFDF709 */
+qS[] = {1.0, -2.40339491173441421878e+00, /* 0xC0033A27, 0x1C8A2D4B */
+ 2.02094576023350569471e+00, /* 0x40002AE5, 0x9C598AC8 */
+ -6.88283971605453293030e-01, /* 0xBFE6066C, 0x1B8D0159 */
+ 7.70381505559019352791e-02}; /* 0x3FB3B8C5, 0xB12E9282 */
#ifdef __STDC__
double __ieee754_asin(double x)
@@ -77,7 +79,7 @@ qS4 = 7.70381505559019352791e-02; /* 0x3FB3B8C5, 0xB12E9282 */
double x;
#endif
{
- double t,w,p,q,c,r,s;
+ double t,w,p,q,c,r,s,p1,p2,p3,q1,q2,q3,z2,z4,z6;
int32_t hx,ix;
GET_HIGH_WORD(hx,x);
ix = hx&0x7fffffff;
@@ -93,8 +95,18 @@ qS4 = 7.70381505559019352791e-02; /* 0x3FB3B8C5, 0xB12E9282 */
if(huge+x>one) return x;/* return x with inexact if x!=0*/
} else {
t = x*x;
+#ifdef DO_NOT_USE_THIS
p = t*(pS0+t*(pS1+t*(pS2+t*(pS3+t*(pS4+t*pS5)))));
q = one+t*(qS1+t*(qS2+t*(qS3+t*qS4)));
+#else
+ p1 = t*pS[0]; z2=t*t;
+ p2 = pS[1]+t*pS[2]; z4=z2*z2;
+ p3 = pS[3]+t*pS[4]; z6=z4*z2;
+ q1 = one+t*qS[1];
+ q2 = qS[2]+t*qS[3];
+ p = p1 + z2*p2 + z4*p3 + z6*pS[5];
+ q = q1 + z2*q2 + z4*qS[4];
+#endif
w = p/q;
return x+x*w;
}
@@ -102,8 +114,18 @@ qS4 = 7.70381505559019352791e-02; /* 0x3FB3B8C5, 0xB12E9282 */
/* 1> |x|>= 0.5 */
w = one-fabs(x);
t = w*0.5;
+#ifdef DO_NOT_USE_THIS
p = t*(pS0+t*(pS1+t*(pS2+t*(pS3+t*(pS4+t*pS5)))));
q = one+t*(qS1+t*(qS2+t*(qS3+t*qS4)));
+#else
+ p1 = t*pS[0]; z2=t*t;
+ p2 = pS[1]+t*pS[2]; z4=z2*z2;
+ p3 = pS[3]+t*pS[4]; z6=z4*z2;
+ q1 = one+t*qS[1];
+ q2 = qS[2]+t*qS[3];
+ p = p1 + z2*p2 + z4*p3 + z6*pS[5];
+ q = q1 + z2*q2 + z4*qS[4];
+#endif
s = __ieee754_sqrt(t);
if(ix>=0x3FEF3333) { /* if |x| > 0.975 */
w = p/q;
diff --git a/sysdeps/libm-ieee754/e_j0.c b/sysdeps/libm-ieee754/e_j0.c
index ac7e00a412..ff4c73bceb 100644
--- a/sysdeps/libm-ieee754/e_j0.c
+++ b/sysdeps/libm-ieee754/e_j0.c
@@ -9,6 +9,9 @@
* is preserved.
* ====================================================
*/
+/* Modified by Naohiko Shimizu/Tokai University, Japan 1997/08/26,
+ for performance improvement on pipelined processors.
+*/
#if defined(LIBM_SCCS) && !defined(lint)
static char rcsid[] = "$NetBSD: e_j0.c,v 1.8 1995/05/10 20:45:23 jtc Exp $";
@@ -78,14 +81,14 @@ one = 1.0,
invsqrtpi= 5.64189583547756279280e-01, /* 0x3FE20DD7, 0x50429B6D */
tpi = 6.36619772367581382433e-01, /* 0x3FE45F30, 0x6DC9C883 */
/* R0/S0 on [0, 2.00] */
-R02 = 1.56249999999999947958e-02, /* 0x3F8FFFFF, 0xFFFFFFFD */
-R03 = -1.89979294238854721751e-04, /* 0xBF28E6A5, 0xB61AC6E9 */
-R04 = 1.82954049532700665670e-06, /* 0x3EBEB1D1, 0x0C503919 */
-R05 = -4.61832688532103189199e-09, /* 0xBE33D5E7, 0x73D63FCE */
-S01 = 1.56191029464890010492e-02, /* 0x3F8FFCE8, 0x82C8C2A4 */
-S02 = 1.16926784663337450260e-04, /* 0x3F1EA6D2, 0xDD57DBF4 */
-S03 = 5.13546550207318111446e-07, /* 0x3EA13B54, 0xCE84D5A9 */
-S04 = 1.16614003333790000205e-09; /* 0x3E1408BC, 0xF4745D8F */
+R[] = {0.0, 0.0, 1.56249999999999947958e-02, /* 0x3F8FFFFF, 0xFFFFFFFD */
+ -1.89979294238854721751e-04, /* 0xBF28E6A5, 0xB61AC6E9 */
+ 1.82954049532700665670e-06, /* 0x3EBEB1D1, 0x0C503919 */
+ -4.61832688532103189199e-09}, /* 0xBE33D5E7, 0x73D63FCE */
+S[] = {0.0, 1.56191029464890010492e-02, /* 0x3F8FFCE8, 0x82C8C2A4 */
+ 1.16926784663337450260e-04, /* 0x3F1EA6D2, 0xDD57DBF4 */
+ 5.13546550207318111446e-07, /* 0x3EA13B54, 0xCE84D5A9 */
+ 1.16614003333790000205e-09}; /* 0x3E1408BC, 0xF4745D8F */
#ifdef __STDC__
static const double zero = 0.0;
@@ -100,7 +103,7 @@ static double zero = 0.0;
double x;
#endif
{
- double z, s,c,ss,cc,r,u,v;
+ double z, s,c,ss,cc,r,u,v,r1,r2,s1,s2,z2,z4;
int32_t hx,ix;
GET_HIGH_WORD(hx,x);
@@ -135,8 +138,17 @@ static double zero = 0.0;
}
}
z = x*x;
+#ifdef DO_NOT_USE_THIS
r = z*(R02+z*(R03+z*(R04+z*R05)));
s = one+z*(S01+z*(S02+z*(S03+z*S04)));
+#else
+ r1 = z*R[2]; z2=z*z;
+ r2 = R[3]+z*R[4]; z4=z2*z2;
+ r = r1 + z2*r2 + z4*R[5];
+ s1 = one+z*S[1];
+ s2 = S[2]+z*S[3];
+ s = s1 + z2*s2 + z4*S[4];
+#endif
if(ix < 0x3FF00000) { /* |x| < 1.00 */
return one + z*(-0.25+(r/s));
} else {
@@ -150,17 +162,17 @@ static const double
#else
static double
#endif
-u00 = -7.38042951086872317523e-02, /* 0xBFB2E4D6, 0x99CBD01F */
-u01 = 1.76666452509181115538e-01, /* 0x3FC69D01, 0x9DE9E3FC */
-u02 = -1.38185671945596898896e-02, /* 0xBF8C4CE8, 0xB16CFA97 */
-u03 = 3.47453432093683650238e-04, /* 0x3F36C54D, 0x20B29B6B */
-u04 = -3.81407053724364161125e-06, /* 0xBECFFEA7, 0x73D25CAD */
-u05 = 1.95590137035022920206e-08, /* 0x3E550057, 0x3B4EABD4 */
-u06 = -3.98205194132103398453e-11, /* 0xBDC5E43D, 0x693FB3C8 */
-v01 = 1.27304834834123699328e-02, /* 0x3F8A1270, 0x91C9C71A */
-v02 = 7.60068627350353253702e-05, /* 0x3F13ECBB, 0xF578C6C1 */
-v03 = 2.59150851840457805467e-07, /* 0x3E91642D, 0x7FF202FD */
-v04 = 4.41110311332675467403e-10; /* 0x3DFE5018, 0x3BD6D9EF */
+U[] = {-7.38042951086872317523e-02, /* 0xBFB2E4D6, 0x99CBD01F */
+ 1.76666452509181115538e-01, /* 0x3FC69D01, 0x9DE9E3FC */
+ -1.38185671945596898896e-02, /* 0xBF8C4CE8, 0xB16CFA97 */
+ 3.47453432093683650238e-04, /* 0x3F36C54D, 0x20B29B6B */
+ -3.81407053724364161125e-06, /* 0xBECFFEA7, 0x73D25CAD */
+ 1.95590137035022920206e-08, /* 0x3E550057, 0x3B4EABD4 */
+ -3.98205194132103398453e-11}, /* 0xBDC5E43D, 0x693FB3C8 */
+V[] = {1.27304834834123699328e-02, /* 0x3F8A1270, 0x91C9C71A */
+ 7.60068627350353253702e-05, /* 0x3F13ECBB, 0xF578C6C1 */
+ 2.59150851840457805467e-07, /* 0x3E91642D, 0x7FF202FD */
+ 4.41110311332675467403e-10}; /* 0x3DFE5018, 0x3BD6D9EF */
#ifdef __STDC__
double __ieee754_y0(double x)
@@ -169,7 +181,7 @@ v04 = 4.41110311332675467403e-10; /* 0x3DFE5018, 0x3BD6D9EF */
double x;
#endif
{
- double z, s,c,ss,cc,u,v;
+ double z, s,c,ss,cc,u,v,z2,z4,z6,u1,u2,u3,v1,v2;
int32_t hx,ix,lx;
EXTRACT_WORDS(hx,lx,x);
@@ -211,11 +223,21 @@ v04 = 4.41110311332675467403e-10; /* 0x3DFE5018, 0x3BD6D9EF */
return z;
}
if(ix<=0x3e400000) { /* x < 2**-27 */
- return(u00 + tpi*__ieee754_log(x));
+ return(U[0] + tpi*__ieee754_log(x));
}
z = x*x;
+#ifdef DO_NOT_USE_THIS
u = u00+z*(u01+z*(u02+z*(u03+z*(u04+z*(u05+z*u06)))));
v = one+z*(v01+z*(v02+z*(v03+z*v04)));
+#else
+ u1 = U[0]+z*U[1]; z2=z*z;
+ u2 = U[2]+z*U[3]; z4=z2*z2;
+ u3 = U[4]+z*U[5]; z6=z4*z2;
+ u = u1 + z2*u2 + z4*u3 + z6*U[6];
+ v1 = one+z*V[1];
+ v2 = V[2]+z*V[3];
+ v = v1 + z2*v2 + z4*V[4];
+#endif
return(u/v + tpi*(__ieee754_j0(x)*__ieee754_log(x)));
}
@@ -336,7 +358,7 @@ static double pS2[5] = {
#else
double *p,*q;
#endif
- double z,r,s;
+ double z,r,s,z2,z4,r1,r2,r3,s1,s2,s3;
int32_t ix;
GET_HIGH_WORD(ix,x);
ix &= 0x7fffffff;
@@ -345,8 +367,19 @@ static double pS2[5] = {
else if(ix>=0x4006DB6D){p = pR3; q= pS3;}
else if(ix>=0x40000000){p = pR2; q= pS2;}
z = one/(x*x);
+#ifdef DO_NOT_USE_THIS
r = p[0]+z*(p[1]+z*(p[2]+z*(p[3]+z*(p[4]+z*p[5]))));
s = one+z*(q[0]+z*(q[1]+z*(q[2]+z*(q[3]+z*q[4]))));
+#else
+ r1 = p[0]+z*p[1]; z2=z*z;
+ r2 = p[2]+z*p[3]; z4=z2*z2;
+ r3 = p[4]+z*p[5];
+ r = r1 + z2*r2 + z4*r3;
+ s1 = one+z*q[0];
+ s2 = q[1]+z*q[2];
+ s3 = q[3]+z*q[4];
+ s = s1 + z2*s2 + z4*s3;
+#endif
return one+ r/s;
}
@@ -472,7 +505,7 @@ static double qS2[6] = {
#else
double *p,*q;
#endif
- double s,r,z;
+ double s,r,z,z2,z4,z6,r1,r2,r3,s1,s2,s3;
int32_t ix;
GET_HIGH_WORD(ix,x);
ix &= 0x7fffffff;
@@ -481,7 +514,18 @@ static double qS2[6] = {
else if(ix>=0x4006DB6D){p = qR3; q= qS3;}
else if(ix>=0x40000000){p = qR2; q= qS2;}
z = one/(x*x);
+#ifdef DO_NOT_USE_THIS
r = p[0]+z*(p[1]+z*(p[2]+z*(p[3]+z*(p[4]+z*p[5]))));
s = one+z*(q[0]+z*(q[1]+z*(q[2]+z*(q[3]+z*(q[4]+z*q[5])))));
+#else
+ r1 = p[0]+z*p[1]; z2=z*z;
+ r2 = p[2]+z*p[3]; z4=z2*z2;
+ r3 = p[4]+z*p[5]; z6=z4*z2;
+ r= r1 + z2*r2 + z4*r3;
+ s1 = one+z*q[0];
+ s2 = q[1]+z*q[2];
+ s3 = q[3]+z*q[4];
+ s = s1 + z2*s2 + z4*s3 +z6*q[5];
+#endif
return (-.125 + r/s)/x;
}
diff --git a/sysdeps/libm-ieee754/e_j1.c b/sysdeps/libm-ieee754/e_j1.c
index cdc18dd4bd..daf025fdb7 100644
--- a/sysdeps/libm-ieee754/e_j1.c
+++ b/sysdeps/libm-ieee754/e_j1.c
@@ -9,6 +9,9 @@
* is preserved.
* ====================================================
*/
+/* Modified by Naohiko Shimizu/Tokai University, Japan 1997/08/26,
+ for performance improvement on pipelined processors.
+*/
#if defined(LIBM_SCCS) && !defined(lint)
static char rcsid[] = "$NetBSD: e_j1.c,v 1.8 1995/05/10 20:45:27 jtc Exp $";
@@ -78,15 +81,15 @@ one = 1.0,
invsqrtpi= 5.64189583547756279280e-01, /* 0x3FE20DD7, 0x50429B6D */
tpi = 6.36619772367581382433e-01, /* 0x3FE45F30, 0x6DC9C883 */
/* R0/S0 on [0,2] */
-r00 = -6.25000000000000000000e-02, /* 0xBFB00000, 0x00000000 */
-r01 = 1.40705666955189706048e-03, /* 0x3F570D9F, 0x98472C61 */
-r02 = -1.59955631084035597520e-05, /* 0xBEF0C5C6, 0xBA169668 */
-r03 = 4.96727999609584448412e-08, /* 0x3E6AAAFA, 0x46CA0BD9 */
-s01 = 1.91537599538363460805e-02, /* 0x3F939D0B, 0x12637E53 */
-s02 = 1.85946785588630915560e-04, /* 0x3F285F56, 0xB9CDF664 */
-s03 = 1.17718464042623683263e-06, /* 0x3EB3BFF8, 0x333F8498 */
-s04 = 5.04636257076217042715e-09, /* 0x3E35AC88, 0xC97DFF2C */
-s05 = 1.23542274426137913908e-11; /* 0x3DAB2ACF, 0xCFB97ED8 */
+R[] = {-6.25000000000000000000e-02, /* 0xBFB00000, 0x00000000 */
+ 1.40705666955189706048e-03, /* 0x3F570D9F, 0x98472C61 */
+ -1.59955631084035597520e-05, /* 0xBEF0C5C6, 0xBA169668 */
+ 4.96727999609584448412e-08}, /* 0x3E6AAAFA, 0x46CA0BD9 */
+S[] = {0.0, 1.91537599538363460805e-02, /* 0x3F939D0B, 0x12637E53 */
+ 1.85946785588630915560e-04, /* 0x3F285F56, 0xB9CDF664 */
+ 1.17718464042623683263e-06, /* 0x3EB3BFF8, 0x333F8498 */
+ 5.04636257076217042715e-09, /* 0x3E35AC88, 0xC97DFF2C */
+ 1.23542274426137913908e-11}; /* 0x3DAB2ACF, 0xCFB97ED8 */
#ifdef __STDC__
static const double zero = 0.0;
@@ -101,7 +104,7 @@ static double zero = 0.0;
double x;
#endif
{
- double z, s,c,ss,cc,r,u,v,y;
+ double z, s,c,ss,cc,r,u,v,y,r1,r2,s1,s2,s3,z2,z4;
int32_t hx,ix;
GET_HIGH_WORD(hx,x);
@@ -134,9 +137,20 @@ static double zero = 0.0;
if(huge+x>one) return 0.5*x;/* inexact if x!=0 necessary */
}
z = x*x;
+#ifdef DO_NOT_USE_THIS
r = z*(r00+z*(r01+z*(r02+z*r03)));
s = one+z*(s01+z*(s02+z*(s03+z*(s04+z*s05))));
r *= x;
+#else
+ r1 = z*R[0]; z2=z*z;
+ r2 = R[1]+z*R[2]; z4=z2*z2;
+ r = r1 + z2*r2 + z4*R[3];
+ r *= x;
+ s1 = one+z*S[1];
+ s2 = S[2]+z*S[3];
+ s3 = S[4]+z*S[5];
+ s = s1 + z2*s2 + z4*s3;
+#endif
return(x*0.5+r/s);
}
@@ -170,7 +184,7 @@ static double V0[5] = {
double x;
#endif
{
- double z, s,c,ss,cc,u,v;
+ double z, s,c,ss,cc,u,v,u1,u2,v1,v2,v3,z2,z4;
int32_t hx,ix,lx;
EXTRACT_WORDS(hx,lx,x);
@@ -211,8 +225,18 @@ static double V0[5] = {
return(-tpi/x);
}
z = x*x;
+#ifdef DO_NOT_USE_THIS
u = U0[0]+z*(U0[1]+z*(U0[2]+z*(U0[3]+z*U0[4])));
v = one+z*(V0[0]+z*(V0[1]+z*(V0[2]+z*(V0[3]+z*V0[4]))));
+#else
+ u1 = U0[0]+z*U0[1];z2=z*z;
+ u2 = U0[2]+z*U0[3];z4=z2*z2;
+ u = u1 + z2*u2 + z4*U0[4];
+ v1 = one+z*V0[0];
+ v2 = V0[1]+z*V0[2];
+ v3 = V0[3]+z*V0[4];
+ v = v1 + z2*v2 + z4*v3;
+#endif
return(x*(u/v) + tpi*(__ieee754_j1(x)*__ieee754_log(x)-one/x));
}
@@ -334,7 +358,7 @@ static double ps2[5] = {
#else
double *p,*q;
#endif
- double z,r,s;
+ double z,r,s,r1,r2,r3,s1,s2,s3,z2,z4;
int32_t ix;
GET_HIGH_WORD(ix,x);
ix &= 0x7fffffff;
@@ -343,8 +367,19 @@ static double ps2[5] = {
else if(ix>=0x4006DB6D){p = pr3; q= ps3;}
else if(ix>=0x40000000){p = pr2; q= ps2;}
z = one/(x*x);
+#ifdef DO_NOT_USE_THIS
r = p[0]+z*(p[1]+z*(p[2]+z*(p[3]+z*(p[4]+z*p[5]))));
s = one+z*(q[0]+z*(q[1]+z*(q[2]+z*(q[3]+z*q[4]))));
+#else
+ r1 = p[0]+z*p[1]; z2=z*z;
+ r2 = p[2]+z*p[3]; z4=z2*z2;
+ r3 = p[4]+z*p[5];
+ r = r1 + z2*r2 + z4*r3;
+ s1 = one+z*q[0];
+ s2 = q[1]+z*q[2];
+ s3 = q[3]+z*q[4];
+ s = s1 + z2*s2 + z4*s3;
+#endif
return one+ r/s;
}
@@ -471,7 +506,7 @@ static double qs2[6] = {
#else
double *p,*q;
#endif
- double s,r,z;
+ double s,r,z,r1,r2,r3,s1,s2,s3,z2,z4,z6;
int32_t ix;
GET_HIGH_WORD(ix,x);
ix &= 0x7fffffff;
@@ -480,7 +515,18 @@ static double qs2[6] = {
else if(ix>=0x4006DB6D){p = qr3; q= qs3;}
else if(ix>=0x40000000){p = qr2; q= qs2;}
z = one/(x*x);
+#ifdef DO_NOT_USE_THIS
r = p[0]+z*(p[1]+z*(p[2]+z*(p[3]+z*(p[4]+z*p[5]))));
s = one+z*(q[0]+z*(q[1]+z*(q[2]+z*(q[3]+z*(q[4]+z*q[5])))));
+#else
+ r1 = p[0]+z*p[1]; z2=z*z;
+ r2 = p[2]+z*p[3]; z4=z2*z2;
+ r3 = p[4]+z*p[5]; z6=z4*z2;
+ r = r1 + z2*r2 + z4*r3;
+ s1 = one+z*q[0];
+ s2 = q[1]+z*q[2];
+ s3 = q[3]+z*q[4];
+ s = s1 + z2*s2 + z4*s3 + z6*q[5];
+#endif
return (.375 + r/s)/x;
}
diff --git a/sysdeps/libm-ieee754/e_log.c b/sysdeps/libm-ieee754/e_log.c
index e4e6eab6c5..5b8c7b3ade 100644
--- a/sysdeps/libm-ieee754/e_log.c
+++ b/sysdeps/libm-ieee754/e_log.c
@@ -9,6 +9,9 @@
* is preserved.
* ====================================================
*/
+/* Modified by Naohiko Shimizu/Tokai University, Japan 1997/08/25,
+ for performance improvement on pipelined processors.
+*/
#if defined(LIBM_SCCS) && !defined(lint)
static char rcsid[] = "$NetBSD: e_log.c,v 1.8 1995/05/10 20:45:49 jtc Exp $";
@@ -67,7 +70,8 @@ static char rcsid[] = "$NetBSD: e_log.c,v 1.8 1995/05/10 20:45:49 jtc Exp $";
#include "math.h"
#include "math_private.h"
-
+#define half Lg[8]
+#define two Lg[9]
#ifdef __STDC__
static const double
#else
@@ -76,14 +80,16 @@ static double
ln2_hi = 6.93147180369123816490e-01, /* 3fe62e42 fee00000 */
ln2_lo = 1.90821492927058770002e-10, /* 3dea39ef 35793c76 */
two54 = 1.80143985094819840000e+16, /* 43500000 00000000 */
-Lg1 = 6.666666666666735130e-01, /* 3FE55555 55555593 */
-Lg2 = 3.999999999940941908e-01, /* 3FD99999 9997FA04 */
-Lg3 = 2.857142874366239149e-01, /* 3FD24924 94229359 */
-Lg4 = 2.222219843214978396e-01, /* 3FCC71C5 1D8E78AF */
-Lg5 = 1.818357216161805012e-01, /* 3FC74664 96CB03DE */
-Lg6 = 1.531383769920937332e-01, /* 3FC39A09 D078C69F */
-Lg7 = 1.479819860511658591e-01; /* 3FC2F112 DF3E5244 */
-
+ Lg[] = {0.0,
+ 6.666666666666735130e-01, /* 3FE55555 55555593 */
+ 3.999999999940941908e-01, /* 3FD99999 9997FA04 */
+ 2.857142874366239149e-01, /* 3FD24924 94229359 */
+ 2.222219843214978396e-01, /* 3FCC71C5 1D8E78AF */
+ 1.818357216161805012e-01, /* 3FC74664 96CB03DE */
+ 1.531383769920937332e-01, /* 3FC39A09 D078C69F */
+ 1.479819860511658591e-01, /* 3FC2F112 DF3E5244 */
+ 0.5,
+ 2.0};
#ifdef __STDC__
static const double zero = 0.0;
#else
@@ -97,7 +103,7 @@ static double zero = 0.0;
double x;
#endif
{
- double hfsq,f,s,z,R,w,t1,t2,dk;
+ double hfsq,f,s,z,R,w,t1,t2,dk,t11,t12,t21,t22,w2,zw2;
int32_t k,hx,i,j;
u_int32_t lx;
@@ -121,20 +127,28 @@ static double zero = 0.0;
if((0x000fffff&(2+hx))<3) { /* |f| < 2**-20 */
if(f==zero) if(k==0) return zero; else {dk=(double)k;
return dk*ln2_hi+dk*ln2_lo;}
- R = f*f*(0.5-0.33333333333333333*f);
+ R = f*f*(half-0.33333333333333333*f);
if(k==0) return f-R; else {dk=(double)k;
return dk*ln2_hi-((R-dk*ln2_lo)-f);}
}
- s = f/(2.0+f);
+ s = f/(two+f);
dk = (double)k;
z = s*s;
i = hx-0x6147a;
w = z*z;
j = 0x6b851-hx;
+#ifdef DO_NOT_USE_THIS
t1= w*(Lg2+w*(Lg4+w*Lg6));
t2= z*(Lg1+w*(Lg3+w*(Lg5+w*Lg7)));
- i |= j;
R = t2+t1;
+#else
+ t21 = Lg[5]+w*Lg[7]; w2=w*w;
+ t22 = Lg[1]+w*Lg[3]; zw2=z*w2;
+ t11 = Lg[4]+w*Lg[6];
+ t12 = w*Lg[2];
+ R = t12 + w2*t11 + z*t22 + zw2*t21;
+#endif
+ i |= j;
if(i>0) {
hfsq=0.5*f*f;
if(k==0) return f-(hfsq-s*(hfsq+R)); else
diff --git a/sysdeps/libm-ieee754/e_pow.c b/sysdeps/libm-ieee754/e_pow.c
index 4b9ba3d5eb..02b16c5171 100644
--- a/sysdeps/libm-ieee754/e_pow.c
+++ b/sysdeps/libm-ieee754/e_pow.c
@@ -9,6 +9,9 @@
* is preserved.
* ====================================================
*/
+/* Modified by Naohiko Shimizu/Tokai University, Japan 1997/08/25,
+ for performance improvement on pipelined processors.
+*/
#if defined(LIBM_SCCS) && !defined(lint)
static char rcsid[] = "$NetBSD: e_pow.c,v 1.9 1995/05/12 04:57:32 jtc Exp $";
@@ -61,6 +64,33 @@ static char rcsid[] = "$NetBSD: e_pow.c,v 1.9 1995/05/12 04:57:32 jtc Exp $";
#include "math.h"
#include "math_private.h"
+#define zero C[0]
+#define one C[1]
+#define two C[2]
+#define two53 C[3]
+#define huge C[4]
+#define tiny C[5]
+#define L1 C[6]
+#define L2 C[7]
+#define L3 C[8]
+#define L4 C[9]
+#define L5 C[10]
+#define L6 C[11]
+#define P1 C[12]
+#define P2 C[13]
+#define P3 C[14]
+#define P4 C[15]
+#define P5 C[16]
+#define lg2 C[17]
+#define lg2_h C[18]
+#define lg2_l C[19]
+#define ovt C[20]
+#define cp C[21]
+#define cp_h C[22]
+#define cp_l C[23]
+#define ivln2 C[24]
+#define ivln2_h C[25]
+#define ivln2_l C[26]
#ifdef __STDC__
static const double
@@ -70,34 +100,34 @@ static double
bp[] = {1.0, 1.5,},
dp_h[] = { 0.0, 5.84962487220764160156e-01,}, /* 0x3FE2B803, 0x40000000 */
dp_l[] = { 0.0, 1.35003920212974897128e-08,}, /* 0x3E4CFDEB, 0x43CFD006 */
-zero = 0.0,
-one = 1.0,
-two = 2.0,
-two53 = 9007199254740992.0, /* 0x43400000, 0x00000000 */
-huge = 1.0e300,
-tiny = 1.0e-300,
- /* poly coefs for (3/2)*(log(x)-2s-2/3*s**3 */
-L1 = 5.99999999999994648725e-01, /* 0x3FE33333, 0x33333303 */
-L2 = 4.28571428578550184252e-01, /* 0x3FDB6DB6, 0xDB6FABFF */
-L3 = 3.33333329818377432918e-01, /* 0x3FD55555, 0x518F264D */
-L4 = 2.72728123808534006489e-01, /* 0x3FD17460, 0xA91D4101 */
-L5 = 2.30660745775561754067e-01, /* 0x3FCD864A, 0x93C9DB65 */
-L6 = 2.06975017800338417784e-01, /* 0x3FCA7E28, 0x4A454EEF */
-P1 = 1.66666666666666019037e-01, /* 0x3FC55555, 0x5555553E */
-P2 = -2.77777777770155933842e-03, /* 0xBF66C16C, 0x16BEBD93 */
-P3 = 6.61375632143793436117e-05, /* 0x3F11566A, 0xAF25DE2C */
-P4 = -1.65339022054652515390e-06, /* 0xBEBBBD41, 0xC5D26BF1 */
-P5 = 4.13813679705723846039e-08, /* 0x3E663769, 0x72BEA4D0 */
-lg2 = 6.93147180559945286227e-01, /* 0x3FE62E42, 0xFEFA39EF */
-lg2_h = 6.93147182464599609375e-01, /* 0x3FE62E43, 0x00000000 */
-lg2_l = -1.90465429995776804525e-09, /* 0xBE205C61, 0x0CA86C39 */
-ovt = 8.0085662595372944372e-0017, /* -(1024-log2(ovfl+.5ulp)) */
-cp = 9.61796693925975554329e-01, /* 0x3FEEC709, 0xDC3A03FD =2/(3ln2) */
-cp_h = 9.61796700954437255859e-01, /* 0x3FEEC709, 0xE0000000 =(float)cp */
-cp_l = -7.02846165095275826516e-09, /* 0xBE3E2FE0, 0x145B01F5 =tail of cp_h*/
-ivln2 = 1.44269504088896338700e+00, /* 0x3FF71547, 0x652B82FE =1/ln2 */
-ivln2_h = 1.44269502162933349609e+00, /* 0x3FF71547, 0x60000000 =24b 1/ln2*/
-ivln2_l = 1.92596299112661746887e-08; /* 0x3E54AE0B, 0xF85DDF44 =1/ln2 tail*/
+C[] = {
+0.0,
+1.0,
+2.0,
+9007199254740992.0 ,
+1.0e300,
+1.0e-300,
+5.99999999999994648725e-01 ,
+4.28571428578550184252e-01 ,
+3.33333329818377432918e-01 ,
+2.72728123808534006489e-01 ,
+2.30660745775561754067e-01 ,
+2.06975017800338417784e-01 ,
+1.66666666666666019037e-01 ,
+-2.77777777770155933842e-03 ,
+6.61375632143793436117e-05 ,
+-1.65339022054652515390e-06 ,
+4.13813679705723846039e-08 ,
+6.93147180559945286227e-01 ,
+6.93147182464599609375e-01 ,
+-1.90465429995776804525e-09 ,
+8.0085662595372944372e-0017 ,
+9.61796693925975554329e-01 ,
+9.61796700954437255859e-01 ,
+-7.02846165095275826516e-09 ,
+1.44269504088896338700e+00 ,
+1.44269502162933349609e+00 ,
+1.92596299112661746887e-08 };
#ifdef __STDC__
double __ieee754_pow(double x, double y)
@@ -107,7 +137,7 @@ ivln2_l = 1.92596299112661746887e-08; /* 0x3E54AE0B, 0xF85DDF44 =1/ln2 tail*/
#endif
{
double z,ax,z_h,z_l,p_h,p_l;
- double y1,t1,t2,r,s,t,u,v,w;
+ double y1,t1,t2,r,s,t,u,v,w, t12,t14,r_1,r_2,r_3;
int32_t i,j,k,yisint,n;
int32_t hx,hy,ix,iy;
u_int32_t lx,ly;
@@ -117,7 +147,7 @@ ivln2_l = 1.92596299112661746887e-08; /* 0x3E54AE0B, 0xF85DDF44 =1/ln2 tail*/
ix = hx&0x7fffffff; iy = hy&0x7fffffff;
/* y==zero: x**0 = 1 */
- if((iy|ly)==0) return one;
+ if((iy|ly)==0) return C[1];
/* +-NaN return x+y */
if(ix > 0x7ff00000 || ((ix==0x7ff00000)&&(lx!=0)) ||
@@ -150,12 +180,12 @@ ivln2_l = 1.92596299112661746887e-08; /* 0x3E54AE0B, 0xF85DDF44 =1/ln2 tail*/
if(((ix-0x3ff00000)|lx)==0)
return y - y; /* inf**+-1 is NaN */
else if (ix >= 0x3ff00000)/* (|x|>1)**+-inf = inf,0 */
- return (hy>=0)? y: zero;
+ return (hy>=0)? y: C[0];
else /* (|x|<1)**-,+inf = inf,0 */
- return (hy<0)?-y: zero;
+ return (hy<0)?-y: C[0];
}
if(iy==0x3ff00000) { /* y is +-1 */
- if(hy<0) return one/x; else return x;
+ if(hy<0) return C[1]/x; else return x;
}
if(hy==0x40000000) return x*x; /* y is 2 */
if(hy==0x3fe00000) { /* y is 0.5 */
@@ -169,7 +199,7 @@ ivln2_l = 1.92596299112661746887e-08; /* 0x3E54AE0B, 0xF85DDF44 =1/ln2 tail*/
if(lx==0) {
if(ix==0x7ff00000||ix==0||ix==0x3ff00000){
z = ax; /*x is +-0,+-inf,+-1*/
- if(hy<0) z = one/z; /* z = (1/|x|) */
+ if(hy<0) z = C[1]/z; /* z = (1/|x|) */
if(hx<0) {
if(((ix-0x3ff00000)|yisint)==0) {
z = (z-z)/(z-z); /* (-1)**non-int is NaN */
@@ -186,27 +216,27 @@ ivln2_l = 1.92596299112661746887e-08; /* 0x3E54AE0B, 0xF85DDF44 =1/ln2 tail*/
/* |y| is huge */
if(iy>0x41e00000) { /* if |y| > 2**31 */
if(iy>0x43f00000){ /* if |y| > 2**64, must o/uflow */
- if(ix<=0x3fefffff) return (hy<0)? huge*huge:tiny*tiny;
- if(ix>=0x3ff00000) return (hy>0)? huge*huge:tiny*tiny;
+ if(ix<=0x3fefffff) return (hy<0)? C[4]*C[4]:C[5]*C[5];
+ if(ix>=0x3ff00000) return (hy>0)? C[4]*C[4]:C[5]*C[5];
}
/* over/underflow if x is not close to one */
- if(ix<0x3fefffff) return (hy<0)? huge*huge:tiny*tiny;
- if(ix>0x3ff00000) return (hy>0)? huge*huge:tiny*tiny;
+ if(ix<0x3fefffff) return (hy<0)? C[4]*C[4]:C[5]*C[5];
+ if(ix>0x3ff00000) return (hy>0)? C[4]*C[4]:C[5]*C[5];
/* now |1-x| is tiny <= 2**-20, suffice to compute
log(x) by x-x^2/2+x^3/3-x^4/4 */
t = x-1; /* t has 20 trailing zeros */
w = (t*t)*(0.5-t*(0.3333333333333333333333-t*0.25));
- u = ivln2_h*t; /* ivln2_h has 21 sig. bits */
- v = t*ivln2_l-w*ivln2;
+ u = C[25]*t; /* ivln2_h has 21 sig. bits */
+ v = t*C[26]-w*C[24];
t1 = u+v;
SET_LOW_WORD(t1,0);
t2 = v-(t1-u);
} else {
- double s2,s_h,s_l,t_h,t_l;
+ double s2,s_h,s_l,t_h,t_l,s22,s24,s26,r1,r2,r3;
n = 0;
/* take care subnormal number */
if(ix<0x00100000)
- {ax *= two53; n -= 53; GET_HIGH_WORD(ix,ax); }
+ {ax *= C[3]; n -= 53; GET_HIGH_WORD(ix,ax); }
n += ((ix)>>20)-0x3ff;
j = ix&0x000fffff;
/* determine interval */
@@ -218,18 +248,25 @@ ivln2_l = 1.92596299112661746887e-08; /* 0x3E54AE0B, 0xF85DDF44 =1/ln2 tail*/
/* compute s = s_h+s_l = (x-1)/(x+1) or (x-1.5)/(x+1.5) */
u = ax-bp[k]; /* bp[0]=1.0, bp[1]=1.5 */
- v = one/(ax+bp[k]);
+ v = C[1]/(ax+bp[k]);
s = u*v;
s_h = s;
SET_LOW_WORD(s_h,0);
/* t_h=ax+bp[k] High */
- t_h = zero;
+ t_h = C[0];
SET_HIGH_WORD(t_h,((ix>>1)|0x20000000)+0x00080000+(k<<18));
t_l = ax - (t_h-bp[k]);
s_l = v*((u-s_h*t_h)-s_h*t_l);
/* compute log(ax) */
s2 = s*s;
+#ifdef DO_NOT_USE_THIS
r = s2*s2*(L1+s2*(L2+s2*(L3+s2*(L4+s2*(L5+s2*L6)))));
+#else
+ r1 = C[10]+s2*C[11]; s22=s2*s2;
+ r2 = C[8]+s2*C[9]; s24=s22*s22;
+ r3 = C[6]+s2*C[7]; s26=s24*s22;
+ r = r3*s22 + r2*s24 + r1*s26;
+#endfi
r += s_l*(s_h+s);
s2 = s_h*s_h;
t_h = 3.0+s2+r;
@@ -242,8 +279,8 @@ ivln2_l = 1.92596299112661746887e-08; /* 0x3E54AE0B, 0xF85DDF44 =1/ln2 tail*/
p_h = u+v;
SET_LOW_WORD(p_h,0);
p_l = v-(p_h-u);
- z_h = cp_h*p_h; /* cp_h+cp_l = 2/(3*log2) */
- z_l = cp_l*p_h+p_l*cp+dp_l[k];
+ z_h = C[22]*p_h; /* cp_h+cp_l = 2/(3*log2) */
+ z_l = C[23]*p_h+p_l*C[21]+dp_l[k];
/* log2(ax) = (s+..)*2/(3*log2) = n + dp_h + z_h + z_l */
t = (double)n;
t1 = (((z_h+z_l)+dp_h[k])+t);
@@ -251,9 +288,9 @@ ivln2_l = 1.92596299112661746887e-08; /* 0x3E54AE0B, 0xF85DDF44 =1/ln2 tail*/
t2 = z_l-(((t1-t)-dp_h[k])-z_h);
}
- s = one; /* s (sign of result -ve**odd) = -1 else = 1 */
+ s = C[1]; /* s (sign of result -ve**odd) = -1 else = 1 */
if(((((u_int32_t)hx>>31)-1)|(yisint-1))==0)
- s = -one;/* (-ve)**(odd int) */
+ s = -C[1];/* (-ve)**(odd int) */
/* split up y into y1+y2 and compute (y1+y2)*(t1+t2) */
y1 = y;
@@ -264,15 +301,15 @@ ivln2_l = 1.92596299112661746887e-08; /* 0x3E54AE0B, 0xF85DDF44 =1/ln2 tail*/
EXTRACT_WORDS(j,i,z);
if (j>=0x40900000) { /* z >= 1024 */
if(((j-0x40900000)|i)!=0) /* if z > 1024 */
- return s*huge*huge; /* overflow */
+ return s*C[4]*C[4]; /* overflow */
else {
- if(p_l+ovt>z-p_h) return s*huge*huge; /* overflow */
+ if(p_l+C[20]>z-p_h) return s*C[4]*C[4]; /* overflow */
}
} else if((j&0x7fffffff)>=0x4090cc00 ) { /* z <= -1075 */
if(((j-0xc090cc00)|i)!=0) /* z < -1075 */
- return s*tiny*tiny; /* underflow */
+ return s*C[5]*C[5]; /* underflow */
else {
- if(p_l<=z-p_h) return s*tiny*tiny; /* underflow */
+ if(p_l<=z-p_h) return s*C[5]*C[5]; /* underflow */
}
}
/*
@@ -284,7 +321,7 @@ ivln2_l = 1.92596299112661746887e-08; /* 0x3E54AE0B, 0xF85DDF44 =1/ln2 tail*/
if(i>0x3fe00000) { /* if |z| > 0.5, set n = [z+0.5] */
n = j+(0x00100000>>(k+1));
k = ((n&0x7fffffff)>>20)-0x3ff; /* new k for n */
- t = zero;
+ t = C[0];
SET_HIGH_WORD(t,n&~(0x000fffff>>k));
n = ((n&0x000fffff)|0x00100000)>>(20-k);
if(j<0) n = -n;
@@ -292,14 +329,21 @@ ivln2_l = 1.92596299112661746887e-08; /* 0x3E54AE0B, 0xF85DDF44 =1/ln2 tail*/
}
t = p_l+p_h;
SET_LOW_WORD(t,0);
- u = t*lg2_h;
- v = (p_l-(t-p_h))*lg2+t*lg2_l;
+ u = t*C[18];
+ v = (p_l-(t-p_h))*C[17]+t*C[19];
z = u+v;
w = v-(z-u);
t = z*z;
- t1 = z - t*(P1+t*(P2+t*(P3+t*(P4+t*P5))));
- r = (z*t1)/(t1-two)-(w+z*w);
- z = one-(r-z);
+#ifdef DO_NOT_USE_THIS
+ t1 = z - t*(C[12]+t*(C[13]+t*(C[14]+t*(C[15]+t*C[16]))));
+#else
+ r_1 = C[15]+t*C[16]; t12 = t*t;
+ r_2 = C[13]+t*C[14]; t14 = t12*t12;
+ r_3 = t*C[12];
+ t1 = z - r_3 - t12*r_2 - t14*r_1;
+#endif
+ r = (z*t1)/(t1-C[2])-(w+z*w);
+ z = C[1]-(r-z);
GET_HIGH_WORD(j,z);
j += (n<<20);
if((j>>20)<=0) z = __scalbn(z,n); /* subnormal output */
diff --git a/sysdeps/libm-ieee754/k_cos.c b/sysdeps/libm-ieee754/k_cos.c
index d8740b3508..7e38ef7915 100644
--- a/sysdeps/libm-ieee754/k_cos.c
+++ b/sysdeps/libm-ieee754/k_cos.c
@@ -5,10 +5,13 @@
*
* Developed at SunPro, a Sun Microsystems, Inc. business.
* Permission to use, copy, modify, and distribute this
- * software is freely granted, provided that this notice
+ * software is freely granted, provided that this notice
* is preserved.
* ====================================================
*/
+/* Modified by Naohiko Shimizu/Tokai University, Japan 1997/08/25,
+ for performance improvement on pipelined processors.
+*/
#if defined(LIBM_SCCS) && !defined(lint)
static char rcsid[] = "$NetBSD: k_cos.c,v 1.8 1995/05/10 20:46:22 jtc Exp $";
@@ -18,7 +21,7 @@ static char rcsid[] = "$NetBSD: k_cos.c,v 1.8 1995/05/10 20:46:22 jtc Exp $";
* __kernel_cos( x, y )
* kernel cos function on [-pi/4, pi/4], pi/4 ~ 0.785398164
* Input x is assumed to be bounded by ~pi/4 in magnitude.
- * Input y is the tail of x.
+ * Input y is the tail of x.
*
* Algorithm
* 1. Since cos(-x) = cos(x), we need only to consider positive x.
@@ -28,15 +31,15 @@ static char rcsid[] = "$NetBSD: k_cos.c,v 1.8 1995/05/10 20:46:22 jtc Exp $";
* 4 14
* cos(x) ~ 1 - x*x/2 + C1*x + ... + C6*x
* where the remez error is
- *
+ *
* | 2 4 6 8 10 12 14 | -58
* |cos(x)-(1-.5*x +C1*x +C2*x +C3*x +C4*x +C5*x +C6*x )| <= 2
- * | |
- *
- * 4 6 8 10 12 14
+ * | |
+ *
+ * 4 6 8 10 12 14
* 4. let r = C1*x +C2*x +C3*x +C4*x +C5*x +C6*x , then
* cos(x) = 1 - x*x/2 + r
- * since cos(x+y) ~ cos(x) - sin(x)*y
+ * since cos(x+y) ~ cos(x) - sin(x)*y
* ~ cos(x) - x*y,
* a correction term is necessary in cos(x) and hence
* cos(x+y) = 1 - (x*x/2 - (r - x*y))
@@ -53,17 +56,18 @@ static char rcsid[] = "$NetBSD: k_cos.c,v 1.8 1995/05/10 20:46:22 jtc Exp $";
#include "math_private.h"
#ifdef __STDC__
-static const double
+static const double
#else
-static double
+static double
#endif
-one = 1.00000000000000000000e+00, /* 0x3FF00000, 0x00000000 */
-C1 = 4.16666666666666019037e-02, /* 0x3FA55555, 0x5555554C */
-C2 = -1.38888888888741095749e-03, /* 0xBF56C16C, 0x16C15177 */
-C3 = 2.48015872894767294178e-05, /* 0x3EFA01A0, 0x19CB1590 */
-C4 = -2.75573143513906633035e-07, /* 0xBE927E4F, 0x809C52AD */
-C5 = 2.08757232129817482790e-09, /* 0x3E21EE9E, 0xBDB4B1C4 */
-C6 = -1.13596475577881948265e-11; /* 0xBDA8FAE9, 0xBE8838D4 */
+C[] = {
+ 1.00000000000000000000e+00, /* 0x3FF00000, 0x00000000 */
+ 4.16666666666666019037e-02, /* 0x3FA55555, 0x5555554C */
+ -1.38888888888741095749e-03, /* 0xBF56C16C, 0x16C15177 */
+ 2.48015872894767294178e-05, /* 0x3EFA01A0, 0x19CB1590 */
+ -2.75573143513906633035e-07, /* 0xBE927E4F, 0x809C52AD */
+ 2.08757232129817482790e-09, /* 0x3E21EE9E, 0xBDB4B1C4 */
+ -1.13596475577881948265e-11}; /* 0xBDA8FAE9, 0xBE8838D4 */
#ifdef __STDC__
double __kernel_cos(double x, double y)
@@ -72,17 +76,24 @@ C6 = -1.13596475577881948265e-11; /* 0xBDA8FAE9, 0xBE8838D4 */
double x,y;
#endif
{
- double a,hz,z,r,qx;
+ double a,hz,z,r,qx,r1,r2,r3,z1,z2,z3;
int32_t ix;
+ z = x*x;
GET_HIGH_WORD(ix,x);
ix &= 0x7fffffff; /* ix = |x|'s high word*/
if(ix<0x3e400000) { /* if x < 2**27 */
- if(((int)x)==0) return one; /* generate inexact */
+ if(((int)x)==0) return C[0]; /* generate inexact */
}
- z = x*x;
+#ifdef DO_NOT_USE_THIS
r = z*(C1+z*(C2+z*(C3+z*(C4+z*(C5+z*C6)))));
- if(ix < 0x3FD33333) /* if |x| < 0.3 */
- return one - (0.5*z - (z*r - x*y));
+#else
+ r1=z*C[6];r1=r1+C[5];z1=z*z;
+ r2=z*C[4];r2=r2+C[3];z2=z1*z;
+ r3=z*C[2];r3=r3+C[1];z3=z2*z1;
+ r=z3*r1+z2*r2+z*r3;
+#endif
+ if(ix < 0x3FD33333) /* if |x| < 0.3 */
+ return C[0] - (0.5*z - (z*r - x*y));
else {
if(ix > 0x3fe90000) { /* x > 0.78125 */
qx = 0.28125;
@@ -90,7 +101,7 @@ C6 = -1.13596475577881948265e-11; /* 0xBDA8FAE9, 0xBE8838D4 */
INSERT_WORDS(qx,ix-0x00200000,0); /* x/4 */
}
hz = 0.5*z-qx;
- a = one-qx;
+ a = C[0]-qx;
return a - (hz - (z*r-x*y));
}
}
diff --git a/sysdeps/libm-ieee754/k_sin.c b/sysdeps/libm-ieee754/k_sin.c
index 86b95529bc..49c59228e0 100644
--- a/sysdeps/libm-ieee754/k_sin.c
+++ b/sysdeps/libm-ieee754/k_sin.c
@@ -5,10 +5,13 @@
*
* Developed at SunPro, a Sun Microsystems, Inc. business.
* Permission to use, copy, modify, and distribute this
- * software is freely granted, provided that this notice
+ * software is freely granted, provided that this notice
* is preserved.
* ====================================================
*/
+/* Modified by Naohiko Shimizu/Tokai University, Japan 1997/08/25,
+ for performance improvement on pipelined processors.
+*/
#if defined(LIBM_SCCS) && !defined(lint)
static char rcsid[] = "$NetBSD: k_sin.c,v 1.8 1995/05/10 20:46:31 jtc Exp $";
@@ -18,24 +21,24 @@ static char rcsid[] = "$NetBSD: k_sin.c,v 1.8 1995/05/10 20:46:31 jtc Exp $";
* kernel sin function on [-pi/4, pi/4], pi/4 ~ 0.7854
* Input x is assumed to be bounded by ~pi/4 in magnitude.
* Input y is the tail of x.
- * Input iy indicates whether y is 0. (if iy=0, y assume to be 0).
+ * Input iy indicates whether y is 0. (if iy=0, y assume to be 0).
*
* Algorithm
- * 1. Since sin(-x) = -sin(x), we need only to consider positive x.
+ * 1. Since sin(-x) = -sin(x), we need only to consider positive x.
* 2. if x < 2^-27 (hx<0x3e400000 0), return x with inexact if x!=0.
* 3. sin(x) is approximated by a polynomial of degree 13 on
* [0,pi/4]
* 3 13
* sin(x) ~ x + S1*x + ... + S6*x
* where
- *
+ *
* |sin(x) 2 4 6 8 10 12 | -58
* |----- - (1+S1*x +S2*x +S3*x +S4*x +S5*x +S6*x )| <= 2
- * | x |
- *
+ * | x |
+ *
* 4. sin(x+y) = sin(x) + sin'(x')*y
* ~ sin(x) + (1-x*x/2)*y
- * For better accuracy, let
+ * For better accuracy, let
* 3 2 2 2 2
* r = x *(S2+x *(S3+x *(S4+x *(S5+x *S6))))
* then 3 2
@@ -46,17 +49,18 @@ static char rcsid[] = "$NetBSD: k_sin.c,v 1.8 1995/05/10 20:46:31 jtc Exp $";
#include "math_private.h"
#ifdef __STDC__
-static const double
+static const double
#else
-static double
+static double
#endif
-half = 5.00000000000000000000e-01, /* 0x3FE00000, 0x00000000 */
-S1 = -1.66666666666666324348e-01, /* 0xBFC55555, 0x55555549 */
-S2 = 8.33333333332248946124e-03, /* 0x3F811111, 0x1110F8A6 */
-S3 = -1.98412698298579493134e-04, /* 0xBF2A01A0, 0x19C161D5 */
-S4 = 2.75573137070700676789e-06, /* 0x3EC71DE3, 0x57B1FE7D */
-S5 = -2.50507602534068634195e-08, /* 0xBE5AE5E6, 0x8A2B9CEB */
-S6 = 1.58969099521155010221e-10; /* 0x3DE5D93A, 0x5ACFD57C */
+S[] = {
+ 5.00000000000000000000e-01, /* 0x3FE00000, 0x00000000 */
+ -1.66666666666666324348e-01, /* 0xBFC55555, 0x55555549 */
+ 8.33333333332248946124e-03, /* 0x3F811111, 0x1110F8A6 */
+ -1.98412698298579493134e-04, /* 0xBF2A01A0, 0x19C161D5 */
+ 2.75573137070700676789e-06, /* 0x3EC71DE3, 0x57B1FE7D */
+ -2.50507602534068634195e-08, /* 0xBE5AE5E6, 0x8A2B9CEB */
+ 1.58969099521155010221e-10}; /* 0x3DE5D93A, 0x5ACFD57C */
#ifdef __STDC__
double __kernel_sin(double x, double y, int iy)
@@ -65,7 +69,7 @@ S6 = 1.58969099521155010221e-10; /* 0x3DE5D93A, 0x5ACFD57C */
double x,y; int iy; /* iy=0 if y is zero */
#endif
{
- double z,r,v;
+ double z,r,v,z1,r1,r2;
int32_t ix;
GET_HIGH_WORD(ix,x);
ix &= 0x7fffffff; /* high word of x */
@@ -73,7 +77,15 @@ S6 = 1.58969099521155010221e-10; /* 0x3DE5D93A, 0x5ACFD57C */
{if((int)x==0) return x;} /* generate inexact */
z = x*x;
v = z*x;
+#ifdef DO_NOT_USE_THIS
r = S2+z*(S3+z*(S4+z*(S5+z*S6)));
if(iy==0) return x+v*(S1+z*r);
else return x-((z*(half*y-v*r)-y)-v*S1);
+#else
+ r1 = S[5]+z*S[6]; z1 = z*z*z;
+ r2 = S[3]+z*S[4];
+ r = S[2] + z*r2 + z1*r1;
+ if(iy==0) return x+v*(S[1]+z*r);
+ else return x-((z*(S[0]*y-v*r)-y)-v*S[1]);
+#endif
}
diff --git a/sysdeps/libm-ieee754/k_tan.c b/sysdeps/libm-ieee754/k_tan.c
index aa9c67c9d0..55dafb8ebc 100644
--- a/sysdeps/libm-ieee754/k_tan.c
+++ b/sysdeps/libm-ieee754/k_tan.c
@@ -5,10 +5,13 @@
*
* Developed at SunPro, a Sun Microsystems, Inc. business.
* Permission to use, copy, modify, and distribute this
- * software is freely granted, provided that this notice
+ * software is freely granted, provided that this notice
* is preserved.
* ====================================================
*/
+/* Modified by Naohiko Shimizu/Tokai University, Japan 1997/08/25,
+ for performance improvement on pipelined processors.
+*/
#if defined(LIBM_SCCS) && !defined(lint)
static char rcsid[] = "$NetBSD: k_tan.c,v 1.8 1995/05/10 20:46:37 jtc Exp $";
@@ -18,25 +21,25 @@ static char rcsid[] = "$NetBSD: k_tan.c,v 1.8 1995/05/10 20:46:37 jtc Exp $";
* kernel tan function on [-pi/4, pi/4], pi/4 ~ 0.7854
* Input x is assumed to be bounded by ~pi/4 in magnitude.
* Input y is the tail of x.
- * Input k indicates whether tan (if k=1) or
+ * Input k indicates whether tan (if k=1) or
* -1/tan (if k= -1) is returned.
*
* Algorithm
- * 1. Since tan(-x) = -tan(x), we need only to consider positive x.
+ * 1. Since tan(-x) = -tan(x), we need only to consider positive x.
* 2. if x < 2^-28 (hx<0x3e300000 0), return x with inexact if x!=0.
* 3. tan(x) is approximated by a odd polynomial of degree 27 on
* [0,0.67434]
* 3 27
* tan(x) ~ x + T1*x + ... + T13*x
* where
- *
+ *
* |tan(x) 2 4 26 | -59.2
* |----- - (1+T1*x +T2*x +.... +T13*x )| <= 2
- * | x |
- *
+ * | x |
+ *
* Note: tan(x+y) = tan(x) + tan'(x)*y
* ~ tan(x) + (1+x*x)*y
- * Therefore, for better accuracy in computing tan(x+y), let
+ * Therefore, for better accuracy in computing tan(x+y), let
* 3 2 2 2 2
* r = x *(T2+x *(T3+x *(...+x *(T12+x *T13))))
* then
@@ -51,9 +54,9 @@ static char rcsid[] = "$NetBSD: k_tan.c,v 1.8 1995/05/10 20:46:37 jtc Exp $";
#include "math.h"
#include "math_private.h"
#ifdef __STDC__
-static const double
+static const double
#else
-static double
+static double
#endif
one = 1.00000000000000000000e+00, /* 0x3FF00000, 0x00000000 */
pio4 = 7.85398163397448278999e-01, /* 0x3FE921FB, 0x54442D18 */
@@ -81,7 +84,7 @@ T[] = {
double x,y; int iy;
#endif
{
- double z,r,v,w,s;
+ double z,r,v,w,s,r1,r2,r3,v1,v2,v3,w2,w4;
int32_t ix,hx;
GET_HIGH_WORD(hx,x);
ix = hx&0x7fffffff; /* high word of |x| */
@@ -105,8 +108,19 @@ T[] = {
* x^5(T[1]+x^4*T[3]+...+x^20*T[11]) +
* x^5(x^2*(T[2]+x^4*T[4]+...+x^22*[T12]))
*/
+#ifdef DO_NOT_USE_THIS
r = T[1]+w*(T[3]+w*(T[5]+w*(T[7]+w*(T[9]+w*T[11]))));
v = z*(T[2]+w*(T[4]+w*(T[6]+w*(T[8]+w*(T[10]+w*T[12])))));
+#else
+ v1 = T[10]+w*T[12]; w2=w*w;
+ v2 = T[6]+w*T[8]; w4=w2*w2;
+ v3 = T[2]+w*T[4]; v1=z*v1;
+ r1 = T[9]+w*T[11]; v2=z*v2;
+ r2 = T[5]+w*T[7]; v3=z*v3;
+ r3 = T[1]+w*T[3];
+ v = v3 + w2*v2 + w4*v1;
+ r = r3 + w2*r2 + w4*r1;
+#endif
s = z*x;
r = y + z*(s*(r+v)+y);
r += T[0]*s;
@@ -116,7 +130,7 @@ T[] = {
return (double)(1-((hx>>30)&2))*(v-2.0*(x-(w*w/(w+v)-r)));
}
if(iy==1) return w;
- else { /* if allow error up to 2 ulp,
+ else { /* if allow error up to 2 ulp,
simply return -1.0/(x+r) here */
/* compute -1.0/(x+r) accurately */
double a,t;
diff --git a/sysdeps/libm-ieee754/s_atan.c b/sysdeps/libm-ieee754/s_atan.c
index 6657b15cda..cad3ba12a8 100644
--- a/sysdeps/libm-ieee754/s_atan.c
+++ b/sysdeps/libm-ieee754/s_atan.c
@@ -9,6 +9,9 @@
* is preserved.
* ====================================================
*/
+/* Modified by Naohiko Shimizu/Tokai University, Japan 1997/08/25,
+ for performance improvement on pipelined processors.
+*/
#if defined(LIBM_SCCS) && !defined(lint)
static char rcsid[] = "$NetBSD: s_atan.c,v 1.8 1995/05/10 20:46:45 jtc Exp $";
@@ -92,7 +95,7 @@ huge = 1.0e300;
double x;
#endif
{
- double w,s1,s2,z;
+ double w,s1,z,s,w2,w4,s11,s12,s13,s21,s22,s23;
int32_t ix,hx,id;
GET_HIGH_WORD(hx,x);
@@ -129,6 +132,7 @@ huge = 1.0e300;
z = x*x;
w = z*z;
/* break sum from i=0 to 10 aT[i]z**(i+1) into odd and even poly */
+#ifdef DO_NOT_USE_THIS
s1 = z*(aT[0]+w*(aT[2]+w*(aT[4]+w*(aT[6]+w*(aT[8]+w*aT[10])))));
s2 = w*(aT[1]+w*(aT[3]+w*(aT[5]+w*(aT[7]+w*aT[9]))));
if (id<0) return x - x*(s1+s2);
@@ -136,6 +140,21 @@ huge = 1.0e300;
z = atanhi[id] - ((x*(s1+s2) - atanlo[id]) - x);
return (hx<0)? -z:z;
}
+#else
+ s11 = aT[8]+w*aT[10]; w2=w*w;
+ s12 = aT[4]+w*aT[6]; w4=w2*w2;
+ s13 = aT[0]+w*aT[2];
+ s21 = aT[7]+w*aT[9];
+ s22 = aT[3]+w*aT[5];
+ s23 = w*aT[1];
+ s1 = s13 + w2*s12 + w4*s11;
+ s = s23 + w2*s22 + w4*s21 + z*s1;
+ if (id<0) return x - x*(s);
+ else {
+ z = atanhi[id] - ((x*(s) - atanlo[id]) - x);
+ return (hx<0)? -z:z;
+ }
+#endif
}
weak_alias (__atan, atan)
#ifdef NO_LONG_DOUBLE
diff --git a/sysdeps/libm-ieee754/s_erf.c b/sysdeps/libm-ieee754/s_erf.c
index f64f0fcefd..d8b6629a72 100644
--- a/sysdeps/libm-ieee754/s_erf.c
+++ b/sysdeps/libm-ieee754/s_erf.c
@@ -9,6 +9,9 @@
* is preserved.
* ====================================================
*/
+/* Modified by Naohiko Shimizu/Tokai University, Japan 1997/08/25,
+ for performance improvement on pipelined processors.
+*/
#if defined(LIBM_SCCS) && !defined(lint)
static char rcsid[] = "$NetBSD: s_erf.c,v 1.8 1995/05/10 20:47:05 jtc Exp $";
@@ -128,68 +131,68 @@ erx = 8.45062911510467529297e-01, /* 0x3FEB0AC1, 0x60000000 */
*/
efx = 1.28379167095512586316e-01, /* 0x3FC06EBA, 0x8214DB69 */
efx8= 1.02703333676410069053e+00, /* 0x3FF06EBA, 0x8214DB69 */
-pp0 = 1.28379167095512558561e-01, /* 0x3FC06EBA, 0x8214DB68 */
-pp1 = -3.25042107247001499370e-01, /* 0xBFD4CD7D, 0x691CB913 */
-pp2 = -2.84817495755985104766e-02, /* 0xBF9D2A51, 0xDBD7194F */
-pp3 = -5.77027029648944159157e-03, /* 0xBF77A291, 0x236668E4 */
-pp4 = -2.37630166566501626084e-05, /* 0xBEF8EAD6, 0x120016AC */
-qq1 = 3.97917223959155352819e-01, /* 0x3FD97779, 0xCDDADC09 */
-qq2 = 6.50222499887672944485e-02, /* 0x3FB0A54C, 0x5536CEBA */
-qq3 = 5.08130628187576562776e-03, /* 0x3F74D022, 0xC4D36B0F */
-qq4 = 1.32494738004321644526e-04, /* 0x3F215DC9, 0x221C1A10 */
-qq5 = -3.96022827877536812320e-06, /* 0xBED09C43, 0x42A26120 */
+pp[] = {1.28379167095512558561e-01, /* 0x3FC06EBA, 0x8214DB68 */
+ -3.25042107247001499370e-01, /* 0xBFD4CD7D, 0x691CB913 */
+ -2.84817495755985104766e-02, /* 0xBF9D2A51, 0xDBD7194F */
+ -5.77027029648944159157e-03, /* 0xBF77A291, 0x236668E4 */
+ -2.37630166566501626084e-05}, /* 0xBEF8EAD6, 0x120016AC */
+qq[] = {0.0, 3.97917223959155352819e-01, /* 0x3FD97779, 0xCDDADC09 */
+ 6.50222499887672944485e-02, /* 0x3FB0A54C, 0x5536CEBA */
+ 5.08130628187576562776e-03, /* 0x3F74D022, 0xC4D36B0F */
+ 1.32494738004321644526e-04, /* 0x3F215DC9, 0x221C1A10 */
+ -3.96022827877536812320e-06}, /* 0xBED09C43, 0x42A26120 */
/*
* Coefficients for approximation to erf in [0.84375,1.25]
*/
-pa0 = -2.36211856075265944077e-03, /* 0xBF6359B8, 0xBEF77538 */
-pa1 = 4.14856118683748331666e-01, /* 0x3FDA8D00, 0xAD92B34D */
-pa2 = -3.72207876035701323847e-01, /* 0xBFD7D240, 0xFBB8C3F1 */
-pa3 = 3.18346619901161753674e-01, /* 0x3FD45FCA, 0x805120E4 */
-pa4 = -1.10894694282396677476e-01, /* 0xBFBC6398, 0x3D3E28EC */
-pa5 = 3.54783043256182359371e-02, /* 0x3FA22A36, 0x599795EB */
-pa6 = -2.16637559486879084300e-03, /* 0xBF61BF38, 0x0A96073F */
-qa1 = 1.06420880400844228286e-01, /* 0x3FBB3E66, 0x18EEE323 */
-qa2 = 5.40397917702171048937e-01, /* 0x3FE14AF0, 0x92EB6F33 */
-qa3 = 7.18286544141962662868e-02, /* 0x3FB2635C, 0xD99FE9A7 */
-qa4 = 1.26171219808761642112e-01, /* 0x3FC02660, 0xE763351F */
-qa5 = 1.36370839120290507362e-02, /* 0x3F8BEDC2, 0x6B51DD1C */
-qa6 = 1.19844998467991074170e-02, /* 0x3F888B54, 0x5735151D */
+pa[] = {-2.36211856075265944077e-03, /* 0xBF6359B8, 0xBEF77538 */
+ 4.14856118683748331666e-01, /* 0x3FDA8D00, 0xAD92B34D */
+ -3.72207876035701323847e-01, /* 0xBFD7D240, 0xFBB8C3F1 */
+ 3.18346619901161753674e-01, /* 0x3FD45FCA, 0x805120E4 */
+ -1.10894694282396677476e-01, /* 0xBFBC6398, 0x3D3E28EC */
+ 3.54783043256182359371e-02, /* 0x3FA22A36, 0x599795EB */
+ -2.16637559486879084300e-03}, /* 0xBF61BF38, 0x0A96073F */
+qa[] = {0.0, 1.06420880400844228286e-01, /* 0x3FBB3E66, 0x18EEE323 */
+ 5.40397917702171048937e-01, /* 0x3FE14AF0, 0x92EB6F33 */
+ 7.18286544141962662868e-02, /* 0x3FB2635C, 0xD99FE9A7 */
+ 1.26171219808761642112e-01, /* 0x3FC02660, 0xE763351F */
+ 1.36370839120290507362e-02, /* 0x3F8BEDC2, 0x6B51DD1C */
+ 1.19844998467991074170e-02}, /* 0x3F888B54, 0x5735151D */
/*
* Coefficients for approximation to erfc in [1.25,1/0.35]
*/
-ra0 = -9.86494403484714822705e-03, /* 0xBF843412, 0x600D6435 */
-ra1 = -6.93858572707181764372e-01, /* 0xBFE63416, 0xE4BA7360 */
-ra2 = -1.05586262253232909814e+01, /* 0xC0251E04, 0x41B0E726 */
-ra3 = -6.23753324503260060396e+01, /* 0xC04F300A, 0xE4CBA38D */
-ra4 = -1.62396669462573470355e+02, /* 0xC0644CB1, 0x84282266 */
-ra5 = -1.84605092906711035994e+02, /* 0xC067135C, 0xEBCCABB2 */
-ra6 = -8.12874355063065934246e+01, /* 0xC0545265, 0x57E4D2F2 */
-ra7 = -9.81432934416914548592e+00, /* 0xC023A0EF, 0xC69AC25C */
-sa1 = 1.96512716674392571292e+01, /* 0x4033A6B9, 0xBD707687 */
-sa2 = 1.37657754143519042600e+02, /* 0x4061350C, 0x526AE721 */
-sa3 = 4.34565877475229228821e+02, /* 0x407B290D, 0xD58A1A71 */
-sa4 = 6.45387271733267880336e+02, /* 0x40842B19, 0x21EC2868 */
-sa5 = 4.29008140027567833386e+02, /* 0x407AD021, 0x57700314 */
-sa6 = 1.08635005541779435134e+02, /* 0x405B28A3, 0xEE48AE2C */
-sa7 = 6.57024977031928170135e+00, /* 0x401A47EF, 0x8E484A93 */
-sa8 = -6.04244152148580987438e-02, /* 0xBFAEEFF2, 0xEE749A62 */
+ra[] = {-9.86494403484714822705e-03, /* 0xBF843412, 0x600D6435 */
+ -6.93858572707181764372e-01, /* 0xBFE63416, 0xE4BA7360 */
+ -1.05586262253232909814e+01, /* 0xC0251E04, 0x41B0E726 */
+ -6.23753324503260060396e+01, /* 0xC04F300A, 0xE4CBA38D */
+ -1.62396669462573470355e+02, /* 0xC0644CB1, 0x84282266 */
+ -1.84605092906711035994e+02, /* 0xC067135C, 0xEBCCABB2 */
+ -8.12874355063065934246e+01, /* 0xC0545265, 0x57E4D2F2 */
+ -9.81432934416914548592e+00}, /* 0xC023A0EF, 0xC69AC25C */
+sa[] = {0.0,1.96512716674392571292e+01, /* 0x4033A6B9, 0xBD707687 */
+ 1.37657754143519042600e+02, /* 0x4061350C, 0x526AE721 */
+ 4.34565877475229228821e+02, /* 0x407B290D, 0xD58A1A71 */
+ 6.45387271733267880336e+02, /* 0x40842B19, 0x21EC2868 */
+ 4.29008140027567833386e+02, /* 0x407AD021, 0x57700314 */
+ 1.08635005541779435134e+02, /* 0x405B28A3, 0xEE48AE2C */
+ 6.57024977031928170135e+00, /* 0x401A47EF, 0x8E484A93 */
+ -6.04244152148580987438e-02}, /* 0xBFAEEFF2, 0xEE749A62 */
/*
* Coefficients for approximation to erfc in [1/.35,28]
*/
-rb0 = -9.86494292470009928597e-03, /* 0xBF843412, 0x39E86F4A */
-rb1 = -7.99283237680523006574e-01, /* 0xBFE993BA, 0x70C285DE */
-rb2 = -1.77579549177547519889e+01, /* 0xC031C209, 0x555F995A */
-rb3 = -1.60636384855821916062e+02, /* 0xC064145D, 0x43C5ED98 */
-rb4 = -6.37566443368389627722e+02, /* 0xC083EC88, 0x1375F228 */
-rb5 = -1.02509513161107724954e+03, /* 0xC0900461, 0x6A2E5992 */
-rb6 = -4.83519191608651397019e+02, /* 0xC07E384E, 0x9BDC383F */
-sb1 = 3.03380607434824582924e+01, /* 0x403E568B, 0x261D5190 */
-sb2 = 3.25792512996573918826e+02, /* 0x40745CAE, 0x221B9F0A */
-sb3 = 1.53672958608443695994e+03, /* 0x409802EB, 0x189D5118 */
-sb4 = 3.19985821950859553908e+03, /* 0x40A8FFB7, 0x688C246A */
-sb5 = 2.55305040643316442583e+03, /* 0x40A3F219, 0xCEDF3BE6 */
-sb6 = 4.74528541206955367215e+02, /* 0x407DA874, 0xE79FE763 */
-sb7 = -2.24409524465858183362e+01; /* 0xC03670E2, 0x42712D62 */
+rb[] = {-9.86494292470009928597e-03, /* 0xBF843412, 0x39E86F4A */
+ -7.99283237680523006574e-01, /* 0xBFE993BA, 0x70C285DE */
+ -1.77579549177547519889e+01, /* 0xC031C209, 0x555F995A */
+ -1.60636384855821916062e+02, /* 0xC064145D, 0x43C5ED98 */
+ -6.37566443368389627722e+02, /* 0xC083EC88, 0x1375F228 */
+ -1.02509513161107724954e+03, /* 0xC0900461, 0x6A2E5992 */
+ -4.83519191608651397019e+02}, /* 0xC07E384E, 0x9BDC383F */
+sb[] = {0.0,3.03380607434824582924e+01, /* 0x403E568B, 0x261D5190 */
+ 3.25792512996573918826e+02, /* 0x40745CAE, 0x221B9F0A */
+ 1.53672958608443695994e+03, /* 0x409802EB, 0x189D5118 */
+ 3.19985821950859553908e+03, /* 0x40A8FFB7, 0x688C246A */
+ 2.55305040643316442583e+03, /* 0x40A3F219, 0xCEDF3BE6 */
+ 4.74528541206955367215e+02, /* 0x407DA874, 0xE79FE763 */
+ -2.24409524465858183362e+01}; /* 0xC03670E2, 0x42712D62 */
#ifdef __STDC__
double __erf(double x)
@@ -208,21 +211,46 @@ sb7 = -2.24409524465858183362e+01; /* 0xC03670E2, 0x42712D62 */
}
if(ix < 0x3feb0000) { /* |x|<0.84375 */
+ double r1,r2,s1,s2,s3,z2,z4;
if(ix < 0x3e300000) { /* |x|<2**-28 */
if (ix < 0x00800000)
return 0.125*(8.0*x+efx8*x); /*avoid underflow */
return x + efx*x;
}
z = x*x;
+#ifdef DO_NOT_USE_THIS
r = pp0+z*(pp1+z*(pp2+z*(pp3+z*pp4)));
s = one+z*(qq1+z*(qq2+z*(qq3+z*(qq4+z*qq5))));
+#else
+ r1 = pp[0]+z*pp[1]; z2=z*z;
+ r2 = pp[2]+z*pp[3]; z4=z2*z2;
+ s1 = one+z*qq[1];
+ s2 = qq[2]+z*qq[3];
+ s3 = qq[4]+z*qq[5];
+ r = r1 + z2*r2 + z4*pp[4];
+ s = s1 + z2*s2 + z4*s3;
+#endif
y = r/s;
return x + x*y;
}
if(ix < 0x3ff40000) { /* 0.84375 <= |x| < 1.25 */
+ double s2,s4,s6,P1,P2,P3,P4,Q1,Q2,Q3,Q4;
s = fabs(x)-one;
+#ifdef DO_NOT_USE_THIS
P = pa0+s*(pa1+s*(pa2+s*(pa3+s*(pa4+s*(pa5+s*pa6)))));
Q = one+s*(qa1+s*(qa2+s*(qa3+s*(qa4+s*(qa5+s*qa6)))));
+#else
+ P1 = pa[0]+s*pa[1]; s2=s*s;
+ Q1 = one+s*qa[1]; s4=s2*s2;
+ P2 = pa[2]+s*pa[3]; s6=s4*s2;
+ Q2 = qa[2]+s*qa[3];
+ P3 = pa[4]+s*pa[5];
+ Q3 = qa[4]+s*qa[5];
+ P4 = s6*pa[6];
+ Q4 = s6*qa[6];
+ P = P1 + s2*P2 + s4*P3 + s6*P4;
+ Q = Q1 + s2*Q2 + s4*Q3 + s6*Q4;
+#endif
if(hx>=0) return erx + P/Q; else return -erx - P/Q;
}
if (ix >= 0x40180000) { /* inf>|x|>=6 */
@@ -231,15 +259,42 @@ sb7 = -2.24409524465858183362e+01; /* 0xC03670E2, 0x42712D62 */
x = fabs(x);
s = one/(x*x);
if(ix< 0x4006DB6E) { /* |x| < 1/0.35 */
+#ifdef DO_NOT_USE_THIS
R=ra0+s*(ra1+s*(ra2+s*(ra3+s*(ra4+s*(
ra5+s*(ra6+s*ra7))))));
S=one+s*(sa1+s*(sa2+s*(sa3+s*(sa4+s*(
sa5+s*(sa6+s*(sa7+s*sa8)))))));
+#else
+ double R1,R2,R3,R4,S1,S2,S3,S4,s2,s4,s6,s8;
+ R1 = ra[0]+s*ra[1];s2 = s*s;
+ S1 = one+s*sa[1]; s4 = s2*s2;
+ R2 = ra[2]+s*ra[3];s6 = s4*s2;
+ S2 = sa[2]+s*sa[3];s8 = s4*s4;
+ R3 = ra[4]+s*ra[5];
+ S3 = sa[4]+s*sa[5];
+ R4 = ra[6]+s*ra[7];
+ S4 = sa[6]+s*sa[7];
+ R = R1 + s2*R2 + s4*R3 + s6*R4;
+ S = S1 + s2*S2 + s4*S3 + s6*S4 + s8*sa[8];
+#endif
} else { /* |x| >= 1/0.35 */
+#ifdef DO_NOT_USE_THIS
R=rb0+s*(rb1+s*(rb2+s*(rb3+s*(rb4+s*(
rb5+s*rb6)))));
S=one+s*(sb1+s*(sb2+s*(sb3+s*(sb4+s*(
sb5+s*(sb6+s*sb7))))));
+#else
+ double R1,R2,R3,S1,S2,S3,S4,s2,s4,s6;
+ R1 = rb[0]+s*rb[1];s2 = s*s;
+ S1 = one+s*sb[1]; s4 = s2*s2;
+ R2 = rb[2]+s*rb[3];s6 = s4*s2;
+ S2 = sb[2]+s*sb[3];
+ R3 = rb[4]+s*rb[5];
+ S3 = sb[4]+s*sb[5];
+ S4 = sb[6]+s*sb[7];
+ R = R1 + s2*R2 + s4*R3 + s6*rb[6];
+ S = S1 + s2*S2 + s4*S3 + s6*S4;
+#endif
}
z = x;
SET_LOW_WORD(z,0);
@@ -269,11 +324,22 @@ weak_alias (__erf, erfl)
}
if(ix < 0x3feb0000) { /* |x|<0.84375 */
+ double r1,r2,s1,s2,s3,z2,z4;
if(ix < 0x3c700000) /* |x|<2**-56 */
return one-x;
z = x*x;
+#ifdef DO_NOT_USE_THIS
r = pp0+z*(pp1+z*(pp2+z*(pp3+z*pp4)));
s = one+z*(qq1+z*(qq2+z*(qq3+z*(qq4+z*qq5))));
+#else
+ r1 = pp[0]+z*pp[1]; z2=z*z;
+ r2 = pp[2]+z*pp[3]; z4=z2*z2;
+ s1 = one+z*qq[1];
+ s2 = qq[2]+z*qq[3];
+ s3 = qq[4]+z*qq[5];
+ r = r1 + z2*r2 + z4*pp[4];
+ s = s1 + z2*s2 + z4*s3;
+#endif
y = r/s;
if(hx < 0x3fd00000) { /* x<1/4 */
return one-(x+x*y);
@@ -284,9 +350,23 @@ weak_alias (__erf, erfl)
}
}
if(ix < 0x3ff40000) { /* 0.84375 <= |x| < 1.25 */
+ double s2,s4,s6,P1,P2,P3,P4,Q1,Q2,Q3,Q4;
s = fabs(x)-one;
+#ifdef DO_NOT_USE_THIS
P = pa0+s*(pa1+s*(pa2+s*(pa3+s*(pa4+s*(pa5+s*pa6)))));
Q = one+s*(qa1+s*(qa2+s*(qa3+s*(qa4+s*(qa5+s*qa6)))));
+#else
+ P1 = pa[0]+s*pa[1]; s2=s*s;
+ Q1 = one+s*qa[1]; s4=s2*s2;
+ P2 = pa[2]+s*pa[3]; s6=s4*s2;
+ Q2 = qa[2]+s*qa[3];
+ P3 = pa[4]+s*pa[5];
+ Q3 = qa[4]+s*qa[5];
+ P4 = s6*pa[6];
+ Q4 = s6*qa[6];
+ P = P1 + s2*P2 + s4*P3 + s6*P4;
+ Q = Q1 + s2*Q2 + s4*Q3 + s6*Q4;
+#endif
if(hx>=0) {
z = one-erx; return z - P/Q;
} else {
@@ -297,16 +377,43 @@ weak_alias (__erf, erfl)
x = fabs(x);
s = one/(x*x);
if(ix< 0x4006DB6D) { /* |x| < 1/.35 ~ 2.857143*/
+#ifdef DO_NOT_USE_THIS
R=ra0+s*(ra1+s*(ra2+s*(ra3+s*(ra4+s*(
ra5+s*(ra6+s*ra7))))));
S=one+s*(sa1+s*(sa2+s*(sa3+s*(sa4+s*(
sa5+s*(sa6+s*(sa7+s*sa8)))))));
+#else
+ double R1,R2,R3,R4,S1,S2,S3,S4,s2,s4,s6,s8;
+ R1 = ra[0]+s*ra[1];s2 = s*s;
+ S1 = one+s*sa[1]; s4 = s2*s2;
+ R2 = ra[2]+s*ra[3];s6 = s4*s2;
+ S2 = sa[2]+s*sa[3];s8 = s4*s4;
+ R3 = ra[4]+s*ra[5];
+ S3 = sa[4]+s*sa[5];
+ R4 = ra[6]+s*ra[7];
+ S4 = sa[6]+s*sa[7];
+ R = R1 + s2*R2 + s4*R3 + s6*R4;
+ S = S1 + s2*S2 + s4*S3 + s6*S4 + s8*sa[8];
+#endif
} else { /* |x| >= 1/.35 ~ 2.857143 */
+ double R1,R2,R3,S1,S2,S3,S4,s2,s4,s6;
if(hx<0&&ix>=0x40180000) return two-tiny;/* x < -6 */
+#ifdef DO_NOT_USE_THIS
R=rb0+s*(rb1+s*(rb2+s*(rb3+s*(rb4+s*(
rb5+s*rb6)))));
S=one+s*(sb1+s*(sb2+s*(sb3+s*(sb4+s*(
sb5+s*(sb6+s*sb7))))));
+#else
+ R1 = rb[0]+s*rb[1];s2 = s*s;
+ S1 = one+s*sb[1]; s4 = s2*s2;
+ R2 = rb[2]+s*rb[3];s6 = s4*s2;
+ S2 = sb[2]+s*sb[3];
+ R3 = rb[4]+s*rb[5];
+ S3 = sb[4]+s*sb[5];
+ S4 = sb[6]+s*sb[7];
+ R = R1 + s2*R2 + s4*R3 + s6*rb[6];
+ S = S1 + s2*S2 + s4*S3 + s6*S4;
+#endif
}
z = x;
SET_LOW_WORD(z,0);
diff --git a/sysdeps/libm-ieee754/s_expm1.c b/sysdeps/libm-ieee754/s_expm1.c
index c8354b7262..ed1aba527b 100644
--- a/sysdeps/libm-ieee754/s_expm1.c
+++ b/sysdeps/libm-ieee754/s_expm1.c
@@ -9,6 +9,9 @@
* is preserved.
* ====================================================
*/
+/* Modified by Naohiko Shimizu/Tokai University, Japan 1997/08/25,
+ for performance improvement on pipelined processors.
+*/
#if defined(LIBM_SCCS) && !defined(lint)
static char rcsid[] = "$NetBSD: s_expm1.c,v 1.8 1995/05/10 20:47:09 jtc Exp $";
@@ -111,7 +114,7 @@ static char rcsid[] = "$NetBSD: s_expm1.c,v 1.8 1995/05/10 20:47:09 jtc Exp $";
#include "math.h"
#include "math_private.h"
-
+#define one Q[0]
#ifdef __STDC__
static const double
#else
@@ -125,11 +128,11 @@ ln2_hi = 6.93147180369123816490e-01,/* 0x3fe62e42, 0xfee00000 */
ln2_lo = 1.90821492927058770002e-10,/* 0x3dea39ef, 0x35793c76 */
invln2 = 1.44269504088896338700e+00,/* 0x3ff71547, 0x652b82fe */
/* scaled coefficients related to expm1 */
-Q1 = -3.33333333333331316428e-02, /* BFA11111 111110F4 */
-Q2 = 1.58730158725481460165e-03, /* 3F5A01A0 19FE5585 */
-Q3 = -7.93650757867487942473e-05, /* BF14CE19 9EAADBB7 */
-Q4 = 4.00821782732936239552e-06, /* 3ED0CFCA 86E65239 */
-Q5 = -2.01099218183624371326e-07; /* BE8AFDB7 6E09C32D */
+Q[] = {1.0, -3.33333333333331316428e-02, /* BFA11111 111110F4 */
+ 1.58730158725481460165e-03, /* 3F5A01A0 19FE5585 */
+ -7.93650757867487942473e-05, /* BF14CE19 9EAADBB7 */
+ 4.00821782732936239552e-06, /* 3ED0CFCA 86E65239 */
+ -2.01099218183624371326e-07}; /* BE8AFDB7 6E09C32D */
#ifdef __STDC__
double __expm1(double x)
@@ -138,7 +141,7 @@ Q5 = -2.01099218183624371326e-07; /* BE8AFDB7 6E09C32D */
double x;
#endif
{
- double y,hi,lo,c,t,e,hxs,hfx,r1;
+ double y,hi,lo,c,t,e,hxs,hfx,r1,h2,h4,R1,R2,R3;
int32_t k,xsb;
u_int32_t hx;
@@ -190,7 +193,14 @@ Q5 = -2.01099218183624371326e-07; /* BE8AFDB7 6E09C32D */
/* x is now in primary range */
hfx = 0.5*x;
hxs = x*hfx;
+#ifdef DO_NOT_USE_THIS
r1 = one+hxs*(Q1+hxs*(Q2+hxs*(Q3+hxs*(Q4+hxs*Q5))));
+#else
+ R1 = one+hxs*Q[1]; h2 = hxs*hxs;
+ R2 = Q[2]+hxs*Q[3]; h4 = h2*h2;
+ R3 = Q[4]+hxs*Q[5];
+ r1 = R1 + h2*R2 + h4*R3;
+#endif
t = 3.0-r1*hfx;
e = hxs*((r1-t)/(6.0 - x*t));
if(k==0) return x - (x*e-hxs); /* c is 0 */
diff --git a/sysdeps/libm-ieee754/s_log1p.c b/sysdeps/libm-ieee754/s_log1p.c
index 086c0dce6c..4ca01b1ff5 100644
--- a/sysdeps/libm-ieee754/s_log1p.c
+++ b/sysdeps/libm-ieee754/s_log1p.c
@@ -9,6 +9,9 @@
* is preserved.
* ====================================================
*/
+/* Modified by Naohiko Shimizu/Tokai University, Japan 1997/08/25,
+ for performance improvement on pipelined processors.
+*/
#if defined(LIBM_SCCS) && !defined(lint)
static char rcsid[] = "$NetBSD: s_log1p.c,v 1.8 1995/05/10 20:47:46 jtc Exp $";
@@ -90,13 +93,13 @@ static double
ln2_hi = 6.93147180369123816490e-01, /* 3fe62e42 fee00000 */
ln2_lo = 1.90821492927058770002e-10, /* 3dea39ef 35793c76 */
two54 = 1.80143985094819840000e+16, /* 43500000 00000000 */
-Lp1 = 6.666666666666735130e-01, /* 3FE55555 55555593 */
-Lp2 = 3.999999999940941908e-01, /* 3FD99999 9997FA04 */
-Lp3 = 2.857142874366239149e-01, /* 3FD24924 94229359 */
-Lp4 = 2.222219843214978396e-01, /* 3FCC71C5 1D8E78AF */
-Lp5 = 1.818357216161805012e-01, /* 3FC74664 96CB03DE */
-Lp6 = 1.531383769920937332e-01, /* 3FC39A09 D078C69F */
-Lp7 = 1.479819860511658591e-01; /* 3FC2F112 DF3E5244 */
+Lp[] = {0.0, 6.666666666666735130e-01, /* 3FE55555 55555593 */
+ 3.999999999940941908e-01, /* 3FD99999 9997FA04 */
+ 2.857142874366239149e-01, /* 3FD24924 94229359 */
+ 2.222219843214978396e-01, /* 3FCC71C5 1D8E78AF */
+ 1.818357216161805012e-01, /* 3FC74664 96CB03DE */
+ 1.531383769920937332e-01, /* 3FC39A09 D078C69F */
+ 1.479819860511658591e-01}; /* 3FC2F112 DF3E5244 */
#ifdef __STDC__
static const double zero = 0.0;
@@ -111,7 +114,7 @@ static double zero = 0.0;
double x;
#endif
{
- double hfsq,f,c,s,z,R,u;
+ double hfsq,f,c,s,z,R,u,z2,z4,z6,R1,R2,R3,R4;
int32_t k,hx,hu,ax;
GET_HIGH_WORD(hx,x);
@@ -167,7 +170,15 @@ static double zero = 0.0;
}
s = f/(2.0+f);
z = s*s;
+#ifdef DO_NOT_USE_THIS
R = z*(Lp1+z*(Lp2+z*(Lp3+z*(Lp4+z*(Lp5+z*(Lp6+z*Lp7))))));
+#else
+ R1 = z*Lp[1]; z2=z*z;
+ R2 = Lp[2]+z*Lp[3]; z4=z2*z2;
+ R3 = Lp[4]+z*Lp[5]; z6=z4*z2;
+ R4 = Lp[6]+z*Lp[7];
+ R = R1 + z2*R2 + z4*R3 + z6*R4;
+#endif
if(k==0) return f-(hfsq-s*(hfsq+R)); else
return k*ln2_hi-((hfsq-(s*(hfsq+R)+(k*ln2_lo+c)))-f);
}