From 476d692e8a80bb770a3f74876f64bcfa2998a09d Mon Sep 17 00:00:00 2001 From: Wilco Dijkstra Date: Wed, 10 Mar 2021 12:40:26 +0000 Subject: math: Remove slow paths in tan [BZ #15267] Remove slow paths in tan. Add ULP annotations. Merge 'number' into 'mynumber'. Remove unused entries from tan constants. Reviewed-By: Paul Zimmermann --- sysdeps/ieee754/dbl-64/e_atan2.c | 1 + sysdeps/ieee754/dbl-64/mydefs.h | 2 +- sysdeps/ieee754/dbl-64/s_tan.c | 610 +++++---------------------------------- sysdeps/ieee754/dbl-64/utan.h | 172 +---------- sysdeps/ieee754/dbl-64/utan.tbl | 4 +- 5 files changed, 81 insertions(+), 708 deletions(-) (limited to 'sysdeps/ieee754') diff --git a/sysdeps/ieee754/dbl-64/e_atan2.c b/sysdeps/ieee754/dbl-64/e_atan2.c index b14d911ebf..e6b98142fb 100644 --- a/sysdeps/ieee754/dbl-64/e_atan2.c +++ b/sysdeps/ieee754/dbl-64/e_atan2.c @@ -39,6 +39,7 @@ #include #include "mpa.h" #include "MathLib.h" +#include "mydefs.h" #include "uatan.tbl" #include "atnat2.h" #include diff --git a/sysdeps/ieee754/dbl-64/mydefs.h b/sysdeps/ieee754/dbl-64/mydefs.h index cdbcabf8a7..a73c4de2c1 100644 --- a/sysdeps/ieee754/dbl-64/mydefs.h +++ b/sysdeps/ieee754/dbl-64/mydefs.h @@ -28,7 +28,7 @@ #define MY_H typedef int int4; -typedef union { int4 i[2]; double x; } mynumber; +typedef union { int4 i[2]; double x; double d; } mynumber; #define max(x, y) (((y) > (x)) ? (y) : (x)) #define min(x, y) (((y) < (x)) ? (y) : (x)) diff --git a/sysdeps/ieee754/dbl-64/s_tan.c b/sysdeps/ieee754/dbl-64/s_tan.c index 6aa5a5a8e3..a4f21fde4e 100644 --- a/sysdeps/ieee754/dbl-64/s_tan.c +++ b/sysdeps/ieee754/dbl-64/s_tan.c @@ -20,40 +20,30 @@ /* MODULE_NAME: utan.c */ /* */ /* FUNCTIONS: utan */ -/* tanMp */ /* */ -/* FILES NEEDED:dla.h endian.h mpa.h mydefs.h utan.h */ -/* branred.c sincos32.c mptan.c */ +/* FILES NEEDED:dla.h endian.h mydefs.h utan.h */ +/* branred.c */ /* utan.tbl */ /* */ -/* An ultimate tan routine. Given an IEEE double machine number x */ -/* it computes the correctly rounded (to nearest) value of tan(x). */ -/* Assumption: Machine arithmetic operations are performed in */ -/* round to nearest mode of IEEE 754 standard. */ -/* */ /*********************************************************************/ #include #include #include "endian.h" #include -#include "mpa.h" -#include "MathLib.h" +#include "mydefs.h" #include #include #include #include #include #include -#include #ifndef SECTION # define SECTION #endif -static double tanMp (double); -void __mptan (double, mp_no *, int); - +/* tan with max ULP of ~0.619 based on random sampling. */ double SECTION __tan (double x) @@ -62,17 +52,14 @@ __tan (double x) #include "utan.tbl" int ux, i, n; - double a, da, a2, b, db, c, dc, c1, cc1, c2, cc2, c3, cc3, fi, ffi, gi, pz, - s, sy, t, t1, t2, t3, t4, w, x2, xn, xx2, y, ya, - yya, z0, z, zz, z2, zz2; - int p; - number num, v; - mp_no mpa, mpt1, mpt2; + double a, da, a2, b, db, c, dc, fi, gi, pz, + s, sy, t, t1, t2, t3, t4, w, x2, xn, y, ya, + yya, z0, z, z2; + mynumber num, v; double retval; int __branred (double, double *, double *); - int __mpranred (double, mp_no *, int); SET_RESTORE_ROUND_53BIT (FE_TONEAREST); @@ -100,7 +87,6 @@ __tan (double x) /* (II) The case 1.259e-8 < abs(x) <= 0.0608 */ if (w <= g2.d) { - /* First stage */ x2 = x * x; t2 = d9.d + x2 * d11.d; @@ -109,50 +95,16 @@ __tan (double x) t2 = d3.d + x2 * t2; t2 *= x * x2; - if ((y = x + (t2 - u1.d * t2)) == x + (t2 + u1.d * t2)) - { - retval = y; - goto ret; - } - - /* Second stage */ - c1 = a25.d + x2 * a27.d; - c1 = a23.d + x2 * c1; - c1 = a21.d + x2 * c1; - c1 = a19.d + x2 * c1; - c1 = a17.d + x2 * c1; - c1 = a15.d + x2 * c1; - c1 *= x2; - - EMULV (x, x, x2, xx2); - ADD2 (a13.d, aa13.d, c1, 0.0, c2, cc2, t1, t2); - MUL2 (x2, xx2, c2, cc2, c1, cc1, t1, t2); - ADD2 (a11.d, aa11.d, c1, cc1, c2, cc2, t1, t2); - MUL2 (x2, xx2, c2, cc2, c1, cc1, t1, t2); - ADD2 (a9.d, aa9.d, c1, cc1, c2, cc2, t1, t2); - MUL2 (x2, xx2, c2, cc2, c1, cc1, t1, t2); - ADD2 (a7.d, aa7.d, c1, cc1, c2, cc2, t1, t2); - MUL2 (x2, xx2, c2, cc2, c1, cc1, t1, t2); - ADD2 (a5.d, aa5.d, c1, cc1, c2, cc2, t1, t2); - MUL2 (x2, xx2, c2, cc2, c1, cc1, t1, t2); - ADD2 (a3.d, aa3.d, c1, cc1, c2, cc2, t1, t2); - MUL2 (x2, xx2, c2, cc2, c1, cc1, t1, t2); - MUL2 (x, 0.0, c1, cc1, c2, cc2, t1, t2); - ADD2 (x, 0.0, c2, cc2, c1, cc1, t1, t2); - if ((y = c1 + (cc1 - u2.d * c1)) == c1 + (cc1 + u2.d * c1)) - { - retval = y; - goto ret; - } - retval = tanMp (x); + y = x + t2; + retval = y; + /* Max ULP is 0.504. */ goto ret; } /* (III) The case 0.0608 < abs(x) <= 0.787 */ if (w <= g3.d) { - /* First stage */ - i = ((int) (mfftnhf.d + TWO8 * w)); + i = ((int) (mfftnhf.d + 256 * w)); z = w - xfg[i][0].d; z2 = z * z; s = (x < 0.0) ? -1 : 1; @@ -160,41 +112,9 @@ __tan (double x) fi = xfg[i][1].d; gi = xfg[i][2].d; t2 = pz * (gi + fi) / (gi - pz); - if ((y = fi + (t2 - fi * u3.d)) == fi + (t2 + fi * u3.d)) - { - retval = (s * y); - goto ret; - } - t3 = (t2 < 0.0) ? -t2 : t2; - t4 = fi * ua3.d + t3 * ub3.d; - if ((y = fi + (t2 - t4)) == fi + (t2 + t4)) - { - retval = (s * y); - goto ret; - } - - /* Second stage */ - ffi = xfg[i][3].d; - c1 = z2 * (a7.d + z2 * (a9.d + z2 * a11.d)); - EMULV (z, z, z2, zz2); - ADD2 (a5.d, aa5.d, c1, 0.0, c2, cc2, t1, t2); - MUL2 (z2, zz2, c2, cc2, c1, cc1, t1, t2); - ADD2 (a3.d, aa3.d, c1, cc1, c2, cc2, t1, t2); - MUL2 (z2, zz2, c2, cc2, c1, cc1, t1, t2); - MUL2 (z, 0.0, c1, cc1, c2, cc2, t1, t2); - ADD2 (z, 0.0, c2, cc2, c1, cc1, t1, t2); - - ADD2 (fi, ffi, c1, cc1, c2, cc2, t1, t2); - MUL2 (fi, ffi, c1, cc1, c3, cc3, t1, t2); - SUB2 (1.0, 0.0, c3, cc3, c1, cc1, t1, t2); - DIV2 (c2, cc2, c1, cc1, c3, cc3, t1, t2, t3, t4); - - if ((y = c3 + (cc3 - u4.d * c3)) == c3 + (cc3 + u4.d * c3)) - { - retval = (s * y); - goto ret; - } - retval = tanMp (x); + y = fi + t2; + retval = (s * y); + /* Max ULP is 0.60. */ goto ret; } @@ -223,14 +143,7 @@ __tan (double x) sy = 1; } - /* (IV),(V) The case 0.787 < abs(x) <= 25, abs(y) <= 1e-7 */ - if (ya <= gy1.d) - { - retval = tanMp (x); - goto ret; - } - - /* (VI) The case 0.787 < abs(x) <= 25, 1e-7 < abs(y) <= 0.0608 */ + /* (VI) The case 0.787 < abs(x) <= 25, 0 < abs(y) <= 0.0608 */ if (ya <= gy2.d) { a2 = a * a; @@ -242,94 +155,27 @@ __tan (double x) if (n) { - /* First stage -cot */ + /* -cot */ EADD (a, t2, b, db); DIV2 (1.0, 0.0, b, db, c, dc, t1, t2, t3, t4); - if ((y = c + (dc - u6.d * c)) == c + (dc + u6.d * c)) - { - retval = (-y); - goto ret; - } - } - else - { - /* First stage tan */ - if ((y = a + (t2 - u5.d * a)) == a + (t2 + u5.d * a)) - { - retval = y; - goto ret; - } - } - /* Second stage */ - /* Range reduction by algorithm ii */ - t = (x * hpinv.d + toint.d); - xn = t - toint.d; - v.d = t; - t1 = (x - xn * mp1.d) - xn * mp2.d; - n = v.i[LOW_HALF] & 0x00000001; - da = xn * pp3.d; - t = t1 - da; - da = (t1 - t) - da; - t1 = xn * pp4.d; - a = t - t1; - da = ((t - a) - t1) + da; - - /* Second stage */ - EADD (a, da, t1, t2); - a = t1; - da = t2; - MUL2 (a, da, a, da, x2, xx2, t1, t2); - - c1 = a25.d + x2 * a27.d; - c1 = a23.d + x2 * c1; - c1 = a21.d + x2 * c1; - c1 = a19.d + x2 * c1; - c1 = a17.d + x2 * c1; - c1 = a15.d + x2 * c1; - c1 *= x2; - - ADD2 (a13.d, aa13.d, c1, 0.0, c2, cc2, t1, t2); - MUL2 (x2, xx2, c2, cc2, c1, cc1, t1, t2); - ADD2 (a11.d, aa11.d, c1, cc1, c2, cc2, t1, t2); - MUL2 (x2, xx2, c2, cc2, c1, cc1, t1, t2); - ADD2 (a9.d, aa9.d, c1, cc1, c2, cc2, t1, t2); - MUL2 (x2, xx2, c2, cc2, c1, cc1, t1, t2); - ADD2 (a7.d, aa7.d, c1, cc1, c2, cc2, t1, t2); - MUL2 (x2, xx2, c2, cc2, c1, cc1, t1, t2); - ADD2 (a5.d, aa5.d, c1, cc1, c2, cc2, t1, t2); - MUL2 (x2, xx2, c2, cc2, c1, cc1, t1, t2); - ADD2 (a3.d, aa3.d, c1, cc1, c2, cc2, t1, t2); - MUL2 (x2, xx2, c2, cc2, c1, cc1, t1, t2); - MUL2 (a, da, c1, cc1, c2, cc2, t1, t2); - ADD2 (a, da, c2, cc2, c1, cc1, t1, t2); - - if (n) - { - /* Second stage -cot */ - DIV2 (1.0, 0.0, c1, cc1, c2, cc2, t1, t2, t3, t4); - if ((y = c2 + (cc2 - u8.d * c2)) == c2 + (cc2 + u8.d * c2)) - { - retval = (-y); - goto ret; - } + y = c + dc; + retval = (-y); + /* Max ULP is 0.506. */ + goto ret; } else { - /* Second stage tan */ - if ((y = c1 + (cc1 - u7.d * c1)) == c1 + (cc1 + u7.d * c1)) - { - retval = y; - goto ret; - } + /* tan */ + y = a + t2; + retval = y; + /* Max ULP is 0.506. */ + goto ret; } - retval = tanMp (x); - goto ret; } /* (VII) The case 0.787 < abs(x) <= 25, 0.0608 < abs(y) <= 0.787 */ - /* First stage */ - i = ((int) (mfftnhf.d + TWO8 * ya)); + i = ((int) (mfftnhf.d + 256 * ya)); z = (z0 = (ya - xfg[i][0].d)) + yya; z2 = z * z; pz = z + z * z2 * (e0.d + z2 * e1.d); @@ -340,76 +186,20 @@ __tan (double x) { /* -cot */ t2 = pz * (fi + gi) / (fi + pz); - if ((y = gi - (t2 - gi * u10.d)) == gi - (t2 + gi * u10.d)) - { - retval = (-sy * y); - goto ret; - } - t3 = (t2 < 0.0) ? -t2 : t2; - t4 = gi * ua10.d + t3 * ub10.d; - if ((y = gi - (t2 - t4)) == gi - (t2 + t4)) - { - retval = (-sy * y); - goto ret; - } + y = gi - t2; + retval = (-sy * y); + /* Max ULP is 0.62. */ + goto ret; } else { /* tan */ t2 = pz * (gi + fi) / (gi - pz); - if ((y = fi + (t2 - fi * u9.d)) == fi + (t2 + fi * u9.d)) - { - retval = (sy * y); - goto ret; - } - t3 = (t2 < 0.0) ? -t2 : t2; - t4 = fi * ua9.d + t3 * ub9.d; - if ((y = fi + (t2 - t4)) == fi + (t2 + t4)) - { - retval = (sy * y); - goto ret; - } - } - - /* Second stage */ - ffi = xfg[i][3].d; - EADD (z0, yya, z, zz) - MUL2 (z, zz, z, zz, z2, zz2, t1, t2); - c1 = z2 * (a7.d + z2 * (a9.d + z2 * a11.d)); - ADD2 (a5.d, aa5.d, c1, 0.0, c2, cc2, t1, t2); - MUL2 (z2, zz2, c2, cc2, c1, cc1, t1, t2); - ADD2 (a3.d, aa3.d, c1, cc1, c2, cc2, t1, t2); - MUL2 (z2, zz2, c2, cc2, c1, cc1, t1, t2); - MUL2 (z, zz, c1, cc1, c2, cc2, t1, t2); - ADD2 (z, zz, c2, cc2, c1, cc1, t1, t2); - - ADD2 (fi, ffi, c1, cc1, c2, cc2, t1, t2); - MUL2 (fi, ffi, c1, cc1, c3, cc3, t1, t2); - SUB2 (1.0, 0.0, c3, cc3, c1, cc1, t1, t2); - - if (n) - { - /* -cot */ - DIV2 (c1, cc1, c2, cc2, c3, cc3, t1, t2, t3, t4); - if ((y = c3 + (cc3 - u12.d * c3)) == c3 + (cc3 + u12.d * c3)) - { - retval = (-sy * y); - goto ret; - } - } - else - { - /* tan */ - DIV2 (c2, cc2, c1, cc1, c3, cc3, t1, t2, t3, t4); - if ((y = c3 + (cc3 - u11.d * c3)) == c3 + (cc3 + u11.d * c3)) - { - retval = (sy * y); - goto ret; - } + y = fi + t2; + retval = (sy * y); + /* Max ULP is 0.62. */ + goto ret; } - - retval = tanMp (x); - goto ret; } /* (---) The case 25 < abs(x) <= 1e8 */ @@ -443,14 +233,7 @@ __tan (double x) sy = 1; } - /* (+++) The case 25 < abs(x) <= 1e8, abs(y) <= 1e-7 */ - if (ya <= gy1.d) - { - retval = tanMp (x); - goto ret; - } - - /* (VIII) The case 25 < abs(x) <= 1e8, 1e-7 < abs(y) <= 0.0608 */ + /* (VIII) The case 25 < abs(x) <= 1e8, 0 < abs(y) <= 0.0608 */ if (ya <= gy2.d) { a2 = a * a; @@ -462,76 +245,26 @@ __tan (double x) if (n) { - /* First stage -cot */ + /* -cot */ EADD (a, t2, b, db); DIV2 (1.0, 0.0, b, db, c, dc, t1, t2, t3, t4); - if ((y = c + (dc - u14.d * c)) == c + (dc + u14.d * c)) - { - retval = (-y); - goto ret; - } - } - else - { - /* First stage tan */ - if ((y = a + (t2 - u13.d * a)) == a + (t2 + u13.d * a)) - { - retval = y; - goto ret; - } - } - - /* Second stage */ - MUL2 (a, da, a, da, x2, xx2, t1, t2); - c1 = a25.d + x2 * a27.d; - c1 = a23.d + x2 * c1; - c1 = a21.d + x2 * c1; - c1 = a19.d + x2 * c1; - c1 = a17.d + x2 * c1; - c1 = a15.d + x2 * c1; - c1 *= x2; - - ADD2 (a13.d, aa13.d, c1, 0.0, c2, cc2, t1, t2); - MUL2 (x2, xx2, c2, cc2, c1, cc1, t1, t2); - ADD2 (a11.d, aa11.d, c1, cc1, c2, cc2, t1, t2); - MUL2 (x2, xx2, c2, cc2, c1, cc1, t1, t2); - ADD2 (a9.d, aa9.d, c1, cc1, c2, cc2, t1, t2); - MUL2 (x2, xx2, c2, cc2, c1, cc1, t1, t2); - ADD2 (a7.d, aa7.d, c1, cc1, c2, cc2, t1, t2); - MUL2 (x2, xx2, c2, cc2, c1, cc1, t1, t2); - ADD2 (a5.d, aa5.d, c1, cc1, c2, cc2, t1, t2); - MUL2 (x2, xx2, c2, cc2, c1, cc1, t1, t2); - ADD2 (a3.d, aa3.d, c1, cc1, c2, cc2, t1, t2); - MUL2 (x2, xx2, c2, cc2, c1, cc1, t1, t2); - MUL2 (a, da, c1, cc1, c2, cc2, t1, t2); - ADD2 (a, da, c2, cc2, c1, cc1, t1, t2); - - if (n) - { - /* Second stage -cot */ - DIV2 (1.0, 0.0, c1, cc1, c2, cc2, t1, t2, t3, t4); - if ((y = c2 + (cc2 - u16.d * c2)) == c2 + (cc2 + u16.d * c2)) - { - retval = (-y); - goto ret; - } + y = c + dc; + retval = (-y); + /* Max ULP is 0.506. */ + goto ret; } else { - /* Second stage tan */ - if ((y = c1 + (cc1 - u15.d * c1)) == c1 + (cc1 + u15.d * c1)) - { - retval = (y); - goto ret; - } + /* tan */ + y = a + t2; + retval = y; + /* Max ULP is 0.506. */ + goto ret; } - retval = tanMp (x); - goto ret; } /* (IX) The case 25 < abs(x) <= 1e8, 0.0608 < abs(y) <= 0.787 */ - /* First stage */ - i = ((int) (mfftnhf.d + TWO8 * ya)); + i = ((int) (mfftnhf.d + 256 * ya)); z = (z0 = (ya - xfg[i][0].d)) + yya; z2 = z * z; pz = z + z * z2 * (e0.d + z2 * e1.d); @@ -542,75 +275,20 @@ __tan (double x) { /* -cot */ t2 = pz * (fi + gi) / (fi + pz); - if ((y = gi - (t2 - gi * u18.d)) == gi - (t2 + gi * u18.d)) - { - retval = (-sy * y); - goto ret; - } - t3 = (t2 < 0.0) ? -t2 : t2; - t4 = gi * ua18.d + t3 * ub18.d; - if ((y = gi - (t2 - t4)) == gi - (t2 + t4)) - { - retval = (-sy * y); - goto ret; - } + y = gi - t2; + retval = (-sy * y); + /* Max ULP is 0.62. */ + goto ret; } else { /* tan */ t2 = pz * (gi + fi) / (gi - pz); - if ((y = fi + (t2 - fi * u17.d)) == fi + (t2 + fi * u17.d)) - { - retval = (sy * y); - goto ret; - } - t3 = (t2 < 0.0) ? -t2 : t2; - t4 = fi * ua17.d + t3 * ub17.d; - if ((y = fi + (t2 - t4)) == fi + (t2 + t4)) - { - retval = (sy * y); - goto ret; - } - } - - /* Second stage */ - ffi = xfg[i][3].d; - EADD (z0, yya, z, zz); - MUL2 (z, zz, z, zz, z2, zz2, t1, t2); - c1 = z2 * (a7.d + z2 * (a9.d + z2 * a11.d)); - ADD2 (a5.d, aa5.d, c1, 0.0, c2, cc2, t1, t2); - MUL2 (z2, zz2, c2, cc2, c1, cc1, t1, t2); - ADD2 (a3.d, aa3.d, c1, cc1, c2, cc2, t1, t2); - MUL2 (z2, zz2, c2, cc2, c1, cc1, t1, t2); - MUL2 (z, zz, c1, cc1, c2, cc2, t1, t2); - ADD2 (z, zz, c2, cc2, c1, cc1, t1, t2); - - ADD2 (fi, ffi, c1, cc1, c2, cc2, t1, t2); - MUL2 (fi, ffi, c1, cc1, c3, cc3, t1, t2); - SUB2 (1.0, 0.0, c3, cc3, c1, cc1, t1, t2); - - if (n) - { - /* -cot */ - DIV2 (c1, cc1, c2, cc2, c3, cc3, t1, t2, t3, t4); - if ((y = c3 + (cc3 - u20.d * c3)) == c3 + (cc3 + u20.d * c3)) - { - retval = (-sy * y); - goto ret; - } - } - else - { - /* tan */ - DIV2 (c2, cc2, c1, cc1, c3, cc3, t1, t2, t3, t4); - if ((y = c3 + (cc3 - u19.d * c3)) == c3 + (cc3 + u19.d * c3)) - { - retval = (sy * y); - goto ret; - } + y = fi + t2; + retval = (sy * y); + /* Max ULP is 0.62. */ + goto ret; } - retval = tanMp (x); - goto ret; } /* (---) The case 1e8 < abs(x) < 2**1024 */ @@ -632,14 +310,7 @@ __tan (double x) sy = 1; } - /* (+++) The case 1e8 < abs(x) < 2**1024, abs(y) <= 1e-7 */ - if (ya <= gy1.d) - { - retval = tanMp (x); - goto ret; - } - - /* (X) The case 1e8 < abs(x) < 2**1024, 1e-7 < abs(y) <= 0.0608 */ + /* (X) The case 1e8 < abs(x) < 2**1024, 0 < abs(y) <= 0.0608 */ if (ya <= gy2.d) { a2 = a * a; @@ -650,85 +321,26 @@ __tan (double x) t2 = da + a * a2 * t2; if (n) { - /* First stage -cot */ + /* -cot */ EADD (a, t2, b, db); DIV2 (1.0, 0.0, b, db, c, dc, t1, t2, t3, t4); - if ((y = c + (dc - u22.d * c)) == c + (dc + u22.d * c)) - { - retval = (-y); - goto ret; - } - } - else - { - /* First stage tan */ - if ((y = a + (t2 - u21.d * a)) == a + (t2 + u21.d * a)) - { - retval = y; - goto ret; - } - } - - /* Second stage */ - /* Reduction by algorithm iv */ - p = 10; - n = (__mpranred (x, &mpa, p)) & 0x00000001; - __mp_dbl (&mpa, &a, p); - __dbl_mp (a, &mpt1, p); - __sub (&mpa, &mpt1, &mpt2, p); - __mp_dbl (&mpt2, &da, p); - - MUL2 (a, da, a, da, x2, xx2, t1, t2); - - c1 = a25.d + x2 * a27.d; - c1 = a23.d + x2 * c1; - c1 = a21.d + x2 * c1; - c1 = a19.d + x2 * c1; - c1 = a17.d + x2 * c1; - c1 = a15.d + x2 * c1; - c1 *= x2; - - ADD2 (a13.d, aa13.d, c1, 0.0, c2, cc2, t1, t2); - MUL2 (x2, xx2, c2, cc2, c1, cc1, t1, t2); - ADD2 (a11.d, aa11.d, c1, cc1, c2, cc2, t1, t2); - MUL2 (x2, xx2, c2, cc2, c1, cc1, t1, t2); - ADD2 (a9.d, aa9.d, c1, cc1, c2, cc2, t1, t2); - MUL2 (x2, xx2, c2, cc2, c1, cc1, t1, t2); - ADD2 (a7.d, aa7.d, c1, cc1, c2, cc2, t1, t2); - MUL2 (x2, xx2, c2, cc2, c1, cc1, t1, t2); - ADD2 (a5.d, aa5.d, c1, cc1, c2, cc2, t1, t2); - MUL2 (x2, xx2, c2, cc2, c1, cc1, t1, t2); - ADD2 (a3.d, aa3.d, c1, cc1, c2, cc2, t1, t2); - MUL2 (x2, xx2, c2, cc2, c1, cc1, t1, t2); - MUL2 (a, da, c1, cc1, c2, cc2, t1, t2); - ADD2 (a, da, c2, cc2, c1, cc1, t1, t2); - - if (n) - { - /* Second stage -cot */ - DIV2 (1.0, 0.0, c1, cc1, c2, cc2, t1, t2, t3, t4); - if ((y = c2 + (cc2 - u24.d * c2)) == c2 + (cc2 + u24.d * c2)) - { - retval = (-y); - goto ret; - } + y = c + dc; + retval = (-y); + /* Max ULP is 0.506. */ + goto ret; } else { - /* Second stage tan */ - if ((y = c1 + (cc1 - u23.d * c1)) == c1 + (cc1 + u23.d * c1)) - { - retval = y; - goto ret; - } + /* tan */ + y = a + t2; + retval = y; + /* Max ULP is 0.507. */ + goto ret; } - retval = tanMp (x); - goto ret; } /* (XI) The case 1e8 < abs(x) < 2**1024, 0.0608 < abs(y) <= 0.787 */ - /* First stage */ - i = ((int) (mfftnhf.d + TWO8 * ya)); + i = ((int) (mfftnhf.d + 256 * ya)); z = (z0 = (ya - xfg[i][0].d)) + yya; z2 = z * z; pz = z + z * z2 * (e0.d + z2 * e1.d); @@ -739,97 +351,25 @@ __tan (double x) { /* -cot */ t2 = pz * (fi + gi) / (fi + pz); - if ((y = gi - (t2 - gi * u26.d)) == gi - (t2 + gi * u26.d)) - { - retval = (-sy * y); - goto ret; - } - t3 = (t2 < 0.0) ? -t2 : t2; - t4 = gi * ua26.d + t3 * ub26.d; - if ((y = gi - (t2 - t4)) == gi - (t2 + t4)) - { - retval = (-sy * y); - goto ret; - } + y = gi - t2; + retval = (-sy * y); + /* Max ULP is 0.62. */ + goto ret; } else { /* tan */ t2 = pz * (gi + fi) / (gi - pz); - if ((y = fi + (t2 - fi * u25.d)) == fi + (t2 + fi * u25.d)) - { - retval = (sy * y); - goto ret; - } - t3 = (t2 < 0.0) ? -t2 : t2; - t4 = fi * ua25.d + t3 * ub25.d; - if ((y = fi + (t2 - t4)) == fi + (t2 + t4)) - { - retval = (sy * y); - goto ret; - } - } - - /* Second stage */ - ffi = xfg[i][3].d; - EADD (z0, yya, z, zz); - MUL2 (z, zz, z, zz, z2, zz2, t1, t2); - c1 = z2 * (a7.d + z2 * (a9.d + z2 * a11.d)); - ADD2 (a5.d, aa5.d, c1, 0.0, c2, cc2, t1, t2); - MUL2 (z2, zz2, c2, cc2, c1, cc1, t1, t2); - ADD2 (a3.d, aa3.d, c1, cc1, c2, cc2, t1, t2); - MUL2 (z2, zz2, c2, cc2, c1, cc1, t1, t2); - MUL2 (z, zz, c1, cc1, c2, cc2, t1, t2); - ADD2 (z, zz, c2, cc2, c1, cc1, t1, t2); - - ADD2 (fi, ffi, c1, cc1, c2, cc2, t1, t2); - MUL2 (fi, ffi, c1, cc1, c3, cc3, t1, t2); - SUB2 (1.0, 0.0, c3, cc3, c1, cc1, t1, t2); - - if (n) - { - /* -cot */ - DIV2 (c1, cc1, c2, cc2, c3, cc3, t1, t2, t3, t4); - if ((y = c3 + (cc3 - u28.d * c3)) == c3 + (cc3 + u28.d * c3)) - { - retval = (-sy * y); - goto ret; - } - } - else - { - /* tan */ - DIV2 (c2, cc2, c1, cc1, c3, cc3, t1, t2, t3, t4); - if ((y = c3 + (cc3 - u27.d * c3)) == c3 + (cc3 + u27.d * c3)) - { - retval = (sy * y); - goto ret; - } + y = fi + t2; + retval = (sy * y); + /* Max ULP is 0.62. */ + goto ret; } - retval = tanMp (x); - goto ret; ret: return retval; } -/* multiple precision stage */ -/* Convert x to multi precision number,compute tan(x) by mptan() routine */ -/* and converts result back to double */ -static double -SECTION -tanMp (double x) -{ - int p; - double y; - mp_no mpy; - p = 32; - __mptan (x, &mpy, p); - __mp_dbl (&mpy, &y, p); - LIBC_PROBE (slowtan, 2, &x, &y); - return y; -} - #ifndef __tan libm_alias_double (__tan, tan) #endif diff --git a/sysdeps/ieee754/dbl-64/utan.h b/sysdeps/ieee754/dbl-64/utan.h index 85f4861b1f..37e4be87b3 100644 --- a/sysdeps/ieee754/dbl-64/utan.h +++ b/sysdeps/ieee754/dbl-64/utan.h @@ -28,7 +28,7 @@ #define UTAN_H #ifdef BIG_ENDI - static const number + static const mynumber /* polynomial I */ /**/ d3 = {{0x3FD55555, 0x55555555} }, /* 0.333... */ /**/ d5 = {{0x3FC11111, 0x111107C6} }, /* 0.133... */ @@ -36,46 +36,6 @@ /**/ d9 = {{0x3F9664ED, 0x49CFC666} }, /* . */ /**/ d11 = {{0x3F82385A, 0x3CF2E4EA} }, /* . */ /* polynomial II */ -/**/ a3 = {{0x3fd55555, 0x55555555} }, /* 1/3 */ -/**/ aa3 = {{0x3c755555, 0x55555555} }, /* 1/3-a3 */ -/**/ a5 = {{0x3fc11111, 0x11111111} }, /* 2/15 */ -/**/ aa5 = {{0x3c411111, 0x11111111} }, /* 2/15-a5 */ -/**/ a7 = {{0x3faba1ba, 0x1ba1ba1c} }, /* 17/315 */ -/**/ aa7 = {{0xbc479179, 0x17917918} }, /* ()-a7 */ -/**/ a9 = {{0x3f9664f4, 0x882c10fa} }, /* 62/2835 */ -/**/ aa9 = {{0xbc09a528, 0x8b6c44fd} }, /* ()-a9 */ -/**/ a11 = {{0x3f8226e3, 0x55e6c23d} }, /* . */ -/**/ aa11 = {{0xbc2c292b, 0x8f1a2c13} }, /* . */ -/**/ a13 = {{0x3f6d6d3d, 0x0e157de0} }, /* . */ -/**/ aa13 = {{0xbc0280cf, 0xc968d971} }, /* . */ -/**/ a15 = {{0x3f57da36, 0x452b75e3} }, /* . */ -#if 0 -/**/ aa15 = {{0xbbf25789, 0xb285d2ed} }, /* . */ -#endif -/**/ a17 = {{0x3f435582, 0x48036744} }, /* . */ -#if 0 -/**/ aa17 = {{0x3be488d9, 0x563f1f23} }, /* . */ -#endif -/**/ a19 = {{0x3f2f57d7, 0x734d1664} }, /* . */ -#if 0 -/**/ aa19 = {{0x3bb0d55a, 0x913ccb50} }, /* . */ -#endif -/**/ a21 = {{0x3f1967e1, 0x8afcafad} }, /* . */ -#if 0 -/**/ aa21 = {{0xbbbd7614, 0xa42d44e6} }, /* . */ -#endif -/**/ a23 = {{0x3f0497d8, 0xeea25259} }, /* . */ -#if 0 -/**/ aa23 = {{0x3b99f2d0, 0x2e4d2863} }, /* . */ -#endif -/**/ a25 = {{0x3ef0b132, 0xd39a6050} }, /* . */ -#if 0 -/**/ aa25 = {{0x3b93b274, 0xc2c19614} }, /* . */ -#endif -/**/ a27 = {{0x3edb0f72, 0xd3ee24e9} }, /* . */ -#if 0 -/**/ aa27 = {{0x3b61688d, 0xdd595609} }, /* . */ -#endif /* polynomial III */ /**/ e0 = {{0x3FD55555, 0x55554DBD} }, /* . */ /**/ e1 = {{0x3FC11112, 0xE0A6B45F} }, /* . */ @@ -88,52 +48,8 @@ /**/ g3 = {{0x3fe92f1a, 0x00000000} }, /* 0.787 */ /**/ g4 = {{0x40390000, 0x00000000} }, /* 25.0 */ /**/ g5 = {{0x4197d784, 0x00000000} }, /* 1e8 */ -/**/ gy1 = {{0x3e7ad7f2, 0x9abcaf48} }, /* 1e-7 */ /**/ gy2 = {{0x3faf212d, 0x00000000} }, /* 0.0608 */ -/**/ u1 = {{0x3cc8c33a, 0x00000000} }, /* 6.873e-16 */ -/**/ u2 = {{0x3983dc4d, 0x00000000} }, /* 1.224e-31 */ -/**/ u3 = {{0x3c78e14b, 0x00000000} }, /* 2.158e-17 */ -/**/ ua3 = {{0x3bfd8b58, 0x00000000} }, /* 1.001e-19 */ -/**/ ub3 = {{0x3cc81898, 0x00000000} }, /* 6.688e-16 */ -/**/ u4 = {{0x399856c2, 0x00000000} }, /* 3e-31 */ -/**/ u5 = {{0x3c39d80a, 0x00000000} }, /* 1.401e-18 */ -/**/ u6 = {{0x3c374c5a, 0x00000000} }, /* 1.263e-18 */ -/**/ u7 = {{0x39903beb, 0x00000000} }, /* 2.001e-31 */ -/**/ u8 = {{0x399c56ae, 0x00000000} }, /* 3.493e-31 */ -/**/ u9 = {{0x3c7d0ac7, 0x00000000} }, /* 2.519e-17 */ -/**/ ua9 = {{0x3bfd8b58, 0x00000000} }, /* 1.001e-19 */ -/**/ ub9 = {{0x3ccc2375, 0x00000000} }, /* 7.810e-16 */ -/**/ u10 = {{0x3c7e40af, 0x00000000} }, /* 2.624e-17 */ -/**/ ua10 = {{0x3bfd8b58, 0x00000000} }, /* 1.001e-19 */ -/**/ ub10 = {{0x3ccc6405, 0x00000000} }, /* 7.880e-16 */ -/**/ u11 = {{0x39e509b6, 0x00000000} }, /* 8.298e-30 */ -/**/ u12 = {{0x39e509b6, 0x00000000} }, /* 8.298e-30 */ -/**/ u13 = {{0x3c39d80a, 0x00000000} }, /* 1.401e-18 */ -/**/ u14 = {{0x3c374c5a, 0x00000000} }, /* 1.263e-18 */ -/**/ u15 = {{0x3ab5767a, 0x00000000} }, /* 6.935e-26 */ -/**/ u16 = {{0x3ab57744, 0x00000000} }, /* 6.936e-26 */ -/**/ u17 = {{0x3c7d0ac7, 0x00000000} }, /* 2.519e-17 */ -/**/ ua17 = {{0x3bfdb11f, 0x00000000} }, /* 1.006e-19 */ -/**/ ub17 = {{0x3ccc2375, 0x00000000} }, /* 7.810e-16 */ -/**/ u18 = {{0x3c7e40af, 0x00000000} }, /* 2.624e-17 */ -/**/ ua18 = {{0x3bfdb11f, 0x00000000} }, /* 1.006e-19 */ -/**/ ub18 = {{0x3ccc6405, 0x00000000} }, /* 7.880e-16 */ -/**/ u19 = {{0x39a13b61, 0x00000000} }, /* 4.248e-31 */ -/**/ u20 = {{0x39a13b61, 0x00000000} }, /* 4.248e-31 */ -/**/ u21 = {{0x3c3bb9b8, 0x00000000} }, /* 1.503e-18 */ -/**/ u22 = {{0x3c392e08, 0x00000000} }, /* 1.365e-18 */ -/**/ u23 = {{0x3a0ce706, 0x00000000} }, /* 4.560e-29 */ -/**/ u24 = {{0x3a0cff5d, 0x00000000} }, /* 4.575e-29 */ -/**/ u25 = {{0x3c7d0ac7, 0x00000000} }, /* 2.519e-17 */ -/**/ ua25 = {{0x3bfd8b58, 0x00000000} }, /* 1.001e-19 */ -/**/ ub25 = {{0x3ccc2375, 0x00000000} }, /* 7.810e-16 */ -/**/ u26 = {{0x3c7e40af, 0x00000000} }, /* 2.624e-17 */ -/**/ ua26 = {{0x3bfd8b58, 0x00000000} }, /* 1.001e-19 */ -/**/ ub26 = {{0x3ccc6405, 0x00000000} }, /* 7.880e-16 */ -/**/ u27 = {{0x3ad421cb, 0x00000000} }, /* 2.602e-25 */ -/**/ u28 = {{0x3ad421cb, 0x00000000} }, /* 2.602e-25 */ - /**/ mp1 = {{0x3FF921FB, 0x58000000} }, /**/ mp2 = {{0xBE4DDE97, 0x3C000000} }, /**/ mp3 = {{0xBC8CB3B3, 0x99D747F2} }, @@ -145,7 +61,7 @@ #else #ifdef LITTLE_ENDI - static const number + static const mynumber /* polynomial I */ /**/ d3 = {{0x55555555, 0x3FD55555} }, /* 0.333... */ /**/ d5 = {{0x111107C6, 0x3FC11111} }, /* 0.133... */ @@ -153,46 +69,6 @@ /**/ d9 = {{0x49CFC666, 0x3F9664ED} }, /* . */ /**/ d11 = {{0x3CF2E4EA, 0x3F82385A} }, /* . */ /* polynomial II */ -/**/ a3 = {{0x55555555, 0x3fd55555} }, /* 1/3 */ -/**/ aa3 = {{0x55555555, 0x3c755555} }, /* 1/3-a3 */ -/**/ a5 = {{0x11111111, 0x3fc11111} }, /* 2/15 */ -/**/ aa5 = {{0x11111111, 0x3c411111} }, /* 2/15-a5 */ -/**/ a7 = {{0x1ba1ba1c, 0x3faba1ba} }, /* 17/315 */ -/**/ aa7 = {{0x17917918, 0xbc479179} }, /* ()-a7 */ -/**/ a9 = {{0x882c10fa, 0x3f9664f4} }, /* 62/2835 */ -/**/ aa9 = {{0x8b6c44fd, 0xbc09a528} }, /* ()-a9 */ -/**/ a11 = {{0x55e6c23d, 0x3f8226e3} }, /* . */ -/**/ aa11 = {{0x8f1a2c13, 0xbc2c292b} }, /* . */ -/**/ a13 = {{0x0e157de0, 0x3f6d6d3d} }, /* . */ -/**/ aa13 = {{0xc968d971, 0xbc0280cf} }, /* . */ -/**/ a15 = {{0x452b75e3, 0x3f57da36} }, /* . */ -#if 0 -/**/ aa15 = {{0xb285d2ed, 0xbbf25789} }, /* . */ -#endif -/**/ a17 = {{0x48036744, 0x3f435582} }, /* . */ -#if 0 -/**/ aa17 = {{0x563f1f23, 0x3be488d9} }, /* . */ -#endif -/**/ a19 = {{0x734d1664, 0x3f2f57d7} }, /* . */ -#if 0 -/**/ aa19 = {{0x913ccb50, 0x3bb0d55a} }, /* . */ -#endif -/**/ a21 = {{0x8afcafad, 0x3f1967e1} }, /* . */ -#if 0 -/**/ aa21 = {{0xa42d44e6, 0xbbbd7614} }, /* . */ -#endif -/**/ a23 = {{0xeea25259, 0x3f0497d8} }, /* . */ -#if 0 -/**/ aa23 = {{0x2e4d2863, 0x3b99f2d0} }, /* . */ -#endif -/**/ a25 = {{0xd39a6050, 0x3ef0b132} }, /* . */ -#if 0 -/**/ aa25 = {{0xc2c19614, 0x3b93b274} }, /* . */ -#endif -/**/ a27 = {{0xd3ee24e9, 0x3edb0f72} }, /* . */ -#if 0 -/**/ aa27 = {{0xdd595609, 0x3b61688d} }, /* . */ -#endif /* polynomial III */ /**/ e0 = {{0x55554DBD, 0x3FD55555} }, /* . */ /**/ e1 = {{0xE0A6B45F, 0x3FC11112} }, /* . */ @@ -205,52 +81,8 @@ /**/ g3 = {{0x00000000, 0x3fe92f1a} }, /* 0.787 */ /**/ g4 = {{0x00000000, 0x40390000} }, /* 25.0 */ /**/ g5 = {{0x00000000, 0x4197d784} }, /* 1e8 */ -/**/ gy1 = {{0x9abcaf48, 0x3e7ad7f2} }, /* 1e-7 */ /**/ gy2 = {{0x00000000, 0x3faf212d} }, /* 0.0608 */ -/**/ u1 = {{0x00000000, 0x3cc8c33a} }, /* 6.873e-16 */ -/**/ u2 = {{0x00000000, 0x3983dc4d} }, /* 1.224e-31 */ -/**/ u3 = {{0x00000000, 0x3c78e14b} }, /* 2.158e-17 */ -/**/ ua3 = {{0x00000000, 0x3bfd8b58} }, /* 1.001e-19 */ -/**/ ub3 = {{0x00000000, 0x3cc81898} }, /* 6.688e-16 */ -/**/ u4 = {{0x00000000, 0x399856c2} }, /* 3e-31 */ -/**/ u5 = {{0x00000000, 0x3c39d80a} }, /* 1.401e-18 */ -/**/ u6 = {{0x00000000, 0x3c374c5a} }, /* 1.263e-18 */ -/**/ u7 = {{0x00000000, 0x39903beb} }, /* 2.001e-31 */ -/**/ u8 = {{0x00000000, 0x399c56ae} }, /* 3.493e-31 */ -/**/ u9 = {{0x00000000, 0x3c7d0ac7} }, /* 2.519e-17 */ -/**/ ua9 = {{0x00000000, 0x3bfd8b58} }, /* 1.001e-19 */ -/**/ ub9 = {{0x00000000, 0x3ccc2375} }, /* 7.810e-16 */ -/**/ u10 = {{0x00000000, 0x3c7e40af} }, /* 2.624e-17 */ -/**/ ua10 = {{0x00000000, 0x3bfd8b58} }, /* 1.001e-19 */ -/**/ ub10 = {{0x00000000, 0x3ccc6405} }, /* 7.880e-16 */ -/**/ u11 = {{0x00000000, 0x39e509b6} }, /* 8.298e-30 */ -/**/ u12 = {{0x00000000, 0x39e509b6} }, /* 8.298e-30 */ -/**/ u13 = {{0x00000000, 0x3c39d80a} }, /* 1.401e-18 */ -/**/ u14 = {{0x00000000, 0x3c374c5a} }, /* 1.263e-18 */ -/**/ u15 = {{0x00000000, 0x3ab5767a} }, /* 6.935e-26 */ -/**/ u16 = {{0x00000000, 0x3ab57744} }, /* 6.936e-26 */ -/**/ u17 = {{0x00000000, 0x3c7d0ac7} }, /* 2.519e-17 */ -/**/ ua17 = {{0x00000000, 0x3bfdb11f} }, /* 1.006e-19 */ -/**/ ub17 = {{0x00000000, 0x3ccc2375} }, /* 7.810e-16 */ -/**/ u18 = {{0x00000000, 0x3c7e40af} }, /* 2.624e-17 */ -/**/ ua18 = {{0x00000000, 0x3bfdb11f} }, /* 1.006e-19 */ -/**/ ub18 = {{0x00000000, 0x3ccc6405} }, /* 7.880e-16 */ -/**/ u19 = {{0x00000000, 0x39a13b61} }, /* 4.248e-31 */ -/**/ u20 = {{0x00000000, 0x39a13b61} }, /* 4.248e-31 */ -/**/ u21 = {{0x00000000, 0x3c3bb9b8} }, /* 1.503e-18 */ -/**/ u22 = {{0x00000000, 0x3c392e08} }, /* 1.365e-18 */ -/**/ u23 = {{0x00000000, 0x3a0ce706} }, /* 4.560e-29 */ -/**/ u24 = {{0x00000000, 0x3a0cff5d} }, /* 4.575e-29 */ -/**/ u25 = {{0x00000000, 0x3c7d0ac7} }, /* 2.519e-17 */ -/**/ ua25 = {{0x00000000, 0x3bfd8b58} }, /* 1.001e-19 */ -/**/ ub25 = {{0x00000000, 0x3ccc2375} }, /* 7.810e-16 */ -/**/ u26 = {{0x00000000, 0x3c7e40af} }, /* 2.624e-17 */ -/**/ ua26 = {{0x00000000, 0x3bfd8b58} }, /* 1.001e-19 */ -/**/ ub26 = {{0x00000000, 0x3ccc6405} }, /* 7.880e-16 */ -/**/ u27 = {{0x00000000, 0x3ad421cb} }, /* 2.602e-25 */ -/**/ u28 = {{0x00000000, 0x3ad421cb} }, /* 2.602e-25 */ - /**/ mp1 = {{0x58000000, 0x3FF921FB} }, /**/ mp2 = {{0x3C000000, 0xBE4DDE97} }, /**/ mp3 = {{0x99D747F2, 0xBC8CB3B3} }, diff --git a/sysdeps/ieee754/dbl-64/utan.tbl b/sysdeps/ieee754/dbl-64/utan.tbl index 325abdeab3..f05e67f488 100644 --- a/sysdeps/ieee754/dbl-64/utan.tbl +++ b/sysdeps/ieee754/dbl-64/utan.tbl @@ -23,7 +23,7 @@ #ifdef BIG_ENDI -static const number +static const mynumber xfg[186][4] = { /* xi,Fi,Gi,FFi, i=16..201 */ /**/ {{{0x3fb00000, 0x1e519d60} }, /**/ {{0x3fb00557, 0x96c4e240} }, @@ -773,7 +773,7 @@ static const number #else #ifdef LITTLE_ENDI -static const number +static const mynumber xfg[186][4] = { /* xi,Fi,Gi,FFi, i=16..201 */ /**/ {{{0x1e519d60, 0x3fb00000} }, /**/ {{0x96c4e240, 0x3fb00557} }, -- cgit v1.2.3