From db3f7bb5586392d9809fc6397c7184983aed6008 Mon Sep 17 00:00:00 2001 From: Wilco Dijkstra Date: Wed, 10 Mar 2021 12:39:56 +0000 Subject: math: Remove slow paths from asin and acos [BZ #15267] This patch series removes all remaining slow paths and related code. First asin/acos, tan, atan, atan2 implementations are updated, and the final patch removes the unused mpa files, headers and probes. Passes buildmanyglibc. Remove slow paths from asin/acos. Add ULP annotations based on previous slow path checks (which are approximate). Update AArch64 and x86_64 libm-test-ulps. Reviewed-By: Paul Zimmermann --- sysdeps/aarch64/libm-test-ulps | 2 +- sysdeps/ieee754/dbl-64/e_asin.c | 341 +++++--------------------------------- sysdeps/x86_64/fpu/libm-test-ulps | 4 +- 3 files changed, 47 insertions(+), 300 deletions(-) diff --git a/sysdeps/aarch64/libm-test-ulps b/sysdeps/aarch64/libm-test-ulps index 780ee25c68..21ff7dc5ce 100644 --- a/sysdeps/aarch64/libm-test-ulps +++ b/sysdeps/aarch64/libm-test-ulps @@ -57,7 +57,7 @@ float: 1 ldouble: 1 Function: "asin_upward": -double: 1 +double: 2 float: 1 ldouble: 2 diff --git a/sysdeps/ieee754/dbl-64/e_asin.c b/sysdeps/ieee754/dbl-64/e_asin.c index ae5344f720..4473ae5df4 100644 --- a/sysdeps/ieee754/dbl-64/e_asin.c +++ b/sysdeps/ieee754/dbl-64/e_asin.c @@ -21,8 +21,7 @@ /* */ /* FUNCTIONS: uasin */ /* uacos */ -/* FILES NEEDED: dla.h endian.h mpa.h mydefs.h usncs.h */ -/* doasin.c sincos32.c dosincos.c mpa.c */ +/* FILES NEEDED: dla.h endian.h mydefs.h usncs.h */ /* sincos.tbl asincos.tbl powtwo.tbl root.tbl */ /* */ /******************************************************************/ @@ -31,7 +30,6 @@ #include "asincos.tbl" #include "root.tbl" #include "powtwo.tbl" -#include "MathLib.h" #include "uasncs.h" #include #include @@ -43,15 +41,11 @@ # define SECTION #endif -void __doasin(double x, double dx, double w[]); -void __dubsin(double x, double dx, double v[]); -void __dubcos(double x, double dx, double v[]); -void __docos(double x, double dx, double v[]); - +/* asin with max ULP of ~0.516 based on random sampling. */ double SECTION __ieee754_asin(double x){ - double x1,x2,xx,s1,s2,res1,p,t,res,r,cor,cc,y,c,z,w[2]; + double x2,xx,res1,p,t,res,r,cor,cc,y,c,z; mynumber u,v; int4 k,m,n; @@ -70,27 +64,8 @@ __ieee754_asin(double x){ x2 = x*x; t = (((((f6*x2 + f5)*x2 + f4)*x2 + f3)*x2 + f2)*x2 + f1)*(x2*x); res = x+t; /* res=arcsin(x) according to Taylor series */ - cor = (x-res)+t; - if (res == res+1.025*cor) return res; - else { - x1 = x+big; - xx = x*x; - x1 -= big; - x2 = x - x1; - p = x1*x1*x1; - s1 = a1.x*p; - s2 = ((((((c7*xx + c6)*xx + c5)*xx + c4)*xx + c3)*xx + c2)*xx*xx*x + - ((a1.x+a2.x)*x2*x2+ 0.5*x1*x)*x2) + a2.x*p; - res1 = x+s1; - s2 = ((x-res1)+s1)+s2; - res = res1+s2; - cor = (res1-res)+s2; - if (res == res+1.00014*cor) return res; - else { - __doasin(x,0,w); - return w[0]; - } - } + /* Max ULP is 0.513. */ + return res; } /*---------------------0.125 <= |x| < 0.5 -----------------------------*/ else if (k < 0x3fe00000) { @@ -103,26 +78,8 @@ __ieee754_asin(double x){ +xx*asncs.x[n+6]))))+asncs.x[n+7]; t+=p; res =asncs.x[n+8] +t; - cor = (asncs.x[n+8]-res)+t; - if (res == res+1.05*cor) return (m>0)?res:-res; - else { - r=asncs.x[n+8]+xx*asncs.x[n+9]; - t=((asncs.x[n+8]-r)+xx*asncs.x[n+9])+(p+xx*asncs.x[n+10]); - res = r+t; - cor = (r-res)+t; - if (res == res+1.0005*cor) return (m>0)?res:-res; - else { - res1=res+1.1*cor; - z=0.5*(res1-res); - __dubsin(res,z,w); - z=(w[0]-fabs(x))+w[1]; - if (z>1.0e-27) return (m>0)?min(res,res1):-min(res,res1); - else if (z<-1.0e-27) return (m>0)?max(res,res1):-max(res,res1); - else { - return (m>0)?res:-res; - } - } - } + /* Max ULP is 0.524. */ + return (m>0)?res:-res; } /* else if (k < 0x3fe00000) */ /*-------------------- 0.5 <= |x| < 0.75 -----------------------------*/ else @@ -135,26 +92,8 @@ __ieee754_asin(double x){ +xx*(asncs.x[n+6]+xx*asncs.x[n+7])))))+asncs.x[n+8]; t+=p; res =asncs.x[n+9] +t; - cor = (asncs.x[n+9]-res)+t; - if (res == res+1.01*cor) return (m>0)?res:-res; - else { - r=asncs.x[n+9]+xx*asncs.x[n+10]; - t=((asncs.x[n+9]-r)+xx*asncs.x[n+10])+(p+xx*asncs.x[n+11]); - res = r+t; - cor = (r-res)+t; - if (res == res+1.0005*cor) return (m>0)?res:-res; - else { - res1=res+1.1*cor; - z=0.5*(res1-res); - __dubsin(res,z,w); - z=(w[0]-fabs(x))+w[1]; - if (z>1.0e-27) return (m>0)?min(res,res1):-min(res,res1); - else if (z<-1.0e-27) return (m>0)?max(res,res1):-max(res,res1); - else { - return (m>0)?res:-res; - } - } - } + /* Max ULP is 0.505. */ + return (m>0)?res:-res; } /* else if (k < 0x3fe80000) */ /*--------------------- 0.75 <= |x|< 0.921875 ----------------------*/ else @@ -167,28 +106,8 @@ __ieee754_asin(double x){ +xx*(asncs.x[n+6]+xx*(asncs.x[n+7]+xx*asncs.x[n+8]))))))+asncs.x[n+9]; t+=p; res =asncs.x[n+10] +t; - cor = (asncs.x[n+10]-res)+t; - if (res == res+1.01*cor) return (m>0)?res:-res; - else { - r=asncs.x[n+10]+xx*asncs.x[n+11]; - t=((asncs.x[n+10]-r)+xx*asncs.x[n+11])+(p+xx*asncs.x[n+12]); - res = r+t; - cor = (r-res)+t; - if (res == res+1.0008*cor) return (m>0)?res:-res; - else { - res1=res+1.1*cor; - z=0.5*(res1-res); - y=hp0.x-res; - z=((hp0.x-y)-res)+(hp1.x-z); - __dubcos(y,z,w); - z=(w[0]-fabs(x))+w[1]; - if (z>1.0e-27) return (m>0)?min(res,res1):-min(res,res1); - else if (z<-1.0e-27) return (m>0)?max(res,res1):-max(res,res1); - else { - return (m>0)?res:-res; - } - } - } + /* Max ULP is 0.505. */ + return (m>0)?res:-res; } /* else if (k < 0x3fed8000) */ /*-------------------0.921875 <= |x| < 0.953125 ------------------------*/ else @@ -203,29 +122,8 @@ __ieee754_asin(double x){ xx*asncs.x[n+9])))))))+asncs.x[n+10]; t+=p; res =asncs.x[n+11] +t; - cor = (asncs.x[n+11]-res)+t; - if (res == res+1.01*cor) return (m>0)?res:-res; - else { - r=asncs.x[n+11]+xx*asncs.x[n+12]; - t=((asncs.x[n+11]-r)+xx*asncs.x[n+12])+(p+xx*asncs.x[n+13]); - res = r+t; - cor = (r-res)+t; - if (res == res+1.0007*cor) return (m>0)?res:-res; - else { - res1=res+1.1*cor; - z=0.5*(res1-res); - y=(hp0.x-res)-z; - z=y+hp1.x; - y=(y-z)+hp1.x; - __dubcos(z,y,w); - z=(w[0]-fabs(x))+w[1]; - if (z>1.0e-27) return (m>0)?min(res,res1):-min(res,res1); - else if (z<-1.0e-27) return (m>0)?max(res,res1):-max(res,res1); - else { - return (m>0)?res:-res; - } - } - } + /* Max ULP is 0.505. */ + return (m>0)?res:-res; } /* else if (k < 0x3fee8000) */ /*--------------------0.953125 <= |x| < 0.96875 ------------------------*/ @@ -241,29 +139,8 @@ __ieee754_asin(double x){ xx*(asncs.x[n+9]+xx*asncs.x[n+10]))))))))+asncs.x[n+11]; t+=p; res =asncs.x[n+12] +t; - cor = (asncs.x[n+12]-res)+t; - if (res == res+1.01*cor) return (m>0)?res:-res; - else { - r=asncs.x[n+12]+xx*asncs.x[n+13]; - t=((asncs.x[n+12]-r)+xx*asncs.x[n+13])+(p+xx*asncs.x[n+14]); - res = r+t; - cor = (r-res)+t; - if (res == res+1.0007*cor) return (m>0)?res:-res; - else { - res1=res+1.1*cor; - z=0.5*(res1-res); - y=(hp0.x-res)-z; - z=y+hp1.x; - y=(y-z)+hp1.x; - __dubcos(z,y,w); - z=(w[0]-fabs(x))+w[1]; - if (z>1.0e-27) return (m>0)?min(res,res1):-min(res,res1); - else if (z<-1.0e-27) return (m>0)?max(res,res1):-max(res,res1); - else { - return (m>0)?res:-res; - } - } - } + /* Max ULP is 0.505. */ + return (m>0)?res:-res; } /* else if (k < 0x3fef0000) */ /*--------------------0.96875 <= |x| < 1 --------------------------------*/ else @@ -282,16 +159,8 @@ __ieee754_asin(double x){ cor = (hp1.x - 2.0*cc)-2.0*(y+cc)*p; res1 = hp0.x - 2.0*y; res =res1 + cor; - if (res == res+1.003*((res1-res)+cor)) return (m>0)?res:-res; - else { - c=y+cc; - cc=(y-c)+cc; - __doasin(c,cc,w); - res1=hp0.x-2.0*w[0]; - cor=((hp0.x-res1)-2.0*w[0])+(hp1.x-2.0*w[1]); - res = res1+cor; - return (m>0)?res:-res; - } + /* Max ULP is 0.5015. */ + return (m>0)?res:-res; } /* else if (k < 0x3ff00000) */ /*---------------------------- |x|>=1 -------------------------------*/ else if (k==0x3ff00000 && u.i[LOW_HALF]==0) return (m>0)?hp0.x:-hp0.x; @@ -315,11 +184,12 @@ libm_alias_finite (__ieee754_asin, __asin) /* */ /*******************************************************************/ +/* acos with max ULP of ~0.523 based on random sampling. */ double SECTION __ieee754_acos(double x) { - double x1,x2,xx,s1,s2,res1,p,t,res,r,cor,cc,y,c,z,w[2],eps; + double x2,xx,res1,p,t,res,r,cor,cc,y,c,z; mynumber u,v; int4 k,m,n; u.x = x; @@ -336,32 +206,8 @@ __ieee754_acos(double x) r=hp0.x-x; cor=(((hp0.x-r)-x)+hp1.x)-t; res = r+cor; - cor = (r-res)+cor; - if (res == res+1.004*cor) return res; - else { - x1 = x+big; - xx = x*x; - x1 -= big; - x2 = x - x1; - p = x1*x1*x1; - s1 = a1.x*p; - s2 = ((((((c7*xx + c6)*xx + c5)*xx + c4)*xx + c3)*xx + c2)*xx*xx*x + - ((a1.x+a2.x)*x2*x2+ 0.5*x1*x)*x2) + a2.x*p; - res1 = x+s1; - s2 = ((x-res1)+s1)+s2; - r=hp0.x-res1; - cor=(((hp0.x-r)-res1)+hp1.x)-s2; - res = r+cor; - cor = (r-res)+cor; - if (res == res+1.00004*cor) return res; - else { - __doasin(x,0,w); - r=hp0.x-w[0]; - cor=((hp0.x-r)-w[0])+(hp1.x-w[1]); - res=r+cor; - return res; - } - } + /* Max ULP is 0.502. */ + return res; } /* else if (k < 0x3fc00000) */ /*---------------------- 0.125 <= |x| < 0.5 --------------------*/ else @@ -377,35 +223,16 @@ __ieee754_acos(double x) y = (m>0)?(hp0.x-asncs.x[n+8]):(hp0.x+asncs.x[n+8]); t = (m>0)?(hp1.x-t):(hp1.x+t); res = y+t; - if (res == res+1.02*((y-res)+t)) return res; - else { - r=asncs.x[n+8]+xx*asncs.x[n+9]; - t=((asncs.x[n+8]-r)+xx*asncs.x[n+9])+(p+xx*asncs.x[n+10]); - if (m>0) - {p = hp0.x-r; t = (((hp0.x-p)-r)-t)+hp1.x; } - else - {p = hp0.x+r; t = ((hp0.x-p)+r)+(hp1.x+t); } - res = p+t; - cor = (p-res)+t; - if (res == (res+1.0002*cor)) return res; - else { - res1=res+1.1*cor; - z=0.5*(res1-res); - __docos(res,z,w); - z=(w[0]-x)+w[1]; - if (z>1.0e-27) return max(res,res1); - else if (z<-1.0e-27) return min(res,res1); - else return res; - } - } + /* Max ULP is 0.51. */ + return res; } /* else if (k < 0x3fe00000) */ /*--------------------------- 0.5 <= |x| < 0.75 ---------------------*/ else if (k < 0x3fe80000) { n = 1056+((k&0x000fe000)>>11)*3; - if (m>0) {xx = x - asncs.x[n]; eps=1.04; } - else {xx = -x - asncs.x[n]; eps=1.02; } + if (m>0) {xx = x - asncs.x[n]; } + else {xx = -x - asncs.x[n]; } t = asncs.x[n+1]*xx; p=xx*xx*(asncs.x[n+2]+xx*(asncs.x[n+3]+xx*(asncs.x[n+4]+ xx*(asncs.x[n+5]+xx*(asncs.x[n+6]+ @@ -414,33 +241,16 @@ __ieee754_acos(double x) y = (m>0)?(hp0.x-asncs.x[n+9]):(hp0.x+asncs.x[n+9]); t = (m>0)?(hp1.x-t):(hp1.x+t); res = y+t; - if (res == res+eps*((y-res)+t)) return res; - else { - r=asncs.x[n+9]+xx*asncs.x[n+10]; - t=((asncs.x[n+9]-r)+xx*asncs.x[n+10])+(p+xx*asncs.x[n+11]); - if (m>0) {p = hp0.x-r; t = (((hp0.x-p)-r)-t)+hp1.x; eps=1.0004; } - else {p = hp0.x+r; t = ((hp0.x-p)+r)+(hp1.x+t); eps=1.0002; } - res = p+t; - cor = (p-res)+t; - if (res == (res+eps*cor)) return res; - else { - res1=res+1.1*cor; - z=0.5*(res1-res); - __docos(res,z,w); - z=(w[0]-x)+w[1]; - if (z>1.0e-27) return max(res,res1); - else if (z<-1.0e-27) return min(res,res1); - else return res; - } - } + /* Max ULP is 0.523 based on random sampling. */ + return res; } /* else if (k < 0x3fe80000) */ /*------------------------- 0.75 <= |x| < 0.921875 -------------*/ else if (k < 0x3fed8000) { n = 992+((k&0x000fe000)>>13)*13; - if (m>0) {xx = x - asncs.x[n]; eps = 1.04; } - else {xx = -x - asncs.x[n]; eps = 1.01; } + if (m>0) {xx = x - asncs.x[n]; } + else {xx = -x - asncs.x[n]; } t = asncs.x[n+1]*xx; p=xx*xx*(asncs.x[n+2]+xx*(asncs.x[n+3]+xx*(asncs.x[n+4]+ xx*(asncs.x[n+5]+xx*(asncs.x[n+6]+xx*(asncs.x[n+7]+ @@ -449,33 +259,16 @@ __ieee754_acos(double x) y = (m>0)?(hp0.x-asncs.x[n+10]):(hp0.x+asncs.x[n+10]); t = (m>0)?(hp1.x-t):(hp1.x+t); res = y+t; - if (res == res+eps*((y-res)+t)) return res; - else { - r=asncs.x[n+10]+xx*asncs.x[n+11]; - t=((asncs.x[n+10]-r)+xx*asncs.x[n+11])+(p+xx*asncs.x[n+12]); - if (m>0) {p = hp0.x-r; t = (((hp0.x-p)-r)-t)+hp1.x; eps=1.0032; } - else {p = hp0.x+r; t = ((hp0.x-p)+r)+(hp1.x+t); eps=1.0008; } - res = p+t; - cor = (p-res)+t; - if (res == (res+eps*cor)) return res; - else { - res1=res+1.1*cor; - z=0.5*(res1-res); - __docos(res,z,w); - z=(w[0]-x)+w[1]; - if (z>1.0e-27) return max(res,res1); - else if (z<-1.0e-27) return min(res,res1); - else return res; - } - } + /* Max ULP is 0.523 based on random sampling. */ + return res; } /* else if (k < 0x3fed8000) */ /*-------------------0.921875 <= |x| < 0.953125 ------------------*/ else if (k < 0x3fee8000) { n = 884+((k&0x000fe000)>>13)*14; - if (m>0) {xx = x - asncs.x[n]; eps=1.04; } - else {xx = -x - asncs.x[n]; eps =1.005; } + if (m>0) {xx = x - asncs.x[n]; } + else {xx = -x - asncs.x[n]; } t = asncs.x[n+1]*xx; p=xx*xx*(asncs.x[n+2]+xx*(asncs.x[n+3]+xx*(asncs.x[n+4]+ xx*(asncs.x[n+5]+xx*(asncs.x[n+6] @@ -485,33 +278,16 @@ __ieee754_acos(double x) y = (m>0)?(hp0.x-asncs.x[n+11]):(hp0.x+asncs.x[n+11]); t = (m>0)?(hp1.x-t):(hp1.x+t); res = y+t; - if (res == res+eps*((y-res)+t)) return res; - else { - r=asncs.x[n+11]+xx*asncs.x[n+12]; - t=((asncs.x[n+11]-r)+xx*asncs.x[n+12])+(p+xx*asncs.x[n+13]); - if (m>0) {p = hp0.x-r; t = (((hp0.x-p)-r)-t)+hp1.x; eps=1.0030; } - else {p = hp0.x+r; t = ((hp0.x-p)+r)+(hp1.x+t); eps=1.0005; } - res = p+t; - cor = (p-res)+t; - if (res == (res+eps*cor)) return res; - else { - res1=res+1.1*cor; - z=0.5*(res1-res); - __docos(res,z,w); - z=(w[0]-x)+w[1]; - if (z>1.0e-27) return max(res,res1); - else if (z<-1.0e-27) return min(res,res1); - else return res; - } - } + /* Max ULP is 0.523 based on random sampling. */ + return res; } /* else if (k < 0x3fee8000) */ /*--------------------0.953125 <= |x| < 0.96875 ----------------*/ else if (k < 0x3fef0000) { n = 768+((k&0x000fe000)>>13)*15; - if (m>0) {xx = x - asncs.x[n]; eps=1.04; } - else {xx = -x - asncs.x[n]; eps=1.005;} + if (m>0) {xx = x - asncs.x[n]; } + else {xx = -x - asncs.x[n]; } t = asncs.x[n+1]*xx; p=xx*xx*(asncs.x[n+2]+xx*(asncs.x[n+3]+xx*(asncs.x[n+4]+ xx*(asncs.x[n+5]+xx*(asncs.x[n+6] @@ -521,25 +297,8 @@ __ieee754_acos(double x) y = (m>0)?(hp0.x-asncs.x[n+12]):(hp0.x+asncs.x[n+12]); t = (m>0)?(hp1.x-t):(hp1.x+t); res = y+t; - if (res == res+eps*((y-res)+t)) return res; - else { - r=asncs.x[n+12]+xx*asncs.x[n+13]; - t=((asncs.x[n+12]-r)+xx*asncs.x[n+13])+(p+xx*asncs.x[n+14]); - if (m>0) {p = hp0.x-r; t = (((hp0.x-p)-r)-t)+hp1.x; eps=1.0030; } - else {p = hp0.x+r; t = ((hp0.x-p)+r)+(hp1.x+t); eps=1.0005; } - res = p+t; - cor = (p-res)+t; - if (res == (res+eps*cor)) return res; - else { - res1=res+1.1*cor; - z=0.5*(res1-res); - __docos(res,z,w); - z=(w[0]-x)+w[1]; - if (z>1.0e-27) return max(res,res1); - else if (z<-1.0e-27) return min(res,res1); - else return res; - } - } + /* Max ULP is 0.523 based on random sampling. */ + return res; } /* else if (k < 0x3fef0000) */ /*-----------------0.96875 <= |x| < 1 ---------------------------*/ @@ -560,28 +319,14 @@ __ieee754_acos(double x) cor = (hp1.x - cc)-(y+cc)*p; res1 = hp0.x - y; res =res1 + cor; - if (res == res+1.002*((res1-res)+cor)) return (res+res); - else { - c=y+cc; - cc=(y-c)+cc; - __doasin(c,cc,w); - res1=hp0.x-w[0]; - cor=((hp0.x-res1)-w[0])+(hp1.x-w[1]); - res = res1+cor; - return (res+res); - } + /* Max ULP is 0.501. */ + return (res+res); } else { cor = cc+p*(y+cc); res = y + cor; - if (res == res+1.03*((y-res)+cor)) return (res+res); - else { - c=y+cc; - cc=(y-c)+cc; - __doasin(c,cc,w); - res = w[0]; - return (res+res); - } + /* Max ULP is 0.515. */ + return (res+res); } } /* else if (k < 0x3ff00000) */ diff --git a/sysdeps/x86_64/fpu/libm-test-ulps b/sysdeps/x86_64/fpu/libm-test-ulps index 7deec8daf5..bd1fa63702 100644 --- a/sysdeps/x86_64/fpu/libm-test-ulps +++ b/sysdeps/x86_64/fpu/libm-test-ulps @@ -68,7 +68,7 @@ float128: 1 ldouble: 1 Function: "asin_upward": -double: 1 +double: 2 float: 1 float128: 2 ldouble: 1 @@ -98,6 +98,7 @@ float128: 4 ldouble: 5 Function: "atan": +double: 1 float: 1 float128: 1 ldouble: 1 @@ -673,6 +674,7 @@ float128: 2 ldouble: 3 Function: Imaginary part of "clog": +double: 1 float: 1 float128: 1 ldouble: 1 -- cgit v1.2.3-70-g09d2