diff options
author | Ulrich Drepper <drepper@gmail.com> | 2011-10-23 20:55:26 -0400 |
---|---|---|
committer | Ulrich Drepper <drepper@gmail.com> | 2011-10-23 20:55:26 -0400 |
commit | 0275fff8a105d5963df43b27df769487adac25ac (patch) | |
tree | 2896c1ef127f15491afc3f4c8ab87cf75558fea4 | |
parent | 10d005f77f0dfb5b7abaf4e78b875964ceaa7ed4 (diff) | |
download | glibc-0275fff8a105d5963df43b27df769487adac25ac.tar glibc-0275fff8a105d5963df43b27df769487adac25ac.tar.gz glibc-0275fff8a105d5963df43b27df769487adac25ac.tar.bz2 glibc-0275fff8a105d5963df43b27df769487adac25ac.zip |
Optimize f_mod
Branch prediction for the 32-bit implementation and a new optimized
64-bit implementation.
-rw-r--r-- | ChangeLog | 4 | ||||
-rw-r--r-- | sysdeps/ieee754/dbl-64/e_fmod.c | 11 | ||||
-rw-r--r-- | sysdeps/ieee754/dbl-64/wordsize-64/e_fmod.c | 104 |
3 files changed, 113 insertions, 6 deletions
@@ -1,5 +1,9 @@ 2011-10-23 Ulrich Drepper <drepper@gmail.com> + * sysdeps/ieee754/dbl-64/e_fmod.c (__ieee754_fmod): Add some branch + prediction. + * sysdeps/ieee754/dbl-64/wordsize-64/e_fmod.c: New file. + * string/strnlen.c: Don't define STRNLEN, reverse logic. Remove unused variable magic_bits. * sysdeps/i386/i686/multiarch/rtld-strnlen.c: New file. diff --git a/sysdeps/ieee754/dbl-64/e_fmod.c b/sysdeps/ieee754/dbl-64/e_fmod.c index a575f616bc..0328b011d2 100644 --- a/sysdeps/ieee754/dbl-64/e_fmod.c +++ b/sysdeps/ieee754/dbl-64/e_fmod.c @@ -1,4 +1,3 @@ -/* @(#)e_fmod.c 5.1 93/09/24 */ /* * ==================================================== * Copyright (C) 1993 by Sun Microsystems, Inc. All rights reserved. @@ -44,7 +43,7 @@ __ieee754_fmod (double x, double y) } /* determine ix = ilogb(x) */ - if(hx<0x00100000) { /* subnormal x */ + if(__builtin_expect(hx<0x00100000, 0)) { /* subnormal x */ if(hx==0) { for (ix = -1043, i=lx; i>0; i<<=1) ix -=1; } else { @@ -53,7 +52,7 @@ __ieee754_fmod (double x, double y) } else ix = (hx>>20)-1023; /* determine iy = ilogb(y) */ - if(hy<0x00100000) { /* subnormal y */ + if(__builtin_expect(hy<0x00100000, 0)) { /* subnormal y */ if(hy==0) { for (iy = -1043, i=ly; i>0; i<<=1) iy -=1; } else { @@ -62,7 +61,7 @@ __ieee754_fmod (double x, double y) } else iy = (hy>>20)-1023; /* set up {hx,lx}, {hy,ly} and align y to x */ - if(ix >= -1022) + if(__builtin_expect(ix >= -1022, 1)) hx = 0x00100000|(0x000fffff&hx); else { /* subnormal x, shift x to normal */ n = -1022-ix; @@ -74,7 +73,7 @@ __ieee754_fmod (double x, double y) lx = 0; } } - if(iy >= -1022) + if(__builtin_expect(iy >= -1022, 1)) hy = 0x00100000|(0x000fffff&hy); else { /* subnormal y, shift y to normal */ n = -1022-iy; @@ -108,7 +107,7 @@ __ieee754_fmod (double x, double y) hx = hx+hx+(lx>>31); lx = lx+lx; iy -= 1; } - if(iy>= -1022) { /* normalize output */ + if(__builtin_expect(iy>= -1022, 1)) { /* normalize output */ hx = ((hx-0x00100000)|((iy+1023)<<20)); INSERT_WORDS(x,hx|sx,lx); } else { /* subnormal output */ diff --git a/sysdeps/ieee754/dbl-64/wordsize-64/e_fmod.c b/sysdeps/ieee754/dbl-64/wordsize-64/e_fmod.c new file mode 100644 index 0000000000..0e20571a74 --- /dev/null +++ b/sysdeps/ieee754/dbl-64/wordsize-64/e_fmod.c @@ -0,0 +1,104 @@ +/* Rewritten for 64-bit machines by Ulrich Drepper <drepper@gmail.com>. */ +/* + * ==================================================== + * Copyright (C) 1993 by Sun Microsystems, Inc. All rights reserved. + * + * Developed at SunPro, a Sun Microsystems, Inc. business. + * Permission to use, copy, modify, and distribute this + * software is freely granted, provided that this notice + * is preserved. + * ==================================================== + */ + +/* + * __ieee754_fmod(x,y) + * Return x mod y in exact arithmetic + * Method: shift and subtract + */ + +#include "math.h" +#include "math_private.h" + +static const double one = 1.0, Zero[] = {0.0, -0.0,}; + +double +__ieee754_fmod (double x, double y) +{ + int32_t n,i,ix,iy; + int64_t hx,hy,hz,sx; + + EXTRACT_WORDS64(hx,x); + EXTRACT_WORDS64(hy,y); + sx = hx&UINT64_C(0x8000000000000000); /* sign of x */ + hx ^=sx; /* |x| */ + hy &= UINT64_C(0x7fffffffffffffff); /* |y| */ + + /* purge off exception values */ + if(__builtin_expect(hy==0 + || hx >= UINT64_C(0x7ff0000000000000) + || hy > UINT64_C(0x7ff0000000000000), 0)) + /* y=0,or x not finite or y is NaN */ + return (x*y)/(x*y); + if(__builtin_expect(hx<=hy, 0)) { + if(hx<hy) return x; /* |x|<|y| return x */ + return Zero[(uint64_t)sx>>63]; /* |x|=|y| return x*0*/ + } + + /* determine ix = ilogb(x) */ + if(__builtin_expect(hx<UINT64_C(0x0010000000000000), 0)) { + /* subnormal x */ + for (ix = -1022,i=(hx<<11); i>0; i<<=1) ix -=1; + } else ix = (hx>>52)-1023; + + /* determine iy = ilogb(y) */ + if(__builtin_expect(hy<UINT64_C(0x0010000000000000), 0)) { /* subnormal y */ + for (iy = -1022,i=(hy<<11); i>0; i<<=1) iy -=1; + } else iy = (hy>>52)-1023; + + /* set up hx, hy and align y to x */ + if(__builtin_expect(ix >= -1022, 1)) + hx = UINT64_C(0x0010000000000000)|(UINT64_C(0x000fffffffffffff)&hx); + else { /* subnormal x, shift x to normal */ + n = -1022-ix; + hx<<=n; + } + if(__builtin_expect(iy >= -1022, 1)) + hy = UINT64_C(0x0010000000000000)|(UINT64_C(0x000fffffffffffff)&hy); + else { /* subnormal y, shift y to normal */ + n = -1022-iy; + hy<<=n; + } + + /* fix point fmod */ + n = ix - iy; + while(n--) { + hz=hx-hy; + if(hz<0){hx = hx+hx;} + else { + if(hz==0) /* return sign(x)*0 */ + return Zero[(uint64_t)sx>>63]; + hx = hz+hz; + } + } + hz=hx-hy; + if(hz>=0) {hx=hz;} + + /* convert back to floating value and restore the sign */ + if(hx==0) /* return sign(x)*0 */ + return Zero[(uint64_t)sx>>63]; + while(hx<UINT64_C(0x0010000000000000)) { /* normalize x */ + hx = hx+hx; + iy -= 1; + } + if(__builtin_expect(iy>= -1022, 1)) { /* normalize output */ + hx = ((hx-UINT64_C(0x0010000000000000))|((uint64_t)(iy+1023)<<52)); + INSERT_WORDS64(x,hx|sx); + } else { /* subnormal output */ + n = -1022 - iy; + hx>>=n; + INSERT_WORDS64(x,hx|sx); + x *= one; /* create necessary signal */ + } + return x; /* exact output */ +} +strong_alias (__ieee754_fmod, __fmod_finite) |