aboutsummaryrefslogtreecommitdiff
path: root/sysdeps
diff options
context:
space:
mode:
authorUlrich Drepper <drepper@gmail.com>2011-10-23 20:55:26 -0400
committerUlrich Drepper <drepper@gmail.com>2011-10-23 20:55:26 -0400
commit0275fff8a105d5963df43b27df769487adac25ac (patch)
tree2896c1ef127f15491afc3f4c8ab87cf75558fea4 /sysdeps
parent10d005f77f0dfb5b7abaf4e78b875964ceaa7ed4 (diff)
downloadglibc-0275fff8a105d5963df43b27df769487adac25ac.tar
glibc-0275fff8a105d5963df43b27df769487adac25ac.tar.gz
glibc-0275fff8a105d5963df43b27df769487adac25ac.tar.bz2
glibc-0275fff8a105d5963df43b27df769487adac25ac.zip
Optimize f_mod
Branch prediction for the 32-bit implementation and a new optimized 64-bit implementation.
Diffstat (limited to 'sysdeps')
-rw-r--r--sysdeps/ieee754/dbl-64/e_fmod.c11
-rw-r--r--sysdeps/ieee754/dbl-64/wordsize-64/e_fmod.c104
2 files changed, 109 insertions, 6 deletions
diff --git a/sysdeps/ieee754/dbl-64/e_fmod.c b/sysdeps/ieee754/dbl-64/e_fmod.c
index a575f616bc..0328b011d2 100644
--- a/sysdeps/ieee754/dbl-64/e_fmod.c
+++ b/sysdeps/ieee754/dbl-64/e_fmod.c
@@ -1,4 +1,3 @@
-/* @(#)e_fmod.c 5.1 93/09/24 */
/*
* ====================================================
* Copyright (C) 1993 by Sun Microsystems, Inc. All rights reserved.
@@ -44,7 +43,7 @@ __ieee754_fmod (double x, double y)
}
/* determine ix = ilogb(x) */
- if(hx<0x00100000) { /* subnormal x */
+ if(__builtin_expect(hx<0x00100000, 0)) { /* subnormal x */
if(hx==0) {
for (ix = -1043, i=lx; i>0; i<<=1) ix -=1;
} else {
@@ -53,7 +52,7 @@ __ieee754_fmod (double x, double y)
} else ix = (hx>>20)-1023;
/* determine iy = ilogb(y) */
- if(hy<0x00100000) { /* subnormal y */
+ if(__builtin_expect(hy<0x00100000, 0)) { /* subnormal y */
if(hy==0) {
for (iy = -1043, i=ly; i>0; i<<=1) iy -=1;
} else {
@@ -62,7 +61,7 @@ __ieee754_fmod (double x, double y)
} else iy = (hy>>20)-1023;
/* set up {hx,lx}, {hy,ly} and align y to x */
- if(ix >= -1022)
+ if(__builtin_expect(ix >= -1022, 1))
hx = 0x00100000|(0x000fffff&hx);
else { /* subnormal x, shift x to normal */
n = -1022-ix;
@@ -74,7 +73,7 @@ __ieee754_fmod (double x, double y)
lx = 0;
}
}
- if(iy >= -1022)
+ if(__builtin_expect(iy >= -1022, 1))
hy = 0x00100000|(0x000fffff&hy);
else { /* subnormal y, shift y to normal */
n = -1022-iy;
@@ -108,7 +107,7 @@ __ieee754_fmod (double x, double y)
hx = hx+hx+(lx>>31); lx = lx+lx;
iy -= 1;
}
- if(iy>= -1022) { /* normalize output */
+ if(__builtin_expect(iy>= -1022, 1)) { /* normalize output */
hx = ((hx-0x00100000)|((iy+1023)<<20));
INSERT_WORDS(x,hx|sx,lx);
} else { /* subnormal output */
diff --git a/sysdeps/ieee754/dbl-64/wordsize-64/e_fmod.c b/sysdeps/ieee754/dbl-64/wordsize-64/e_fmod.c
new file mode 100644
index 0000000000..0e20571a74
--- /dev/null
+++ b/sysdeps/ieee754/dbl-64/wordsize-64/e_fmod.c
@@ -0,0 +1,104 @@
+/* Rewritten for 64-bit machines by Ulrich Drepper <drepper@gmail.com>. */
+/*
+ * ====================================================
+ * Copyright (C) 1993 by Sun Microsystems, Inc. All rights reserved.
+ *
+ * Developed at SunPro, a Sun Microsystems, Inc. business.
+ * Permission to use, copy, modify, and distribute this
+ * software is freely granted, provided that this notice
+ * is preserved.
+ * ====================================================
+ */
+
+/*
+ * __ieee754_fmod(x,y)
+ * Return x mod y in exact arithmetic
+ * Method: shift and subtract
+ */
+
+#include "math.h"
+#include "math_private.h"
+
+static const double one = 1.0, Zero[] = {0.0, -0.0,};
+
+double
+__ieee754_fmod (double x, double y)
+{
+ int32_t n,i,ix,iy;
+ int64_t hx,hy,hz,sx;
+
+ EXTRACT_WORDS64(hx,x);
+ EXTRACT_WORDS64(hy,y);
+ sx = hx&UINT64_C(0x8000000000000000); /* sign of x */
+ hx ^=sx; /* |x| */
+ hy &= UINT64_C(0x7fffffffffffffff); /* |y| */
+
+ /* purge off exception values */
+ if(__builtin_expect(hy==0
+ || hx >= UINT64_C(0x7ff0000000000000)
+ || hy > UINT64_C(0x7ff0000000000000), 0))
+ /* y=0,or x not finite or y is NaN */
+ return (x*y)/(x*y);
+ if(__builtin_expect(hx<=hy, 0)) {
+ if(hx<hy) return x; /* |x|<|y| return x */
+ return Zero[(uint64_t)sx>>63]; /* |x|=|y| return x*0*/
+ }
+
+ /* determine ix = ilogb(x) */
+ if(__builtin_expect(hx<UINT64_C(0x0010000000000000), 0)) {
+ /* subnormal x */
+ for (ix = -1022,i=(hx<<11); i>0; i<<=1) ix -=1;
+ } else ix = (hx>>52)-1023;
+
+ /* determine iy = ilogb(y) */
+ if(__builtin_expect(hy<UINT64_C(0x0010000000000000), 0)) { /* subnormal y */
+ for (iy = -1022,i=(hy<<11); i>0; i<<=1) iy -=1;
+ } else iy = (hy>>52)-1023;
+
+ /* set up hx, hy and align y to x */
+ if(__builtin_expect(ix >= -1022, 1))
+ hx = UINT64_C(0x0010000000000000)|(UINT64_C(0x000fffffffffffff)&hx);
+ else { /* subnormal x, shift x to normal */
+ n = -1022-ix;
+ hx<<=n;
+ }
+ if(__builtin_expect(iy >= -1022, 1))
+ hy = UINT64_C(0x0010000000000000)|(UINT64_C(0x000fffffffffffff)&hy);
+ else { /* subnormal y, shift y to normal */
+ n = -1022-iy;
+ hy<<=n;
+ }
+
+ /* fix point fmod */
+ n = ix - iy;
+ while(n--) {
+ hz=hx-hy;
+ if(hz<0){hx = hx+hx;}
+ else {
+ if(hz==0) /* return sign(x)*0 */
+ return Zero[(uint64_t)sx>>63];
+ hx = hz+hz;
+ }
+ }
+ hz=hx-hy;
+ if(hz>=0) {hx=hz;}
+
+ /* convert back to floating value and restore the sign */
+ if(hx==0) /* return sign(x)*0 */
+ return Zero[(uint64_t)sx>>63];
+ while(hx<UINT64_C(0x0010000000000000)) { /* normalize x */
+ hx = hx+hx;
+ iy -= 1;
+ }
+ if(__builtin_expect(iy>= -1022, 1)) { /* normalize output */
+ hx = ((hx-UINT64_C(0x0010000000000000))|((uint64_t)(iy+1023)<<52));
+ INSERT_WORDS64(x,hx|sx);
+ } else { /* subnormal output */
+ n = -1022 - iy;
+ hx>>=n;
+ INSERT_WORDS64(x,hx|sx);
+ x *= one; /* create necessary signal */
+ }
+ return x; /* exact output */
+}
+strong_alias (__ieee754_fmod, __fmod_finite)