diff options
author | Richard Henderson <rth@twiddle.net> | 2012-03-10 08:53:05 -0800 |
---|---|---|
committer | Richard Henderson <rth@twiddle.net> | 2012-03-19 06:50:41 -0700 |
commit | 0fe0f1f86f82f9e08ca4d4b85111de03f4c2f876 (patch) | |
tree | b7daee8ebf19cc6f493921a1862408ef166b6b14 /sysdeps | |
parent | eb92c487b35e26aa1e08815c4480d0bc5cc9f370 (diff) | |
download | glibc-0fe0f1f86f82f9e08ca4d4b85111de03f4c2f876.tar glibc-0fe0f1f86f82f9e08ca4d4b85111de03f4c2f876.tar.gz glibc-0fe0f1f86f82f9e08ca4d4b85111de03f4c2f876.tar.bz2 glibc-0fe0f1f86f82f9e08ca4d4b85111de03f4c2f876.zip |
Create and use libc_feupdateenv_test.
We can reduce the number of STMXCSR, and often we can avoid the
call to __feraiseexcept.
Diffstat (limited to 'sysdeps')
-rw-r--r-- | sysdeps/generic/math_private.h | 18 | ||||
-rw-r--r-- | sysdeps/ieee754/dbl-64/s_fma.c | 25 | ||||
-rw-r--r-- | sysdeps/ieee754/dbl-64/s_fmaf.c | 12 | ||||
-rw-r--r-- | sysdeps/x86_64/fpu/math_private.h | 28 |
4 files changed, 64 insertions, 19 deletions
diff --git a/sysdeps/generic/math_private.h b/sysdeps/generic/math_private.h index 0b945f9afc..813ad93611 100644 --- a/sysdeps/generic/math_private.h +++ b/sysdeps/generic/math_private.h @@ -457,6 +457,24 @@ default_libc_feupdateenv (fenv_t *e) # define libc_feupdateenv_53bit libc_feupdateenv #endif +static __always_inline int +default_libc_feupdateenv_test (fenv_t *e, int ex) +{ + int ret = fetestexcept (ex); + feupdateenv (e); + return ret; +} + +#ifndef libc_feupdateenv_test +# define libc_feupdateenv_test default_libc_feupdateenv_test +#endif +#ifndef libc_feupdateenv_testf +# define libc_feupdateenv_testf default_libc_feupdateenv_test +#endif +#ifndef libc_feupdateenv_testl +# define libc_feupdateenv_testl default_libc_feupdateenv_test +#endif + /* Save and set the rounding mode. The use of fenv_t to store the old mode allows a target-specific version of this function to avoid converting the rounding mode from the fpu format. By default we have no choice but to diff --git a/sysdeps/ieee754/dbl-64/s_fma.c b/sysdeps/ieee754/dbl-64/s_fma.c index a27e246a49..ab20a801a4 100644 --- a/sysdeps/ieee754/dbl-64/s_fma.c +++ b/sysdeps/ieee754/dbl-64/s_fma.c @@ -149,35 +149,36 @@ __fma (double x, double y, double z) fenv_t env; libc_feholdexcept_setround (&env, FE_TOWARDZERO); + /* Perform m2 + a2 addition with round to odd. */ u.d = a2 + m2; + if (__builtin_expect (adjust < 0, 0)) + { + if ((u.ieee.mantissa1 & 1) == 0) + u.ieee.mantissa1 |= libc_fetestexcept (FE_INEXACT) != 0; + v.d = a1 + u.d; + } + + /* Reset rounding mode and test for inexact simultaneously. */ + int j = libc_feupdateenv_test (&env, FE_INEXACT) != 0; + if (__builtin_expect (adjust == 0, 1)) { if ((u.ieee.mantissa1 & 1) == 0 && u.ieee.exponent != 0x7ff) - u.ieee.mantissa1 |= libc_fetestexcept (FE_INEXACT) != 0; - libc_feupdateenv (&env); + u.ieee.mantissa1 |= j; /* Result is a1 + u.d. */ return a1 + u.d; } else if (__builtin_expect (adjust > 0, 1)) { if ((u.ieee.mantissa1 & 1) == 0 && u.ieee.exponent != 0x7ff) - u.ieee.mantissa1 |= libc_fetestexcept (FE_INEXACT) != 0; - libc_feupdateenv (&env); + u.ieee.mantissa1 |= j; /* Result is a1 + u.d, scaled up. */ return (a1 + u.d) * 0x1p53; } else { - if ((u.ieee.mantissa1 & 1) == 0) - u.ieee.mantissa1 |= libc_fetestexcept (FE_INEXACT) != 0; - v.d = a1 + u.d; - int j = libc_fetestexcept (FE_INEXACT) != 0; - libc_feupdateenv (&env); - /* Ensure the following computations are performed in default rounding - mode instead of just reusing the round to zero computation. */ - asm volatile ("" : "=m" (u) : "m" (u)); /* If a1 + u.d is exact, the only rounding happens during scaling down. */ if (j == 0) diff --git a/sysdeps/ieee754/dbl-64/s_fmaf.c b/sysdeps/ieee754/dbl-64/s_fmaf.c index 00cd38270f..7a939aaed1 100644 --- a/sysdeps/ieee754/dbl-64/s_fmaf.c +++ b/sysdeps/ieee754/dbl-64/s_fmaf.c @@ -35,12 +35,18 @@ __fmaf (float x, float y, float z) /* Multiplication is always exact. */ double temp = (double) x * (double) y; union ieee754_double u; - libc_feholdexcept_setroundf (&env, FE_TOWARDZERO); + + libc_feholdexcept_setround (&env, FE_TOWARDZERO); + /* Perform addition with round to odd. */ u.d = temp + (double) z; + + /* Reset rounding mode and test for inexact simultaneously. */ + int j = libc_feupdateenv_test (&env, FE_INEXACT) != 0; + if ((u.ieee.mantissa1 & 1) == 0 && u.ieee.exponent != 0x7ff) - u.ieee.mantissa1 |= libc_fetestexcept (FE_INEXACT) != 0; - libc_feupdateenv (&env); + u.ieee.mantissa1 |= j; + /* And finally truncation with round to nearest. */ return (float) u.d; } diff --git a/sysdeps/x86_64/fpu/math_private.h b/sysdeps/x86_64/fpu/math_private.h index 3289afc58b..aa208b2e18 100644 --- a/sysdeps/x86_64/fpu/math_private.h +++ b/sysdeps/x86_64/fpu/math_private.h @@ -108,13 +108,33 @@ libc_fesetenv (fenv_t *e) #define libc_fesetenv libc_fesetenv #define libc_fesetenvf libc_fesetenv +static __always_inline int +libc_feupdateenv_test (fenv_t *e, int ex) +{ + unsigned int mxcsr, old_mxcsr, cur_ex; + asm volatile (STMXCSR " %0" : "=m" (*&mxcsr)); + cur_ex = mxcsr & FE_ALL_EXCEPT; + + /* Merge current exceptions with the old environment. */ + old_mxcsr = e->__mxcsr; + mxcsr = old_mxcsr | cur_ex; + asm volatile (LDMXCSR " %0" : : "m" (*&mxcsr)); + + /* Raise SIGFPE for any new exceptions since the hold. Expect that + the normal environment has all exceptions masked. */ + if (__builtin_expect ((old_mxcsr >> 7) & cur_ex, 0)) + __feraiseexcept (cur_ex); + + /* Test for exceptions raised since the hold. */ + return cur_ex & ex; +} +#define libc_feupdateenv_test libc_feupdateenv_test +#define libc_feupdateenv_testf libc_feupdateenv_test + static __always_inline void libc_feupdateenv (fenv_t *e) { - unsigned int mxcsr; - asm volatile (STMXCSR " %0" : "=m" (*&mxcsr)); - asm volatile (LDMXCSR " %0" : : "m" ((e)->__mxcsr)); - __feraiseexcept (mxcsr & FE_ALL_EXCEPT); + libc_feupdateenv_test (e, 0); } #define libc_feupdateenv libc_feupdateenv #define libc_feupdateenvf libc_feupdateenv |