summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorRichard Henderson <rth@twiddle.net>2012-03-10 08:53:05 -0800
committerRichard Henderson <rth@twiddle.net>2012-03-19 06:50:41 -0700
commit0fe0f1f86f82f9e08ca4d4b85111de03f4c2f876 (patch)
treeb7daee8ebf19cc6f493921a1862408ef166b6b14
parenteb92c487b35e26aa1e08815c4480d0bc5cc9f370 (diff)
downloadglibc-0fe0f1f86f82f9e08ca4d4b85111de03f4c2f876.tar
glibc-0fe0f1f86f82f9e08ca4d4b85111de03f4c2f876.tar.gz
glibc-0fe0f1f86f82f9e08ca4d4b85111de03f4c2f876.tar.bz2
glibc-0fe0f1f86f82f9e08ca4d4b85111de03f4c2f876.zip
Create and use libc_feupdateenv_test.
We can reduce the number of STMXCSR, and often we can avoid the call to __feraiseexcept.
-rw-r--r--ChangeLog9
-rw-r--r--sysdeps/generic/math_private.h18
-rw-r--r--sysdeps/ieee754/dbl-64/s_fma.c25
-rw-r--r--sysdeps/ieee754/dbl-64/s_fmaf.c12
-rw-r--r--sysdeps/x86_64/fpu/math_private.h28
5 files changed, 73 insertions, 19 deletions
diff --git a/ChangeLog b/ChangeLog
index 277a201fa6..12a99d2bf7 100644
--- a/ChangeLog
+++ b/ChangeLog
@@ -1,5 +1,14 @@
2012-03-19 Richard Henderson <rth@twiddle.net>
+ * sysdeps/generic/math_private.h (default_libc_feupdateenv_test): New.
+ (libc_feupdateenv_test, libc_feupdateenv_testf): New.
+ (libc_feupdateenv_testl): New.
+ * sysdeps/x86_64/fpu/math_private.h (libc_feupdateenv_test): New.
+ (libc_feupdateenv_testf): New.
+ (libc_feupdateenv): Use libc_feupdateenv_test.
+ * sysdeps/ieee754/dbl-64/s_fma.c (__fma): Use libc_feupdateenv_test.
+ * sysdeps/ieee754/dbl-64/s_fmaf.c (__fmaf): Likewise.
+
* sysdeps/generic/math_private.h (libc_feholdsetround): New.
(libc_feholdsetroundf, libc_feholdsetroundl): New.
(libc_feresetround, libc_feresetroundf, libc_feresetroundl): New.
diff --git a/sysdeps/generic/math_private.h b/sysdeps/generic/math_private.h
index 0b945f9afc..813ad93611 100644
--- a/sysdeps/generic/math_private.h
+++ b/sysdeps/generic/math_private.h
@@ -457,6 +457,24 @@ default_libc_feupdateenv (fenv_t *e)
# define libc_feupdateenv_53bit libc_feupdateenv
#endif
+static __always_inline int
+default_libc_feupdateenv_test (fenv_t *e, int ex)
+{
+ int ret = fetestexcept (ex);
+ feupdateenv (e);
+ return ret;
+}
+
+#ifndef libc_feupdateenv_test
+# define libc_feupdateenv_test default_libc_feupdateenv_test
+#endif
+#ifndef libc_feupdateenv_testf
+# define libc_feupdateenv_testf default_libc_feupdateenv_test
+#endif
+#ifndef libc_feupdateenv_testl
+# define libc_feupdateenv_testl default_libc_feupdateenv_test
+#endif
+
/* Save and set the rounding mode. The use of fenv_t to store the old mode
allows a target-specific version of this function to avoid converting the
rounding mode from the fpu format. By default we have no choice but to
diff --git a/sysdeps/ieee754/dbl-64/s_fma.c b/sysdeps/ieee754/dbl-64/s_fma.c
index a27e246a49..ab20a801a4 100644
--- a/sysdeps/ieee754/dbl-64/s_fma.c
+++ b/sysdeps/ieee754/dbl-64/s_fma.c
@@ -149,35 +149,36 @@ __fma (double x, double y, double z)
fenv_t env;
libc_feholdexcept_setround (&env, FE_TOWARDZERO);
+
/* Perform m2 + a2 addition with round to odd. */
u.d = a2 + m2;
+ if (__builtin_expect (adjust < 0, 0))
+ {
+ if ((u.ieee.mantissa1 & 1) == 0)
+ u.ieee.mantissa1 |= libc_fetestexcept (FE_INEXACT) != 0;
+ v.d = a1 + u.d;
+ }
+
+ /* Reset rounding mode and test for inexact simultaneously. */
+ int j = libc_feupdateenv_test (&env, FE_INEXACT) != 0;
+
if (__builtin_expect (adjust == 0, 1))
{
if ((u.ieee.mantissa1 & 1) == 0 && u.ieee.exponent != 0x7ff)
- u.ieee.mantissa1 |= libc_fetestexcept (FE_INEXACT) != 0;
- libc_feupdateenv (&env);
+ u.ieee.mantissa1 |= j;
/* Result is a1 + u.d. */
return a1 + u.d;
}
else if (__builtin_expect (adjust > 0, 1))
{
if ((u.ieee.mantissa1 & 1) == 0 && u.ieee.exponent != 0x7ff)
- u.ieee.mantissa1 |= libc_fetestexcept (FE_INEXACT) != 0;
- libc_feupdateenv (&env);
+ u.ieee.mantissa1 |= j;
/* Result is a1 + u.d, scaled up. */
return (a1 + u.d) * 0x1p53;
}
else
{
- if ((u.ieee.mantissa1 & 1) == 0)
- u.ieee.mantissa1 |= libc_fetestexcept (FE_INEXACT) != 0;
- v.d = a1 + u.d;
- int j = libc_fetestexcept (FE_INEXACT) != 0;
- libc_feupdateenv (&env);
- /* Ensure the following computations are performed in default rounding
- mode instead of just reusing the round to zero computation. */
- asm volatile ("" : "=m" (u) : "m" (u));
/* If a1 + u.d is exact, the only rounding happens during
scaling down. */
if (j == 0)
diff --git a/sysdeps/ieee754/dbl-64/s_fmaf.c b/sysdeps/ieee754/dbl-64/s_fmaf.c
index 00cd38270f..7a939aaed1 100644
--- a/sysdeps/ieee754/dbl-64/s_fmaf.c
+++ b/sysdeps/ieee754/dbl-64/s_fmaf.c
@@ -35,12 +35,18 @@ __fmaf (float x, float y, float z)
/* Multiplication is always exact. */
double temp = (double) x * (double) y;
union ieee754_double u;
- libc_feholdexcept_setroundf (&env, FE_TOWARDZERO);
+
+ libc_feholdexcept_setround (&env, FE_TOWARDZERO);
+
/* Perform addition with round to odd. */
u.d = temp + (double) z;
+
+ /* Reset rounding mode and test for inexact simultaneously. */
+ int j = libc_feupdateenv_test (&env, FE_INEXACT) != 0;
+
if ((u.ieee.mantissa1 & 1) == 0 && u.ieee.exponent != 0x7ff)
- u.ieee.mantissa1 |= libc_fetestexcept (FE_INEXACT) != 0;
- libc_feupdateenv (&env);
+ u.ieee.mantissa1 |= j;
+
/* And finally truncation with round to nearest. */
return (float) u.d;
}
diff --git a/sysdeps/x86_64/fpu/math_private.h b/sysdeps/x86_64/fpu/math_private.h
index 3289afc58b..aa208b2e18 100644
--- a/sysdeps/x86_64/fpu/math_private.h
+++ b/sysdeps/x86_64/fpu/math_private.h
@@ -108,13 +108,33 @@ libc_fesetenv (fenv_t *e)
#define libc_fesetenv libc_fesetenv
#define libc_fesetenvf libc_fesetenv
+static __always_inline int
+libc_feupdateenv_test (fenv_t *e, int ex)
+{
+ unsigned int mxcsr, old_mxcsr, cur_ex;
+ asm volatile (STMXCSR " %0" : "=m" (*&mxcsr));
+ cur_ex = mxcsr & FE_ALL_EXCEPT;
+
+ /* Merge current exceptions with the old environment. */
+ old_mxcsr = e->__mxcsr;
+ mxcsr = old_mxcsr | cur_ex;
+ asm volatile (LDMXCSR " %0" : : "m" (*&mxcsr));
+
+ /* Raise SIGFPE for any new exceptions since the hold. Expect that
+ the normal environment has all exceptions masked. */
+ if (__builtin_expect ((old_mxcsr >> 7) & cur_ex, 0))
+ __feraiseexcept (cur_ex);
+
+ /* Test for exceptions raised since the hold. */
+ return cur_ex & ex;
+}
+#define libc_feupdateenv_test libc_feupdateenv_test
+#define libc_feupdateenv_testf libc_feupdateenv_test
+
static __always_inline void
libc_feupdateenv (fenv_t *e)
{
- unsigned int mxcsr;
- asm volatile (STMXCSR " %0" : "=m" (*&mxcsr));
- asm volatile (LDMXCSR " %0" : : "m" ((e)->__mxcsr));
- __feraiseexcept (mxcsr & FE_ALL_EXCEPT);
+ libc_feupdateenv_test (e, 0);
}
#define libc_feupdateenv libc_feupdateenv
#define libc_feupdateenvf libc_feupdateenv