aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorUlrich Drepper <drepper@gmail.com>2011-10-18 09:00:46 -0400
committerUlrich Drepper <drepper@gmail.com>2011-10-18 09:00:46 -0400
commitd38f1dba009689d78af371cffa091b27e4ebe17d (patch)
treeda7daa82001d244a4dbc1ce6dc72cf4e8eac45db
parent83c7615c2dc344562f6a1a499a269b114f74c7e5 (diff)
downloadglibc-d38f1dba009689d78af371cffa091b27e4ebe17d.tar
glibc-d38f1dba009689d78af371cffa091b27e4ebe17d.tar.gz
glibc-d38f1dba009689d78af371cffa091b27e4ebe17d.tar.bz2
glibc-d38f1dba009689d78af371cffa091b27e4ebe17d.zip
Start optimizing the use of the fenv interfaces in libm itself
-rw-r--r--ChangeLog15
-rw-r--r--math/math_private.h22
-rw-r--r--sysdeps/ieee754/dbl-64/e_exp2.c19
-rw-r--r--sysdeps/ieee754/dbl-64/wordsize-64/s_nearbyint.c28
-rw-r--r--sysdeps/x86_64/fpu/math_private.h42
5 files changed, 95 insertions, 31 deletions
diff --git a/ChangeLog b/ChangeLog
index 1ca1cca3c4..c391f612ea 100644
--- a/ChangeLog
+++ b/ChangeLog
@@ -1,3 +1,18 @@
+2011-10-18 Ulrich Drepper <drepper@gmail.com>
+
+ * math/math_private.h: Define defaults for libc_fegetround,
+ libc_fegetroundf, libc_fegetroundl, libc_fesetround, libc_fesetroundf,
+ libc_fesetroundl, libc_feholdexcept, libc_feholdexceptf,
+ libc_feholdexceptl, libc_fesetenv, libc_fesetenvf, libc_fesetenvl.
+ * sysdeps/ieee754/dbl-64/wordsize-64/s_nearbyint.c: Use
+ libc_feholdexcept, libc_fesetround, libc_fesetenv instead of the
+ standard functions.
+ * sysdeps/ieee754/dbl-64/e_exp2.c: Likewise.
+ Remove comments and hacks for old compiler versions.
+ * sysdeps/x86_64/fpu/math_private.h: Define special versions of
+ libc_fegetround, libc_fesetround, libc_feholdexcept, and
+ libc_feholdexceptl.
+
2011-10-18 Andreas Schwab <schwab@redhat.com>
* sysdeps/x86_64/fpu/bits/fenv.h: Add C linkage markers.
diff --git a/math/math_private.h b/math/math_private.h
index c5fbf15f65..a1ce0142b1 100644
--- a/math/math_private.h
+++ b/math/math_private.h
@@ -358,4 +358,26 @@ extern void __docos (double __x, double __dx, double __v[]);
#define math_force_eval(x) __asm __volatile ("" : : "m" (x))
#endif
+
+/* The standards only specify one variant of the fenv.h interfaces.
+ But at least for some architectures we can be more efficient if we
+ know what operations are going to be performed. Therefore we
+ define additional interfaces. By default they refer to the normal
+ interfaces. */
+#define libc_fegetround() fegetround ()
+#define libc_fegetroundf() fegetround ()
+#define libc_fegetroundl() fegetround ()
+
+#define libc_fesetround(r) (void) fesetround (r)
+#define libc_fesetroundf(r) (void) fesetround (r)
+#define libc_fesetroundl(r) (void) fesetround (r)
+
+#define libc_feholdexcept(e) (void) feholdexcept (e)
+#define libc_feholdexceptf(e) (void) feholdexcept (e)
+#define libc_feholdexceptl(e) (void) feholdexcept (e)
+
+#define libc_fesetenv(e) (void) fesetenv (e)
+#define libc_fesetenvf(e) (void) fesetenv (e)
+#define libc_fesetenvl(e) (void) fesetenv (e)
+
#endif /* _MATH_PRIVATE_H_ */
diff --git a/sysdeps/ieee754/dbl-64/e_exp2.c b/sysdeps/ieee754/dbl-64/e_exp2.c
index c973f35673..734e476ce5 100644
--- a/sysdeps/ieee754/dbl-64/e_exp2.c
+++ b/sysdeps/ieee754/dbl-64/e_exp2.c
@@ -25,9 +25,6 @@
17 (1), March 1991, pp. 26-45.
It has been slightly modified to compute 2^x instead of e^x.
*/
-#ifndef _GNU_SOURCE
-#define _GNU_SOURCE
-#endif
#include <stdlib.h>
#include <float.h>
#include <ieee754.h>
@@ -38,13 +35,8 @@
#include "t_exp2.h"
-/* XXX I know the assembler generates a warning about incorrect section
- attributes. But without the attribute here the compiler places the
- constants in the .data section. Ideally the constant is placed in
- .rodata.cst8 so that it can be merged, but gcc sucks, it ICEs when
- we try to force this section on it. --drepper */
-static const volatile double TWO1023 = 8.988465674311579539e+307;
-static const volatile double TWOM1000 = 9.3326361850321887899e-302;
+static const double TWO1023 = 8.988465674311579539e+307;
+static const double TWOM1000 = 9.3326361850321887899e-302;
double
__ieee754_exp2 (double x)
@@ -72,10 +64,10 @@ __ieee754_exp2 (double x)
union ieee754_double ex2_u, scale_u;
fenv_t oldenv;
- feholdexcept (&oldenv);
+ libc_feholdexcept (&oldenv);
#ifdef FE_TONEAREST
/* If we don't have this, it's too bad. */
- fesetround (FE_TONEAREST);
+ libc_fesetround (FE_TONEAREST);
#endif
/* 1. Argument reduction.
@@ -120,9 +112,10 @@ __ieee754_exp2 (double x)
* x + .055504110254308625)
* x + .240226506959100583)
* x + .69314718055994495) * ex2_u.d;
+ math_opt_barrier (x22);
/* 5. Return (2^x2-1) * 2^(t/512+e+ex) + 2^(t/512+e+ex). */
- fesetenv (&oldenv);
+ libc_fesetenv (&oldenv);
result = x22 * x + ex2_u.d;
diff --git a/sysdeps/ieee754/dbl-64/wordsize-64/s_nearbyint.c b/sysdeps/ieee754/dbl-64/wordsize-64/s_nearbyint.c
index cb49019ddb..861da20b10 100644
--- a/sysdeps/ieee754/dbl-64/wordsize-64/s_nearbyint.c
+++ b/sysdeps/ieee754/dbl-64/wordsize-64/s_nearbyint.c
@@ -24,22 +24,14 @@
#include "math.h"
#include "math_private.h"
-#ifdef __STDC__
static const double
-#else
-static double
-#endif
TWO52[2]={
4.50359962737049600000e+15, /* 0x43300000, 0x00000000 */
-4.50359962737049600000e+15, /* 0xC3300000, 0x00000000 */
};
-#ifdef __STDC__
- double __nearbyint(double x)
-#else
- double __nearbyint(x)
- double x;
-#endif
+double
+__nearbyint(double x)
{
fenv_t env;
int64_t i0,sx;
@@ -47,20 +39,19 @@ TWO52[2]={
EXTRACT_WORDS64(i0,x);
sx = (i0>>63)&1;
j0 = ((i0>>52)&0x7ff)-0x3ff;
- if(j0<52) {
+ if(__builtin_expect(j0<52, 1)) {
if(j0<0) {
if((i0&UINT64_C(0x7fffffffffffffff))==0) return x;
uint64_t i = i0 & UINT64_C(0xfffffffffffff);
i0 &= UINT64_C(0xfffe000000000000);
i0 |= (((i|-i) >> 12) & UINT64_C(0x8000000000000));
INSERT_WORDS64(x,i0);
- feholdexcept (&env);
+ libc_feholdexcept (&env);
double w = TWO52[sx]+x;
double t = w-TWO52[sx];
- fesetenv (&env);
- EXTRACT_WORDS64(i0,t);
- INSERT_WORDS64(t,(i0&UINT64_C(0x7fffffffffffffff))|(sx<<63));
- return t;
+ math_opt_barrier(t);
+ libc_fesetenv (&env);
+ return copysign(t, x);
} else {
uint64_t i = UINT64_C(0x000fffffffffffff)>>j0;
if((i0&i)==0) return x; /* x is integral */
@@ -73,10 +64,11 @@ TWO52[2]={
else return x; /* x is integral */
}
INSERT_WORDS64(x,i0);
- feholdexcept (&env);
+ libc_feholdexcept (&env);
double w = TWO52[sx]+x;
double t = w-TWO52[sx];
- fesetenv (&env);
+ math_opt_barrier (t);
+ libc_fesetenv (&env);
return t;
}
weak_alias (__nearbyint, nearbyint)
diff --git a/sysdeps/x86_64/fpu/math_private.h b/sysdeps/x86_64/fpu/math_private.h
index 6cff8b3161..4886c64dc3 100644
--- a/sysdeps/x86_64/fpu/math_private.h
+++ b/sysdeps/x86_64/fpu/math_private.h
@@ -118,3 +118,45 @@ do { \
__res; })
# endif
#endif
+
+
+/* Specialized variants of the <fenv.h> interfaces which only handle
+ either the FPU or the SSE unit. */
+#undef libc_fegetround
+#define libc_fegetround() \
+ ({ \
+ unsigned int mxcsr; \
+ asm volatile ("stmxcsr %0" : "=m" (*&mxcsr)); \
+ (mxcsr & 0x6000) >> 3; \
+ })
+// #define libc_fegetroundf() fegetround ()
+// #define libc_fegetroundl() fegetround ()
+
+#undef libc_fesetround
+#define libc_fesetround(r) \
+ do { \
+ unsigned int mxcsr; \
+ asm ("stmxcsr %0" : "=m" (*&mxcsr)); \
+ mxcsr = (mxcsr & ~0x6000) | ((r) << 3); \
+ asm volatile ("ldmxcsr %0" : : "m" (*&mxcsr)); \
+ } while (0)
+// #define libc_fesetroundf(r) (void) fesetround (r)
+// #define libc_fesetroundl(r) (void) fesetround (r)
+
+#undef libc_feholdexcept
+#define libc_feholdexcept(e) \
+ do { \
+ unsigned int mxcsr; \
+ asm ("stmxcsr %0" : "=m" (*&mxcsr)); \
+ (e)->__mxcsr = mxcsr; \
+ mxcsr = (mxcsr | 0x1f80) & ~0x3f; \
+ asm volatile ("ldmxcsr %0" : : "m" (*&mxcsr)); \
+ } while (0)
+// #define libc_feholdexceptf(e) (void) feholdexcept (e)
+// #define libc_feholdexceptl(e) (void) feholdexcept (e)
+
+#undef libc_fesetenv
+#define libc_fesetenv(e) \
+ asm volatile ("ldmxcsr %0" : : "m" ((e)->__mxcsr))
+// #define libc_fesetenvf(e) (void) fesetenv (e)
+// #define libc_fesetenvl(e) (void) fesetenv (e)