diff options
author | Ulrich Drepper <drepper@gmail.com> | 2011-10-17 11:23:40 -0400 |
---|---|---|
committer | Ulrich Drepper <drepper@gmail.com> | 2011-10-17 11:23:40 -0400 |
commit | ed22dcf691326d148222eb9a215d6d98bb8073a4 (patch) | |
tree | 9a7f83e6295b8ac6610e6637defc66f693ea252c | |
parent | b171c137687dd4328f756d141d17f30bae750079 (diff) | |
download | glibc-ed22dcf691326d148222eb9a215d6d98bb8073a4.tar glibc-ed22dcf691326d148222eb9a215d6d98bb8073a4.tar.gz glibc-ed22dcf691326d148222eb9a215d6d98bb8073a4.tar.bz2 glibc-ed22dcf691326d148222eb9a215d6d98bb8073a4.zip |
Provide internal optimizations on x86-64 with SSE4.1
Provide macros so that the internal users can, if possible, directly use
the new instructions.
Also fix up the mathinline.h header when compiling with SSE4.1 enabled.
-rw-r--r-- | ChangeLog | 10 | ||||
-rw-r--r-- | sysdeps/x86_64/fpu/bits/mathinline.h | 31 | ||||
-rw-r--r-- | sysdeps/x86_64/fpu/math_private.h | 28 | ||||
-rw-r--r-- | sysdeps/x86_64/fpu/multiarch/s_floor-c.c | 1 | ||||
-rw-r--r-- | sysdeps/x86_64/fpu/multiarch/s_floorf-c.c | 1 | ||||
-rw-r--r-- | sysdeps/x86_64/fpu/multiarch/s_rint-c.c | 1 | ||||
-rw-r--r-- | sysdeps/x86_64/fpu/multiarch/s_rintf-c.c | 1 |
7 files changed, 61 insertions, 12 deletions
@@ -1,5 +1,15 @@ 2011-10-17 Ulrich Drepper <drepper@gmail.com> + * sysdeps/x86_64/fpu/bits/mathinline.h: Don't define inlines if + __NO_MATH_INLINES is defined. Cleanups. + + * sysdeps/x86_64/fpu/math_private.h: Define __rint, __rintf, __floor, + and __floorf is target has SSE4.1. + * sysdeps/x86_64/fpu/multiarch/s_floor-c.c: Undef first. + * sysdeps/x86_64/fpu/multiarch/s_floorf-c.: Likewise. + * sysdeps/x86_64/fpu/multiarch/s_rint-c.c: Likewise. + * sysdeps/x86_64/fpu/multiarch/s_rintf-c.c: Likewise. + * sysdeps/x86_64/fpu/bits/mathinline.h (floor): Use correct function name. (floorf): Likewise. diff --git a/sysdeps/x86_64/fpu/bits/mathinline.h b/sysdeps/x86_64/fpu/bits/mathinline.h index 210bef8102..721f6e4731 100644 --- a/sysdeps/x86_64/fpu/bits/mathinline.h +++ b/sysdeps/x86_64/fpu/bits/mathinline.h @@ -30,34 +30,35 @@ #endif -#if defined __GNUC__ && __GNUC__ >= 2 -# ifdef __USE_ISOC99 +/* The gcc, version 2.7 or below, has problems with all this inlining + code. So disable it for this version of the compiler. */ +#if __GNUC_PREREQ (2, 8) && defined __USE_ISOC99 __BEGIN_NAMESPACE_C99 /* Test for negative number. Used in the signbit() macro. */ __MATH_INLINE int __NTH (__signbitf (float __x)) { -# if __WORDSIZE == 32 +# if __WORDSIZE == 32 __extension__ union { float __f; int __i; } __u = { __f: __x }; return __u.__i < 0; -# else +# else int __m; __asm ("pmovmskb %1, %0" : "=r" (__m) : "x" (__x)); return __m & 0x8; -# endif +# endif } __MATH_INLINE int __NTH (__signbit (double __x)) { -# if __WORDSIZE == 32 +# if __WORDSIZE == 32 __extension__ union { double __d; int __i[2]; } __u = { __d: __x }; return __u.__i[1] < 0; -# else +# else int __m; __asm ("pmovmskb %1, %0" : "=r" (__m) : "x" (__x)); return __m & 0x80; -# endif +# endif } __MATH_INLINE int __NTH (__signbitl (long double __x)) @@ -66,6 +67,16 @@ __NTH (__signbitl (long double __x)) return (__u.__i[2] & 0x8000) != 0; } +__END_NAMESPACE_C99 +#endif + + +#if (__GNUC_PREREQ (2, 8) && !defined __NO_MATH_INLINES \ + && defined __OPTIMIZE__) + +# ifdef __USE_ISOC99 +__BEGIN_NAMESPACE_C99 + /* Round to nearest integer. */ # if __WORDSIZE == 64 || defined __SSE_MATH__ __MATH_INLINE long int @@ -100,14 +111,10 @@ __NTH (llrint (double __x)) __asm ("cvtsd2si %1, %0" : "=r" (__res) : "xm" (__x)); return __res; } - -__END_NAMESPACE_C99 # endif # if defined __FINITE_MATH_ONLY__ && __FINITE_MATH_ONLY__ > 0 \ && (__WORDSIZE == 64 || defined __SSE2_MATH__) -__BEGIN_NAMESPACE_C99 - /* Determine maximum of two values. */ __MATH_INLINE float __NTH (fmaxf (float __x, float __y)) diff --git a/sysdeps/x86_64/fpu/math_private.h b/sysdeps/x86_64/fpu/math_private.h index 523ec549ac..71eb41664f 100644 --- a/sysdeps/x86_64/fpu/math_private.h +++ b/sysdeps/x86_64/fpu/math_private.h @@ -90,3 +90,31 @@ do { \ ({ long double __res; \ asm ("fsqrt" : "=t" (__res) : "0" ((long double) d)); \ __res; }) + +#ifdef __SSE4_1__ +# ifndef __rint +# define __rint(d) \ + ({ double __res; \ + asm ("roundsd $4, %1, %0" : "=x" (__res) : "x" ((double) d)); \ + __res; }) +# endif +# ifndef __rintf +# define __rintf(d) \ + ({ float __res; \ + asm ("roundss $4, %1, %0" : "=x" (__res) : "x" ((float) d)); \ + __res; }) +# endif + +# ifndef __floor +# define __floor(d) \ + ({ double __res; \ + asm ("roundsd $1, %1, %0" : "=x" (__res) : "x" ((double) d)); \ + __res; }) +# endif +# ifndef __floorf +# define __floorf(d) \ + ({ float __res; \ + asm ("roundss $1, %1, %0" : "=x" (__res) : "x" ((float) d)); \ + __res; }) +# endif +#endif diff --git a/sysdeps/x86_64/fpu/multiarch/s_floor-c.c b/sysdeps/x86_64/fpu/multiarch/s_floor-c.c index 8b8c31d945..68733b69ef 100644 --- a/sysdeps/x86_64/fpu/multiarch/s_floor-c.c +++ b/sysdeps/x86_64/fpu/multiarch/s_floor-c.c @@ -1,2 +1,3 @@ +#undef __floor #define __floor __floor_c #include <sysdeps/ieee754/dbl-64/wordsize-64/s_floor.c> diff --git a/sysdeps/x86_64/fpu/multiarch/s_floorf-c.c b/sysdeps/x86_64/fpu/multiarch/s_floorf-c.c index 3f367863a7..2386362328 100644 --- a/sysdeps/x86_64/fpu/multiarch/s_floorf-c.c +++ b/sysdeps/x86_64/fpu/multiarch/s_floorf-c.c @@ -1,2 +1,3 @@ +#undef __floorf #define __floorf __floorf_c #include <sysdeps/ieee754/flt-32/s_floorf.c> diff --git a/sysdeps/x86_64/fpu/multiarch/s_rint-c.c b/sysdeps/x86_64/fpu/multiarch/s_rint-c.c index f29f45b062..162a630ff9 100644 --- a/sysdeps/x86_64/fpu/multiarch/s_rint-c.c +++ b/sysdeps/x86_64/fpu/multiarch/s_rint-c.c @@ -1,2 +1,3 @@ +#undef __rint #define __rint __rint_c #include <sysdeps/ieee754/dbl-64/wordsize-64/s_rint.c> diff --git a/sysdeps/x86_64/fpu/multiarch/s_rintf-c.c b/sysdeps/x86_64/fpu/multiarch/s_rintf-c.c index 30ed42a656..8505249f34 100644 --- a/sysdeps/x86_64/fpu/multiarch/s_rintf-c.c +++ b/sysdeps/x86_64/fpu/multiarch/s_rintf-c.c @@ -1,2 +1,3 @@ +#undef __rintf #define __rintf __rintf_c #include <sysdeps/ieee754/flt-32/s_rintf.c> |