aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorUlrich Drepper <drepper@gmail.com>2011-10-17 11:23:40 -0400
committerUlrich Drepper <drepper@gmail.com>2011-10-17 11:23:40 -0400
commited22dcf691326d148222eb9a215d6d98bb8073a4 (patch)
tree9a7f83e6295b8ac6610e6637defc66f693ea252c
parentb171c137687dd4328f756d141d17f30bae750079 (diff)
downloadglibc-ed22dcf691326d148222eb9a215d6d98bb8073a4.tar
glibc-ed22dcf691326d148222eb9a215d6d98bb8073a4.tar.gz
glibc-ed22dcf691326d148222eb9a215d6d98bb8073a4.tar.bz2
glibc-ed22dcf691326d148222eb9a215d6d98bb8073a4.zip
Provide internal optimizations on x86-64 with SSE4.1
Provide macros so that the internal users can, if possible, directly use the new instructions. Also fix up the mathinline.h header when compiling with SSE4.1 enabled.
-rw-r--r--ChangeLog10
-rw-r--r--sysdeps/x86_64/fpu/bits/mathinline.h31
-rw-r--r--sysdeps/x86_64/fpu/math_private.h28
-rw-r--r--sysdeps/x86_64/fpu/multiarch/s_floor-c.c1
-rw-r--r--sysdeps/x86_64/fpu/multiarch/s_floorf-c.c1
-rw-r--r--sysdeps/x86_64/fpu/multiarch/s_rint-c.c1
-rw-r--r--sysdeps/x86_64/fpu/multiarch/s_rintf-c.c1
7 files changed, 61 insertions, 12 deletions
diff --git a/ChangeLog b/ChangeLog
index 289475c20c..af72e63d8d 100644
--- a/ChangeLog
+++ b/ChangeLog
@@ -1,5 +1,15 @@
2011-10-17 Ulrich Drepper <drepper@gmail.com>
+ * sysdeps/x86_64/fpu/bits/mathinline.h: Don't define inlines if
+ __NO_MATH_INLINES is defined. Cleanups.
+
+ * sysdeps/x86_64/fpu/math_private.h: Define __rint, __rintf, __floor,
+ and __floorf is target has SSE4.1.
+ * sysdeps/x86_64/fpu/multiarch/s_floor-c.c: Undef first.
+ * sysdeps/x86_64/fpu/multiarch/s_floorf-c.: Likewise.
+ * sysdeps/x86_64/fpu/multiarch/s_rint-c.c: Likewise.
+ * sysdeps/x86_64/fpu/multiarch/s_rintf-c.c: Likewise.
+
* sysdeps/x86_64/fpu/bits/mathinline.h (floor): Use correct function
name.
(floorf): Likewise.
diff --git a/sysdeps/x86_64/fpu/bits/mathinline.h b/sysdeps/x86_64/fpu/bits/mathinline.h
index 210bef8102..721f6e4731 100644
--- a/sysdeps/x86_64/fpu/bits/mathinline.h
+++ b/sysdeps/x86_64/fpu/bits/mathinline.h
@@ -30,34 +30,35 @@
#endif
-#if defined __GNUC__ && __GNUC__ >= 2
-# ifdef __USE_ISOC99
+/* The gcc, version 2.7 or below, has problems with all this inlining
+ code. So disable it for this version of the compiler. */
+#if __GNUC_PREREQ (2, 8) && defined __USE_ISOC99
__BEGIN_NAMESPACE_C99
/* Test for negative number. Used in the signbit() macro. */
__MATH_INLINE int
__NTH (__signbitf (float __x))
{
-# if __WORDSIZE == 32
+# if __WORDSIZE == 32
__extension__ union { float __f; int __i; } __u = { __f: __x };
return __u.__i < 0;
-# else
+# else
int __m;
__asm ("pmovmskb %1, %0" : "=r" (__m) : "x" (__x));
return __m & 0x8;
-# endif
+# endif
}
__MATH_INLINE int
__NTH (__signbit (double __x))
{
-# if __WORDSIZE == 32
+# if __WORDSIZE == 32
__extension__ union { double __d; int __i[2]; } __u = { __d: __x };
return __u.__i[1] < 0;
-# else
+# else
int __m;
__asm ("pmovmskb %1, %0" : "=r" (__m) : "x" (__x));
return __m & 0x80;
-# endif
+# endif
}
__MATH_INLINE int
__NTH (__signbitl (long double __x))
@@ -66,6 +67,16 @@ __NTH (__signbitl (long double __x))
return (__u.__i[2] & 0x8000) != 0;
}
+__END_NAMESPACE_C99
+#endif
+
+
+#if (__GNUC_PREREQ (2, 8) && !defined __NO_MATH_INLINES \
+ && defined __OPTIMIZE__)
+
+# ifdef __USE_ISOC99
+__BEGIN_NAMESPACE_C99
+
/* Round to nearest integer. */
# if __WORDSIZE == 64 || defined __SSE_MATH__
__MATH_INLINE long int
@@ -100,14 +111,10 @@ __NTH (llrint (double __x))
__asm ("cvtsd2si %1, %0" : "=r" (__res) : "xm" (__x));
return __res;
}
-
-__END_NAMESPACE_C99
# endif
# if defined __FINITE_MATH_ONLY__ && __FINITE_MATH_ONLY__ > 0 \
&& (__WORDSIZE == 64 || defined __SSE2_MATH__)
-__BEGIN_NAMESPACE_C99
-
/* Determine maximum of two values. */
__MATH_INLINE float
__NTH (fmaxf (float __x, float __y))
diff --git a/sysdeps/x86_64/fpu/math_private.h b/sysdeps/x86_64/fpu/math_private.h
index 523ec549ac..71eb41664f 100644
--- a/sysdeps/x86_64/fpu/math_private.h
+++ b/sysdeps/x86_64/fpu/math_private.h
@@ -90,3 +90,31 @@ do { \
({ long double __res; \
asm ("fsqrt" : "=t" (__res) : "0" ((long double) d)); \
__res; })
+
+#ifdef __SSE4_1__
+# ifndef __rint
+# define __rint(d) \
+ ({ double __res; \
+ asm ("roundsd $4, %1, %0" : "=x" (__res) : "x" ((double) d)); \
+ __res; })
+# endif
+# ifndef __rintf
+# define __rintf(d) \
+ ({ float __res; \
+ asm ("roundss $4, %1, %0" : "=x" (__res) : "x" ((float) d)); \
+ __res; })
+# endif
+
+# ifndef __floor
+# define __floor(d) \
+ ({ double __res; \
+ asm ("roundsd $1, %1, %0" : "=x" (__res) : "x" ((double) d)); \
+ __res; })
+# endif
+# ifndef __floorf
+# define __floorf(d) \
+ ({ float __res; \
+ asm ("roundss $1, %1, %0" : "=x" (__res) : "x" ((float) d)); \
+ __res; })
+# endif
+#endif
diff --git a/sysdeps/x86_64/fpu/multiarch/s_floor-c.c b/sysdeps/x86_64/fpu/multiarch/s_floor-c.c
index 8b8c31d945..68733b69ef 100644
--- a/sysdeps/x86_64/fpu/multiarch/s_floor-c.c
+++ b/sysdeps/x86_64/fpu/multiarch/s_floor-c.c
@@ -1,2 +1,3 @@
+#undef __floor
#define __floor __floor_c
#include <sysdeps/ieee754/dbl-64/wordsize-64/s_floor.c>
diff --git a/sysdeps/x86_64/fpu/multiarch/s_floorf-c.c b/sysdeps/x86_64/fpu/multiarch/s_floorf-c.c
index 3f367863a7..2386362328 100644
--- a/sysdeps/x86_64/fpu/multiarch/s_floorf-c.c
+++ b/sysdeps/x86_64/fpu/multiarch/s_floorf-c.c
@@ -1,2 +1,3 @@
+#undef __floorf
#define __floorf __floorf_c
#include <sysdeps/ieee754/flt-32/s_floorf.c>
diff --git a/sysdeps/x86_64/fpu/multiarch/s_rint-c.c b/sysdeps/x86_64/fpu/multiarch/s_rint-c.c
index f29f45b062..162a630ff9 100644
--- a/sysdeps/x86_64/fpu/multiarch/s_rint-c.c
+++ b/sysdeps/x86_64/fpu/multiarch/s_rint-c.c
@@ -1,2 +1,3 @@
+#undef __rint
#define __rint __rint_c
#include <sysdeps/ieee754/dbl-64/wordsize-64/s_rint.c>
diff --git a/sysdeps/x86_64/fpu/multiarch/s_rintf-c.c b/sysdeps/x86_64/fpu/multiarch/s_rintf-c.c
index 30ed42a656..8505249f34 100644
--- a/sysdeps/x86_64/fpu/multiarch/s_rintf-c.c
+++ b/sysdeps/x86_64/fpu/multiarch/s_rintf-c.c
@@ -1,2 +1,3 @@
+#undef __rintf
#define __rintf __rintf_c
#include <sysdeps/ieee754/flt-32/s_rintf.c>