aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorRichard Henderson <rth@twiddle.net>2012-03-06 15:41:14 -0800
committerRichard Henderson <rth@twiddle.net>2012-03-09 11:15:19 -0800
commit64e21edef13c6d2592f276d599d8eed01a1b1a9a (patch)
tree5fe81b3c9db53f193b4eed171fa9277b4682057a
parent38842f4553f5dfebd8c276e07dbbbadc60921fef (diff)
downloadglibc-64e21edef13c6d2592f276d599d8eed01a1b1a9a.tar
glibc-64e21edef13c6d2592f276d599d8eed01a1b1a9a.tar.gz
glibc-64e21edef13c6d2592f276d599d8eed01a1b1a9a.tar.bz2
glibc-64e21edef13c6d2592f276d599d8eed01a1b1a9a.zip
x86_64: Convert __ieee754_sqrt{,f,l} from macros to inlines.
-rw-r--r--ChangeLog4
-rw-r--r--sysdeps/x86_64/fpu/math_private.h54
2 files changed, 36 insertions, 22 deletions
diff --git a/ChangeLog b/ChangeLog
index 0f89a951c9..d8d167aaa2 100644
--- a/ChangeLog
+++ b/ChangeLog
@@ -1,5 +1,9 @@
2012-03-08 Richard Henderson <rth@twiddle.net>
+ * sysdeps/x86_64/fpu/math_private.h (__ieee754_sqrt): Convert from
+ macro to inline function.
+ (__ieee754_sqrtf, __ieee754_sqrtl): Likewise.
+
* sysdeps/ieee754/ldbl-opt/math_ldbl_opt.h: Include <math_private.h>,
not <math/math_private.h>.
diff --git a/sysdeps/x86_64/fpu/math_private.h b/sysdeps/x86_64/fpu/math_private.h
index 8e79718262..07bc7e30c4 100644
--- a/sysdeps/x86_64/fpu/math_private.h
+++ b/sysdeps/x86_64/fpu/math_private.h
@@ -1,4 +1,5 @@
-#ifndef _MATH_PRIVATE_H
+#ifndef X86_64_MATH_PRIVATE_H
+#define X86_64_MATH_PRIVATE_H 1
#define math_opt_barrier(x) \
({ __typeof(x) __x; \
@@ -67,7 +68,6 @@
f = f__; \
} while (0)
-#endif
#define __isnan(d) \
({ long int __di; EXTRACT_WORDS64 (__di, (double) (d)); \
@@ -90,29 +90,37 @@
({ int __di; GET_FLOAT_WORD (__di, (float) d); \
(__di & 0x7fffffff) < 0x7f800000; })
+extern __always_inline double
+__ieee754_sqrt (double d)
+{
+ double res;
#if defined __AVX__ || defined SSE2AVX
-# define __ieee754_sqrt(d) \
- ({ double __res; \
- asm ("vsqrtsd %1, %0, %0" : "=x" (__res) : "xm" ((double) (d))); \
- __res; })
-# define __ieee754_sqrtf(d) \
- ({ float __res; \
- asm ("vsqrtss %1, %0, %0" : "=x" (__res) : "xm" ((float) (d))); \
- __res; })
+ asm ("vsqrtsd %1, %0, %0" : "=x" (res) : "xm" (d));
#else
-# define __ieee754_sqrt(d) \
- ({ double __res; \
- asm ("sqrtsd %1, %0" : "=x" (__res) : "xm" ((double) (d))); \
- __res; })
-# define __ieee754_sqrtf(d) \
- ({ float __res; \
- asm ("sqrtss %1, %0" : "=x" (__res) : "xm" ((float) (d))); \
- __res; })
+ asm ("sqrtsd %1, %0" : "=x" (res) : "xm" (d));
#endif
-#define __ieee754_sqrtl(d) \
- ({ long double __res; \
- asm ("fsqrt" : "=t" (__res) : "0" ((long double) (d))); \
- __res; })
+ return res;
+}
+
+extern __always_inline float
+__ieee754_sqrtf (float d)
+{
+ float res;
+#if defined __AVX__ || defined SSE2AVX
+ asm ("vsqrtss %1, %0, %0" : "=x" (res) : "xm" (d));
+#else
+ asm ("sqrtss %1, %0" : "=x" (res) : "xm" (d));
+#endif
+ return res;
+}
+
+extern __always_inline long double
+__ieee754_sqrtl (long double d)
+{
+ long double res;
+ asm ("fsqrt" : "=t" (res) : "0" (d));
+ return res;
+}
#ifdef __SSE4_1__
# ifndef __rint
@@ -226,3 +234,5 @@
#undef libc_feupdateenvf
#define libc_feupdateenvf(e) libc_feupdateenv (e)
// #define libc_feupdateenvl(e) (void) feupdateenv (e)
+
+#endif /* X86_64_MATH_PRIVATE_H */