diff options
Diffstat (limited to 'REORG.TODO/sysdeps/x86_64/fpu/math_private.h')
-rw-r--r-- | REORG.TODO/sysdeps/x86_64/fpu/math_private.h | 133 |
1 files changed, 133 insertions, 0 deletions
diff --git a/REORG.TODO/sysdeps/x86_64/fpu/math_private.h b/REORG.TODO/sysdeps/x86_64/fpu/math_private.h new file mode 100644 index 0000000000..027a6a3a4d --- /dev/null +++ b/REORG.TODO/sysdeps/x86_64/fpu/math_private.h @@ -0,0 +1,133 @@ +#ifndef X86_64_MATH_PRIVATE_H +#define X86_64_MATH_PRIVATE_H 1 + +/* We can do a few things better on x86-64. */ + +#if defined __AVX__ || defined SSE2AVX +# define MOVD "vmovd" +# define MOVQ "vmovq" +#else +# define MOVD "movd" +# define MOVQ "movq" +#endif + +/* Direct movement of float into integer register. */ +#define EXTRACT_WORDS64(i, d) \ + do { \ + int64_t i_; \ + asm (MOVQ " %1, %0" : "=rm" (i_) : "x" ((double) (d))); \ + (i) = i_; \ + } while (0) + +/* And the reverse. */ +#define INSERT_WORDS64(d, i) \ + do { \ + int64_t i_ = i; \ + double d__; \ + asm (MOVQ " %1, %0" : "=x" (d__) : "rm" (i_)); \ + d = d__; \ + } while (0) + +/* Direct movement of float into integer register. */ +#define GET_FLOAT_WORD(i, d) \ + do { \ + int i_; \ + asm (MOVD " %1, %0" : "=rm" (i_) : "x" ((float) (d))); \ + (i) = i_; \ + } while (0) + +/* And the reverse. */ +#define SET_FLOAT_WORD(f, i) \ + do { \ + int i_ = i; \ + float f__; \ + asm (MOVD " %1, %0" : "=x" (f__) : "rm" (i_)); \ + f = f__; \ + } while (0) + +#include <sysdeps/i386/fpu/fenv_private.h> +#include_next <math_private.h> + +extern __always_inline double +__ieee754_sqrt (double d) +{ + double res; +#if defined __AVX__ || defined SSE2AVX + asm ("vsqrtsd %1, %0, %0" : "=x" (res) : "xm" (d)); +#else + asm ("sqrtsd %1, %0" : "=x" (res) : "xm" (d)); +#endif + return res; +} + +extern __always_inline float +__ieee754_sqrtf (float d) +{ + float res; +#if defined __AVX__ || defined SSE2AVX + asm ("vsqrtss %1, %0, %0" : "=x" (res) : "xm" (d)); +#else + asm ("sqrtss %1, %0" : "=x" (res) : "xm" (d)); +#endif + return res; +} + +extern __always_inline long double +__ieee754_sqrtl (long double d) +{ + long double res; + asm ("fsqrt" : "=t" (res) : "0" (d)); + return res; +} + +#ifdef __SSE4_1__ +extern __always_inline double +__rint (double d) +{ + double res; +# if defined __AVX__ || defined SSE2AVX + asm ("vroundsd $4, %1, %0, %0" : "=x" (res) : "xm" (d)); +# else + asm ("roundsd $4, %1, %0" : "=x" (res) : "xm" (d)); +# endif + return res; +} + +extern __always_inline float +__rintf (float d) +{ + float res; +# if defined __AVX__ || defined SSE2AVX + asm ("vroundss $4, %1, %0, %0" : "=x" (res) : "xm" (d)); +# else + asm ("roundss $4, %1, %0" : "=x" (res) : "xm" (d)); +# endif + return res; +} + +extern __always_inline double +__floor (double d) +{ + double res; +# if defined __AVX__ || defined SSE2AVX + asm ("vroundsd $1, %1, %0, %0" : "=x" (res) : "xm" (d)); +# else + asm ("roundsd $1, %1, %0" : "=x" (res) : "xm" (d)); +# endif + return res; +} + +extern __always_inline float +__floorf (float d) +{ + float res; +# if defined __AVX__ || defined SSE2AVX + asm ("vroundss $1, %1, %0, %0" : "=x" (res) : "xm" (d)); +# else + asm ("roundss $1, %1, %0" : "=x" (res) : "xm" (d)); +# endif + return res; +} +#endif /* __SSE4_1__ */ + +#endif /* X86_64_MATH_PRIVATE_H */ |