aboutsummaryrefslogtreecommitdiff
path: root/sysdeps/i386/fpu/e_expf.S
diff options
context:
space:
mode:
authorH.J. Lu <hjl.tools@gmail.com>2017-10-22 07:54:38 -0700
committerH.J. Lu <hjl.tools@gmail.com>2017-10-22 07:54:50 -0700
commitb2f6137ea570933fb4be286574cc58b794eb5b5f (patch)
treeb95f459a9090aba7854e32c454001a610f400728 /sysdeps/i386/fpu/e_expf.S
parente1f59bebd885c442c14df3554da9fe08792ce7ce (diff)
downloadglibc-b2f6137ea570933fb4be286574cc58b794eb5b5f.tar
glibc-b2f6137ea570933fb4be286574cc58b794eb5b5f.tar.gz
glibc-b2f6137ea570933fb4be286574cc58b794eb5b5f.tar.bz2
glibc-b2f6137ea570933fb4be286574cc58b794eb5b5f.zip
i386: Replace assembly versions of e_expf with generic e_expf.c
This patch replaces i386 assembly versions of e_expf with generic e_expf.c. For workload-spec2017.wrf, on Nehalem, it improves performance by: Before After Improvement reciprocal-throughput 55.5724 40.2664 38% latency 80.0687 60.8517 31% On Skylake, it improves performance by: Before After Improvement reciprocal-throughput 62.4056 39.4188 58% latency 85.5496 59.6377 43% On IvyBridge with --disable-multi-arch, it improves performance by: Before After Improvement reciprocal-throughput 133.707 40.3778 231% latency 149.191 63.2515 135% * sysdeps/i386/fpu/e_exp2f_data.c: Removed. * sysdeps/i386/fpu/e_expf.S: Likewise. * sysdeps/i386/fpu/math_errf.c: Likewise. * sysdeps/i386/fpu/w_expf.c: Likewise. * sysdeps/i386/i686/fpu/multiarch/e_expf-ia32.S: Likewise. * sysdeps/i386/i686/fpu/multiarch/e_expf-sse2.S: Likewise. * sysdeps/i386/i686/fpu/multiarch/w_expf.c: Likewise. * sysdeps/i386/fpu/libm-test-ulps: Updated for generic e_expf.c. * sysdeps/i386/i686/fpu/multiarch/libm-test-ulps: Likewise. * sysdeps/i386/i686/fpu/multiarch/Makefile (libm-sysdep_routines): Remove e_expf-ia32. (CFLAGS-e_expf-sse2.c): New. * sysdeps/i386/i686/fpu/multiarch/e_expf-sse2.c: New file. * sysdeps/i386/i686/fpu/multiarch/e_expf.c: Rewritten.
Diffstat (limited to 'sysdeps/i386/fpu/e_expf.S')
-rw-r--r--sysdeps/i386/fpu/e_expf.S74
1 files changed, 0 insertions, 74 deletions
diff --git a/sysdeps/i386/fpu/e_expf.S b/sysdeps/i386/fpu/e_expf.S
deleted file mode 100644
index 65cb4ec204..0000000000
--- a/sysdeps/i386/fpu/e_expf.S
+++ /dev/null
@@ -1,74 +0,0 @@
-/*
- * Written by J.T. Conklin <jtc@netbsd.org>.
- * Public domain.
- * Adapted for float type by Ulrich Drepper <drepper@cygnus.com>.
- */
-
-#include <machine/asm.h>
-#include <i386-math-asm.h>
-
-DEFINE_FLT_MIN
-
-#ifdef PIC
-# define MO(op) op##@GOTOFF(%ecx)
-#else
-# define MO(op) op
-#endif
-
- .text
-/* e^x = 2^(x * log2(e)) */
-ENTRY(__ieee754_expf)
-#ifdef PIC
- LOAD_PIC_REG (cx)
-#endif
- flds 4(%esp)
-/* I added the following ugly construct because exp(+-Inf) resulted
- in NaN. The ugliness results from the bright minds at Intel.
- For the i686 the code can be written better.
- -- drepper@cygnus.com. */
- fxam /* Is NaN or +-Inf? */
- fstsw %ax
- movb $0x45, %dh
- andb %ah, %dh
- cmpb $0x05, %dh
- je 1f /* Is +-Inf, jump. */
- fldl2e
- fmulp /* x * log2(e) */
- fld %st
- frndint /* int(x * log2(e)) */
- fsubr %st,%st(1) /* fract(x * log2(e)) */
- fxch
- f2xm1 /* 2^(fract(x * log2(e))) - 1 */
- fld1
- faddp /* 2^(fract(x * log2(e))) */
- fscale /* e^x */
- fstp %st(1)
- FLT_NARROW_EVAL_UFLOW_NONNEG_NAN
- ret
-
-1: testl $0x200, %eax /* Test sign. */
- jz 2f /* If positive, jump. */
- fstp %st
- fldz /* Set result to 0. */
-2: ret
-END (__ieee754_expf)
-
-
-ENTRY(__expf_finite)
-#ifdef PIC
- LOAD_PIC_REG (cx)
-#endif
- fldl2e
- fmuls 4(%esp) /* x * log2(e) */
- fld %st
- frndint /* int(x * log2(e)) */
- fsubr %st,%st(1) /* fract(x * log2(e)) */
- fxch
- f2xm1 /* 2^(fract(x * log2(e))) - 1 */
- fld1
- faddp /* 2^(fract(x * log2(e))) */
- fscale /* e^x */
- fstp %st(1)
- FLT_NARROW_EVAL_UFLOW_NONNEG
- ret
-END(__expf_finite)