aboutsummaryrefslogtreecommitdiff
path: root/sysdeps/x86_64/fpu/multiarch/ifunc-fma.h
diff options
context:
space:
mode:
authorH.J. Lu <hjl.tools@gmail.com>2017-08-16 08:43:35 -0700
committerH.J. Lu <hjl.tools@gmail.com>2017-08-16 08:43:48 -0700
commit24a2e6588d2e0c91b4003878b0625d4a9360e8f3 (patch)
tree11b91098943a676d40fdec100ac8cd3f1ada8a11 /sysdeps/x86_64/fpu/multiarch/ifunc-fma.h
parent403143e1df85dadd374f304bd891be0cd7573e3b (diff)
downloadglibc-24a2e6588d2e0c91b4003878b0625d4a9360e8f3.tar
glibc-24a2e6588d2e0c91b4003878b0625d4a9360e8f3.tar.gz
glibc-24a2e6588d2e0c91b4003878b0625d4a9360e8f3.tar.bz2
glibc-24a2e6588d2e0c91b4003878b0625d4a9360e8f3.zip
x86-64: Optimize e_expf with FMA [BZ #21912]
FMA optimized e_expf improves performance by more than 50% on Skylake. [BZ #21912] * sysdeps/x86_64/fpu/multiarch/Makefile (libm-sysdep_routines): Add e_expf-fma. * sysdeps/x86_64/fpu/multiarch/e_expf-fma.S: New file. * sysdeps/x86_64/fpu/multiarch/e_expf.c: Likewise. * sysdeps/x86_64/fpu/multiarch/ifunc-fma.h: Likewise.
Diffstat (limited to 'sysdeps/x86_64/fpu/multiarch/ifunc-fma.h')
-rw-r--r--sysdeps/x86_64/fpu/multiarch/ifunc-fma.h34
1 files changed, 34 insertions, 0 deletions
diff --git a/sysdeps/x86_64/fpu/multiarch/ifunc-fma.h b/sysdeps/x86_64/fpu/multiarch/ifunc-fma.h
new file mode 100644
index 0000000000..383c41ffb1
--- /dev/null
+++ b/sysdeps/x86_64/fpu/multiarch/ifunc-fma.h
@@ -0,0 +1,34 @@
+/* Common definition for ifunc selections optimized with AVX2/FMA.
+ Copyright (C) 2017 Free Software Foundation, Inc.
+ This file is part of the GNU C Library.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library; if not, see
+ <http://www.gnu.org/licenses/>. */
+
+#include <init-arch.h>
+
+extern __typeof (REDIRECT_NAME) OPTIMIZE (sse2) attribute_hidden;
+extern __typeof (REDIRECT_NAME) OPTIMIZE (fma) attribute_hidden;
+
+static inline void *
+IFUNC_SELECTOR (void)
+{
+ const struct cpu_features* cpu_features = __get_cpu_features ();
+
+ if (CPU_FEATURES_ARCH_P (cpu_features, FMA_Usable)
+ && CPU_FEATURES_ARCH_P (cpu_features, AVX2_Usable))
+ return OPTIMIZE (fma);
+
+ return OPTIMIZE (sse2);
+}