aboutsummaryrefslogtreecommitdiff
path: root/sysdeps/x86_64/multiarch
diff options
context:
space:
mode:
authorH.J. Lu <hjl.tools@gmail.com>2014-07-14 07:58:27 -0700
committerH.J. Lu <hjl.tools@gmail.com>2014-07-14 07:58:27 -0700
commitf2fef657d8736c32fb600771949f59852558b11a (patch)
treea9e954223fa5cbd9e53f143e0f50c6f6276ebe0c /sysdeps/x86_64/multiarch
parentf6c44d475104e931bab2b4ffa499961088de673c (diff)
downloadglibc-f2fef657d8736c32fb600771949f59852558b11a.tar
glibc-f2fef657d8736c32fb600771949f59852558b11a.tar.gz
glibc-f2fef657d8736c32fb600771949f59852558b11a.tar.bz2
glibc-f2fef657d8736c32fb600771949f59852558b11a.zip
Enable AVX2 optimized memset only if -mavx2 works
* config.h.in (HAVE_AVX2_SUPPORT): New #undef. * sysdeps/i386/configure.ac: Set HAVE_AVX2_SUPPORT and config-cflags-avx2. * sysdeps/x86_64/configure.ac: Likewise. * sysdeps/i386/configure: Regenerated. * sysdeps/x86_64/configure: Likewise. * sysdeps/x86_64/multiarch/Makefile (sysdep_routines): Add memset-avx2 only if config-cflags-avx2 is yes. * sysdeps/x86_64/multiarch/ifunc-impl-list.c (__libc_ifunc_impl_list): Tests for memset_chk and memset only if HAVE_AVX2_SUPPORT is defined. * sysdeps/x86_64/multiarch/memset.S: Define multiple versions only if HAVE_AVX2_SUPPORT is defined. * sysdeps/x86_64/multiarch/memset_chk.S: Likewise.
Diffstat (limited to 'sysdeps/x86_64/multiarch')
-rw-r--r--sysdeps/x86_64/multiarch/Makefile7
-rw-r--r--sysdeps/x86_64/multiarch/ifunc-impl-list.c2
-rw-r--r--sysdeps/x86_64/multiarch/memset.S24
-rw-r--r--sysdeps/x86_64/multiarch/memset_chk.S2
4 files changed, 21 insertions, 14 deletions
diff --git a/sysdeps/x86_64/multiarch/Makefile b/sysdeps/x86_64/multiarch/Makefile
index 42df96f636..3bb9702b95 100644
--- a/sysdeps/x86_64/multiarch/Makefile
+++ b/sysdeps/x86_64/multiarch/Makefile
@@ -17,8 +17,7 @@ sysdep_routines += strncat-c stpncpy-c strncpy-c strcmp-ssse3 \
strcpy-sse2-unaligned strncpy-sse2-unaligned \
stpcpy-sse2-unaligned stpncpy-sse2-unaligned \
strcat-sse2-unaligned strncat-sse2-unaligned \
- strchr-sse2-no-bsf memcmp-ssse3 strstr-sse2-unaligned \
- memset-avx2
+ strchr-sse2-no-bsf memcmp-ssse3 strstr-sse2-unaligned
ifeq (yes,$(config-cflags-sse4))
sysdep_routines += strcspn-c strpbrk-c strspn-c varshift
@@ -27,6 +26,10 @@ CFLAGS-strcspn-c.c += -msse4
CFLAGS-strpbrk-c.c += -msse4
CFLAGS-strspn-c.c += -msse4
endif
+
+ifeq (yes,$(config-cflags-avx2))
+sysdep_routines += memset-avx2
+endif
endif
ifeq ($(subdir),wcsmbs)
diff --git a/sysdeps/x86_64/multiarch/ifunc-impl-list.c b/sysdeps/x86_64/multiarch/ifunc-impl-list.c
index f1593c5ea1..7e93e598db 100644
--- a/sysdeps/x86_64/multiarch/ifunc-impl-list.c
+++ b/sysdeps/x86_64/multiarch/ifunc-impl-list.c
@@ -61,6 +61,7 @@ __libc_ifunc_impl_list (const char *name, struct libc_ifunc_impl *array,
__memmove_ssse3)
IFUNC_IMPL_ADD (array, i, memmove, 1, __memmove_sse2))
+#ifdef HAVE_AVX2_SUPPORT
/* Support sysdeps/x86_64/multiarch/memset_chk.S. */
IFUNC_IMPL (i, name, __memset_chk,
IFUNC_IMPL_ADD (array, i, __memset_chk, 1, __memset_chk_sse2)
@@ -71,6 +72,7 @@ __libc_ifunc_impl_list (const char *name, struct libc_ifunc_impl *array,
IFUNC_IMPL (i, name, memset,
IFUNC_IMPL_ADD (array, i, memset, 1, __memset_sse2)
IFUNC_IMPL_ADD (array, i, memset, HAS_AVX2, __memset_avx2))
+#endif
/* Support sysdeps/x86_64/multiarch/stpncpy.S. */
IFUNC_IMPL (i, name, stpncpy,
diff --git a/sysdeps/x86_64/multiarch/memset.S b/sysdeps/x86_64/multiarch/memset.S
index 3113d1cbc0..00d46d12d0 100644
--- a/sysdeps/x86_64/multiarch/memset.S
+++ b/sysdeps/x86_64/multiarch/memset.S
@@ -17,12 +17,13 @@
License along with the GNU C Library; if not, see
<http://www.gnu.org/licenses/>. */
+#ifdef HAVE_AVX2_SUPPORT
#include <sysdep.h>
#include <shlib-compat.h>
#include <init-arch.h>
/* Define multiple versions only for the definition in lib. */
-#ifndef NOT_IN_libc
+# ifndef NOT_IN_libc
ENTRY(memset)
.type memset, @gnu_indirect_function
cmpl $0, __cpu_features+KIND_OFFSET(%rip)
@@ -34,26 +35,27 @@ ENTRY(memset)
leaq __memset_avx2(%rip), %rax
2: ret
END(memset)
-#endif
+# endif
-#if !defined NOT_IN_libc
-# undef memset
-# define memset __memset_sse2
+# if !defined NOT_IN_libc
+# undef memset
+# define memset __memset_sse2
-# undef __memset_chk
-# define __memset_chk __memset_chk_sse2
+# undef __memset_chk
+# define __memset_chk __memset_chk_sse2
-# ifdef SHARED
+# ifdef SHARED
# undef libc_hidden_builtin_def
/* It doesn't make sense to send libc-internal memset calls through a PLT.
The speedup we get from using GPR instruction is likely eaten away
by the indirect call in the PLT. */
# define libc_hidden_builtin_def(name) \
.globl __GI_memset; __GI_memset = __memset_sse2
-# endif
+# endif
-# undef strong_alias
-# define strong_alias(original, alias)
+# undef strong_alias
+# define strong_alias(original, alias)
+# endif
#endif
#include "../memset.S"
diff --git a/sysdeps/x86_64/multiarch/memset_chk.S b/sysdeps/x86_64/multiarch/memset_chk.S
index 2182780822..8a607bd6b7 100644
--- a/sysdeps/x86_64/multiarch/memset_chk.S
+++ b/sysdeps/x86_64/multiarch/memset_chk.S
@@ -22,7 +22,7 @@
/* Define multiple versions only for the definition in lib. */
#ifndef NOT_IN_libc
-# ifdef SHARED
+# if defined SHARED && defined HAVE_AVX2_SUPPORT
ENTRY(__memset_chk)
.type __memset_chk, @gnu_indirect_function
cmpl $0, __cpu_features+KIND_OFFSET(%rip)