diff options
author | H.J. Lu <hjl.tools@gmail.com> | 2014-07-14 07:58:27 -0700 |
---|---|---|
committer | H.J. Lu <hjl.tools@gmail.com> | 2014-07-14 07:58:27 -0700 |
commit | f2fef657d8736c32fb600771949f59852558b11a (patch) | |
tree | a9e954223fa5cbd9e53f143e0f50c6f6276ebe0c /sysdeps/x86_64/multiarch | |
parent | f6c44d475104e931bab2b4ffa499961088de673c (diff) | |
download | glibc-f2fef657d8736c32fb600771949f59852558b11a.tar glibc-f2fef657d8736c32fb600771949f59852558b11a.tar.gz glibc-f2fef657d8736c32fb600771949f59852558b11a.tar.bz2 glibc-f2fef657d8736c32fb600771949f59852558b11a.zip |
Enable AVX2 optimized memset only if -mavx2 works
* config.h.in (HAVE_AVX2_SUPPORT): New #undef.
* sysdeps/i386/configure.ac: Set HAVE_AVX2_SUPPORT and
config-cflags-avx2.
* sysdeps/x86_64/configure.ac: Likewise.
* sysdeps/i386/configure: Regenerated.
* sysdeps/x86_64/configure: Likewise.
* sysdeps/x86_64/multiarch/Makefile (sysdep_routines): Add
memset-avx2 only if config-cflags-avx2 is yes.
* sysdeps/x86_64/multiarch/ifunc-impl-list.c (__libc_ifunc_impl_list):
Tests for memset_chk and memset only if HAVE_AVX2_SUPPORT is
defined.
* sysdeps/x86_64/multiarch/memset.S: Define multiple versions
only if HAVE_AVX2_SUPPORT is defined.
* sysdeps/x86_64/multiarch/memset_chk.S: Likewise.
Diffstat (limited to 'sysdeps/x86_64/multiarch')
-rw-r--r-- | sysdeps/x86_64/multiarch/Makefile | 7 | ||||
-rw-r--r-- | sysdeps/x86_64/multiarch/ifunc-impl-list.c | 2 | ||||
-rw-r--r-- | sysdeps/x86_64/multiarch/memset.S | 24 | ||||
-rw-r--r-- | sysdeps/x86_64/multiarch/memset_chk.S | 2 |
4 files changed, 21 insertions, 14 deletions
diff --git a/sysdeps/x86_64/multiarch/Makefile b/sysdeps/x86_64/multiarch/Makefile index 42df96f636..3bb9702b95 100644 --- a/sysdeps/x86_64/multiarch/Makefile +++ b/sysdeps/x86_64/multiarch/Makefile @@ -17,8 +17,7 @@ sysdep_routines += strncat-c stpncpy-c strncpy-c strcmp-ssse3 \ strcpy-sse2-unaligned strncpy-sse2-unaligned \ stpcpy-sse2-unaligned stpncpy-sse2-unaligned \ strcat-sse2-unaligned strncat-sse2-unaligned \ - strchr-sse2-no-bsf memcmp-ssse3 strstr-sse2-unaligned \ - memset-avx2 + strchr-sse2-no-bsf memcmp-ssse3 strstr-sse2-unaligned ifeq (yes,$(config-cflags-sse4)) sysdep_routines += strcspn-c strpbrk-c strspn-c varshift @@ -27,6 +26,10 @@ CFLAGS-strcspn-c.c += -msse4 CFLAGS-strpbrk-c.c += -msse4 CFLAGS-strspn-c.c += -msse4 endif + +ifeq (yes,$(config-cflags-avx2)) +sysdep_routines += memset-avx2 +endif endif ifeq ($(subdir),wcsmbs) diff --git a/sysdeps/x86_64/multiarch/ifunc-impl-list.c b/sysdeps/x86_64/multiarch/ifunc-impl-list.c index f1593c5ea1..7e93e598db 100644 --- a/sysdeps/x86_64/multiarch/ifunc-impl-list.c +++ b/sysdeps/x86_64/multiarch/ifunc-impl-list.c @@ -61,6 +61,7 @@ __libc_ifunc_impl_list (const char *name, struct libc_ifunc_impl *array, __memmove_ssse3) IFUNC_IMPL_ADD (array, i, memmove, 1, __memmove_sse2)) +#ifdef HAVE_AVX2_SUPPORT /* Support sysdeps/x86_64/multiarch/memset_chk.S. */ IFUNC_IMPL (i, name, __memset_chk, IFUNC_IMPL_ADD (array, i, __memset_chk, 1, __memset_chk_sse2) @@ -71,6 +72,7 @@ __libc_ifunc_impl_list (const char *name, struct libc_ifunc_impl *array, IFUNC_IMPL (i, name, memset, IFUNC_IMPL_ADD (array, i, memset, 1, __memset_sse2) IFUNC_IMPL_ADD (array, i, memset, HAS_AVX2, __memset_avx2)) +#endif /* Support sysdeps/x86_64/multiarch/stpncpy.S. */ IFUNC_IMPL (i, name, stpncpy, diff --git a/sysdeps/x86_64/multiarch/memset.S b/sysdeps/x86_64/multiarch/memset.S index 3113d1cbc0..00d46d12d0 100644 --- a/sysdeps/x86_64/multiarch/memset.S +++ b/sysdeps/x86_64/multiarch/memset.S @@ -17,12 +17,13 @@ License along with the GNU C Library; if not, see <http://www.gnu.org/licenses/>. */ +#ifdef HAVE_AVX2_SUPPORT #include <sysdep.h> #include <shlib-compat.h> #include <init-arch.h> /* Define multiple versions only for the definition in lib. */ -#ifndef NOT_IN_libc +# ifndef NOT_IN_libc ENTRY(memset) .type memset, @gnu_indirect_function cmpl $0, __cpu_features+KIND_OFFSET(%rip) @@ -34,26 +35,27 @@ ENTRY(memset) leaq __memset_avx2(%rip), %rax 2: ret END(memset) -#endif +# endif -#if !defined NOT_IN_libc -# undef memset -# define memset __memset_sse2 +# if !defined NOT_IN_libc +# undef memset +# define memset __memset_sse2 -# undef __memset_chk -# define __memset_chk __memset_chk_sse2 +# undef __memset_chk +# define __memset_chk __memset_chk_sse2 -# ifdef SHARED +# ifdef SHARED # undef libc_hidden_builtin_def /* It doesn't make sense to send libc-internal memset calls through a PLT. The speedup we get from using GPR instruction is likely eaten away by the indirect call in the PLT. */ # define libc_hidden_builtin_def(name) \ .globl __GI_memset; __GI_memset = __memset_sse2 -# endif +# endif -# undef strong_alias -# define strong_alias(original, alias) +# undef strong_alias +# define strong_alias(original, alias) +# endif #endif #include "../memset.S" diff --git a/sysdeps/x86_64/multiarch/memset_chk.S b/sysdeps/x86_64/multiarch/memset_chk.S index 2182780822..8a607bd6b7 100644 --- a/sysdeps/x86_64/multiarch/memset_chk.S +++ b/sysdeps/x86_64/multiarch/memset_chk.S @@ -22,7 +22,7 @@ /* Define multiple versions only for the definition in lib. */ #ifndef NOT_IN_libc -# ifdef SHARED +# if defined SHARED && defined HAVE_AVX2_SUPPORT ENTRY(__memset_chk) .type __memset_chk, @gnu_indirect_function cmpl $0, __cpu_features+KIND_OFFSET(%rip) |