diff options
author | H.J. Lu <hjl.tools@gmail.com> | 2016-03-13 00:26:57 -0800 |
---|---|---|
committer | H.J. Lu <hjl.tools@gmail.com> | 2016-03-31 09:00:47 -0700 |
commit | 0db56470f1bee39a252daf2728d818296b179a9e (patch) | |
tree | 5d376065cc6fce95724fefee7bd867abd7f05b0b | |
parent | 7df7c6a195d6bc6ffdd90db0786d5de9c67d037a (diff) | |
download | glibc-0db56470f1bee39a252daf2728d818296b179a9e.tar glibc-0db56470f1bee39a252daf2728d818296b179a9e.tar.gz glibc-0db56470f1bee39a252daf2728d818296b179a9e.tar.bz2 glibc-0db56470f1bee39a252daf2728d818296b179a9e.zip |
Add memmove/memset-avx512-unaligned-erms-no-vzeroupper.Shjl/erms/master
4 files changed, 82 insertions, 1 deletions
diff --git a/sysdeps/x86_64/multiarch/Makefile b/sysdeps/x86_64/multiarch/Makefile index 8878efbc8f..0218ffae4d 100644 --- a/sysdeps/x86_64/multiarch/Makefile +++ b/sysdeps/x86_64/multiarch/Makefile @@ -24,9 +24,11 @@ sysdep_routines += strncat-c stpncpy-c strncpy-c strcmp-ssse3 \ memmove-sse2-unaligned-erms \ memmove-avx-unaligned-erms \ memmove-avx512-unaligned-erms \ + memmove-avx512-unaligned-erms-no-vzeroupper \ memset-sse2-unaligned-erms \ memset-avx2-unaligned-erms \ - memset-avx512-unaligned-erms + memset-avx512-unaligned-erms \ + memset-avx512-unaligned-erms-no-vzeroupper CFLAGS-varshift.c += -msse4 CFLAGS-strcspn-c.c += -msse4 CFLAGS-strpbrk-c.c += -msse4 diff --git a/sysdeps/x86_64/multiarch/ifunc-impl-list.c b/sysdeps/x86_64/multiarch/ifunc-impl-list.c index 1e880f6edc..a621ae0ed3 100644 --- a/sysdeps/x86_64/multiarch/ifunc-impl-list.c +++ b/sysdeps/x86_64/multiarch/ifunc-impl-list.c @@ -57,7 +57,13 @@ __libc_ifunc_impl_list (const char *name, struct libc_ifunc_impl *array, __memmove_chk_avx512_unaligned_2) IFUNC_IMPL_ADD (array, i, __memmove_chk, HAS_ARCH_FEATURE (AVX512F_Usable), + __memmove_chk_avx512_no_vzeroupper_unaligned_2) + IFUNC_IMPL_ADD (array, i, __memmove_chk, + HAS_ARCH_FEATURE (AVX512F_Usable), __memmove_chk_avx512_unaligned_erms) + IFUNC_IMPL_ADD (array, i, __memmove_chk, + HAS_ARCH_FEATURE (AVX512F_Usable), + __memmove_chk_avx512_no_vzeroupper_unaligned_erms) #endif IFUNC_IMPL_ADD (array, i, __memmove_chk, HAS_ARCH_FEATURE (AVX_Usable), @@ -101,7 +107,13 @@ __libc_ifunc_impl_list (const char *name, struct libc_ifunc_impl *array, __memmove_avx512_unaligned_2) IFUNC_IMPL_ADD (array, i, memmove, HAS_ARCH_FEATURE (AVX512F_Usable), + __memmove_avx512_no_vzeroupper_unaligned_2) + IFUNC_IMPL_ADD (array, i, memmove, + HAS_ARCH_FEATURE (AVX512F_Usable), __memmove_avx512_unaligned_erms) + IFUNC_IMPL_ADD (array, i, memmove, + HAS_ARCH_FEATURE (AVX512F_Usable), + __memmove_avx512_no_vzeroupper_unaligned_erms) #endif IFUNC_IMPL_ADD (array, i, memmove, HAS_CPU_FEATURE (SSSE3), __memmove_ssse3_back) @@ -137,9 +149,15 @@ __libc_ifunc_impl_list (const char *name, struct libc_ifunc_impl *array, __memset_chk_avx512_unaligned_erms) IFUNC_IMPL_ADD (array, i, __memset_chk, HAS_ARCH_FEATURE (AVX512F_Usable), + __memset_chk_avx512_no_vzeroupper_unaligned_erms) + IFUNC_IMPL_ADD (array, i, __memset_chk, + HAS_ARCH_FEATURE (AVX512F_Usable), __memset_chk_avx512_unaligned) IFUNC_IMPL_ADD (array, i, __memset_chk, HAS_ARCH_FEATURE (AVX512F_Usable), + __memset_chk_avx512_no_vzeroupper_unaligned) + IFUNC_IMPL_ADD (array, i, __memset_chk, + HAS_ARCH_FEATURE (AVX512F_Usable), __memset_chk_avx512_no_vzeroupper) #endif ) @@ -167,9 +185,15 @@ __libc_ifunc_impl_list (const char *name, struct libc_ifunc_impl *array, __memset_avx512_unaligned_erms) IFUNC_IMPL_ADD (array, i, memset, HAS_ARCH_FEATURE (AVX512F_Usable), + __memset_avx512_no_vzeroupper_unaligned_erms) + IFUNC_IMPL_ADD (array, i, memset, + HAS_ARCH_FEATURE (AVX512F_Usable), __memset_avx512_unaligned) IFUNC_IMPL_ADD (array, i, memset, HAS_ARCH_FEATURE (AVX512F_Usable), + __memset_avx512_no_vzeroupper_unaligned) + IFUNC_IMPL_ADD (array, i, memset, + HAS_ARCH_FEATURE (AVX512F_Usable), __memset_avx512_no_vzeroupper) #endif ) @@ -338,7 +362,13 @@ __libc_ifunc_impl_list (const char *name, struct libc_ifunc_impl *array, __memcpy_chk_avx512_unaligned_2) IFUNC_IMPL_ADD (array, i, __memcpy_chk, HAS_ARCH_FEATURE (AVX512F_Usable), + __memcpy_chk_avx512_no_vzeroupper_unaligned_2) + IFUNC_IMPL_ADD (array, i, __memcpy_chk, + HAS_ARCH_FEATURE (AVX512F_Usable), __memcpy_chk_avx512_unaligned_erms) + IFUNC_IMPL_ADD (array, i, __memcpy_chk, + HAS_ARCH_FEATURE (AVX512F_Usable), + __memcpy_chk_avx512_no_vzeroupper_unaligned_erms) #endif IFUNC_IMPL_ADD (array, i, __memcpy_chk, HAS_ARCH_FEATURE (AVX_Usable), @@ -386,7 +416,13 @@ __libc_ifunc_impl_list (const char *name, struct libc_ifunc_impl *array, __memcpy_avx512_unaligned_2) IFUNC_IMPL_ADD (array, i, memcpy, HAS_ARCH_FEATURE (AVX512F_Usable), + __memcpy_avx512_no_vzeroupper_unaligned_2) + IFUNC_IMPL_ADD (array, i, memcpy, + HAS_ARCH_FEATURE (AVX512F_Usable), __memcpy_avx512_unaligned_erms) + IFUNC_IMPL_ADD (array, i, memcpy, + HAS_ARCH_FEATURE (AVX512F_Usable), + __memcpy_avx512_no_vzeroupper_unaligned_erms) #endif IFUNC_IMPL_ADD (array, i, memcpy, 1, __memcpy_sse2_unaligned) IFUNC_IMPL_ADD (array, i, memcpy, 1, @@ -407,7 +443,13 @@ __libc_ifunc_impl_list (const char *name, struct libc_ifunc_impl *array, __mempcpy_chk_avx512_unaligned_2) IFUNC_IMPL_ADD (array, i, __mempcpy_chk, HAS_ARCH_FEATURE (AVX512F_Usable), + __mempcpy_chk_avx512_no_vzeroupper_unaligned_2) + IFUNC_IMPL_ADD (array, i, __mempcpy_chk, + HAS_ARCH_FEATURE (AVX512F_Usable), __mempcpy_chk_avx512_unaligned_erms) + IFUNC_IMPL_ADD (array, i, __mempcpy_chk, + HAS_ARCH_FEATURE (AVX512F_Usable), + __mempcpy_chk_avx512_no_vzeroupper_unaligned_erms) #endif IFUNC_IMPL_ADD (array, i, __mempcpy_chk, HAS_ARCH_FEATURE (AVX_Usable), @@ -442,7 +484,13 @@ __libc_ifunc_impl_list (const char *name, struct libc_ifunc_impl *array, __mempcpy_avx512_unaligned_2) IFUNC_IMPL_ADD (array, i, mempcpy, HAS_ARCH_FEATURE (AVX512F_Usable), + __mempcpy_avx512_no_vzeroupper_unaligned_2) + IFUNC_IMPL_ADD (array, i, mempcpy, + HAS_ARCH_FEATURE (AVX512F_Usable), __mempcpy_avx512_unaligned_erms) + IFUNC_IMPL_ADD (array, i, mempcpy, + HAS_ARCH_FEATURE (AVX512F_Usable), + __mempcpy_avx512_no_vzeroupper_unaligned_erms) #endif IFUNC_IMPL_ADD (array, i, mempcpy, HAS_ARCH_FEATURE (AVX_Usable), diff --git a/sysdeps/x86_64/multiarch/memmove-avx512-unaligned-erms-no-vzeroupper.S b/sysdeps/x86_64/multiarch/memmove-avx512-unaligned-erms-no-vzeroupper.S new file mode 100644 index 0000000000..3ba2851c3e --- /dev/null +++ b/sysdeps/x86_64/multiarch/memmove-avx512-unaligned-erms-no-vzeroupper.S @@ -0,0 +1,12 @@ +#ifdef HAVE_AVX512_ASM_SUPPORT +# define VEC_SIZE 64 +# define VEC(i) zmm##i +# define VMOVU vmovdqu64 +# define VMOVA vmovdqa64 +# define VZEROUPPER + +# define SECTION(p) p##.avx512 +# define MEMMOVE_SYMBOL(p,s) p##_avx512_no_vzeroupper_##s + +# include "memmove-vec-unaligned-erms.S" +#endif diff --git a/sysdeps/x86_64/multiarch/memset-avx512-unaligned-erms-no-vzeroupper.S b/sysdeps/x86_64/multiarch/memset-avx512-unaligned-erms-no-vzeroupper.S new file mode 100644 index 0000000000..db246306b5 --- /dev/null +++ b/sysdeps/x86_64/multiarch/memset-avx512-unaligned-erms-no-vzeroupper.S @@ -0,0 +1,19 @@ +#ifdef HAVE_AVX512_ASM_SUPPORT +# define VEC_SIZE 64 +# define VEC(i) zmm##i +# define VMOVU vmovdqu64 +# define VMOVA vmovdqa64 +# define VZEROUPPER +# define VZEROUPPER_SHORT_RETURN rep + +# define VDUP_TO_VEC0_AND_SET_RETURN(d, r) \ + vmovd d, %xmm0; \ + movq r, %rax; \ + vpbroadcastb %xmm0, %xmm0; \ + vpbroadcastq %xmm0, %zmm0 + +# define SECTION(p) p##.avx512 +# define MEMSET_SYMBOL(p,s) p##_avx512_no_vzeroupper_##s + +# include "memset-vec-unaligned-erms.S" +#endif |