From 0db56470f1bee39a252daf2728d818296b179a9e Mon Sep 17 00:00:00 2001 From: "H.J. Lu" Date: Sun, 13 Mar 2016 00:26:57 -0800 Subject: Add memmove/memset-avx512-unaligned-erms-no-vzeroupper.S --- sysdeps/x86_64/multiarch/Makefile | 4 +- sysdeps/x86_64/multiarch/ifunc-impl-list.c | 48 ++++++++++++++++++++++ .../memmove-avx512-unaligned-erms-no-vzeroupper.S | 12 ++++++ .../memset-avx512-unaligned-erms-no-vzeroupper.S | 19 +++++++++ 4 files changed, 82 insertions(+), 1 deletion(-) create mode 100644 sysdeps/x86_64/multiarch/memmove-avx512-unaligned-erms-no-vzeroupper.S create mode 100644 sysdeps/x86_64/multiarch/memset-avx512-unaligned-erms-no-vzeroupper.S diff --git a/sysdeps/x86_64/multiarch/Makefile b/sysdeps/x86_64/multiarch/Makefile index 8878efbc8f..0218ffae4d 100644 --- a/sysdeps/x86_64/multiarch/Makefile +++ b/sysdeps/x86_64/multiarch/Makefile @@ -24,9 +24,11 @@ sysdep_routines += strncat-c stpncpy-c strncpy-c strcmp-ssse3 \ memmove-sse2-unaligned-erms \ memmove-avx-unaligned-erms \ memmove-avx512-unaligned-erms \ + memmove-avx512-unaligned-erms-no-vzeroupper \ memset-sse2-unaligned-erms \ memset-avx2-unaligned-erms \ - memset-avx512-unaligned-erms + memset-avx512-unaligned-erms \ + memset-avx512-unaligned-erms-no-vzeroupper CFLAGS-varshift.c += -msse4 CFLAGS-strcspn-c.c += -msse4 CFLAGS-strpbrk-c.c += -msse4 diff --git a/sysdeps/x86_64/multiarch/ifunc-impl-list.c b/sysdeps/x86_64/multiarch/ifunc-impl-list.c index 1e880f6edc..a621ae0ed3 100644 --- a/sysdeps/x86_64/multiarch/ifunc-impl-list.c +++ b/sysdeps/x86_64/multiarch/ifunc-impl-list.c @@ -55,9 +55,15 @@ __libc_ifunc_impl_list (const char *name, struct libc_ifunc_impl *array, IFUNC_IMPL_ADD (array, i, __memmove_chk, HAS_ARCH_FEATURE (AVX512F_Usable), __memmove_chk_avx512_unaligned_2) + IFUNC_IMPL_ADD (array, i, __memmove_chk, + HAS_ARCH_FEATURE (AVX512F_Usable), + __memmove_chk_avx512_no_vzeroupper_unaligned_2) IFUNC_IMPL_ADD (array, i, __memmove_chk, HAS_ARCH_FEATURE (AVX512F_Usable), __memmove_chk_avx512_unaligned_erms) + IFUNC_IMPL_ADD (array, i, __memmove_chk, + HAS_ARCH_FEATURE (AVX512F_Usable), + __memmove_chk_avx512_no_vzeroupper_unaligned_erms) #endif IFUNC_IMPL_ADD (array, i, __memmove_chk, HAS_ARCH_FEATURE (AVX_Usable), @@ -99,9 +105,15 @@ __libc_ifunc_impl_list (const char *name, struct libc_ifunc_impl *array, IFUNC_IMPL_ADD (array, i, memmove, HAS_ARCH_FEATURE (AVX512F_Usable), __memmove_avx512_unaligned_2) + IFUNC_IMPL_ADD (array, i, memmove, + HAS_ARCH_FEATURE (AVX512F_Usable), + __memmove_avx512_no_vzeroupper_unaligned_2) IFUNC_IMPL_ADD (array, i, memmove, HAS_ARCH_FEATURE (AVX512F_Usable), __memmove_avx512_unaligned_erms) + IFUNC_IMPL_ADD (array, i, memmove, + HAS_ARCH_FEATURE (AVX512F_Usable), + __memmove_avx512_no_vzeroupper_unaligned_erms) #endif IFUNC_IMPL_ADD (array, i, memmove, HAS_CPU_FEATURE (SSSE3), __memmove_ssse3_back) @@ -135,9 +147,15 @@ __libc_ifunc_impl_list (const char *name, struct libc_ifunc_impl *array, IFUNC_IMPL_ADD (array, i, __memset_chk, HAS_ARCH_FEATURE (AVX512F_Usable), __memset_chk_avx512_unaligned_erms) + IFUNC_IMPL_ADD (array, i, __memset_chk, + HAS_ARCH_FEATURE (AVX512F_Usable), + __memset_chk_avx512_no_vzeroupper_unaligned_erms) IFUNC_IMPL_ADD (array, i, __memset_chk, HAS_ARCH_FEATURE (AVX512F_Usable), __memset_chk_avx512_unaligned) + IFUNC_IMPL_ADD (array, i, __memset_chk, + HAS_ARCH_FEATURE (AVX512F_Usable), + __memset_chk_avx512_no_vzeroupper_unaligned) IFUNC_IMPL_ADD (array, i, __memset_chk, HAS_ARCH_FEATURE (AVX512F_Usable), __memset_chk_avx512_no_vzeroupper) @@ -165,9 +183,15 @@ __libc_ifunc_impl_list (const char *name, struct libc_ifunc_impl *array, IFUNC_IMPL_ADD (array, i, memset, HAS_ARCH_FEATURE (AVX512F_Usable), __memset_avx512_unaligned_erms) + IFUNC_IMPL_ADD (array, i, memset, + HAS_ARCH_FEATURE (AVX512F_Usable), + __memset_avx512_no_vzeroupper_unaligned_erms) IFUNC_IMPL_ADD (array, i, memset, HAS_ARCH_FEATURE (AVX512F_Usable), __memset_avx512_unaligned) + IFUNC_IMPL_ADD (array, i, memset, + HAS_ARCH_FEATURE (AVX512F_Usable), + __memset_avx512_no_vzeroupper_unaligned) IFUNC_IMPL_ADD (array, i, memset, HAS_ARCH_FEATURE (AVX512F_Usable), __memset_avx512_no_vzeroupper) @@ -336,9 +360,15 @@ __libc_ifunc_impl_list (const char *name, struct libc_ifunc_impl *array, IFUNC_IMPL_ADD (array, i, __memcpy_chk, HAS_ARCH_FEATURE (AVX512F_Usable), __memcpy_chk_avx512_unaligned_2) + IFUNC_IMPL_ADD (array, i, __memcpy_chk, + HAS_ARCH_FEATURE (AVX512F_Usable), + __memcpy_chk_avx512_no_vzeroupper_unaligned_2) IFUNC_IMPL_ADD (array, i, __memcpy_chk, HAS_ARCH_FEATURE (AVX512F_Usable), __memcpy_chk_avx512_unaligned_erms) + IFUNC_IMPL_ADD (array, i, __memcpy_chk, + HAS_ARCH_FEATURE (AVX512F_Usable), + __memcpy_chk_avx512_no_vzeroupper_unaligned_erms) #endif IFUNC_IMPL_ADD (array, i, __memcpy_chk, HAS_ARCH_FEATURE (AVX_Usable), @@ -384,9 +414,15 @@ __libc_ifunc_impl_list (const char *name, struct libc_ifunc_impl *array, IFUNC_IMPL_ADD (array, i, memcpy, HAS_ARCH_FEATURE (AVX512F_Usable), __memcpy_avx512_unaligned_2) + IFUNC_IMPL_ADD (array, i, memcpy, + HAS_ARCH_FEATURE (AVX512F_Usable), + __memcpy_avx512_no_vzeroupper_unaligned_2) IFUNC_IMPL_ADD (array, i, memcpy, HAS_ARCH_FEATURE (AVX512F_Usable), __memcpy_avx512_unaligned_erms) + IFUNC_IMPL_ADD (array, i, memcpy, + HAS_ARCH_FEATURE (AVX512F_Usable), + __memcpy_avx512_no_vzeroupper_unaligned_erms) #endif IFUNC_IMPL_ADD (array, i, memcpy, 1, __memcpy_sse2_unaligned) IFUNC_IMPL_ADD (array, i, memcpy, 1, @@ -405,9 +441,15 @@ __libc_ifunc_impl_list (const char *name, struct libc_ifunc_impl *array, IFUNC_IMPL_ADD (array, i, __mempcpy_chk, HAS_ARCH_FEATURE (AVX512F_Usable), __mempcpy_chk_avx512_unaligned_2) + IFUNC_IMPL_ADD (array, i, __mempcpy_chk, + HAS_ARCH_FEATURE (AVX512F_Usable), + __mempcpy_chk_avx512_no_vzeroupper_unaligned_2) IFUNC_IMPL_ADD (array, i, __mempcpy_chk, HAS_ARCH_FEATURE (AVX512F_Usable), __mempcpy_chk_avx512_unaligned_erms) + IFUNC_IMPL_ADD (array, i, __mempcpy_chk, + HAS_ARCH_FEATURE (AVX512F_Usable), + __mempcpy_chk_avx512_no_vzeroupper_unaligned_erms) #endif IFUNC_IMPL_ADD (array, i, __mempcpy_chk, HAS_ARCH_FEATURE (AVX_Usable), @@ -440,9 +482,15 @@ __libc_ifunc_impl_list (const char *name, struct libc_ifunc_impl *array, IFUNC_IMPL_ADD (array, i, mempcpy, HAS_ARCH_FEATURE (AVX512F_Usable), __mempcpy_avx512_unaligned_2) + IFUNC_IMPL_ADD (array, i, mempcpy, + HAS_ARCH_FEATURE (AVX512F_Usable), + __mempcpy_avx512_no_vzeroupper_unaligned_2) IFUNC_IMPL_ADD (array, i, mempcpy, HAS_ARCH_FEATURE (AVX512F_Usable), __mempcpy_avx512_unaligned_erms) + IFUNC_IMPL_ADD (array, i, mempcpy, + HAS_ARCH_FEATURE (AVX512F_Usable), + __mempcpy_avx512_no_vzeroupper_unaligned_erms) #endif IFUNC_IMPL_ADD (array, i, mempcpy, HAS_ARCH_FEATURE (AVX_Usable), diff --git a/sysdeps/x86_64/multiarch/memmove-avx512-unaligned-erms-no-vzeroupper.S b/sysdeps/x86_64/multiarch/memmove-avx512-unaligned-erms-no-vzeroupper.S new file mode 100644 index 0000000000..3ba2851c3e --- /dev/null +++ b/sysdeps/x86_64/multiarch/memmove-avx512-unaligned-erms-no-vzeroupper.S @@ -0,0 +1,12 @@ +#ifdef HAVE_AVX512_ASM_SUPPORT +# define VEC_SIZE 64 +# define VEC(i) zmm##i +# define VMOVU vmovdqu64 +# define VMOVA vmovdqa64 +# define VZEROUPPER + +# define SECTION(p) p##.avx512 +# define MEMMOVE_SYMBOL(p,s) p##_avx512_no_vzeroupper_##s + +# include "memmove-vec-unaligned-erms.S" +#endif diff --git a/sysdeps/x86_64/multiarch/memset-avx512-unaligned-erms-no-vzeroupper.S b/sysdeps/x86_64/multiarch/memset-avx512-unaligned-erms-no-vzeroupper.S new file mode 100644 index 0000000000..db246306b5 --- /dev/null +++ b/sysdeps/x86_64/multiarch/memset-avx512-unaligned-erms-no-vzeroupper.S @@ -0,0 +1,19 @@ +#ifdef HAVE_AVX512_ASM_SUPPORT +# define VEC_SIZE 64 +# define VEC(i) zmm##i +# define VMOVU vmovdqu64 +# define VMOVA vmovdqa64 +# define VZEROUPPER +# define VZEROUPPER_SHORT_RETURN rep + +# define VDUP_TO_VEC0_AND_SET_RETURN(d, r) \ + vmovd d, %xmm0; \ + movq r, %rax; \ + vpbroadcastb %xmm0, %xmm0; \ + vpbroadcastq %xmm0, %zmm0 + +# define SECTION(p) p##.avx512 +# define MEMSET_SYMBOL(p,s) p##_avx512_no_vzeroupper_##s + +# include "memset-vec-unaligned-erms.S" +#endif -- cgit v1.2.3