aboutsummaryrefslogtreecommitdiff
path: root/sysdeps
diff options
context:
space:
mode:
Diffstat (limited to 'sysdeps')
-rw-r--r--sysdeps/x86_64/isa-default-impl.h10
-rw-r--r--sysdeps/x86_64/memchr.S357
-rw-r--r--sysdeps/x86_64/multiarch/ifunc-evex.h29
-rw-r--r--sysdeps/x86_64/multiarch/ifunc-impl-list.c72
-rw-r--r--sysdeps/x86_64/multiarch/memchr-avx2.S5
-rw-r--r--sysdeps/x86_64/multiarch/memchr-evex.S5
-rw-r--r--sysdeps/x86_64/multiarch/memchr-sse2.S363
-rw-r--r--sysdeps/x86_64/multiarch/rawmemchr-avx2.S7
-rw-r--r--sysdeps/x86_64/multiarch/rawmemchr-evex.S7
-rw-r--r--sysdeps/x86_64/multiarch/rawmemchr-sse2.S198
-rw-r--r--sysdeps/x86_64/multiarch/rtld-memchr.S18
-rw-r--r--sysdeps/x86_64/multiarch/rtld-rawmemchr.S18
-rw-r--r--sysdeps/x86_64/multiarch/wmemchr-avx2.S7
-rw-r--r--sysdeps/x86_64/multiarch/wmemchr-evex.S7
-rw-r--r--sysdeps/x86_64/multiarch/wmemchr-sse2.S9
-rw-r--r--sysdeps/x86_64/rawmemchr.S184
-rw-r--r--sysdeps/x86_64/wmemchr.S28
17 files changed, 720 insertions, 604 deletions
diff --git a/sysdeps/x86_64/isa-default-impl.h b/sysdeps/x86_64/isa-default-impl.h
index 34634668e5..7d7832b1f5 100644
--- a/sysdeps/x86_64/isa-default-impl.h
+++ b/sysdeps/x86_64/isa-default-impl.h
@@ -46,4 +46,14 @@
# error "Unsupported ISA Level!"
#endif
+#if IS_IN(rtld)
+# if !defined USE_MULTIARCH
+# error "RTLD version should only exist in multiarch build"
+# endif
+#else
+# if defined USE_MULTIARCH
+# error "Multiarch build should not use ISA_DEFAULT_IMPL without RTLD"
+# endif
+#endif
+
#include ISA_DEFAULT_IMPL
diff --git a/sysdeps/x86_64/memchr.S b/sysdeps/x86_64/memchr.S
index a160fd9b00..20b43508c4 100644
--- a/sysdeps/x86_64/memchr.S
+++ b/sysdeps/x86_64/memchr.S
@@ -15,358 +15,13 @@
License along with the GNU C Library; if not, see
<https://www.gnu.org/licenses/>. */
-#include <sysdep.h>
+#define MEMCHR __memchr
-#ifdef USE_AS_WMEMCHR
-# define MEMCHR wmemchr
-# define PCMPEQ pcmpeqd
-# define CHAR_PER_VEC 4
-#else
-# define MEMCHR memchr
-# define PCMPEQ pcmpeqb
-# define CHAR_PER_VEC 16
-#endif
+#define DEFAULT_IMPL_V1 "multiarch/memchr-sse2.S"
+#define DEFAULT_IMPL_V3 "multiarch/memchr-avx2.S"
+#define DEFAULT_IMPL_V4 "multiarch/memchr-evex.S"
-/* fast SSE2 version with using pmaxub and 64 byte loop */
+#include "isa-default-impl.h"
- .text
-ENTRY(MEMCHR)
- movd %esi, %xmm1
- mov %edi, %ecx
-
-#ifdef __ILP32__
- /* Clear the upper 32 bits. */
- movl %edx, %edx
-#endif
-#ifdef USE_AS_WMEMCHR
- test %RDX_LP, %RDX_LP
- jz L(return_null)
-#else
- punpcklbw %xmm1, %xmm1
- test %RDX_LP, %RDX_LP
- jz L(return_null)
- punpcklbw %xmm1, %xmm1
-#endif
-
- and $63, %ecx
- pshufd $0, %xmm1, %xmm1
-
- cmp $48, %ecx
- ja L(crosscache)
-
- movdqu (%rdi), %xmm0
- PCMPEQ %xmm1, %xmm0
- pmovmskb %xmm0, %eax
- test %eax, %eax
-
- jnz L(matches_1)
- sub $CHAR_PER_VEC, %rdx
- jbe L(return_null)
- add $16, %rdi
- and $15, %ecx
- and $-16, %rdi
-#ifdef USE_AS_WMEMCHR
- shr $2, %ecx
-#endif
- add %rcx, %rdx
- sub $(CHAR_PER_VEC * 4), %rdx
- jbe L(exit_loop)
- jmp L(loop_prolog)
-
- .p2align 4
-L(crosscache):
- and $15, %ecx
- and $-16, %rdi
- movdqa (%rdi), %xmm0
-
- PCMPEQ %xmm1, %xmm0
- /* Check if there is a match. */
- pmovmskb %xmm0, %eax
- /* Remove the leading bytes. */
- sar %cl, %eax
- test %eax, %eax
- je L(unaligned_no_match)
- /* Check which byte is a match. */
- bsf %eax, %eax
-#ifdef USE_AS_WMEMCHR
- mov %eax, %esi
- shr $2, %esi
- sub %rsi, %rdx
-#else
- sub %rax, %rdx
-#endif
- jbe L(return_null)
- add %rdi, %rax
- add %rcx, %rax
- ret
-
- .p2align 4
-L(unaligned_no_match):
- /* "rcx" is less than 16. Calculate "rdx + rcx - 16" by using
- "rdx - (16 - rcx)" instead of "(rdx + rcx) - 16" to void
- possible addition overflow. */
- neg %rcx
- add $16, %rcx
-#ifdef USE_AS_WMEMCHR
- shr $2, %ecx
-#endif
- sub %rcx, %rdx
- jbe L(return_null)
- add $16, %rdi
- sub $(CHAR_PER_VEC * 4), %rdx
- jbe L(exit_loop)
-
- .p2align 4
-L(loop_prolog):
- movdqa (%rdi), %xmm0
- PCMPEQ %xmm1, %xmm0
- pmovmskb %xmm0, %eax
- test %eax, %eax
- jnz L(matches)
-
- movdqa 16(%rdi), %xmm2
- PCMPEQ %xmm1, %xmm2
- pmovmskb %xmm2, %eax
- test %eax, %eax
- jnz L(matches16)
-
- movdqa 32(%rdi), %xmm3
- PCMPEQ %xmm1, %xmm3
- pmovmskb %xmm3, %eax
- test %eax, %eax
- jnz L(matches32)
-
- movdqa 48(%rdi), %xmm4
- PCMPEQ %xmm1, %xmm4
- add $64, %rdi
- pmovmskb %xmm4, %eax
- test %eax, %eax
- jnz L(matches0)
-
- test $0x3f, %rdi
- jz L(align64_loop)
-
- sub $(CHAR_PER_VEC * 4), %rdx
- jbe L(exit_loop)
-
- movdqa (%rdi), %xmm0
- PCMPEQ %xmm1, %xmm0
- pmovmskb %xmm0, %eax
- test %eax, %eax
- jnz L(matches)
-
- movdqa 16(%rdi), %xmm2
- PCMPEQ %xmm1, %xmm2
- pmovmskb %xmm2, %eax
- test %eax, %eax
- jnz L(matches16)
-
- movdqa 32(%rdi), %xmm3
- PCMPEQ %xmm1, %xmm3
- pmovmskb %xmm3, %eax
- test %eax, %eax
- jnz L(matches32)
-
- movdqa 48(%rdi), %xmm3
- PCMPEQ %xmm1, %xmm3
- pmovmskb %xmm3, %eax
-
- add $64, %rdi
- test %eax, %eax
- jnz L(matches0)
-
- mov %rdi, %rcx
- and $-64, %rdi
- and $63, %ecx
-#ifdef USE_AS_WMEMCHR
- shr $2, %ecx
-#endif
- add %rcx, %rdx
-
- .p2align 4
-L(align64_loop):
- sub $(CHAR_PER_VEC * 4), %rdx
- jbe L(exit_loop)
- movdqa (%rdi), %xmm0
- movdqa 16(%rdi), %xmm2
- movdqa 32(%rdi), %xmm3
- movdqa 48(%rdi), %xmm4
-
- PCMPEQ %xmm1, %xmm0
- PCMPEQ %xmm1, %xmm2
- PCMPEQ %xmm1, %xmm3
- PCMPEQ %xmm1, %xmm4
-
- pmaxub %xmm0, %xmm3
- pmaxub %xmm2, %xmm4
- pmaxub %xmm3, %xmm4
- pmovmskb %xmm4, %eax
-
- add $64, %rdi
-
- test %eax, %eax
- jz L(align64_loop)
-
- sub $64, %rdi
-
- pmovmskb %xmm0, %eax
- test %eax, %eax
- jnz L(matches)
-
- pmovmskb %xmm2, %eax
- test %eax, %eax
- jnz L(matches16)
-
- movdqa 32(%rdi), %xmm3
- PCMPEQ %xmm1, %xmm3
-
- PCMPEQ 48(%rdi), %xmm1
- pmovmskb %xmm3, %eax
- test %eax, %eax
- jnz L(matches32)
-
- pmovmskb %xmm1, %eax
- bsf %eax, %eax
- lea 48(%rdi, %rax), %rax
- ret
-
- .p2align 4
-L(exit_loop):
- add $(CHAR_PER_VEC * 2), %edx
- jle L(exit_loop_32)
-
- movdqa (%rdi), %xmm0
- PCMPEQ %xmm1, %xmm0
- pmovmskb %xmm0, %eax
- test %eax, %eax
- jnz L(matches)
-
- movdqa 16(%rdi), %xmm2
- PCMPEQ %xmm1, %xmm2
- pmovmskb %xmm2, %eax
- test %eax, %eax
- jnz L(matches16)
-
- movdqa 32(%rdi), %xmm3
- PCMPEQ %xmm1, %xmm3
- pmovmskb %xmm3, %eax
- test %eax, %eax
- jnz L(matches32_1)
- sub $CHAR_PER_VEC, %edx
- jle L(return_null)
-
- PCMPEQ 48(%rdi), %xmm1
- pmovmskb %xmm1, %eax
- test %eax, %eax
- jnz L(matches48_1)
- xor %eax, %eax
- ret
-
- .p2align 4
-L(exit_loop_32):
- add $(CHAR_PER_VEC * 2), %edx
- movdqa (%rdi), %xmm0
- PCMPEQ %xmm1, %xmm0
- pmovmskb %xmm0, %eax
- test %eax, %eax
- jnz L(matches_1)
- sub $CHAR_PER_VEC, %edx
- jbe L(return_null)
-
- PCMPEQ 16(%rdi), %xmm1
- pmovmskb %xmm1, %eax
- test %eax, %eax
- jnz L(matches16_1)
- xor %eax, %eax
- ret
-
- .p2align 4
-L(matches0):
- bsf %eax, %eax
- lea -16(%rax, %rdi), %rax
- ret
-
- .p2align 4
-L(matches):
- bsf %eax, %eax
- add %rdi, %rax
- ret
-
- .p2align 4
-L(matches16):
- bsf %eax, %eax
- lea 16(%rax, %rdi), %rax
- ret
-
- .p2align 4
-L(matches32):
- bsf %eax, %eax
- lea 32(%rax, %rdi), %rax
- ret
-
- .p2align 4
-L(matches_1):
- bsf %eax, %eax
-#ifdef USE_AS_WMEMCHR
- mov %eax, %esi
- shr $2, %esi
- sub %rsi, %rdx
-#else
- sub %rax, %rdx
-#endif
- jbe L(return_null)
- add %rdi, %rax
- ret
-
- .p2align 4
-L(matches16_1):
- bsf %eax, %eax
-#ifdef USE_AS_WMEMCHR
- mov %eax, %esi
- shr $2, %esi
- sub %rsi, %rdx
-#else
- sub %rax, %rdx
-#endif
- jbe L(return_null)
- lea 16(%rdi, %rax), %rax
- ret
-
- .p2align 4
-L(matches32_1):
- bsf %eax, %eax
-#ifdef USE_AS_WMEMCHR
- mov %eax, %esi
- shr $2, %esi
- sub %rsi, %rdx
-#else
- sub %rax, %rdx
-#endif
- jbe L(return_null)
- lea 32(%rdi, %rax), %rax
- ret
-
- .p2align 4
-L(matches48_1):
- bsf %eax, %eax
-#ifdef USE_AS_WMEMCHR
- mov %eax, %esi
- shr $2, %esi
- sub %rsi, %rdx
-#else
- sub %rax, %rdx
-#endif
- jbe L(return_null)
- lea 48(%rdi, %rax), %rax
- ret
-
- .p2align 4
-L(return_null):
- xor %eax, %eax
- ret
-END(MEMCHR)
-
-#ifndef USE_AS_WMEMCHR
-strong_alias (memchr, __memchr)
+weak_alias (__memchr, memchr)
libc_hidden_builtin_def(memchr)
-#endif
diff --git a/sysdeps/x86_64/multiarch/ifunc-evex.h b/sysdeps/x86_64/multiarch/ifunc-evex.h
index b8f7a12ea2..856c6261f8 100644
--- a/sysdeps/x86_64/multiarch/ifunc-evex.h
+++ b/sysdeps/x86_64/multiarch/ifunc-evex.h
@@ -19,24 +19,28 @@
#include <init-arch.h>
-extern __typeof (REDIRECT_NAME) OPTIMIZE (sse2) attribute_hidden;
-extern __typeof (REDIRECT_NAME) OPTIMIZE (avx2) attribute_hidden;
-extern __typeof (REDIRECT_NAME) OPTIMIZE (avx2_rtm) attribute_hidden;
extern __typeof (REDIRECT_NAME) OPTIMIZE (evex) attribute_hidden;
extern __typeof (REDIRECT_NAME) OPTIMIZE (evex_rtm) attribute_hidden;
+extern __typeof (REDIRECT_NAME) OPTIMIZE (avx2) attribute_hidden;
+extern __typeof (REDIRECT_NAME) OPTIMIZE (avx2_rtm) attribute_hidden;
+
+extern __typeof (REDIRECT_NAME) OPTIMIZE (sse2) attribute_hidden;
static inline void *
IFUNC_SELECTOR (void)
{
- const struct cpu_features* cpu_features = __get_cpu_features ();
-
- if (CPU_FEATURE_USABLE_P (cpu_features, AVX2)
- && CPU_FEATURE_USABLE_P (cpu_features, BMI2)
- && CPU_FEATURES_ARCH_P (cpu_features, AVX_Fast_Unaligned_Load))
+ const struct cpu_features *cpu_features = __get_cpu_features ();
+
+ /* NB: The X86_ISA_* feature check macros are evaluated at
+ compile time. */
+ if (X86_ISA_CPU_FEATURE_USABLE_P (cpu_features, AVX2)
+ && X86_ISA_CPU_FEATURE_USABLE_P (cpu_features, BMI2)
+ && X86_ISA_CPU_FEATURES_ARCH_P (cpu_features,
+ AVX_Fast_Unaligned_Load))
{
- if (CPU_FEATURE_USABLE_P (cpu_features, AVX512VL)
- && CPU_FEATURE_USABLE_P (cpu_features, AVX512BW))
+ if (X86_ISA_CPU_FEATURE_USABLE_P (cpu_features, AVX512VL)
+ && X86_ISA_CPU_FEATURE_USABLE_P (cpu_features, AVX512BW))
{
if (CPU_FEATURE_USABLE_P (cpu_features, RTM))
return OPTIMIZE (evex_rtm);
@@ -47,9 +51,12 @@ IFUNC_SELECTOR (void)
if (CPU_FEATURE_USABLE_P (cpu_features, RTM))
return OPTIMIZE (avx2_rtm);
- if (!CPU_FEATURES_ARCH_P (cpu_features, Prefer_No_VZEROUPPER))
+ if (X86_ISA_CPU_FEATURES_ARCH_P (cpu_features,
+ Prefer_No_VZEROUPPER))
return OPTIMIZE (avx2);
}
+ /* This is unreachable (compile time checked) if ISA level >= 3
+ so no need for a robust fallback here. */
return OPTIMIZE (sse2);
}
diff --git a/sysdeps/x86_64/multiarch/ifunc-impl-list.c b/sysdeps/x86_64/multiarch/ifunc-impl-list.c
index 883362f63d..bf52cf96d0 100644
--- a/sysdeps/x86_64/multiarch/ifunc-impl-list.c
+++ b/sysdeps/x86_64/multiarch/ifunc-impl-list.c
@@ -25,7 +25,8 @@
/* Fill ARRAY of MAX elements with IFUNC implementations for function
NAME supported on target machine and return the number of valid
- entries. */
+ entries. Each set of implementations for a given function is sorted in
+ descending order by ISA level. */
size_t
__libc_ifunc_impl_list (const char *name, struct libc_ifunc_impl *array,
@@ -53,24 +54,27 @@ __libc_ifunc_impl_list (const char *name, struct libc_ifunc_impl *array,
/* Support sysdeps/x86_64/multiarch/memchr.c. */
IFUNC_IMPL (i, name, memchr,
- IFUNC_IMPL_ADD (array, i, memchr,
- CPU_FEATURE_USABLE (AVX2),
- __memchr_avx2)
- IFUNC_IMPL_ADD (array, i, memchr,
- (CPU_FEATURE_USABLE (AVX2)
- && CPU_FEATURE_USABLE (RTM)),
- __memchr_avx2_rtm)
- IFUNC_IMPL_ADD (array, i, memchr,
+ X86_IFUNC_IMPL_ADD_V4 (array, i, memchr,
(CPU_FEATURE_USABLE (AVX512VL)
&& CPU_FEATURE_USABLE (AVX512BW)
&& CPU_FEATURE_USABLE (BMI2)),
__memchr_evex)
- IFUNC_IMPL_ADD (array, i, memchr,
+ X86_IFUNC_IMPL_ADD_V4 (array, i, memchr,
(CPU_FEATURE_USABLE (AVX512VL)
&& CPU_FEATURE_USABLE (AVX512BW)
&& CPU_FEATURE_USABLE (BMI2)),
__memchr_evex_rtm)
- IFUNC_IMPL_ADD (array, i, memchr, 1, __memchr_sse2))
+ X86_IFUNC_IMPL_ADD_V3 (array, i, memchr,
+ CPU_FEATURE_USABLE (AVX2),
+ __memchr_avx2)
+ X86_IFUNC_IMPL_ADD_V3 (array, i, memchr,
+ (CPU_FEATURE_USABLE (AVX2)
+ && CPU_FEATURE_USABLE (RTM)),
+ __memchr_avx2_rtm)
+ /* Can be lowered to V1 if a V2 implementation is added. */
+ X86_IFUNC_IMPL_ADD_V2 (array, i, memchr,
+ 1,
+ __memchr_sse2))
/* Support sysdeps/x86_64/multiarch/memcmp.c. */
IFUNC_IMPL (i, name, memcmp,
@@ -288,24 +292,27 @@ __libc_ifunc_impl_list (const char *name, struct libc_ifunc_impl *array,
/* Support sysdeps/x86_64/multiarch/rawmemchr.c. */
IFUNC_IMPL (i, name, rawmemchr,
- IFUNC_IMPL_ADD (array, i, rawmemchr,
- CPU_FEATURE_USABLE (AVX2),
- __rawmemchr_avx2)
- IFUNC_IMPL_ADD (array, i, rawmemchr,
- (CPU_FEATURE_USABLE (AVX2)
- && CPU_FEATURE_USABLE (RTM)),
- __rawmemchr_avx2_rtm)
- IFUNC_IMPL_ADD (array, i, rawmemchr,
+ X86_IFUNC_IMPL_ADD_V4 (array, i, rawmemchr,
(CPU_FEATURE_USABLE (AVX512VL)
&& CPU_FEATURE_USABLE (AVX512BW)
&& CPU_FEATURE_USABLE (BMI2)),
__rawmemchr_evex)
- IFUNC_IMPL_ADD (array, i, rawmemchr,
+ X86_IFUNC_IMPL_ADD_V4 (array, i, rawmemchr,
(CPU_FEATURE_USABLE (AVX512VL)
&& CPU_FEATURE_USABLE (AVX512BW)
&& CPU_FEATURE_USABLE (BMI2)),
__rawmemchr_evex_rtm)
- IFUNC_IMPL_ADD (array, i, rawmemchr, 1, __rawmemchr_sse2))
+ X86_IFUNC_IMPL_ADD_V3 (array, i, rawmemchr,
+ CPU_FEATURE_USABLE (AVX2),
+ __rawmemchr_avx2)
+ X86_IFUNC_IMPL_ADD_V3 (array, i, rawmemchr,
+ (CPU_FEATURE_USABLE (AVX2)
+ && CPU_FEATURE_USABLE (RTM)),
+ __rawmemchr_avx2_rtm)
+ /* Can be lowered to V1 if a V2 implementation is added. */
+ X86_IFUNC_IMPL_ADD_V2 (array, i, rawmemchr,
+ 1,
+ __rawmemchr_sse2))
/* Support sysdeps/x86_64/multiarch/strlen.c. */
IFUNC_IMPL (i, name, strlen,
@@ -748,24 +755,27 @@ __libc_ifunc_impl_list (const char *name, struct libc_ifunc_impl *array,
/* Support sysdeps/x86_64/multiarch/wmemchr.c. */
IFUNC_IMPL (i, name, wmemchr,
- IFUNC_IMPL_ADD (array, i, wmemchr,
- CPU_FEATURE_USABLE (AVX2),
- __wmemchr_avx2)
- IFUNC_IMPL_ADD (array, i, wmemchr,
- (CPU_FEATURE_USABLE (AVX2)
- && CPU_FEATURE_USABLE (RTM)),
- __wmemchr_avx2_rtm)
- IFUNC_IMPL_ADD (array, i, wmemchr,
+ X86_IFUNC_IMPL_ADD_V4 (array, i, wmemchr,
(CPU_FEATURE_USABLE (AVX512VL)
&& CPU_FEATURE_USABLE (AVX512BW)
&& CPU_FEATURE_USABLE (BMI2)),
__wmemchr_evex)
- IFUNC_IMPL_ADD (array, i, wmemchr,
+ X86_IFUNC_IMPL_ADD_V4 (array, i, wmemchr,
(CPU_FEATURE_USABLE (AVX512VL)
&& CPU_FEATURE_USABLE (AVX512BW)
&& CPU_FEATURE_USABLE (BMI2)),
__wmemchr_evex_rtm)
- IFUNC_IMPL_ADD (array, i, wmemchr, 1, __wmemchr_sse2))
+ X86_IFUNC_IMPL_ADD_V3 (array, i, wmemchr,
+ CPU_FEATURE_USABLE (AVX2),
+ __wmemchr_avx2)
+ X86_IFUNC_IMPL_ADD_V3 (array, i, wmemchr,
+ (CPU_FEATURE_USABLE (AVX2)
+ && CPU_FEATURE_USABLE (RTM)),
+ __wmemchr_avx2_rtm)
+ /* Can be lowered to V1 if a V2 implementation is added. */
+ X86_IFUNC_IMPL_ADD_V2 (array, i, wmemchr,
+ 1,
+ __wmemchr_sse2))
/* Support sysdeps/x86_64/multiarch/wmemcmp.c. */
IFUNC_IMPL (i, name, wmemcmp,
diff --git a/sysdeps/x86_64/multiarch/memchr-avx2.S b/sysdeps/x86_64/multiarch/memchr-avx2.S
index c5a256eb37..39be5f7083 100644
--- a/sysdeps/x86_64/multiarch/memchr-avx2.S
+++ b/sysdeps/x86_64/multiarch/memchr-avx2.S
@@ -16,9 +16,10 @@
License along with the GNU C Library; if not, see
<https://www.gnu.org/licenses/>. */
-#if IS_IN (libc)
+#include <isa-level.h>
+#include <sysdep.h>
-# include <sysdep.h>
+#if ISA_SHOULD_BUILD (3)
# ifndef MEMCHR
# define MEMCHR __memchr_avx2
diff --git a/sysdeps/x86_64/multiarch/memchr-evex.S b/sysdeps/x86_64/multiarch/memchr-evex.S
index 0fd11b7632..0dd4f1dcce 100644
--- a/sysdeps/x86_64/multiarch/memchr-evex.S
+++ b/sysdeps/x86_64/multiarch/memchr-evex.S
@@ -16,9 +16,10 @@
License along with the GNU C Library; if not, see
<https://www.gnu.org/licenses/>. */
-#if IS_IN (libc)
+#include <isa-level.h>
+#include <sysdep.h>
-# include <sysdep.h>
+#if ISA_SHOULD_BUILD (4)
# ifndef MEMCHR
# define MEMCHR __memchr_evex
diff --git a/sysdeps/x86_64/multiarch/memchr-sse2.S b/sysdeps/x86_64/multiarch/memchr-sse2.S
index 2c6fdd41d6..8c561cd687 100644
--- a/sysdeps/x86_64/multiarch/memchr-sse2.S
+++ b/sysdeps/x86_64/multiarch/memchr-sse2.S
@@ -16,13 +16,360 @@
License along with the GNU C Library; if not, see
<https://www.gnu.org/licenses/>. */
-#if IS_IN (libc)
-# define memchr __memchr_sse2
+#include <isa-level.h>
+#include <sysdep.h>
-# undef strong_alias
-# define strong_alias(memchr, __memchr)
-# undef libc_hidden_builtin_def
-# define libc_hidden_builtin_def(memchr)
-#endif
+/* MINIMUM_X86_ISA_LEVEL <= 2 because there is no V2 implementation
+ so we need this to build for ISA V2 builds. */
+#if ISA_SHOULD_BUILD (2)
+
+# ifndef MEMCHR
+# define MEMCHR __memchr_sse2
+# endif
+# ifdef USE_AS_WMEMCHR
+# define PCMPEQ pcmpeqd
+# define CHAR_PER_VEC 4
+# else
+# define PCMPEQ pcmpeqb
+# define CHAR_PER_VEC 16
+# endif
+
+/* fast SSE2 version with using pmaxub and 64 byte loop */
+
+ .text
+ENTRY(MEMCHR)
+ movd %esi, %xmm1
+ mov %edi, %ecx
+
+# ifdef __ILP32__
+ /* Clear the upper 32 bits. */
+ movl %edx, %edx
+# endif
+# ifdef USE_AS_WMEMCHR
+ test %RDX_LP, %RDX_LP
+ jz L(return_null)
+# else
+ punpcklbw %xmm1, %xmm1
+ test %RDX_LP, %RDX_LP
+ jz L(return_null)
+ punpcklbw %xmm1, %xmm1
+# endif
+
+ and $63, %ecx
+ pshufd $0, %xmm1, %xmm1
+
+ cmp $48, %ecx
+ ja L(crosscache)
+
+ movdqu (%rdi), %xmm0
+ PCMPEQ %xmm1, %xmm0
+ pmovmskb %xmm0, %eax
+ test %eax, %eax
+
+ jnz L(matches_1)
+ sub $CHAR_PER_VEC, %rdx
+ jbe L(return_null)
+ add $16, %rdi
+ and $15, %ecx
+ and $-16, %rdi
+# ifdef USE_AS_WMEMCHR
+ shr $2, %ecx
+# endif
+ add %rcx, %rdx
+ sub $(CHAR_PER_VEC * 4), %rdx
+ jbe L(exit_loop)
+ jmp L(loop_prolog)
+
+ .p2align 4
+L(crosscache):
+ and $15, %ecx
+ and $-16, %rdi
+ movdqa (%rdi), %xmm0
+
+ PCMPEQ %xmm1, %xmm0
+ /* Check if there is a match. */
+ pmovmskb %xmm0, %eax
+ /* Remove the leading bytes. */
+ sar %cl, %eax
+ test %eax, %eax
+ je L(unaligned_no_match)
+ /* Check which byte is a match. */
+ bsf %eax, %eax
+# ifdef USE_AS_WMEMCHR
+ mov %eax, %esi
+ shr $2, %esi
+ sub %rsi, %rdx
+# else
+ sub %rax, %rdx
+# endif
+ jbe L(return_null)
+ add %rdi, %rax
+ add %rcx, %rax
+ ret
+
+ .p2align 4
+L(unaligned_no_match):
+ /* "rcx" is less than 16. Calculate "rdx + rcx - 16" by using
+ "rdx - (16 - rcx)" instead of "(rdx + rcx) - 16" to void
+ possible addition overflow. */
+ neg %rcx
+ add $16, %rcx
+# ifdef USE_AS_WMEMCHR
+ shr $2, %ecx
+# endif
+ sub %rcx, %rdx
+ jbe L(return_null)
+ add $16, %rdi
+ sub $(CHAR_PER_VEC * 4), %rdx
+ jbe L(exit_loop)
+
+ .p2align 4
+L(loop_prolog):
+ movdqa (%rdi), %xmm0
+ PCMPEQ %xmm1, %xmm0
+ pmovmskb %xmm0, %eax
+ test %eax, %eax
+ jnz L(matches)
+
+ movdqa 16(%rdi), %xmm2
+ PCMPEQ %xmm1, %xmm2
+ pmovmskb %xmm2, %eax
+ test %eax, %eax
+ jnz L(matches16)
+
+ movdqa 32(%rdi), %xmm3
+ PCMPEQ %xmm1, %xmm3
+ pmovmskb %xmm3, %eax
+ test %eax, %eax
+ jnz L(matches32)
+
+ movdqa 48(%rdi), %xmm4
+ PCMPEQ %xmm1, %xmm4
+ add $64, %rdi
+ pmovmskb %xmm4, %eax
+ test %eax, %eax
+ jnz L(matches0)
+
+ test $0x3f, %rdi
+ jz L(align64_loop)
+
+ sub $(CHAR_PER_VEC * 4), %rdx
+ jbe L(exit_loop)
+
+ movdqa (%rdi), %xmm0
+ PCMPEQ %xmm1, %xmm0
+ pmovmskb %xmm0, %eax
+ test %eax, %eax
+ jnz L(matches)
+
+ movdqa 16(%rdi), %xmm2
+ PCMPEQ %xmm1, %xmm2
+ pmovmskb %xmm2, %eax
+ test %eax, %eax
+ jnz L(matches16)
+
+ movdqa 32(%rdi), %xmm3
+ PCMPEQ %xmm1, %xmm3
+ pmovmskb %xmm3, %eax
+ test %eax, %eax
+ jnz L(matches32)
+
+ movdqa 48(%rdi), %xmm3
+ PCMPEQ %xmm1, %xmm3
+ pmovmskb %xmm3, %eax
+
+ add $64, %rdi
+ test %eax, %eax
+ jnz L(matches0)
+
+ mov %rdi, %rcx
+ and $-64, %rdi
+ and $63, %ecx
+# ifdef USE_AS_WMEMCHR
+ shr $2, %ecx
+# endif
+ add %rcx, %rdx
+
+ .p2align 4
+L(align64_loop):
+ sub $(CHAR_PER_VEC * 4), %rdx
+ jbe L(exit_loop)
+ movdqa (%rdi), %xmm0
+ movdqa 16(%rdi), %xmm2
+ movdqa 32(%rdi), %xmm3
+ movdqa 48(%rdi), %xmm4
+
+ PCMPEQ %xmm1, %xmm0
+ PCMPEQ %xmm1, %xmm2
+ PCMPEQ %xmm1, %xmm3
+ PCMPEQ %xmm1, %xmm4
-#include "../memchr.S"
+ pmaxub %xmm0, %xmm3
+ pmaxub %xmm2, %xmm4
+ pmaxub %xmm3, %xmm4
+ pmovmskb %xmm4, %eax
+
+ add $64, %rdi
+
+ test %eax, %eax
+ jz L(align64_loop)
+
+ sub $64, %rdi
+
+ pmovmskb %xmm0, %eax
+ test %eax, %eax
+ jnz L(matches)
+
+ pmovmskb %xmm2, %eax
+ test %eax, %eax
+ jnz L(matches16)
+
+ movdqa 32(%rdi), %xmm3
+ PCMPEQ %xmm1, %xmm3
+
+ PCMPEQ 48(%rdi), %xmm1
+ pmovmskb %xmm3, %eax
+ test %eax, %eax
+ jnz L(matches32)
+
+ pmovmskb %xmm1, %eax
+ bsf %eax, %eax
+ lea 48(%rdi, %rax), %rax
+ ret
+
+ .p2align 4
+L(exit_loop):
+ add $(CHAR_PER_VEC * 2), %edx
+ jle L(exit_loop_32)
+
+ movdqa (%rdi), %xmm0
+ PCMPEQ %xmm1, %xmm0
+ pmovmskb %xmm0, %eax
+ test %eax, %eax
+ jnz L(matches)
+
+ movdqa 16(%rdi), %xmm2
+ PCMPEQ %xmm1, %xmm2
+ pmovmskb %xmm2, %eax
+ test %eax, %eax
+ jnz L(matches16)
+
+ movdqa 32(%rdi), %xmm3
+ PCMPEQ %xmm1, %xmm3
+ pmovmskb %xmm3, %eax
+ test %eax, %eax
+ jnz L(matches32_1)
+ sub $CHAR_PER_VEC, %edx
+ jle L(return_null)
+
+ PCMPEQ 48(%rdi), %xmm1
+ pmovmskb %xmm1, %eax
+ test %eax, %eax
+ jnz L(matches48_1)
+ xor %eax, %eax
+ ret
+
+ .p2align 4
+L(exit_loop_32):
+ add $(CHAR_PER_VEC * 2), %edx
+ movdqa (%rdi), %xmm0
+ PCMPEQ %xmm1, %xmm0
+ pmovmskb %xmm0, %eax
+ test %eax, %eax
+ jnz L(matches_1)
+ sub $CHAR_PER_VEC, %edx
+ jbe L(return_null)
+
+ PCMPEQ 16(%rdi), %xmm1
+ pmovmskb %xmm1, %eax
+ test %eax, %eax
+ jnz L(matches16_1)
+ xor %eax, %eax
+ ret
+
+ .p2align 4
+L(matches0):
+ bsf %eax, %eax
+ lea -16(%rax, %rdi), %rax
+ ret
+
+ .p2align 4
+L(matches):
+ bsf %eax, %eax
+ add %rdi, %rax
+ ret
+
+ .p2align 4
+L(matches16):
+ bsf %eax, %eax
+ lea 16(%rax, %rdi), %rax
+ ret
+
+ .p2align 4
+L(matches32):
+ bsf %eax, %eax
+ lea 32(%rax, %rdi), %rax
+ ret
+
+ .p2align 4
+L(matches_1):
+ bsf %eax, %eax
+# ifdef USE_AS_WMEMCHR
+ mov %eax, %esi
+ shr $2, %esi
+ sub %rsi, %rdx
+# else
+ sub %rax, %rdx
+# endif
+ jbe L(return_null)
+ add %rdi, %rax
+ ret
+
+ .p2align 4
+L(matches16_1):
+ bsf %eax, %eax
+# ifdef USE_AS_WMEMCHR
+ mov %eax, %esi
+ shr $2, %esi
+ sub %rsi, %rdx
+# else
+ sub %rax, %rdx
+# endif
+ jbe L(return_null)
+ lea 16(%rdi, %rax), %rax
+ ret
+
+ .p2align 4
+L(matches32_1):
+ bsf %eax, %eax
+# ifdef USE_AS_WMEMCHR
+ mov %eax, %esi
+ shr $2, %esi
+ sub %rsi, %rdx
+# else
+ sub %rax, %rdx
+# endif
+ jbe L(return_null)
+ lea 32(%rdi, %rax), %rax
+ ret
+
+ .p2align 4
+L(matches48_1):
+ bsf %eax, %eax
+# ifdef USE_AS_WMEMCHR
+ mov %eax, %esi
+ shr $2, %esi
+ sub %rsi, %rdx
+# else
+ sub %rax, %rdx
+# endif
+ jbe L(return_null)
+ lea 48(%rdi, %rax), %rax
+ ret
+
+ .p2align 4
+L(return_null):
+ xor %eax, %eax
+ ret
+END(MEMCHR)
+#endif
diff --git a/sysdeps/x86_64/multiarch/rawmemchr-avx2.S b/sysdeps/x86_64/multiarch/rawmemchr-avx2.S
index 128f9ea637..d6bff28757 100644
--- a/sysdeps/x86_64/multiarch/rawmemchr-avx2.S
+++ b/sysdeps/x86_64/multiarch/rawmemchr-avx2.S
@@ -1,4 +1,7 @@
-#define MEMCHR __rawmemchr_avx2
-#define USE_AS_RAWMEMCHR 1
+#ifndef RAWMEMCHR
+# define RAWMEMCHR __rawmemchr_avx2
+#endif
+#define USE_AS_RAWMEMCHR 1
+#define MEMCHR RAWMEMCHR
#include "memchr-avx2.S"
diff --git a/sysdeps/x86_64/multiarch/rawmemchr-evex.S b/sysdeps/x86_64/multiarch/rawmemchr-evex.S
index ec942b77ba..dc1c450699 100644
--- a/sysdeps/x86_64/multiarch/rawmemchr-evex.S
+++ b/sysdeps/x86_64/multiarch/rawmemchr-evex.S
@@ -1,4 +1,7 @@
-#define MEMCHR __rawmemchr_evex
-#define USE_AS_RAWMEMCHR 1
+#ifndef RAWMEMCHR
+# define RAWMEMCHR __rawmemchr_evex
+#endif
+#define USE_AS_RAWMEMCHR 1
+#define MEMCHR RAWMEMCHR
#include "memchr-evex.S"
diff --git a/sysdeps/x86_64/multiarch/rawmemchr-sse2.S b/sysdeps/x86_64/multiarch/rawmemchr-sse2.S
index 3841c14c34..e2c2e20d85 100644
--- a/sysdeps/x86_64/multiarch/rawmemchr-sse2.S
+++ b/sysdeps/x86_64/multiarch/rawmemchr-sse2.S
@@ -16,14 +16,192 @@
License along with the GNU C Library; if not, see
<https://www.gnu.org/licenses/>. */
-/* Define multiple versions only for the definition in libc. */
-#if IS_IN (libc)
-# define __rawmemchr __rawmemchr_sse2
-
-# undef weak_alias
-# define weak_alias(__rawmemchr, rawmemchr)
-# undef libc_hidden_def
-# define libc_hidden_def(__rawmemchr)
-#endif
+#include <isa-level.h>
+#include <sysdep.h>
+
+/* MINIMUM_X86_ISA_LEVEL <= 2 because there is no V2 implementation
+ so we need this to build for ISA V2 builds. */
+#if ISA_SHOULD_BUILD (2)
+
+# ifndef RAWMEMCHR
+# define RAWMEMCHR __rawmemchr_sse2
+# endif
+
+ .text
+ENTRY (RAWMEMCHR)
+ movd %rsi, %xmm1
+ mov %rdi, %rcx
+
+ punpcklbw %xmm1, %xmm1
+ punpcklbw %xmm1, %xmm1
+
+ and $63, %rcx
+ pshufd $0, %xmm1, %xmm1
+
+ cmp $48, %rcx
+ ja L(crosscache)
+
+ movdqu (%rdi), %xmm0
+ pcmpeqb %xmm1, %xmm0
+/* Check if there is a match. */
+ pmovmskb %xmm0, %eax
+ test %eax, %eax
+
+ jnz L(matches)
+ add $16, %rdi
+ and $-16, %rdi
+ jmp L(loop_prolog)
+
+ .p2align 4
+L(crosscache):
+ and $15, %rcx
+ and $-16, %rdi
+ movdqa (%rdi), %xmm0
+
+ pcmpeqb %xmm1, %xmm0
+/* Check if there is a match. */
+ pmovmskb %xmm0, %eax
+/* Remove the leading bytes. */
+ sar %cl, %eax
+ test %eax, %eax
+ je L(unaligned_no_match)
+/* Check which byte is a match. */
+ bsf %eax, %eax
+
+ add %rdi, %rax
+ add %rcx, %rax
+ ret
+
+ .p2align 4
+L(unaligned_no_match):
+ add $16, %rdi
+
+ .p2align 4
+L(loop_prolog):
+ movdqa (%rdi), %xmm0
+ pcmpeqb %xmm1, %xmm0
+ pmovmskb %xmm0, %eax
+ test %eax, %eax
+ jnz L(matches)
+
+ movdqa 16(%rdi), %xmm2
+ pcmpeqb %xmm1, %xmm2
+ pmovmskb %xmm2, %eax
+ test %eax, %eax
+ jnz L(matches16)
+
+ movdqa 32(%rdi), %xmm3
+ pcmpeqb %xmm1, %xmm3
+ pmovmskb %xmm3, %eax
+ test %eax, %eax
+ jnz L(matches32)
+
+ movdqa 48(%rdi), %xmm4
+ pcmpeqb %xmm1, %xmm4
+ add $64, %rdi
+ pmovmskb %xmm4, %eax
+ test %eax, %eax
+ jnz L(matches0)
+
+ test $0x3f, %rdi
+ jz L(align64_loop)
+
+ movdqa (%rdi), %xmm0
+ pcmpeqb %xmm1, %xmm0
+ pmovmskb %xmm0, %eax
+ test %eax, %eax
+ jnz L(matches)
+
+ movdqa 16(%rdi), %xmm2
+ pcmpeqb %xmm1, %xmm2
+ pmovmskb %xmm2, %eax
+ test %eax, %eax
+ jnz L(matches16)
-#include "../rawmemchr.S"
+ movdqa 32(%rdi), %xmm3
+ pcmpeqb %xmm1, %xmm3
+ pmovmskb %xmm3, %eax
+ test %eax, %eax
+ jnz L(matches32)
+
+ movdqa 48(%rdi), %xmm3
+ pcmpeqb %xmm1, %xmm3
+ pmovmskb %xmm3, %eax
+
+ add $64, %rdi
+ test %eax, %eax
+ jnz L(matches0)
+
+ and $-64, %rdi
+
+ .p2align 4
+L(align64_loop):
+ movdqa (%rdi), %xmm0
+ movdqa 16(%rdi), %xmm2
+ movdqa 32(%rdi), %xmm3
+ movdqa 48(%rdi), %xmm4
+
+ pcmpeqb %xmm1, %xmm0
+ pcmpeqb %xmm1, %xmm2
+ pcmpeqb %xmm1, %xmm3
+ pcmpeqb %xmm1, %xmm4
+
+ pmaxub %xmm0, %xmm3
+ pmaxub %xmm2, %xmm4
+ pmaxub %xmm3, %xmm4
+ pmovmskb %xmm4, %eax
+
+ add $64, %rdi
+
+ test %eax, %eax
+ jz L(align64_loop)
+
+ sub $64, %rdi
+
+ pmovmskb %xmm0, %eax
+ test %eax, %eax
+ jnz L(matches)
+
+ pmovmskb %xmm2, %eax
+ test %eax, %eax
+ jnz L(matches16)
+
+ movdqa 32(%rdi), %xmm3
+ pcmpeqb %xmm1, %xmm3
+
+ pcmpeqb 48(%rdi), %xmm1
+ pmovmskb %xmm3, %eax
+ test %eax, %eax
+ jnz L(matches32)
+
+ pmovmskb %xmm1, %eax
+ bsf %eax, %eax
+ lea 48(%rdi, %rax), %rax
+ ret
+
+ .p2align 4
+L(matches0):
+ bsf %eax, %eax
+ lea -16(%rax, %rdi), %rax
+ ret
+
+ .p2align 4
+L(matches):
+ bsf %eax, %eax
+ add %rdi, %rax
+ ret
+
+ .p2align 4
+L(matches16):
+ bsf %eax, %eax
+ lea 16(%rax, %rdi), %rax
+ ret
+
+ .p2align 4
+L(matches32):
+ bsf %eax, %eax
+ lea 32(%rax, %rdi), %rax
+ ret
+
+END (RAWMEMCHR)
+#endif
diff --git a/sysdeps/x86_64/multiarch/rtld-memchr.S b/sysdeps/x86_64/multiarch/rtld-memchr.S
new file mode 100644
index 0000000000..a14b192bed
--- /dev/null
+++ b/sysdeps/x86_64/multiarch/rtld-memchr.S
@@ -0,0 +1,18 @@
+/* Copyright (C) 2022 Free Software Foundation, Inc.
+ This file is part of the GNU C Library.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library; if not, see
+ <https://www.gnu.org/licenses/>. */
+
+#include "../memchr.S"
diff --git a/sysdeps/x86_64/multiarch/rtld-rawmemchr.S b/sysdeps/x86_64/multiarch/rtld-rawmemchr.S
new file mode 100644
index 0000000000..5d4110a052
--- /dev/null
+++ b/sysdeps/x86_64/multiarch/rtld-rawmemchr.S
@@ -0,0 +1,18 @@
+/* Copyright (C) 2022 Free Software Foundation, Inc.
+ This file is part of the GNU C Library.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library; if not, see
+ <https://www.gnu.org/licenses/>. */
+
+#include "../rawmemchr.S"
diff --git a/sysdeps/x86_64/multiarch/wmemchr-avx2.S b/sysdeps/x86_64/multiarch/wmemchr-avx2.S
index 282854f1a1..2bf93fd84b 100644
--- a/sysdeps/x86_64/multiarch/wmemchr-avx2.S
+++ b/sysdeps/x86_64/multiarch/wmemchr-avx2.S
@@ -1,4 +1,7 @@
-#define MEMCHR __wmemchr_avx2
-#define USE_AS_WMEMCHR 1
+#ifndef WMEMCHR
+# define WMEMCHR __wmemchr_avx2
+#endif
+#define USE_AS_WMEMCHR 1
+#define MEMCHR WMEMCHR
#include "memchr-avx2.S"
diff --git a/sysdeps/x86_64/multiarch/wmemchr-evex.S b/sysdeps/x86_64/multiarch/wmemchr-evex.S
index 06cd0f9f5a..5512d5cdc3 100644
--- a/sysdeps/x86_64/multiarch/wmemchr-evex.S
+++ b/sysdeps/x86_64/multiarch/wmemchr-evex.S
@@ -1,4 +1,7 @@
-#define MEMCHR __wmemchr_evex
-#define USE_AS_WMEMCHR 1
+#ifndef WMEMCHR
+# define WMEMCHR __wmemchr_evex
+#endif
+#define USE_AS_WMEMCHR 1
+#define MEMCHR WMEMCHR
#include "memchr-evex.S"
diff --git a/sysdeps/x86_64/multiarch/wmemchr-sse2.S b/sysdeps/x86_64/multiarch/wmemchr-sse2.S
index 70a965d552..b675a070d4 100644
--- a/sysdeps/x86_64/multiarch/wmemchr-sse2.S
+++ b/sysdeps/x86_64/multiarch/wmemchr-sse2.S
@@ -1,4 +1,7 @@
-#define USE_AS_WMEMCHR 1
-#define wmemchr __wmemchr_sse2
+#ifndef WMEMCHR
+# define WMEMCHR __wmemchr_sse2
+#endif
+#define USE_AS_WMEMCHR 1
+#define MEMCHR WMEMCHR
-#include "../memchr.S"
+#include "memchr-sse2.S"
diff --git a/sysdeps/x86_64/rawmemchr.S b/sysdeps/x86_64/rawmemchr.S
index 4c1a3383b9..ba7e5202e6 100644
--- a/sysdeps/x86_64/rawmemchr.S
+++ b/sysdeps/x86_64/rawmemchr.S
@@ -17,185 +17,13 @@
License along with the GNU C Library; if not, see
<https://www.gnu.org/licenses/>. */
-#include <sysdep.h>
+#define RAWMEMCHR __rawmemchr
- .text
-ENTRY (__rawmemchr)
- movd %rsi, %xmm1
- mov %rdi, %rcx
+#define DEFAULT_IMPL_V1 "multiarch/rawmemchr-sse2.S"
+#define DEFAULT_IMPL_V3 "multiarch/rawmemchr-avx2.S"
+#define DEFAULT_IMPL_V4 "multiarch/rawmemchr-evex.S"
- punpcklbw %xmm1, %xmm1
- punpcklbw %xmm1, %xmm1
-
- and $63, %rcx
- pshufd $0, %xmm1, %xmm1
-
- cmp $48, %rcx
- ja L(crosscache)
-
- movdqu (%rdi), %xmm0
- pcmpeqb %xmm1, %xmm0
-/* Check if there is a match. */
- pmovmskb %xmm0, %eax
- test %eax, %eax
-
- jnz L(matches)
- add $16, %rdi
- and $-16, %rdi
- jmp L(loop_prolog)
-
- .p2align 4
-L(crosscache):
- and $15, %rcx
- and $-16, %rdi
- movdqa (%rdi), %xmm0
-
- pcmpeqb %xmm1, %xmm0
-/* Check if there is a match. */
- pmovmskb %xmm0, %eax
-/* Remove the leading bytes. */
- sar %cl, %eax
- test %eax, %eax
- je L(unaligned_no_match)
-/* Check which byte is a match. */
- bsf %eax, %eax
-
- add %rdi, %rax
- add %rcx, %rax
- ret
-
- .p2align 4
-L(unaligned_no_match):
- add $16, %rdi
-
- .p2align 4
-L(loop_prolog):
- movdqa (%rdi), %xmm0
- pcmpeqb %xmm1, %xmm0
- pmovmskb %xmm0, %eax
- test %eax, %eax
- jnz L(matches)
-
- movdqa 16(%rdi), %xmm2
- pcmpeqb %xmm1, %xmm2
- pmovmskb %xmm2, %eax
- test %eax, %eax
- jnz L(matches16)
-
- movdqa 32(%rdi), %xmm3
- pcmpeqb %xmm1, %xmm3
- pmovmskb %xmm3, %eax
- test %eax, %eax
- jnz L(matches32)
-
- movdqa 48(%rdi), %xmm4
- pcmpeqb %xmm1, %xmm4
- add $64, %rdi
- pmovmskb %xmm4, %eax
- test %eax, %eax
- jnz L(matches0)
-
- test $0x3f, %rdi
- jz L(align64_loop)
-
- movdqa (%rdi), %xmm0
- pcmpeqb %xmm1, %xmm0
- pmovmskb %xmm0, %eax
- test %eax, %eax
- jnz L(matches)
-
- movdqa 16(%rdi), %xmm2
- pcmpeqb %xmm1, %xmm2
- pmovmskb %xmm2, %eax
- test %eax, %eax
- jnz L(matches16)
-
- movdqa 32(%rdi), %xmm3
- pcmpeqb %xmm1, %xmm3
- pmovmskb %xmm3, %eax
- test %eax, %eax
- jnz L(matches32)
-
- movdqa 48(%rdi), %xmm3
- pcmpeqb %xmm1, %xmm3
- pmovmskb %xmm3, %eax
-
- add $64, %rdi
- test %eax, %eax
- jnz L(matches0)
-
- and $-64, %rdi
-
- .p2align 4
-L(align64_loop):
- movdqa (%rdi), %xmm0
- movdqa 16(%rdi), %xmm2
- movdqa 32(%rdi), %xmm3
- movdqa 48(%rdi), %xmm4
-
- pcmpeqb %xmm1, %xmm0
- pcmpeqb %xmm1, %xmm2
- pcmpeqb %xmm1, %xmm3
- pcmpeqb %xmm1, %xmm4
-
- pmaxub %xmm0, %xmm3
- pmaxub %xmm2, %xmm4
- pmaxub %xmm3, %xmm4
- pmovmskb %xmm4, %eax
-
- add $64, %rdi
-
- test %eax, %eax
- jz L(align64_loop)
-
- sub $64, %rdi
-
- pmovmskb %xmm0, %eax
- test %eax, %eax
- jnz L(matches)
-
- pmovmskb %xmm2, %eax
- test %eax, %eax
- jnz L(matches16)
-
- movdqa 32(%rdi), %xmm3
- pcmpeqb %xmm1, %xmm3
-
- pcmpeqb 48(%rdi), %xmm1
- pmovmskb %xmm3, %eax
- test %eax, %eax
- jnz L(matches32)
-
- pmovmskb %xmm1, %eax
- bsf %eax, %eax
- lea 48(%rdi, %rax), %rax
- ret
-
- .p2align 4
-L(matches0):
- bsf %eax, %eax
- lea -16(%rax, %rdi), %rax
- ret
-
- .p2align 4
-L(matches):
- bsf %eax, %eax
- add %rdi, %rax
- ret
-
- .p2align 4
-L(matches16):
- bsf %eax, %eax
- lea 16(%rax, %rdi), %rax
- ret
-
- .p2align 4
-L(matches32):
- bsf %eax, %eax
- lea 32(%rax, %rdi), %rax
- ret
-
-END (__rawmemchr)
+#include "isa-default-impl.h"
weak_alias (__rawmemchr, rawmemchr)
-libc_hidden_builtin_def (__rawmemchr)
+libc_hidden_def (__rawmemchr)
diff --git a/sysdeps/x86_64/wmemchr.S b/sysdeps/x86_64/wmemchr.S
new file mode 100644
index 0000000000..30565b2067
--- /dev/null
+++ b/sysdeps/x86_64/wmemchr.S
@@ -0,0 +1,28 @@
+/* Copyright (C) 2022 Free Software Foundation, Inc.
+ This file is part of the GNU C Library.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library; if not, see
+ <https://www.gnu.org/licenses/>. */
+
+#define WMEMCHR __wmemchr
+
+#define DEFAULT_IMPL_V1 "multiarch/wmemchr-sse2.S"
+#define DEFAULT_IMPL_V3 "multiarch/wmemchr-avx2.S"
+#define DEFAULT_IMPL_V4 "multiarch/wmemchr-evex.S"
+
+#include "isa-default-impl.h"
+
+libc_hidden_def (__wmemchr)
+weak_alias (__wmemchr, wmemchr)
+libc_hidden_weak (wmemchr)