aboutsummaryrefslogtreecommitdiff
path: root/sysdeps/x86_64
diff options
context:
space:
mode:
authorH.J. Lu <hjl.tools@gmail.com>2017-06-09 05:42:16 -0700
committerH.J. Lu <hjl.tools@gmail.com>2017-06-09 05:42:29 -0700
commit8fe57365bfb5a417d911ab715a5671b3b1d7b155 (patch)
tree9b6a729305aaa4f9b30e8a016469fbbd5a7bc664 /sysdeps/x86_64
parentdc485ceb2ac596d27294cc1942adf3181f15e8bf (diff)
downloadglibc-8fe57365bfb5a417d911ab715a5671b3b1d7b155.tar
glibc-8fe57365bfb5a417d911ab715a5671b3b1d7b155.tar.gz
glibc-8fe57365bfb5a417d911ab715a5671b3b1d7b155.tar.bz2
glibc-8fe57365bfb5a417d911ab715a5671b3b1d7b155.zip
x86-64: Optimize strchr/strchrnul/wcschr with AVX2
Optimize strchr/strchrnul/wcschr with AVX2 to search 32 bytes with vector instructions. It is as fast as SSE2 versions for size <= 16 bytes and up to 1X faster for or size > 16 bytes on Haswell. Select AVX2 version on AVX2 machines where vzeroupper is preferred and AVX unaligned load is fast. NB: It uses TZCNT instead of BSF since TZCNT produces the same result as BSF for non-zero input. TZCNT is faster than BSF and is executed as BSF if machine doesn't support TZCNT. * sysdeps/x86_64/multiarch/Makefile (sysdep_routines): Add strchr-sse2, strchrnul-sse2, strchr-avx2, strchrnul-avx2, wcschr-sse2 and wcschr-avx2. * sysdeps/x86_64/multiarch/ifunc-impl-list.c (__libc_ifunc_impl_list): Add tests for __strchr_avx2, __strchrnul_avx2, __strchrnul_sse2, __wcschr_avx2 and __wcschr_sse2. * sysdeps/x86_64/multiarch/strchr-avx2.S: New file. * sysdeps/x86_64/multiarch/strchr-sse2.S: Likewise. * sysdeps/x86_64/multiarch/strchr.c: Likewise. * sysdeps/x86_64/multiarch/strchrnul-avx2.S: Likewise. * sysdeps/x86_64/multiarch/strchrnul-sse2.S: Likewise. * sysdeps/x86_64/multiarch/strchrnul.c: Likewise. * sysdeps/x86_64/multiarch/wcschr-avx2.S: Likewise. * sysdeps/x86_64/multiarch/wcschr-sse2.S: Likewise. * sysdeps/x86_64/multiarch/wcschr.c: Likewise. * sysdeps/x86_64/multiarch/strchr.S: Removed.
Diffstat (limited to 'sysdeps/x86_64')
-rw-r--r--sysdeps/x86_64/multiarch/Makefile2
-rw-r--r--sysdeps/x86_64/multiarch/ifunc-impl-list.c19
-rw-r--r--sysdeps/x86_64/multiarch/strchr-avx2.S254
-rw-r--r--sysdeps/x86_64/multiarch/strchr-sse2.S28
-rw-r--r--sysdeps/x86_64/multiarch/strchr.S57
-rw-r--r--sysdeps/x86_64/multiarch/strchr.c55
-rw-r--r--sysdeps/x86_64/multiarch/strchrnul-avx2.S3
-rw-r--r--sysdeps/x86_64/multiarch/strchrnul-sse2.S26
-rw-r--r--sysdeps/x86_64/multiarch/strchrnul.c34
-rw-r--r--sysdeps/x86_64/multiarch/wcschr-avx2.S3
-rw-r--r--sysdeps/x86_64/multiarch/wcschr-sse2.S30
-rw-r--r--sysdeps/x86_64/multiarch/wcschr.c39
12 files changed, 492 insertions, 58 deletions
diff --git a/sysdeps/x86_64/multiarch/Makefile b/sysdeps/x86_64/multiarch/Makefile
index 1846ae1acb..4523f51095 100644
--- a/sysdeps/x86_64/multiarch/Makefile
+++ b/sysdeps/x86_64/multiarch/Makefile
@@ -13,6 +13,7 @@ sysdep_routines += strncat-c stpncpy-c strncpy-c strcmp-ssse3 \
memcpy-ssse3-back \
memmove-ssse3-back \
memmove-avx512-no-vzeroupper strcasecmp_l-ssse3 \
+ strchr-sse2 strchrnul-sse2 strchr-avx2 strchrnul-avx2 \
strlen-sse2 strnlen-sse2 strlen-avx2 strnlen-avx2 \
strncase_l-ssse3 strcat-ssse3 strncat-ssse3\
strcpy-ssse3 strncpy-ssse3 stpcpy-ssse3 stpncpy-ssse3 \
@@ -37,6 +38,7 @@ sysdep_routines += wmemcmp-sse4 wmemcmp-ssse3 wmemcmp-c \
wmemcmp-avx2-movbe \
wmemchr-sse2 wmemchr-avx2 \
wcscpy-ssse3 wcscpy-c \
+ wcschr-sse2 wcschr-avx2 \
wcsnlen-sse4_1 wcsnlen-c \
wcslen-sse2 wcslen-avx2 wcsnlen-avx2
endif
diff --git a/sysdeps/x86_64/multiarch/ifunc-impl-list.c b/sysdeps/x86_64/multiarch/ifunc-impl-list.c
index 9d499ffe0b..8dda1b040a 100644
--- a/sysdeps/x86_64/multiarch/ifunc-impl-list.c
+++ b/sysdeps/x86_64/multiarch/ifunc-impl-list.c
@@ -229,11 +229,21 @@ __libc_ifunc_impl_list (const char *name, struct libc_ifunc_impl *array,
IFUNC_IMPL_ADD (array, i, strcat, 1, __strcat_sse2_unaligned)
IFUNC_IMPL_ADD (array, i, strcat, 1, __strcat_sse2))
- /* Support sysdeps/x86_64/multiarch/strchr.S. */
+ /* Support sysdeps/x86_64/multiarch/strchr.c. */
IFUNC_IMPL (i, name, strchr,
+ IFUNC_IMPL_ADD (array, i, strchr,
+ HAS_ARCH_FEATURE (AVX2_Usable),
+ __strchr_avx2)
IFUNC_IMPL_ADD (array, i, strchr, 1, __strchr_sse2_no_bsf)
IFUNC_IMPL_ADD (array, i, strchr, 1, __strchr_sse2))
+ /* Support sysdeps/x86_64/multiarch/strchrnul.c. */
+ IFUNC_IMPL (i, name, strchrnul,
+ IFUNC_IMPL_ADD (array, i, strchrnul,
+ HAS_ARCH_FEATURE (AVX2_Usable),
+ __strchrnul_avx2)
+ IFUNC_IMPL_ADD (array, i, strchrnul, 1, __strchrnul_sse2))
+
/* Support sysdeps/x86_64/multiarch/strcmp.S. */
IFUNC_IMPL (i, name, strcmp,
IFUNC_IMPL_ADD (array, i, strcmp, HAS_CPU_FEATURE (SSE4_2),
@@ -318,6 +328,13 @@ __libc_ifunc_impl_list (const char *name, struct libc_ifunc_impl *array,
IFUNC_IMPL_ADD (array, i, strstr, 1, __strstr_sse2_unaligned)
IFUNC_IMPL_ADD (array, i, strstr, 1, __strstr_sse2))
+ /* Support sysdeps/x86_64/multiarch/wcschr.c. */
+ IFUNC_IMPL (i, name, wcschr,
+ IFUNC_IMPL_ADD (array, i, wcschr,
+ HAS_ARCH_FEATURE (AVX2_Usable),
+ __wcschr_avx2)
+ IFUNC_IMPL_ADD (array, i, wcschr, 1, __wcschr_sse2))
+
/* Support sysdeps/x86_64/multiarch/wcscpy.S. */
IFUNC_IMPL (i, name, wcscpy,
IFUNC_IMPL_ADD (array, i, wcscpy, HAS_CPU_FEATURE (SSSE3),
diff --git a/sysdeps/x86_64/multiarch/strchr-avx2.S b/sysdeps/x86_64/multiarch/strchr-avx2.S
new file mode 100644
index 0000000000..e4292d33ab
--- /dev/null
+++ b/sysdeps/x86_64/multiarch/strchr-avx2.S
@@ -0,0 +1,254 @@
+/* strchr/strchrnul optimized with AVX2.
+ Copyright (C) 2017 Free Software Foundation, Inc.
+ This file is part of the GNU C Library.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library; if not, see
+ <http://www.gnu.org/licenses/>. */
+
+#if IS_IN (libc)
+
+# include <sysdep.h>
+
+# ifndef STRCHR
+# define STRCHR __strchr_avx2
+# endif
+
+# ifdef USE_AS_WCSCHR
+# define VPBROADCAST vpbroadcastd
+# define VPCMPEQ vpcmpeqd
+# define CHAR_REG esi
+# else
+# define VPBROADCAST vpbroadcastb
+# define VPCMPEQ vpcmpeqb
+# define CHAR_REG sil
+# endif
+
+# ifndef VZEROUPPER
+# define VZEROUPPER vzeroupper
+# endif
+
+# define VEC_SIZE 32
+
+ .section .text.avx,"ax",@progbits
+ENTRY (STRCHR)
+ movl %edi, %ecx
+ /* Broadcast CHAR to YMM0. */
+ vmovd %esi, %xmm0
+ vpxor %xmm9, %xmm9, %xmm9
+ VPBROADCAST %xmm0, %ymm0
+ /* Check if we may cross page boundary with one vector load. */
+ andl $(2 * VEC_SIZE - 1), %ecx
+ cmpl $VEC_SIZE, %ecx
+ ja L(cros_page_boundary)
+
+ /* Check the first VEC_SIZE bytes. Search for both CHAR and the
+ null byte. */
+ vmovdqu (%rdi), %ymm8
+ VPCMPEQ %ymm8, %ymm0, %ymm1
+ VPCMPEQ %ymm8, %ymm9, %ymm2
+ vpor %ymm1, %ymm2, %ymm1
+ vpmovmskb %ymm1, %eax
+ testl %eax, %eax
+ jnz L(first_vec_x0)
+
+ /* Align data for aligned loads in the loop. */
+ addq $VEC_SIZE, %rdi
+ andl $(VEC_SIZE - 1), %ecx
+ andq $-VEC_SIZE, %rdi
+
+ jmp L(more_4x_vec)
+
+ .p2align 4
+L(cros_page_boundary):
+ andl $(VEC_SIZE - 1), %ecx
+ andq $-VEC_SIZE, %rdi
+ vmovdqu (%rdi), %ymm8
+ VPCMPEQ %ymm8, %ymm0, %ymm1
+ VPCMPEQ %ymm8, %ymm9, %ymm2
+ vpor %ymm1, %ymm2, %ymm1
+ vpmovmskb %ymm1, %eax
+ /* Remove the leading bytes. */
+ sarl %cl, %eax
+ testl %eax, %eax
+ jz L(aligned_more)
+ /* Found CHAR or the null byte. */
+ tzcntl %eax, %eax
+ addq %rcx, %rax
+# ifdef USE_AS_STRCHRNUL
+ addq %rdi, %rax
+# else
+ xorl %edx, %edx
+ leaq (%rdi, %rax), %rax
+ cmp (%rax), %CHAR_REG
+ cmovne %rdx, %rax
+# endif
+ VZEROUPPER
+ ret
+
+ .p2align 4
+L(aligned_more):
+ addq $VEC_SIZE, %rdi
+
+L(more_4x_vec):
+ /* Check the first 4 * VEC_SIZE. Only one VEC_SIZE at a time
+ since data is only aligned to VEC_SIZE. */
+ vmovdqa (%rdi), %ymm8
+ VPCMPEQ %ymm8, %ymm0, %ymm1
+ VPCMPEQ %ymm8, %ymm9, %ymm2
+ vpor %ymm1, %ymm2, %ymm1
+ vpmovmskb %ymm1, %eax
+ testl %eax, %eax
+ jnz L(first_vec_x0)
+
+ vmovdqa VEC_SIZE(%rdi), %ymm8
+ VPCMPEQ %ymm8, %ymm0, %ymm1
+ VPCMPEQ %ymm8, %ymm9, %ymm2
+ vpor %ymm1, %ymm2, %ymm1
+ vpmovmskb %ymm1, %eax
+ testl %eax, %eax
+ jnz L(first_vec_x1)
+
+ vmovdqa (VEC_SIZE * 2)(%rdi), %ymm8
+ VPCMPEQ %ymm8, %ymm0, %ymm1
+ VPCMPEQ %ymm8, %ymm9, %ymm2
+ vpor %ymm1, %ymm2, %ymm1
+ vpmovmskb %ymm1, %eax
+ testl %eax, %eax
+ jnz L(first_vec_x2)
+
+ vmovdqa (VEC_SIZE * 3)(%rdi), %ymm8
+ VPCMPEQ %ymm8, %ymm0, %ymm1
+ VPCMPEQ %ymm8, %ymm9, %ymm2
+ vpor %ymm1, %ymm2, %ymm1
+ vpmovmskb %ymm1, %eax
+ testl %eax, %eax
+ jnz L(first_vec_x3)
+
+ addq $(VEC_SIZE * 4), %rdi
+
+ /* Align data to 4 * VEC_SIZE. */
+ movq %rdi, %rcx
+ andl $(4 * VEC_SIZE - 1), %ecx
+ andq $-(4 * VEC_SIZE), %rdi
+
+ .p2align 4
+L(loop_4x_vec):
+ /* Compare 4 * VEC at a time forward. */
+ vmovdqa (%rdi), %ymm5
+ vmovdqa VEC_SIZE(%rdi), %ymm6
+ vmovdqa (VEC_SIZE * 2)(%rdi), %ymm7
+ vmovdqa (VEC_SIZE * 3)(%rdi), %ymm8
+
+ VPCMPEQ %ymm5, %ymm0, %ymm1
+ VPCMPEQ %ymm6, %ymm0, %ymm2
+ VPCMPEQ %ymm7, %ymm0, %ymm3
+ VPCMPEQ %ymm8, %ymm0, %ymm4
+
+ VPCMPEQ %ymm5, %ymm9, %ymm5
+ VPCMPEQ %ymm6, %ymm9, %ymm6
+ VPCMPEQ %ymm7, %ymm9, %ymm7
+ VPCMPEQ %ymm8, %ymm9, %ymm8
+
+ vpor %ymm1, %ymm5, %ymm1
+ vpor %ymm2, %ymm6, %ymm2
+ vpor %ymm3, %ymm7, %ymm3
+ vpor %ymm4, %ymm8, %ymm4
+
+ vpor %ymm1, %ymm2, %ymm5
+ vpor %ymm3, %ymm4, %ymm6
+
+ vpor %ymm5, %ymm6, %ymm5
+
+ vpmovmskb %ymm5, %eax
+ testl %eax, %eax
+ jnz L(4x_vec_end)
+
+ addq $(VEC_SIZE * 4), %rdi
+
+ jmp L(loop_4x_vec)
+
+ .p2align 4
+L(first_vec_x0):
+ /* Found CHAR or the null byte. */
+ tzcntl %eax, %eax
+# ifdef USE_AS_STRCHRNUL
+ addq %rdi, %rax
+# else
+ xorl %edx, %edx
+ leaq (%rdi, %rax), %rax
+ cmp (%rax), %CHAR_REG
+ cmovne %rdx, %rax
+# endif
+ VZEROUPPER
+ ret
+
+ .p2align 4
+L(first_vec_x1):
+ tzcntl %eax, %eax
+# ifdef USE_AS_STRCHRNUL
+ addq $VEC_SIZE, %rax
+ addq %rdi, %rax
+# else
+ xorl %edx, %edx
+ leaq VEC_SIZE(%rdi, %rax), %rax
+ cmp (%rax), %CHAR_REG
+ cmovne %rdx, %rax
+# endif
+ VZEROUPPER
+ ret
+
+ .p2align 4
+L(first_vec_x2):
+ tzcntl %eax, %eax
+# ifdef USE_AS_STRCHRNUL
+ addq $(VEC_SIZE * 2), %rax
+ addq %rdi, %rax
+# else
+ xorl %edx, %edx
+ leaq (VEC_SIZE * 2)(%rdi, %rax), %rax
+ cmp (%rax), %CHAR_REG
+ cmovne %rdx, %rax
+# endif
+ VZEROUPPER
+ ret
+
+ .p2align 4
+L(4x_vec_end):
+ vpmovmskb %ymm1, %eax
+ testl %eax, %eax
+ jnz L(first_vec_x0)
+ vpmovmskb %ymm2, %eax
+ testl %eax, %eax
+ jnz L(first_vec_x1)
+ vpmovmskb %ymm3, %eax
+ testl %eax, %eax
+ jnz L(first_vec_x2)
+ vpmovmskb %ymm4, %eax
+ testl %eax, %eax
+L(first_vec_x3):
+ tzcntl %eax, %eax
+# ifdef USE_AS_STRCHRNUL
+ addq $(VEC_SIZE * 3), %rax
+ addq %rdi, %rax
+# else
+ xorl %edx, %edx
+ leaq (VEC_SIZE * 3)(%rdi, %rax), %rax
+ cmp (%rax), %CHAR_REG
+ cmovne %rdx, %rax
+# endif
+ VZEROUPPER
+ ret
+
+END (STRCHR)
+#endif
diff --git a/sysdeps/x86_64/multiarch/strchr-sse2.S b/sysdeps/x86_64/multiarch/strchr-sse2.S
new file mode 100644
index 0000000000..c4d1c0c773
--- /dev/null
+++ b/sysdeps/x86_64/multiarch/strchr-sse2.S
@@ -0,0 +1,28 @@
+/* strchr optimized with SSE2.
+ Copyright (C) 2009-2017 Free Software Foundation, Inc.
+ This file is part of the GNU C Library.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library; if not, see
+ <http://www.gnu.org/licenses/>. */
+
+#if IS_IN (libc)
+# define strchr __strchr_sse2
+
+# undef weak_alias
+# define weak_alias(strchr, index)
+# undef libc_hidden_builtin_def
+# define libc_hidden_builtin_def(strchr)
+#endif
+
+#include "../strchr.S"
diff --git a/sysdeps/x86_64/multiarch/strchr.S b/sysdeps/x86_64/multiarch/strchr.S
deleted file mode 100644
index c9f54ca2e2..0000000000
--- a/sysdeps/x86_64/multiarch/strchr.S
+++ /dev/null
@@ -1,57 +0,0 @@
-/* Multiple versions of strchr
- Copyright (C) 2009-2017 Free Software Foundation, Inc.
- This file is part of the GNU C Library.
-
- The GNU C Library is free software; you can redistribute it and/or
- modify it under the terms of the GNU Lesser General Public
- License as published by the Free Software Foundation; either
- version 2.1 of the License, or (at your option) any later version.
-
- The GNU C Library is distributed in the hope that it will be useful,
- but WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- Lesser General Public License for more details.
-
- You should have received a copy of the GNU Lesser General Public
- License along with the GNU C Library; if not, see
- <http://www.gnu.org/licenses/>. */
-
-#include <sysdep.h>
-#include <init-arch.h>
-
-
-/* Define multiple versions only for the definition in libc. */
-#if IS_IN (libc)
- .text
-ENTRY(strchr)
- .type strchr, @gnu_indirect_function
- LOAD_RTLD_GLOBAL_RO_RDX
- leaq __strchr_sse2(%rip), %rax
-2: HAS_ARCH_FEATURE (Slow_BSF)
- jz 3f
- leaq __strchr_sse2_no_bsf(%rip), %rax
-3: ret
-END(strchr)
-
-
-
-# undef ENTRY
-# define ENTRY(name) \
- .type __strchr_sse2, @function; \
- .align 16; \
- .globl __strchr_sse2; \
- .hidden __strchr_sse2; \
- __strchr_sse2: cfi_startproc; \
- CALL_MCOUNT
-# undef END
-# define END(name) \
- cfi_endproc; .size __strchr_sse2, .-__strchr_sse2
-# undef libc_hidden_builtin_def
-/* It doesn't make sense to send libc-internal strchr calls through a PLT.
- The speedup we get from using SSE4.2 instruction is likely eaten away
- by the indirect call in the PLT. */
-# define libc_hidden_builtin_def(name) \
- .globl __GI_strchr; __GI_strchr = __strchr_sse2
-#endif
-
-#include "../strchr.S"
diff --git a/sysdeps/x86_64/multiarch/strchr.c b/sysdeps/x86_64/multiarch/strchr.c
new file mode 100644
index 0000000000..22af16e5c9
--- /dev/null
+++ b/sysdeps/x86_64/multiarch/strchr.c
@@ -0,0 +1,55 @@
+/* Multiple versions of strchr.
+ All versions must be listed in ifunc-impl-list.c.
+ Copyright (C) 2009-2017 Free Software Foundation, Inc.
+ This file is part of the GNU C Library.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library; if not, see
+ <http://www.gnu.org/licenses/>. */
+
+/* Define multiple versions only for the definition in libc. */
+#if IS_IN (libc)
+# define strchr __redirect_strchr
+# include <string.h>
+# undef strchr
+
+# define SYMBOL_NAME strchr
+# include <init-arch.h>
+
+extern __typeof (REDIRECT_NAME) OPTIMIZE (sse2) attribute_hidden;
+extern __typeof (REDIRECT_NAME) OPTIMIZE (sse2_no_bsf) attribute_hidden;
+extern __typeof (REDIRECT_NAME) OPTIMIZE (avx2) attribute_hidden;
+
+static inline void *
+IFUNC_SELECTOR (void)
+{
+ const struct cpu_features* cpu_features = __get_cpu_features ();
+
+ if (!CPU_FEATURES_ARCH_P (cpu_features, Prefer_No_VZEROUPPER)
+ && CPU_FEATURES_ARCH_P (cpu_features, AVX2_Usable)
+ && CPU_FEATURES_ARCH_P (cpu_features, AVX_Fast_Unaligned_Load))
+ return OPTIMIZE (avx2);
+
+ if (CPU_FEATURES_ARCH_P (cpu_features, Slow_BSF))
+ return OPTIMIZE (sse2_no_bsf);
+
+ return OPTIMIZE (sse2);
+}
+
+libc_ifunc_redirected (__redirect_strchr, strchr, IFUNC_SELECTOR ());
+weak_alias (strchr, index)
+# ifdef SHARED
+__hidden_ver1 (strchr, __GI_strchr, __redirect_strchr)
+ __attribute__((visibility ("hidden")));
+# endif
+#endif
diff --git a/sysdeps/x86_64/multiarch/strchrnul-avx2.S b/sysdeps/x86_64/multiarch/strchrnul-avx2.S
new file mode 100644
index 0000000000..fa0cc09760
--- /dev/null
+++ b/sysdeps/x86_64/multiarch/strchrnul-avx2.S
@@ -0,0 +1,3 @@
+#define STRCHR __strchrnul_avx2
+#define USE_AS_STRCHRNUL 1
+#include "strchr-avx2.S"
diff --git a/sysdeps/x86_64/multiarch/strchrnul-sse2.S b/sysdeps/x86_64/multiarch/strchrnul-sse2.S
new file mode 100644
index 0000000000..4d199b3de8
--- /dev/null
+++ b/sysdeps/x86_64/multiarch/strchrnul-sse2.S
@@ -0,0 +1,26 @@
+/* strchrnul optimized with SSE2.
+ Copyright (C) 2017 Free Software Foundation, Inc.
+ This file is part of the GNU C Library.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library; if not, see
+ <http://www.gnu.org/licenses/>. */
+
+#if IS_IN (libc)
+# define __strchrnul __strchrnul_sse2
+
+# undef weak_alias
+# define weak_alias(__strchrnul, strchrnul)
+#endif
+
+#include "../strchrnul.S"
diff --git a/sysdeps/x86_64/multiarch/strchrnul.c b/sysdeps/x86_64/multiarch/strchrnul.c
new file mode 100644
index 0000000000..4a4c55a937
--- /dev/null
+++ b/sysdeps/x86_64/multiarch/strchrnul.c
@@ -0,0 +1,34 @@
+/* Multiple versions of strchrnul.
+ All versions must be listed in ifunc-impl-list.c.
+ Copyright (C) 2017 Free Software Foundation, Inc.
+ This file is part of the GNU C Library.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library; if not, see
+ <http://www.gnu.org/licenses/>. */
+
+/* Define multiple versions only for the definition in libc. */
+#if IS_IN (libc)
+# define strchrnul __redirect_strchrnul
+# define __strchrnul __redirect___strchrnul
+# include <string.h>
+# undef __strchrnul
+# undef strchrnul
+
+# define SYMBOL_NAME strchrnul
+# include "ifunc-avx2.h"
+
+libc_ifunc_redirected (__redirect_strchrnul, __strchrnul,
+ IFUNC_SELECTOR ());
+weak_alias (__strchrnul, strchrnul)
+#endif
diff --git a/sysdeps/x86_64/multiarch/wcschr-avx2.S b/sysdeps/x86_64/multiarch/wcschr-avx2.S
new file mode 100644
index 0000000000..67726b6837
--- /dev/null
+++ b/sysdeps/x86_64/multiarch/wcschr-avx2.S
@@ -0,0 +1,3 @@
+#define STRCHR __wcschr_avx2
+#define USE_AS_WCSCHR 1
+#include "strchr-avx2.S"
diff --git a/sysdeps/x86_64/multiarch/wcschr-sse2.S b/sysdeps/x86_64/multiarch/wcschr-sse2.S
new file mode 100644
index 0000000000..9a1b45aaac
--- /dev/null
+++ b/sysdeps/x86_64/multiarch/wcschr-sse2.S
@@ -0,0 +1,30 @@
+/* wcschr optimized with SSE2.
+ Copyright (C) 2017 Free Software Foundation, Inc.
+ This file is part of the GNU C Library.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library; if not, see
+ <http://www.gnu.org/licenses/>. */
+
+#if IS_IN (libc)
+# define __wcschr __wcschr_sse2
+
+# undef weak_alias
+# define weak_alias(__wcschr, wcschr)
+# undef libc_hidden_def
+# define libc_hidden_def(__wcschr)
+# undef libc_hidden_weak
+# define libc_hidden_weak(wcschr)
+#endif
+
+#include "../wcschr.S"
diff --git a/sysdeps/x86_64/multiarch/wcschr.c b/sysdeps/x86_64/multiarch/wcschr.c
new file mode 100644
index 0000000000..70d36926e1
--- /dev/null
+++ b/sysdeps/x86_64/multiarch/wcschr.c
@@ -0,0 +1,39 @@
+/* Multiple versions of wcschr.
+ All versions must be listed in ifunc-impl-list.c.
+ Copyright (C) 2017 Free Software Foundation, Inc.
+ This file is part of the GNU C Library.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library; if not, see
+ <http://www.gnu.org/licenses/>. */
+
+/* Define multiple versions only for the definition in libc. */
+#if IS_IN (libc)
+# define wcschr __redirect_wcschr
+# define __wcschr __redirect___wcschr
+# include <wchar.h>
+# undef wcschr
+# undef __wcschr
+
+# define SYMBOL_NAME wcschr
+# include "ifunc-avx2.h"
+
+libc_ifunc_redirected (__redirect_wcschr, __wcschr, IFUNC_SELECTOR ());
+weak_alias (__wcschr, wcschr);
+# ifdef SHARED
+__hidden_ver1 (__wcschr, __GI___wcschr, __redirect___wcschr)
+ __attribute__((visibility ("hidden")));
+__hidden_ver1 (wcschr, __GI_wcschr, __redirect_wcschr)
+ __attribute__((weak, visibility ("hidden")));
+# endif
+#endif