aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--ChangeLog14
-rw-r--r--sysdeps/x86_64/multiarch/Makefile2
-rw-r--r--sysdeps/x86_64/multiarch/ifunc-impl-list.c14
-rw-r--r--sysdeps/x86_64/multiarch/strrchr-avx2.S235
-rw-r--r--sysdeps/x86_64/multiarch/strrchr-sse2.S28
-rw-r--r--sysdeps/x86_64/multiarch/strrchr.c34
-rw-r--r--sysdeps/x86_64/multiarch/wcsrchr-avx2.S3
-rw-r--r--sysdeps/x86_64/multiarch/wcsrchr-sse2.S23
-rw-r--r--sysdeps/x86_64/multiarch/wcsrchr.c29
9 files changed, 382 insertions, 0 deletions
diff --git a/ChangeLog b/ChangeLog
index 8fea821f91..d45f71d5f8 100644
--- a/ChangeLog
+++ b/ChangeLog
@@ -1,6 +1,20 @@
2017-06-09 H.J. Lu <hongjiu.lu@intel.com>
* sysdeps/x86_64/multiarch/Makefile (sysdep_routines): Add
+ strrchr-sse2, strrchr-avx2, wcsrchr-sse2 and wcsrchr-avx2.
+ * sysdeps/x86_64/multiarch/ifunc-impl-list.c
+ (__libc_ifunc_impl_list): Add tests for __strrchr_avx2,
+ __strrchr_sse2, __wcsrchr_avx2 and __wcsrchr_sse2.
+ * sysdeps/x86_64/multiarch/strrchr-avx2.S: New file.
+ * sysdeps/x86_64/multiarch/strrchr-sse2.S: Likewise.
+ * sysdeps/x86_64/multiarch/strrchr.c: Likewise.
+ * sysdeps/x86_64/multiarch/wcsrchr-avx2.S: Likewise.
+ * sysdeps/x86_64/multiarch/wcsrchr-sse2.S: Likewise.
+ * sysdeps/x86_64/multiarch/wcsrchr.c: Likewise.
+
+2017-06-09 H.J. Lu <hongjiu.lu@intel.com>
+
+ * sysdeps/x86_64/multiarch/Makefile (sysdep_routines): Add
memrchr-sse2 and memrchr-avx2.
* sysdeps/x86_64/multiarch/ifunc-impl-list.c
(__libc_ifunc_impl_list): Add tests for __memrchr_avx2 and
diff --git a/sysdeps/x86_64/multiarch/Makefile b/sysdeps/x86_64/multiarch/Makefile
index 2fa390b3dd..c901704b11 100644
--- a/sysdeps/x86_64/multiarch/Makefile
+++ b/sysdeps/x86_64/multiarch/Makefile
@@ -15,6 +15,7 @@ sysdep_routines += strncat-c stpncpy-c strncpy-c strcmp-ssse3 \
memmove-ssse3-back \
memmove-avx512-no-vzeroupper strcasecmp_l-ssse3 \
strchr-sse2 strchrnul-sse2 strchr-avx2 strchrnul-avx2 \
+ strrchr-sse2 strrchr-avx2 \
strlen-sse2 strnlen-sse2 strlen-avx2 strnlen-avx2 \
strncase_l-ssse3 strcat-ssse3 strncat-ssse3\
strcpy-ssse3 strncpy-ssse3 stpcpy-ssse3 stpncpy-ssse3 \
@@ -40,6 +41,7 @@ sysdep_routines += wmemcmp-sse4 wmemcmp-ssse3 wmemcmp-c \
wmemchr-sse2 wmemchr-avx2 \
wcscpy-ssse3 wcscpy-c \
wcschr-sse2 wcschr-avx2 \
+ wcsrchr-sse2 wcsrchr-avx2 \
wcsnlen-sse4_1 wcsnlen-c \
wcslen-sse2 wcslen-avx2 wcsnlen-avx2
endif
diff --git a/sysdeps/x86_64/multiarch/ifunc-impl-list.c b/sysdeps/x86_64/multiarch/ifunc-impl-list.c
index 5670eb7e9e..81f4d9b7af 100644
--- a/sysdeps/x86_64/multiarch/ifunc-impl-list.c
+++ b/sysdeps/x86_64/multiarch/ifunc-impl-list.c
@@ -251,6 +251,13 @@ __libc_ifunc_impl_list (const char *name, struct libc_ifunc_impl *array,
__strchrnul_avx2)
IFUNC_IMPL_ADD (array, i, strchrnul, 1, __strchrnul_sse2))
+ /* Support sysdeps/x86_64/multiarch/strrchr.c. */
+ IFUNC_IMPL (i, name, strrchr,
+ IFUNC_IMPL_ADD (array, i, strrchr,
+ HAS_ARCH_FEATURE (AVX2_Usable),
+ __strrchr_avx2)
+ IFUNC_IMPL_ADD (array, i, strrchr, 1, __strrchr_sse2))
+
/* Support sysdeps/x86_64/multiarch/strcmp.S. */
IFUNC_IMPL (i, name, strcmp,
IFUNC_IMPL_ADD (array, i, strcmp, HAS_CPU_FEATURE (SSE4_2),
@@ -342,6 +349,13 @@ __libc_ifunc_impl_list (const char *name, struct libc_ifunc_impl *array,
__wcschr_avx2)
IFUNC_IMPL_ADD (array, i, wcschr, 1, __wcschr_sse2))
+ /* Support sysdeps/x86_64/multiarch/wcsrchr.c. */
+ IFUNC_IMPL (i, name, wcsrchr,
+ IFUNC_IMPL_ADD (array, i, wcsrchr,
+ HAS_ARCH_FEATURE (AVX2_Usable),
+ __wcsrchr_avx2)
+ IFUNC_IMPL_ADD (array, i, wcsrchr, 1, __wcsrchr_sse2))
+
/* Support sysdeps/x86_64/multiarch/wcscpy.S. */
IFUNC_IMPL (i, name, wcscpy,
IFUNC_IMPL_ADD (array, i, wcscpy, HAS_CPU_FEATURE (SSSE3),
diff --git a/sysdeps/x86_64/multiarch/strrchr-avx2.S b/sysdeps/x86_64/multiarch/strrchr-avx2.S
new file mode 100644
index 0000000000..36ef660b2f
--- /dev/null
+++ b/sysdeps/x86_64/multiarch/strrchr-avx2.S
@@ -0,0 +1,235 @@
+/* strrchr/wcsrchr optimized with AVX2.
+ Copyright (C) 2017 Free Software Foundation, Inc.
+ This file is part of the GNU C Library.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library; if not, see
+ <http://www.gnu.org/licenses/>. */
+
+#if IS_IN (libc)
+
+# include <sysdep.h>
+
+# ifndef STRRCHR
+# define STRRCHR __strrchr_avx2
+# endif
+
+# ifdef USE_AS_WCSRCHR
+# define VPBROADCAST vpbroadcastd
+# define VPCMPEQ vpcmpeqd
+# else
+# define VPBROADCAST vpbroadcastb
+# define VPCMPEQ vpcmpeqb
+# endif
+
+# ifndef VZEROUPPER
+# define VZEROUPPER vzeroupper
+# endif
+
+# define VEC_SIZE 32
+
+ .section .text.avx,"ax",@progbits
+ENTRY (STRRCHR)
+ movd %esi, %xmm4
+ movl %edi, %ecx
+ /* Broadcast CHAR to YMM4. */
+ VPBROADCAST %xmm4, %ymm4
+ vpxor %ymm0, %ymm0, %ymm0
+
+ /* Check if we may cross page boundary with one vector load. */
+ andl $(2 * VEC_SIZE - 1), %ecx
+ cmpl $VEC_SIZE, %ecx
+ ja L(cros_page_boundary)
+
+ vmovdqu (%rdi), %ymm1
+ VPCMPEQ %ymm1, %ymm0, %ymm2
+ VPCMPEQ %ymm1, %ymm4, %ymm3
+ vpmovmskb %ymm2, %ecx
+ vpmovmskb %ymm3, %eax
+ addq $VEC_SIZE, %rdi
+
+ testl %eax, %eax
+ jnz L(first_vec)
+
+ testl %ecx, %ecx
+ jnz L(return_null)
+
+ andq $-VEC_SIZE, %rdi
+ xorl %edx, %edx
+ jmp L(aligned_loop)
+
+ .p2align 4
+L(first_vec):
+ /* Check if there is a nul CHAR. */
+ testl %ecx, %ecx
+ jnz L(char_and_nul_in_first_vec)
+
+ /* Remember the match and keep searching. */
+ movl %eax, %edx
+ movq %rdi, %rsi
+ andq $-VEC_SIZE, %rdi
+ jmp L(aligned_loop)
+
+ .p2align 4
+L(cros_page_boundary):
+ andl $(VEC_SIZE - 1), %ecx
+ andq $-VEC_SIZE, %rdi
+ vmovdqa (%rdi), %ymm1
+ VPCMPEQ %ymm1, %ymm0, %ymm2
+ VPCMPEQ %ymm1, %ymm4, %ymm3
+ vpmovmskb %ymm2, %edx
+ vpmovmskb %ymm3, %eax
+ shrl %cl, %edx
+ shrl %cl, %eax
+ addq $VEC_SIZE, %rdi
+
+ /* Check if there is a CHAR. */
+ testl %eax, %eax
+ jnz L(found_char)
+
+ testl %edx, %edx
+ jnz L(return_null)
+
+ jmp L(aligned_loop)
+
+ .p2align 4
+L(found_char):
+ testl %edx, %edx
+ jnz L(char_and_nul)
+
+ /* Remember the match and keep searching. */
+ movl %eax, %edx
+ leaq (%rdi, %rcx), %rsi
+
+ .p2align 4
+L(aligned_loop):
+ vmovdqa (%rdi), %ymm1
+ VPCMPEQ %ymm1, %ymm0, %ymm2
+ addq $VEC_SIZE, %rdi
+ VPCMPEQ %ymm1, %ymm4, %ymm3
+ vpmovmskb %ymm2, %ecx
+ vpmovmskb %ymm3, %eax
+ orl %eax, %ecx
+ jnz L(char_nor_null)
+
+ vmovdqa (%rdi), %ymm1
+ VPCMPEQ %ymm1, %ymm0, %ymm2
+ add $VEC_SIZE, %rdi
+ VPCMPEQ %ymm1, %ymm4, %ymm3
+ vpmovmskb %ymm2, %ecx
+ vpmovmskb %ymm3, %eax
+ orl %eax, %ecx
+ jnz L(char_nor_null)
+
+ vmovdqa (%rdi), %ymm1
+ VPCMPEQ %ymm1, %ymm0, %ymm2
+ addq $VEC_SIZE, %rdi
+ VPCMPEQ %ymm1, %ymm4, %ymm3
+ vpmovmskb %ymm2, %ecx
+ vpmovmskb %ymm3, %eax
+ orl %eax, %ecx
+ jnz L(char_nor_null)
+
+ vmovdqa (%rdi), %ymm1
+ VPCMPEQ %ymm1, %ymm0, %ymm2
+ addq $VEC_SIZE, %rdi
+ VPCMPEQ %ymm1, %ymm4, %ymm3
+ vpmovmskb %ymm2, %ecx
+ vpmovmskb %ymm3, %eax
+ orl %eax, %ecx
+ jz L(aligned_loop)
+
+ .p2align 4
+L(char_nor_null):
+ /* Find a CHAR or a nul CHAR in a loop. */
+ testl %eax, %eax
+ jnz L(match)
+L(return_value):
+ testl %edx, %edx
+ jz L(return_null)
+ movl %edx, %eax
+ movq %rsi, %rdi
+
+# ifdef USE_AS_WCSRCHR
+ /* Keep the first bit for each matching CHAR for bsr. */
+ andl $0x11111111, %eax
+# endif
+ bsrl %eax, %eax
+ leaq -VEC_SIZE(%rdi, %rax), %rax
+ VZEROUPPER
+ ret
+
+ .p2align 4
+L(match):
+ /* Find a CHAR. Check if there is a nul CHAR. */
+ vpmovmskb %ymm2, %ecx
+ testl %ecx, %ecx
+ jnz L(find_nul)
+
+ /* Remember the match and keep searching. */
+ movl %eax, %edx
+ movq %rdi, %rsi
+ jmp L(aligned_loop)
+
+ .p2align 4
+L(find_nul):
+# ifdef USE_AS_WCSRCHR
+ /* Keep the first bit for each matching CHAR for bsr. */
+ andl $0x11111111, %ecx
+ andl $0x11111111, %eax
+# endif
+ /* Mask out any matching bits after the nul CHAR. */
+ movl %ecx, %r8d
+ subl $1, %r8d
+ xorl %ecx, %r8d
+ andl %r8d, %eax
+ testl %eax, %eax
+ /* If there is no CHAR here, return the remembered one. */
+ jz L(return_value)
+ bsrl %eax, %eax
+ leaq -VEC_SIZE(%rdi, %rax), %rax
+ VZEROUPPER
+ ret
+
+ .p2align 4
+L(char_and_nul):
+ /* Find both a CHAR and a nul CHAR. */
+ addq %rcx, %rdi
+ movl %edx, %ecx
+L(char_and_nul_in_first_vec):
+# ifdef USE_AS_WCSRCHR
+ /* Keep the first bit for each matching CHAR for bsr. */
+ andl $0x11111111, %ecx
+ andl $0x11111111, %eax
+# endif
+ /* Mask out any matching bits after the nul CHAR. */
+ movl %ecx, %r8d
+ subl $1, %r8d
+ xorl %ecx, %r8d
+ andl %r8d, %eax
+ testl %eax, %eax
+ /* Return null pointer if the nul CHAR comes first. */
+ jz L(return_null)
+ bsrl %eax, %eax
+ leaq -VEC_SIZE(%rdi, %rax), %rax
+ VZEROUPPER
+ ret
+
+ .p2align 4
+L(return_null):
+ xorl %eax, %eax
+ VZEROUPPER
+ ret
+
+END (STRRCHR)
+#endif
diff --git a/sysdeps/x86_64/multiarch/strrchr-sse2.S b/sysdeps/x86_64/multiarch/strrchr-sse2.S
new file mode 100644
index 0000000000..6479d17915
--- /dev/null
+++ b/sysdeps/x86_64/multiarch/strrchr-sse2.S
@@ -0,0 +1,28 @@
+/* strrchr optimized with SSE2.
+ Copyright (C) 2017 Free Software Foundation, Inc.
+ This file is part of the GNU C Library.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library; if not, see
+ <http://www.gnu.org/licenses/>. */
+
+#if IS_IN (libc)
+# define strrchr __strrchr_sse2
+
+# undef weak_alias
+# define weak_alias(strrchr, rindex)
+# undef libc_hidden_builtin_def
+# define libc_hidden_builtin_def(strrchr)
+#endif
+
+#include "../strrchr.S"
diff --git a/sysdeps/x86_64/multiarch/strrchr.c b/sysdeps/x86_64/multiarch/strrchr.c
new file mode 100644
index 0000000000..0375ae6dae
--- /dev/null
+++ b/sysdeps/x86_64/multiarch/strrchr.c
@@ -0,0 +1,34 @@
+/* Multiple versions of strrchr.
+ Copyright (C) 2017 Free Software Foundation, Inc.
+ This file is part of the GNU C Library.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library; if not, see
+ <http://www.gnu.org/licenses/>. */
+
+/* Define multiple versions only for the definition in libc. */
+#if IS_IN (libc)
+# define strrchr __redirect_strrchr
+# include <string.h>
+# undef strrchr
+
+# define SYMBOL_NAME strrchr
+# include "ifunc-avx2.h"
+
+libc_ifunc_redirected (__redirect_strrchr, strrchr, IFUNC_SELECTOR ());
+weak_alias (strrchr, rindex);
+# ifdef SHARED
+__hidden_ver1 (strrchr, __GI_strrchr, __redirect_strrchr)
+ __attribute__((visibility ("hidden")));
+# endif
+#endif
diff --git a/sysdeps/x86_64/multiarch/wcsrchr-avx2.S b/sysdeps/x86_64/multiarch/wcsrchr-avx2.S
new file mode 100644
index 0000000000..cf8a239ab2
--- /dev/null
+++ b/sysdeps/x86_64/multiarch/wcsrchr-avx2.S
@@ -0,0 +1,3 @@
+#define STRRCHR __wcsrchr_avx2
+#define USE_AS_WCSRCHR 1
+#include "strrchr-avx2.S"
diff --git a/sysdeps/x86_64/multiarch/wcsrchr-sse2.S b/sysdeps/x86_64/multiarch/wcsrchr-sse2.S
new file mode 100644
index 0000000000..0ac1b1356c
--- /dev/null
+++ b/sysdeps/x86_64/multiarch/wcsrchr-sse2.S
@@ -0,0 +1,23 @@
+/* wcsrchr optimized with SSE2.
+ Copyright (C) 2017 Free Software Foundation, Inc.
+ This file is part of the GNU C Library.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library; if not, see
+ <http://www.gnu.org/licenses/>. */
+
+#if IS_IN (libc)
+# define wcsrchr __wcsrchr_sse2
+#endif
+
+#include "../wcsrchr.S"
diff --git a/sysdeps/x86_64/multiarch/wcsrchr.c b/sysdeps/x86_64/multiarch/wcsrchr.c
new file mode 100644
index 0000000000..b39218f6bd
--- /dev/null
+++ b/sysdeps/x86_64/multiarch/wcsrchr.c
@@ -0,0 +1,29 @@
+/* Multiple versions of wcsrchr.
+ Copyright (C) 2017 Free Software Foundation, Inc.
+ This file is part of the GNU C Library.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library; if not, see
+ <http://www.gnu.org/licenses/>. */
+
+/* Define multiple versions only for the definition in libc. */
+#if IS_IN (libc)
+# define wcsrchr __redirect_wcsrchr
+# include <wchar.h>
+# undef wcsrchr
+
+# define SYMBOL_NAME wcsrchr
+# include "ifunc-avx2.h"
+
+libc_ifunc_redirected (__redirect_wcsrchr, wcsrchr, IFUNC_SELECTOR ());
+#endif