aboutsummaryrefslogtreecommitdiff
path: root/REORG.TODO/sysdeps/x86_64/multiarch/strcmp.S
diff options
context:
space:
mode:
Diffstat (limited to 'REORG.TODO/sysdeps/x86_64/multiarch/strcmp.S')
-rw-r--r--REORG.TODO/sysdeps/x86_64/multiarch/strcmp.S209
1 files changed, 209 insertions, 0 deletions
diff --git a/REORG.TODO/sysdeps/x86_64/multiarch/strcmp.S b/REORG.TODO/sysdeps/x86_64/multiarch/strcmp.S
new file mode 100644
index 0000000000..54f8f7dd44
--- /dev/null
+++ b/REORG.TODO/sysdeps/x86_64/multiarch/strcmp.S
@@ -0,0 +1,209 @@
+/* Multiple versions of strcmp
+ Copyright (C) 2009-2017 Free Software Foundation, Inc.
+ Contributed by Intel Corporation.
+ This file is part of the GNU C Library.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library; if not, see
+ <http://www.gnu.org/licenses/>. */
+
+#include <sysdep.h>
+#include <init-arch.h>
+
+#ifdef USE_AS_STRNCMP
+/* Since the counter, %r11, is unsigned, we branch to strcmp_exitz
+ if the new counter > the old one or is 0. */
+# define UPDATE_STRNCMP_COUNTER \
+ /* calculate left number to compare */ \
+ lea -16(%rcx, %r11), %r9; \
+ cmp %r9, %r11; \
+ jb LABEL(strcmp_exitz); \
+ test %r9, %r9; \
+ je LABEL(strcmp_exitz); \
+ mov %r9, %r11
+
+# define STRCMP_SSE42 __strncmp_sse42
+# define STRCMP_SSSE3 __strncmp_ssse3
+# define STRCMP_SSE2 __strncmp_sse2
+# define __GI_STRCMP __GI_strncmp
+#elif defined USE_AS_STRCASECMP_L
+# include "locale-defines.h"
+
+# define UPDATE_STRNCMP_COUNTER
+
+# define STRCMP_AVX __strcasecmp_l_avx
+# define STRCMP_SSE42 __strcasecmp_l_sse42
+# define STRCMP_SSSE3 __strcasecmp_l_ssse3
+# define STRCMP_SSE2 __strcasecmp_l_sse2
+# define __GI_STRCMP __GI___strcasecmp_l
+#elif defined USE_AS_STRNCASECMP_L
+# include "locale-defines.h"
+
+/* Since the counter, %r11, is unsigned, we branch to strcmp_exitz
+ if the new counter > the old one or is 0. */
+# define UPDATE_STRNCMP_COUNTER \
+ /* calculate left number to compare */ \
+ lea -16(%rcx, %r11), %r9; \
+ cmp %r9, %r11; \
+ jb LABEL(strcmp_exitz); \
+ test %r9, %r9; \
+ je LABEL(strcmp_exitz); \
+ mov %r9, %r11
+
+# define STRCMP_AVX __strncasecmp_l_avx
+# define STRCMP_SSE42 __strncasecmp_l_sse42
+# define STRCMP_SSSE3 __strncasecmp_l_ssse3
+# define STRCMP_SSE2 __strncasecmp_l_sse2
+# define __GI_STRCMP __GI___strncasecmp_l
+#else
+# define USE_AS_STRCMP
+# define UPDATE_STRNCMP_COUNTER
+# ifndef STRCMP
+# define STRCMP strcmp
+# define STRCMP_SSE42 __strcmp_sse42
+# define STRCMP_SSSE3 __strcmp_ssse3
+# define STRCMP_SSE2 __strcmp_sse2
+# define __GI_STRCMP __GI_strcmp
+# endif
+#endif
+
+/* Define multiple versions only for the definition in libc. Don't
+ define multiple versions for strncmp in static library since we
+ need strncmp before the initialization happened. */
+#if (defined SHARED || !defined USE_AS_STRNCMP) && IS_IN (libc)
+ .text
+ENTRY(STRCMP)
+ .type STRCMP, @gnu_indirect_function
+ LOAD_RTLD_GLOBAL_RO_RDX
+#ifdef USE_AS_STRCMP
+ leaq __strcmp_sse2_unaligned(%rip), %rax
+ HAS_ARCH_FEATURE (Fast_Unaligned_Load)
+ jnz 3f
+#else
+ HAS_ARCH_FEATURE (Slow_SSE4_2)
+ jnz 2f
+ leaq STRCMP_SSE42(%rip), %rax
+ HAS_CPU_FEATURE (SSE4_2)
+ jnz 3f
+#endif
+2: leaq STRCMP_SSSE3(%rip), %rax
+ HAS_CPU_FEATURE (SSSE3)
+ jnz 3f
+ leaq STRCMP_SSE2(%rip), %rax
+3: ret
+END(STRCMP)
+
+# ifdef USE_AS_STRCASECMP_L
+ENTRY(__strcasecmp)
+ .type __strcasecmp, @gnu_indirect_function
+ LOAD_RTLD_GLOBAL_RO_RDX
+ leaq __strcasecmp_avx(%rip), %rax
+ HAS_ARCH_FEATURE (AVX_Usable)
+ jnz 3f
+ HAS_ARCH_FEATURE (Slow_SSE4_2)
+ jnz 2f
+ leaq __strcasecmp_sse42(%rip), %rax
+ HAS_CPU_FEATURE (SSE4_2)
+ jnz 3f
+2: leaq __strcasecmp_ssse3(%rip), %rax
+ HAS_CPU_FEATURE (SSSE3)
+ jnz 3f
+ leaq __strcasecmp_sse2(%rip), %rax
+3: ret
+END(__strcasecmp)
+weak_alias (__strcasecmp, strcasecmp)
+# endif
+# ifdef USE_AS_STRNCASECMP_L
+ENTRY(__strncasecmp)
+ .type __strncasecmp, @gnu_indirect_function
+ LOAD_RTLD_GLOBAL_RO_RDX
+ leaq __strncasecmp_avx(%rip), %rax
+ HAS_ARCH_FEATURE (AVX_Usable)
+ jnz 3f
+ HAS_ARCH_FEATURE (Slow_SSE4_2)
+ jnz 2f
+ leaq __strncasecmp_sse42(%rip), %rax
+ HAS_CPU_FEATURE (SSE4_2)
+ jnz 3f
+2: leaq __strncasecmp_ssse3(%rip), %rax
+ HAS_CPU_FEATURE (SSSE3)
+ jnz 3f
+ leaq __strncasecmp_sse2(%rip), %rax
+3: ret
+END(__strncasecmp)
+weak_alias (__strncasecmp, strncasecmp)
+# endif
+
+# undef LABEL
+# define LABEL(l) .L##l##_sse42
+# define GLABEL(l) l##_sse42
+# define SECTION sse4.2
+# include "strcmp-sse42.S"
+
+
+# if defined USE_AS_STRCASECMP_L || defined USE_AS_STRNCASECMP_L
+# define LABEL(l) .L##l##_avx
+# define GLABEL(l) l##_avx
+# define USE_AVX 1
+# undef STRCMP_SSE42
+# define STRCMP_SSE42 STRCMP_AVX
+# define SECTION avx
+# include "strcmp-sse42.S"
+# endif
+
+
+# undef ENTRY
+# define ENTRY(name) \
+ .type STRCMP_SSE2, @function; \
+ .align 16; \
+ .globl STRCMP_SSE2; \
+ .hidden STRCMP_SSE2; \
+ STRCMP_SSE2: cfi_startproc; \
+ CALL_MCOUNT
+# undef END
+# define END(name) \
+ cfi_endproc; .size STRCMP_SSE2, .-STRCMP_SSE2
+
+# ifdef USE_AS_STRCASECMP_L
+# define ENTRY2(name) \
+ .type __strcasecmp_sse2, @function; \
+ .align 16; \
+ .globl __strcasecmp_sse2; \
+ .hidden __strcasecmp_sse2; \
+ __strcasecmp_sse2: cfi_startproc; \
+ CALL_MCOUNT
+# define END2(name) \
+ cfi_endproc; .size __strcasecmp_sse2, .-__strcasecmp_sse2
+# endif
+
+# ifdef USE_AS_STRNCASECMP_L
+# define ENTRY2(name) \
+ .type __strncasecmp_sse2, @function; \
+ .align 16; \
+ .globl __strncasecmp_sse2; \
+ .hidden __strncasecmp_sse2; \
+ __strncasecmp_sse2: cfi_startproc; \
+ CALL_MCOUNT
+# define END2(name) \
+ cfi_endproc; .size __strncasecmp_sse2, .-__strncasecmp_sse2
+# endif
+
+# undef libc_hidden_builtin_def
+/* It doesn't make sense to send libc-internal strcmp calls through a PLT.
+ The speedup we get from using SSE4.2 instruction is likely eaten away
+ by the indirect call in the PLT. */
+# define libc_hidden_builtin_def(name) \
+ .globl __GI_STRCMP; __GI_STRCMP = STRCMP_SSE2
+#endif
+
+#include "../strcmp.S"