aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorUlrich Drepper <drepper@redhat.com>2010-08-14 22:04:01 -0700
committerUlrich Drepper <drepper@redhat.com>2010-08-14 22:04:01 -0700
commite9f82e0d1d70f361a40f1853c928df04918a38f5 (patch)
treecdbf94a494dc32833a600e6c86b776b59d646bd7
parentca6bb004ebd1cc7da72f1a761ffea377245d1ee9 (diff)
downloadglibc-e9f82e0d1d70f361a40f1853c928df04918a38f5.tar
glibc-e9f82e0d1d70f361a40f1853c928df04918a38f5.tar.gz
glibc-e9f82e0d1d70f361a40f1853c928df04918a38f5.tar.bz2
glibc-e9f82e0d1d70f361a40f1853c928df04918a38f5.zip
Add optimized strncasecmp versions for x86-64.
-rw-r--r--ChangeLog17
-rw-r--r--NEWS5
-rw-r--r--string/Makefile2
-rw-r--r--string/test-strncasecmp.c318
-rw-r--r--sysdeps/x86_64/Makefile2
-rw-r--r--sysdeps/x86_64/multiarch/Makefile3
-rw-r--r--sysdeps/x86_64/multiarch/strcmp.S514
-rw-r--r--sysdeps/x86_64/multiarch/strncase_l-ssse3.S6
-rw-r--r--sysdeps/x86_64/multiarch/strncase_l.S6
-rw-r--r--sysdeps/x86_64/strcasecmp_l-nonascii.c3
-rw-r--r--sysdeps/x86_64/strcmp.S169
-rw-r--r--sysdeps/x86_64/strncase.S1
-rw-r--r--sysdeps/x86_64/strncase_l-nonascii.c8
-rw-r--r--sysdeps/x86_64/strncase_l.S6
14 files changed, 774 insertions, 286 deletions
diff --git a/ChangeLog b/ChangeLog
index 21f9ed7049..dd78abe472 100644
--- a/ChangeLog
+++ b/ChangeLog
@@ -1,5 +1,22 @@
2010-08-14 Ulrich Drepper <drepper@redhat.com>
+ * sysdeps/x86_64/Makefile [subdir=string] (sysdep_routines): Add
+ strncase_l-nonascii.
+ * sysdeps/x86_64/multiarch/Makefile [subdir=string] (sysdep_routines):
+ Add strncase_l-ssse3.
+ * sysdeps/x86_64/multiarch/strcmp.S: Prepare for use as strncasecmp.
+ * sysdeps/x86_64/strcmp.S: Likewise.
+ * sysdeps/x86_64/multiarch/strncase_l-ssse3.S: New file.
+ * sysdeps/x86_64/multiarch/strncase_l.S: New file.
+ * sysdeps/x86_64/strncase.S: New file.
+ * sysdeps/x86_64/strncase_l-nonascii.c: New file.
+ * sysdeps/x86_64/strncase_l.S: New file.
+ * string/Makefile (strop-tests): Add strncasecmp.
+ * string/test-strncasecmp.c: New file.
+
+ * sysdeps/x86_64/strcasecmp_l-nonascii.c: Add prototype to avoid
+ warning.
+
* sysdeps/x86_64/strcmp.S: Move definition of NO_NOLOCALE_ALIAS to...
* sysdeps/x86_64/multiarch/strcasecmp_l-ssse3.S: ... here.
diff --git a/NEWS b/NEWS
index eba00f4e52..f3094d18aa 100644
--- a/NEWS
+++ b/NEWS
@@ -1,4 +1,4 @@
-GNU C Library NEWS -- history of user-visible changes. 2010-8-12
+GNU C Library NEWS -- history of user-visible changes. 2010-8-14
Copyright (C) 1992-2009, 2010 Free Software Foundation, Inc.
See the end for copying conditions.
@@ -15,7 +15,8 @@ Version 2.13
* POWER7 optimizations: memset, memcmp, strncmp
-* New optimized string functions for x86-64: strnlen, strcasecmp
+* New optimized string functions for x86-64: strnlen (SSE2),
+ strcasecmp (SSE2, SSSE3, SSE4.2), strncasecmp (SSE2, SSSE3, SSE4.2)
Implemented by Ulrich Drepper.
Version 2.12
diff --git a/string/Makefile b/string/Makefile
index 4c160e9d2d..cc2da10a4e 100644
--- a/string/Makefile
+++ b/string/Makefile
@@ -49,7 +49,7 @@ o-objects.ob := memcpy.o memset.o memchr.o
strop-tests := memchr memcmp memcpy memmove mempcpy memset memccpy \
stpcpy stpncpy strcat strchr strcmp strcpy strcspn \
strlen strncmp strncpy strpbrk strrchr strspn memmem \
- strstr strcasestr strnlen strcasecmp
+ strstr strcasestr strnlen strcasecmp strncasecmp
tests := tester inl-tester noinl-tester testcopy test-ffs \
tst-strlen stratcliff tst-svc tst-inlcall \
bug-strncat1 bug-strspn1 bug-strpbrk1 tst-bswap \
diff --git a/string/test-strncasecmp.c b/string/test-strncasecmp.c
new file mode 100644
index 0000000000..80e4d6315e
--- /dev/null
+++ b/string/test-strncasecmp.c
@@ -0,0 +1,318 @@
+/* Test and measure strncasecmp functions.
+ Copyright (C) 1999, 2002, 2003, 2005, 2010 Free Software Foundation, Inc.
+ This file is part of the GNU C Library.
+ Written by Jakub Jelinek <jakub@redhat.com>, 1999.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library; if not, write to the Free
+ Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
+ 02111-1307 USA. */
+
+#include <ctype.h>
+#define TEST_MAIN
+#include "test-string.h"
+
+typedef int (*proto_t) (const char *, const char *, size_t);
+static int simple_strncasecmp (const char *, const char *, size_t);
+static int stupid_strncasecmp (const char *, const char *, size_t);
+
+IMPL (stupid_strncasecmp, 0)
+IMPL (simple_strncasecmp, 0)
+IMPL (strncasecmp, 1)
+
+static int
+simple_strncasecmp (const char *s1, const char *s2, size_t n)
+{
+ int ret;
+
+ if (n == 0)
+ return 0;
+
+ while ((ret = ((unsigned char) tolower (*s1)
+ - (unsigned char) tolower (*s2))) == 0
+ && *s1++)
+ {
+ if (--n == 0)
+ return 0;
+ ++s2;
+ }
+ return ret;
+}
+
+static int
+stupid_strncasecmp (const char *s1, const char *s2, size_t max)
+{
+ size_t ns1 = strlen (s1) + 1;
+ size_t ns2 = strlen (s2) + 1;
+ size_t n = ns1 < ns2 ? ns1 : ns2;
+ if (n > max)
+ n = max;
+ int ret = 0;
+
+ while (n--)
+ {
+ if ((ret = ((unsigned char) tolower (*s1)
+ - (unsigned char) tolower (*s2))) != 0)
+ break;
+ ++s1;
+ ++s2;
+ }
+ return ret;
+}
+
+static void
+do_one_test (impl_t *impl, const char *s1, const char *s2, size_t n,
+ int exp_result)
+{
+ int result = CALL (impl, s1, s2, n);
+ if ((exp_result == 0 && result != 0)
+ || (exp_result < 0 && result >= 0)
+ || (exp_result > 0 && result <= 0))
+ {
+ error (0, 0, "Wrong result in function %s %d %d", impl->name,
+ result, exp_result);
+ ret = 1;
+ return;
+ }
+
+ if (HP_TIMING_AVAIL)
+ {
+ hp_timing_t start __attribute ((unused));
+ hp_timing_t stop __attribute ((unused));
+ hp_timing_t best_time = ~ (hp_timing_t) 0;
+ size_t i;
+
+ for (i = 0; i < 32; ++i)
+ {
+ HP_TIMING_NOW (start);
+ CALL (impl, s1, s2, n);
+ HP_TIMING_NOW (stop);
+ HP_TIMING_BEST (best_time, start, stop);
+ }
+
+ printf ("\t%zd", (size_t) best_time);
+ }
+}
+
+static void
+do_test (size_t align1, size_t align2, size_t n, size_t len, int max_char,
+ int exp_result)
+{
+ size_t i;
+ char *s1, *s2;
+
+ if (len == 0)
+ return;
+
+ align1 &= 7;
+ if (align1 + len + 1 >= page_size)
+ return;
+
+ align2 &= 7;
+ if (align2 + len + 1 >= page_size)
+ return;
+
+ s1 = (char *) (buf1 + align1);
+ s2 = (char *) (buf2 + align2);
+
+ for (i = 0; i < len; i++)
+ {
+ s1[i] = toupper (1 + 23 * i % max_char);
+ s2[i] = tolower (s1[i]);
+ }
+
+ s1[len] = s2[len] = 0;
+ s1[len + 1] = 23;
+ s2[len + 1] = 24 + exp_result;
+ if ((s2[len - 1] == 'z' && exp_result == -1)
+ || (s2[len - 1] == 'a' && exp_result == 1))
+ s1[len - 1] += exp_result;
+ else
+ s2[len - 1] -= exp_result;
+
+ if (HP_TIMING_AVAIL)
+ printf ("Length %4zd, alignment %2zd/%2zd:", len, align1, align2);
+
+ FOR_EACH_IMPL (impl, 0)
+ do_one_test (impl, s1, s2, n, exp_result);
+
+ if (HP_TIMING_AVAIL)
+ putchar ('\n');
+}
+
+static void
+do_random_tests (void)
+{
+ size_t i, j, n, align1, align2, pos, len1, len2;
+ int result;
+ long r;
+ unsigned char *p1 = buf1 + page_size - 512;
+ unsigned char *p2 = buf2 + page_size - 512;
+
+ for (n = 0; n < ITERATIONS; n++)
+ {
+ align1 = random () & 31;
+ if (random () & 1)
+ align2 = random () & 31;
+ else
+ align2 = align1 + (random () & 24);
+ pos = random () & 511;
+ j = align1 > align2 ? align1 : align2;
+ if (pos + j >= 511)
+ pos = 510 - j - (random () & 7);
+ len1 = random () & 511;
+ if (pos >= len1 && (random () & 1))
+ len1 = pos + (random () & 7);
+ if (len1 + j >= 512)
+ len1 = 511 - j - (random () & 7);
+ if (pos >= len1)
+ len2 = len1;
+ else
+ len2 = len1 + (len1 != 511 - j ? random () % (511 - j - len1) : 0);
+ j = (pos > len2 ? pos : len2) + align1 + 64;
+ if (j > 512)
+ j = 512;
+ for (i = 0; i < j; ++i)
+ {
+ p1[i] = tolower (random () & 255);
+ if (i < len1 + align1 && !p1[i])
+ {
+ p1[i] = tolower (random () & 255);
+ if (!p1[i])
+ p1[i] = tolower (1 + (random () & 127));
+ }
+ }
+ for (i = 0; i < j; ++i)
+ {
+ p2[i] = toupper (random () & 255);
+ if (i < len2 + align2 && !p2[i])
+ {
+ p2[i] = toupper (random () & 255);
+ if (!p2[i])
+ toupper (p2[i] = 1 + (random () & 127));
+ }
+ }
+
+ result = 0;
+ memcpy (p2 + align2, p1 + align1, pos);
+ if (pos < len1)
+ {
+ if (tolower (p2[align2 + pos]) == p1[align1 + pos])
+ {
+ p2[align2 + pos] = toupper (random () & 255);
+ if (tolower (p2[align2 + pos]) == p1[align1 + pos])
+ p2[align2 + pos] = toupper (p1[align1 + pos]
+ + 3 + (random () & 127));
+ }
+
+ if (p1[align1 + pos] < tolower (p2[align2 + pos]))
+ result = -1;
+ else
+ result = 1;
+ }
+ p1[len1 + align1] = 0;
+ p2[len2 + align2] = 0;
+
+ FOR_EACH_IMPL (impl, 1)
+ {
+ r = CALL (impl, (char *) (p1 + align1), (char *) (p2 + align2),
+ pos + 1 + (random () & 255));
+ /* Test whether on 64-bit architectures where ABI requires
+ callee to promote has the promotion been done. */
+ asm ("" : "=g" (r) : "0" (r));
+ if ((r == 0 && result)
+ || (r < 0 && result >= 0)
+ || (r > 0 && result <= 0))
+ {
+ error (0, 0, "Iteration %zd - wrong result in function %s (%zd, %zd, %zd, %zd, %zd) %ld != %d, p1 %p p2 %p",
+ n, impl->name, align1, align2, len1, len2, pos, r, result, p1, p2);
+ ret = 1;
+ }
+ }
+ }
+}
+
+int
+test_main (void)
+{
+ size_t i;
+
+ test_init ();
+
+ printf ("%23s", "");
+ FOR_EACH_IMPL (impl, 0)
+ printf ("\t%s", impl->name);
+ putchar ('\n');
+
+ for (i = 1; i < 16; ++i)
+ {
+ do_test (i, i, i - 1, i, 127, 0);
+
+ do_test (i, i, i, i, 127, 0);
+ do_test (i, i, i, i, 127, 1);
+ do_test (i, i, i, i, 127, -1);
+
+ do_test (i, i, i + 1, i, 127, 0);
+ do_test (i, i, i + 1, i, 127, 1);
+ do_test (i, i, i + 1, i, 127, -1);
+ }
+
+ for (i = 1; i < 10; ++i)
+ {
+ do_test (0, 0, (2 << i) - 1, 2 << i, 127, 0);
+ do_test (0, 0, 2 << i, 2 << i, 254, 0);
+ do_test (0, 0, (2 << i) + 1, 2 << i, 127, 0);
+
+ do_test (0, 0, (2 << i) + 1, 2 << i, 254, 0);
+
+ do_test (0, 0, 2 << i, 2 << i, 127, 1);
+ do_test (0, 0, (2 << i) + 10, 2 << i, 127, 1);
+
+ do_test (0, 0, 2 << i, 2 << i, 254, 1);
+ do_test (0, 0, (2 << i) + 10, 2 << i, 254, 1);
+
+ do_test (0, 0, 2 << i, 2 << i, 127, -1);
+ do_test (0, 0, (2 << i) + 10, 2 << i, 127, -1);
+
+ do_test (0, 0, 2 << i, 2 << i, 254, -1);
+ do_test (0, 0, (2 << i) + 10, 2 << i, 254, -1);
+ }
+
+ for (i = 1; i < 8; ++i)
+ {
+ do_test (i, 2 * i, (8 << i) - 1, 8 << i, 127, 0);
+ do_test (i, 2 * i, 8 << i, 8 << i, 127, 0);
+ do_test (i, 2 * i, (8 << i) + 100, 8 << i, 127, 0);
+
+ do_test (2 * i, i, (8 << i) - 1, 8 << i, 254, 0);
+ do_test (2 * i, i, 8 << i, 8 << i, 254, 0);
+ do_test (2 * i, i, (8 << i) + 100, 8 << i, 254, 0);
+
+ do_test (i, 2 * i, 8 << i, 8 << i, 127, 1);
+ do_test (i, 2 * i, (8 << i) + 100, 8 << i, 127, 1);
+
+ do_test (2 * i, i, 8 << i, 8 << i, 254, 1);
+ do_test (2 * i, i, (8 << i) + 100, 8 << i, 254, 1);
+
+ do_test (i, 2 * i, 8 << i, 8 << i, 127, -1);
+ do_test (i, 2 * i, (8 << i) + 100, 8 << i, 127, -1);
+
+ do_test (2 * i, i, 8 << i, 8 << i, 254, -1);
+ do_test (2 * i, i, (8 << i) + 100, 8 << i, 254, -1);
+ }
+
+ do_random_tests ();
+ return ret;
+}
+
+#include "../test-skeleton.c"
diff --git a/sysdeps/x86_64/Makefile b/sysdeps/x86_64/Makefile
index f7eeb155ed..b989f6a976 100644
--- a/sysdeps/x86_64/Makefile
+++ b/sysdeps/x86_64/Makefile
@@ -12,7 +12,7 @@ sysdep_routines += _mcount
endif
ifeq ($(subdir),string)
-sysdep_routines += cacheinfo strcasecmp_l-nonascii
+sysdep_routines += cacheinfo strcasecmp_l-nonascii strncase_l-nonascii
gen-as-const-headers += locale-defines.sym
endif
diff --git a/sysdeps/x86_64/multiarch/Makefile b/sysdeps/x86_64/multiarch/Makefile
index 5113dc1ce2..b124524b2e 100644
--- a/sysdeps/x86_64/multiarch/Makefile
+++ b/sysdeps/x86_64/multiarch/Makefile
@@ -7,7 +7,8 @@ ifeq ($(subdir),string)
sysdep_routines += stpncpy-c strncpy-c strcmp-ssse3 strncmp-ssse3 \
strend-sse4 memcmp-sse4 memcpy-ssse3 mempcpy-ssse3 \
memmove-ssse3 memcpy-ssse3-back mempcpy-ssse3-back \
- memmove-ssse3-back strcasestr-nonascii strcasecmp_l-ssse3
+ memmove-ssse3-back strcasestr-nonascii strcasecmp_l-ssse3 \
+ strncase_l-ssse3
ifeq (yes,$(config-cflags-sse4))
sysdep_routines += strcspn-c strpbrk-c strspn-c strstr-c strcasestr-c
CFLAGS-strcspn-c.c += -msse4
diff --git a/sysdeps/x86_64/multiarch/strcmp.S b/sysdeps/x86_64/multiarch/strcmp.S
index 3726dbe4d0..764eb09320 100644
--- a/sysdeps/x86_64/multiarch/strcmp.S
+++ b/sysdeps/x86_64/multiarch/strcmp.S
@@ -46,6 +46,24 @@
# define STRCMP_SSSE3 __strcasecmp_l_ssse3
# define STRCMP_SSE2 __strcasecmp_l_sse2
# define __GI_STRCMP __GI___strcasecmp_l
+#elif defined USE_AS_STRNCASECMP_L
+# include "locale-defines.h"
+
+/* Since the counter, %r11, is unsigned, we branch to strcmp_exitz
+ if the new counter > the old one or is 0. */
+# define UPDATE_STRNCMP_COUNTER \
+ /* calculate left number to compare */ \
+ lea -16(%rcx, %r11), %r9; \
+ cmp %r9, %r11; \
+ jb LABEL(strcmp_exitz_sse4_2); \
+ test %r9, %r9; \
+ je LABEL(strcmp_exitz_sse4_2); \
+ mov %r9, %r11
+
+# define STRCMP_SSE42 __strncasecmp_l_sse42
+# define STRCMP_SSSE3 __strncasecmp_l_ssse3
+# define STRCMP_SSE2 __strncasecmp_l_sse2
+# define __GI_STRCMP __GI___strncasecmp_l
#else
# define UPDATE_STRNCMP_COUNTER
# ifndef STRCMP
@@ -100,6 +118,24 @@ ENTRY(__strcasecmp)
END(__strcasecmp)
weak_alias (__strcasecmp, strcasecmp)
# endif
+# ifdef USE_AS_STRNCASECMP_L
+ENTRY(__strncasecmp)
+ .type __strncasecmp, @gnu_indirect_function
+ cmpl $0, __cpu_features+KIND_OFFSET(%rip)
+ jne 1f
+ call __init_cpu_features
+1:
+ leaq __strncasecmp_sse42(%rip), %rax
+ testl $bit_SSE4_2, __cpu_features+CPUID_OFFSET+index_SSE4_2(%rip)
+ jnz 2f
+ leaq __strncasecmp_ssse3(%rip), %rax
+ testl $bit_SSSE3, __cpu_features+CPUID_OFFSET+index_SSSE3(%rip)
+ jnz 2f
+ leaq __strncasecmp_sse2(%rip), %rax
+2: ret
+END(__strncasecmp)
+weak_alias (__strncasecmp, strncasecmp)
+# endif
/* We use 0x1a:
_SIDD_SBYTE_OPS
@@ -131,15 +167,28 @@ weak_alias (__strcasecmp, strcasecmp)
.section .text.sse4.2,"ax",@progbits
.align 16
.type STRCMP_SSE42, @function
-#ifdef USE_AS_STRCASECMP_L
- /* 5-byte NOP. */
- .byte 0x0f,0x1f,0x44,0x00,0x00
+# ifdef USE_AS_STRCASECMP_L
ENTRY (__strcasecmp_sse42)
movq __libc_tsd_LOCALE@gottpoff(%rip),%rax
movq %fs:(%rax),%rdx
+
+ // XXX 5 byte should be before the function
+ /* 5-byte NOP. */
+ .byte 0x0f,0x1f,0x44,0x00,0x00
END (__strcasecmp_sse42)
/* FALLTHROUGH to strcasecmp_l. */
-#endif
+# endif
+# ifdef USE_AS_STRNCASECMP_L
+ENTRY (__strncasecmp_sse42)
+ movq __libc_tsd_LOCALE@gottpoff(%rip),%rax
+ movq %fs:(%rax),%r10
+
+ // XXX 5 byte should be before the function
+ /* 5-byte NOP. */
+ .byte 0x0f,0x1f,0x44,0x00,0x00
+END (__strncasecmp_sse42)
+ /* FALLTHROUGH to strncasecmp_l. */
+# endif
STRCMP_SSE42:
cfi_startproc
@@ -148,31 +197,42 @@ STRCMP_SSE42:
/*
* This implementation uses SSE to compare up to 16 bytes at a time.
*/
-#ifdef USE_AS_STRCASECMP_L
+# ifdef USE_AS_STRCASECMP_L
/* We have to fall back on the C implementation for locales
with encodings not matching ASCII for single bytes. */
-# if LOCALE_T___LOCALES != 0 || LC_CTYPE != 0
+# if LOCALE_T___LOCALES != 0 || LC_CTYPE != 0
movq LOCALE_T___LOCALES+LC_CTYPE*8(%rdx), %rax
-# else
+# else
movq (%rdx), %rax
-# endif
+# endif
testl $0, LOCALE_DATA_VALUES+_NL_CTYPE_NONASCII_CASE*SIZEOF_VALUES(%rax)
jne __strcasecmp_l_nonascii
-#endif
+# endif
+# ifdef USE_AS_STRNCASECMP_L
+ /* We have to fall back on the C implementation for locales
+ with encodings not matching ASCII for single bytes. */
+# if LOCALE_T___LOCALES != 0 || LC_CTYPE != 0
+ movq LOCALE_T___LOCALES+LC_CTYPE*8(%r10), %rax
+# else
+ movq (%r10), %rax
+# endif
+ testl $0, LOCALE_DATA_VALUES+_NL_CTYPE_NONASCII_CASE*SIZEOF_VALUES(%rax)
+ jne __strncasecmp_l_nonascii
+# endif
-#ifdef USE_AS_STRNCMP
+# if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L
test %rdx, %rdx
je LABEL(strcmp_exitz_sse4_2)
cmp $1, %rdx
je LABEL(Byte0_sse4_2)
mov %rdx, %r11
-#endif
+# endif
mov %esi, %ecx
mov %edi, %eax
/* Use 64bit AND here to avoid long NOP padding. */
and $0x3f, %rcx /* rsi alignment in cache line */
and $0x3f, %rax /* rdi alignment in cache line */
-#ifdef USE_AS_STRCASECMP_L
+# if defined USE_AS_STRCASECMP_L || defined USE_AS_STRNCASECMP_L
.section .rodata.cst16,"aM",@progbits,16
.align 16
.Lbelowupper_sse4:
@@ -186,19 +246,19 @@ STRCMP_SSE42:
.quad 0x2020202020202020
.previous
movdqa .Lbelowupper_sse4(%rip), %xmm4
-# define UCLOW_reg %xmm4
+# define UCLOW_reg %xmm4
movdqa .Ltopupper_sse4(%rip), %xmm5
-# define UCHIGH_reg %xmm5
+# define UCHIGH_reg %xmm5
movdqa .Ltouppermask_sse4(%rip), %xmm6
-# define LCQWORD_reg %xmm6
-#endif
+# define LCQWORD_reg %xmm6
+# endif
cmp $0x30, %ecx
ja LABEL(crosscache_sse4_2)/* rsi: 16-byte load will cross cache line */
cmp $0x30, %eax
ja LABEL(crosscache_sse4_2)/* rdi: 16-byte load will cross cache line */
movdqu (%rdi), %xmm1
movdqu (%rsi), %xmm2
-# ifdef USE_AS_STRCASECMP_L
+# if defined USE_AS_STRCASECMP_L || defined USE_AS_STRNCASECMP_L
# define TOLOWER(reg1, reg2) \
movdqa reg1, %xmm7; \
movdqa UCHIGH_reg, %xmm8; \
@@ -225,10 +285,10 @@ STRCMP_SSE42:
pmovmskb %xmm1, %edx
sub $0xffff, %edx /* if first 16 bytes are same, edx == 0xffff */
jnz LABEL(less16bytes_sse4_2)/* If not, find different value or null char */
-#ifdef USE_AS_STRNCMP
+# if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L
sub $16, %r11
jbe LABEL(strcmp_exitz_sse4_2)/* finish comparision */
-#endif
+# endif
add $16, %rsi /* prepare to search next 16 bytes */
add $16, %rdi /* prepare to search next 16 bytes */
@@ -270,13 +330,13 @@ LABEL(ashr_0_sse4_2):
movdqa (%rsi), %xmm1
pxor %xmm0, %xmm0 /* clear %xmm0 for null char check */
pcmpeqb %xmm1, %xmm0 /* Any null chars? */
-#ifndef USE_AS_STRCASECMP_L
+# if !defined USE_AS_STRCASECMP_L && !defined USE_AS_STRNCASECMP_L
pcmpeqb (%rdi), %xmm1 /* compare 16 bytes for equality */
-#else
+# else
movdqa (%rdi), %xmm2
TOLOWER (%xmm1, %xmm2)
pcmpeqb %xmm2, %xmm1 /* compare 16 bytes for equality */
-#endif
+# endif
psubb %xmm0, %xmm1 /* packed sub of comparison results*/
pmovmskb %xmm1, %r9d
shr %cl, %edx /* adjust 0xffff for offset */
@@ -300,48 +360,48 @@ LABEL(ashr_0_sse4_2):
.p2align 4
LABEL(ashr_0_use_sse4_2):
movdqa (%rdi,%rdx), %xmm0
-#ifndef USE_AS_STRCASECMP_L
+# if !defined USE_AS_STRCASECMP_L && !defined USE_AS_STRNCASECMP_L
pcmpistri $0x1a,(%rsi,%rdx), %xmm0
-#else
+# else
movdqa (%rsi,%rdx), %xmm1
TOLOWER (%xmm0, %xmm1)
pcmpistri $0x1a, %xmm1, %xmm0
-#endif
+# endif
lea 16(%rdx), %rdx
jbe LABEL(ashr_0_use_sse4_2_exit)
-#ifdef USE_AS_STRNCMP
+# if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L
sub $16, %r11
jbe LABEL(strcmp_exitz_sse4_2)
-#endif
+# endif
movdqa (%rdi,%rdx), %xmm0
-#ifndef USE_AS_STRCASECMP_L
+# if !defined USE_AS_STRCASECMP_L && !defined USE_AS_STRNCASECMP_L
pcmpistri $0x1a,(%rsi,%rdx), %xmm0
-#else
+# else
movdqa (%rsi,%rdx), %xmm1
TOLOWER (%xmm0, %xmm1)
pcmpistri $0x1a, %xmm1, %xmm0
-#endif
+# endif
lea 16(%rdx), %rdx
jbe LABEL(ashr_0_use_sse4_2_exit)
-#ifdef USE_AS_STRNCMP
+# if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L
sub $16, %r11
jbe LABEL(strcmp_exitz_sse4_2)
-#endif
+# endif
jmp LABEL(ashr_0_use_sse4_2)
.p2align 4
LABEL(ashr_0_use_sse4_2_exit):
jnc LABEL(strcmp_exitz_sse4_2)
-#ifdef USE_AS_STRNCMP
+# if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L
sub %rcx, %r11
jbe LABEL(strcmp_exitz_sse4_2)
-#endif
+# endif
lea -16(%rdx, %rcx), %rcx
movzbl (%rdi, %rcx), %eax
movzbl (%rsi, %rcx), %edx
-# ifdef USE_AS_STRCASECMP_L
+# if defined USE_AS_STRCASECMP_L || defined USE_AS_STRNCASECMP_L
leaq _nl_C_LC_CTYPE_tolower+128*4(%rip), %rcx
movl (%rcx,%rax,4), %eax
movl (%rcx,%rdx,4), %edx
@@ -394,18 +454,18 @@ LABEL(loop_ashr_1_use_sse4_2):
movdqa (%rdi, %rdx), %xmm0
palignr $1, -16(%rdi, %rdx), %xmm0
-#ifndef USE_AS_STRCASECMP_L
+# if !defined USE_AS_STRCASECMP_L && !defined USE_AS_STRNCASECMP_L
pcmpistri $0x1a,(%rsi,%rdx), %xmm0
-#else
+# else
movdqa (%rsi,%rdx), %xmm1
TOLOWER (%xmm0, %xmm1)
pcmpistri $0x1a, %xmm1, %xmm0
-#endif
+# endif
jbe LABEL(use_sse4_2_exit)
-#ifdef USE_AS_STRNCMP
+# if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L
sub $16, %r11
jbe LABEL(strcmp_exitz_sse4_2)
-#endif
+# endif
add $16, %rdx
add $16, %r10
@@ -413,18 +473,18 @@ LABEL(loop_ashr_1_use_sse4_2):
movdqa (%rdi, %rdx), %xmm0
palignr $1, -16(%rdi, %rdx), %xmm0
-#ifndef USE_AS_STRCASECMP_L
+# if !defined USE_AS_STRCASECMP_L && !defined USE_AS_STRNCASECMP_L
pcmpistri $0x1a,(%rsi,%rdx), %xmm0
-#else
+# else
movdqa (%rsi,%rdx), %xmm1
TOLOWER (%xmm0, %xmm1)
pcmpistri $0x1a, %xmm1, %xmm0
-#endif
+# endif
jbe LABEL(use_sse4_2_exit)
-#ifdef USE_AS_STRNCMP
+# if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L
sub $16, %r11
jbe LABEL(strcmp_exitz_sse4_2)
-#endif
+# endif
add $16, %rdx
jmp LABEL(loop_ashr_1_use_sse4_2)
@@ -434,10 +494,10 @@ LABEL(nibble_ashr_1_use_sse4_2):
movdqa -16(%rdi, %rdx), %xmm0
psrldq $1, %xmm0
pcmpistri $0x3a,%xmm0, %xmm0
-#ifdef USE_AS_STRNCMP
+# if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L
cmp %r11, %rcx
jae LABEL(nibble_ashr_use_sse4_2_exit)
-#endif
+# endif
cmp $14, %ecx
ja LABEL(loop_ashr_1_use_sse4_2)
@@ -486,18 +546,18 @@ LABEL(loop_ashr_2_use_sse4_2):
movdqa (%rdi, %rdx), %xmm0
palignr $2, -16(%rdi, %rdx), %xmm0
-#ifndef USE_AS_STRCASECMP_L
+# if !defined USE_AS_STRCASECMP_L && !defined USE_AS_STRNCASECMP_L
pcmpistri $0x1a,(%rsi,%rdx), %xmm0
-#else
+# else
movdqa (%rsi,%rdx), %xmm1
TOLOWER (%xmm0, %xmm1)
pcmpistri $0x1a, %xmm1, %xmm0
-#endif
+# endif
jbe LABEL(use_sse4_2_exit)
-#ifdef USE_AS_STRNCMP
+# if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L
sub $16, %r11
jbe LABEL(strcmp_exitz_sse4_2)
-#endif
+# endif
add $16, %rdx
add $16, %r10
@@ -505,18 +565,18 @@ LABEL(loop_ashr_2_use_sse4_2):
movdqa (%rdi, %rdx), %xmm0
palignr $2, -16(%rdi, %rdx), %xmm0
-#ifndef USE_AS_STRCASECMP_L
+# if !defined USE_AS_STRCASECMP_L && !defined USE_AS_STRNCASECMP_L
pcmpistri $0x1a,(%rsi,%rdx), %xmm0
-#else
+# else
movdqa (%rsi,%rdx), %xmm1
TOLOWER (%xmm0, %xmm1)
pcmpistri $0x1a, %xmm1, %xmm0
-#endif
+# endif
jbe LABEL(use_sse4_2_exit)
-#ifdef USE_AS_STRNCMP
+# if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L
sub $16, %r11
jbe LABEL(strcmp_exitz_sse4_2)
-#endif
+# endif
add $16, %rdx
jmp LABEL(loop_ashr_2_use_sse4_2)
@@ -526,10 +586,10 @@ LABEL(nibble_ashr_2_use_sse4_2):
movdqa -16(%rdi, %rdx), %xmm0
psrldq $2, %xmm0
pcmpistri $0x3a,%xmm0, %xmm0
-#ifdef USE_AS_STRNCMP
+# if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L
cmp %r11, %rcx
jae LABEL(nibble_ashr_use_sse4_2_exit)
-#endif
+# endif
cmp $13, %ecx
ja LABEL(loop_ashr_2_use_sse4_2)
@@ -578,18 +638,18 @@ LABEL(loop_ashr_3_use_sse4_2):
movdqa (%rdi, %rdx), %xmm0
palignr $3, -16(%rdi, %rdx), %xmm0
-#ifndef USE_AS_STRCASECMP_L
+# if !defined USE_AS_STRCASECMP_L && !defined USE_AS_STRNCASECMP_L
pcmpistri $0x1a,(%rsi,%rdx), %xmm0
-#else
+# else
movdqa (%rsi,%rdx), %xmm1
TOLOWER (%xmm0, %xmm1)
pcmpistri $0x1a, %xmm1, %xmm0
-#endif
+# endif
jbe LABEL(use_sse4_2_exit)
-#ifdef USE_AS_STRNCMP
+# if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L
sub $16, %r11
jbe LABEL(strcmp_exitz_sse4_2)
-#endif
+# endif
add $16, %rdx
add $16, %r10
@@ -597,18 +657,18 @@ LABEL(loop_ashr_3_use_sse4_2):
movdqa (%rdi, %rdx), %xmm0
palignr $3, -16(%rdi, %rdx), %xmm0
-#ifndef USE_AS_STRCASECMP_L
+# if !defined USE_AS_STRCASECMP_L && !defined USE_AS_STRNCASECMP_L
pcmpistri $0x1a,(%rsi,%rdx), %xmm0
-#else
+# else
movdqa (%rsi,%rdx), %xmm1
TOLOWER (%xmm0, %xmm1)
pcmpistri $0x1a, %xmm1, %xmm0
-#endif
+# endif
jbe LABEL(use_sse4_2_exit)
-#ifdef USE_AS_STRNCMP
+# if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L
sub $16, %r11
jbe LABEL(strcmp_exitz_sse4_2)
-#endif
+# endif
add $16, %rdx
jmp LABEL(loop_ashr_3_use_sse4_2)
@@ -618,10 +678,10 @@ LABEL(nibble_ashr_3_use_sse4_2):
movdqa -16(%rdi, %rdx), %xmm0
psrldq $3, %xmm0
pcmpistri $0x3a,%xmm0, %xmm0
-#ifdef USE_AS_STRNCMP
+# if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L
cmp %r11, %rcx
jae LABEL(nibble_ashr_use_sse4_2_exit)
-#endif
+# endif
cmp $12, %ecx
ja LABEL(loop_ashr_3_use_sse4_2)
@@ -671,18 +731,18 @@ LABEL(loop_ashr_4_use_sse4_2):
movdqa (%rdi, %rdx), %xmm0
palignr $4, -16(%rdi, %rdx), %xmm0
-#ifndef USE_AS_STRCASECMP_L
+# if !defined USE_AS_STRCASECMP_L && !defined USE_AS_STRNCASECMP_L
pcmpistri $0x1a,(%rsi,%rdx), %xmm0
-#else
+# else
movdqa (%rsi,%rdx), %xmm1
TOLOWER (%xmm0, %xmm1)
pcmpistri $0x1a, %xmm1, %xmm0
-#endif
+# endif
jbe LABEL(use_sse4_2_exit)
-#ifdef USE_AS_STRNCMP
+# if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L
sub $16, %r11
jbe LABEL(strcmp_exitz_sse4_2)
-#endif
+# endif
add $16, %rdx
add $16, %r10
@@ -690,18 +750,18 @@ LABEL(loop_ashr_4_use_sse4_2):
movdqa (%rdi, %rdx), %xmm0
palignr $4, -16(%rdi, %rdx), %xmm0
-#ifndef USE_AS_STRCASECMP_L
+# if !defined USE_AS_STRCASECMP_L && !defined USE_AS_STRNCASECMP_L
pcmpistri $0x1a,(%rsi,%rdx), %xmm0
-#else
+# else
movdqa (%rsi,%rdx), %xmm1
TOLOWER (%xmm0, %xmm1)
pcmpistri $0x1a, %xmm1, %xmm0
-#endif
+# endif
jbe LABEL(use_sse4_2_exit)
-#ifdef USE_AS_STRNCMP
+# if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L
sub $16, %r11
jbe LABEL(strcmp_exitz_sse4_2)
-#endif
+# endif
add $16, %rdx
jmp LABEL(loop_ashr_4_use_sse4_2)
@@ -711,10 +771,10 @@ LABEL(nibble_ashr_4_use_sse4_2):
movdqa -16(%rdi, %rdx), %xmm0
psrldq $4, %xmm0
pcmpistri $0x3a,%xmm0, %xmm0
-#ifdef USE_AS_STRNCMP
+# if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L
cmp %r11, %rcx
jae LABEL(nibble_ashr_use_sse4_2_exit)
-#endif
+# endif
cmp $11, %ecx
ja LABEL(loop_ashr_4_use_sse4_2)
@@ -764,18 +824,18 @@ LABEL(loop_ashr_5_use_sse4_2):
movdqa (%rdi, %rdx), %xmm0
palignr $5, -16(%rdi, %rdx), %xmm0
-#ifndef USE_AS_STRCASECMP_L
+# if !defined USE_AS_STRCASECMP_L || !defined USE_AS_STRNCASECMP_L
pcmpistri $0x1a,(%rsi,%rdx), %xmm0
-#else
+# else
movdqa (%rsi,%rdx), %xmm1
TOLOWER (%xmm0, %xmm1)
pcmpistri $0x1a, %xmm1, %xmm0
-#endif
+# endif
jbe LABEL(use_sse4_2_exit)
-#ifdef USE_AS_STRNCMP
+# if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L
sub $16, %r11
jbe LABEL(strcmp_exitz_sse4_2)
-#endif
+# endif
add $16, %rdx
add $16, %r10
@@ -784,18 +844,18 @@ LABEL(loop_ashr_5_use_sse4_2):
movdqa (%rdi, %rdx), %xmm0
palignr $5, -16(%rdi, %rdx), %xmm0
-#ifndef USE_AS_STRCASECMP_L
+# if !defined USE_AS_STRCASECMP_L && !defined USE_AS_STRNCASECMP_L
pcmpistri $0x1a,(%rsi,%rdx), %xmm0
-#else
+# else
movdqa (%rsi,%rdx), %xmm1
TOLOWER (%xmm0, %xmm1)
pcmpistri $0x1a, %xmm1, %xmm0
-#endif
+# endif
jbe LABEL(use_sse4_2_exit)
-#ifdef USE_AS_STRNCMP
+# if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L
sub $16, %r11
jbe LABEL(strcmp_exitz_sse4_2)
-#endif
+# endif
add $16, %rdx
jmp LABEL(loop_ashr_5_use_sse4_2)
@@ -805,10 +865,10 @@ LABEL(nibble_ashr_5_use_sse4_2):
movdqa -16(%rdi, %rdx), %xmm0
psrldq $5, %xmm0
pcmpistri $0x3a,%xmm0, %xmm0
-#ifdef USE_AS_STRNCMP
+# if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L
cmp %r11, %rcx
jae LABEL(nibble_ashr_use_sse4_2_exit)
-#endif
+# endif
cmp $10, %ecx
ja LABEL(loop_ashr_5_use_sse4_2)
@@ -858,18 +918,18 @@ LABEL(loop_ashr_6_use_sse4_2):
movdqa (%rdi, %rdx), %xmm0
palignr $6, -16(%rdi, %rdx), %xmm0
-#ifndef USE_AS_STRCASECMP_L
+# if !defined USE_AS_STRCASECMP_L && !defined USE_AS_STRNCASECMP_L
pcmpistri $0x1a,(%rsi,%rdx), %xmm0
-#else
+# else
movdqa (%rsi,%rdx), %xmm1
TOLOWER (%xmm0, %xmm1)
pcmpistri $0x1a, %xmm1, %xmm0
-#endif
+# endif
jbe LABEL(use_sse4_2_exit)
-#ifdef USE_AS_STRNCMP
+# if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L
sub $16, %r11
jbe LABEL(strcmp_exitz_sse4_2)
-#endif
+# endif
add $16, %rdx
add $16, %r10
@@ -877,18 +937,18 @@ LABEL(loop_ashr_6_use_sse4_2):
movdqa (%rdi, %rdx), %xmm0
palignr $6, -16(%rdi, %rdx), %xmm0
-#ifndef USE_AS_STRCASECMP_L
+# if !defined USE_AS_STRCASECMP_L && !defined USE_AS_STRNCASECMP_L
pcmpistri $0x1a,(%rsi,%rdx), %xmm0
-#else
+# else
movdqa (%rsi,%rdx), %xmm1
TOLOWER (%xmm0, %xmm1)
pcmpistri $0x1a, %xmm1, %xmm0
-#endif
+# endif
jbe LABEL(use_sse4_2_exit)
-#ifdef USE_AS_STRNCMP
+# if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L
sub $16, %r11
jbe LABEL(strcmp_exitz_sse4_2)
-#endif
+# endif
add $16, %rdx
jmp LABEL(loop_ashr_6_use_sse4_2)
@@ -898,10 +958,10 @@ LABEL(nibble_ashr_6_use_sse4_2):
movdqa -16(%rdi, %rdx), %xmm0
psrldq $6, %xmm0
pcmpistri $0x3a,%xmm0, %xmm0
-#ifdef USE_AS_STRNCMP
+# if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L
cmp %r11, %rcx
jae LABEL(nibble_ashr_use_sse4_2_exit)
-#endif
+# endif
cmp $9, %ecx
ja LABEL(loop_ashr_6_use_sse4_2)
@@ -951,18 +1011,18 @@ LABEL(loop_ashr_7_use_sse4_2):
movdqa (%rdi, %rdx), %xmm0
palignr $7, -16(%rdi, %rdx), %xmm0
-#ifndef USE_AS_STRCASECMP_L
+# if !defined USE_AS_STRCASECMP_L && !defined USE_AS_STRNCASECMP_L
pcmpistri $0x1a,(%rsi,%rdx), %xmm0
-#else
+# else
movdqa (%rsi,%rdx), %xmm1
TOLOWER (%xmm0, %xmm1)
pcmpistri $0x1a, %xmm1, %xmm0
-#endif
+# endif
jbe LABEL(use_sse4_2_exit)
-#ifdef USE_AS_STRNCMP
+# if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L
sub $16, %r11
jbe LABEL(strcmp_exitz_sse4_2)
-#endif
+# endif
add $16, %rdx
add $16, %r10
@@ -970,18 +1030,18 @@ LABEL(loop_ashr_7_use_sse4_2):
movdqa (%rdi, %rdx), %xmm0
palignr $7, -16(%rdi, %rdx), %xmm0
-#ifndef USE_AS_STRCASECMP_L
+# if !defined USE_AS_STRCASECMP_L && !defined USE_AS_STRNCASECMP_L
pcmpistri $0x1a,(%rsi,%rdx), %xmm0
-#else
+# else
movdqa (%rsi,%rdx), %xmm1
TOLOWER (%xmm0, %xmm1)
pcmpistri $0x1a, %xmm1, %xmm0
-#endif
+# endif
jbe LABEL(use_sse4_2_exit)
-#ifdef USE_AS_STRNCMP
+# if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L
sub $16, %r11
jbe LABEL(strcmp_exitz_sse4_2)
-#endif
+# endif
add $16, %rdx
jmp LABEL(loop_ashr_7_use_sse4_2)
@@ -991,10 +1051,10 @@ LABEL(nibble_ashr_7_use_sse4_2):
movdqa -16(%rdi, %rdx), %xmm0
psrldq $7, %xmm0
pcmpistri $0x3a,%xmm0, %xmm0
-#ifdef USE_AS_STRNCMP
+# if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L
cmp %r11, %rcx
jae LABEL(nibble_ashr_use_sse4_2_exit)
-#endif
+# endif
cmp $8, %ecx
ja LABEL(loop_ashr_7_use_sse4_2)
@@ -1044,18 +1104,18 @@ LABEL(loop_ashr_8_use_sse4_2):
movdqa (%rdi, %rdx), %xmm0
palignr $8, -16(%rdi, %rdx), %xmm0
-#ifndef USE_AS_STRCASECMP_L
+# if !defined USE_AS_STRCASECMP_L && !defined USE_AS_STRNCASECMP_L
pcmpistri $0x1a, (%rsi,%rdx), %xmm0
-#else
+# else
movdqa (%rsi,%rdx), %xmm1
TOLOWER (%xmm0, %xmm1)
pcmpistri $0x1a, %xmm1, %xmm0
-#endif
+# endif
jbe LABEL(use_sse4_2_exit)
-#ifdef USE_AS_STRNCMP
+# if defined USE_AS_STRNCMP || defined USE_AS_STRCASECMP_L
sub $16, %r11
jbe LABEL(strcmp_exitz_sse4_2)
-#endif
+# endif
add $16, %rdx
add $16, %r10
@@ -1063,18 +1123,18 @@ LABEL(loop_ashr_8_use_sse4_2):
movdqa (%rdi, %rdx), %xmm0
palignr $8, -16(%rdi, %rdx), %xmm0
-#ifndef USE_AS_STRCASECMP_L
+# if !defined USE_AS_STRCASECMP_L && !defined USE_AS_STRNCASECMP_L
pcmpistri $0x1a, (%rsi,%rdx), %xmm0
-#else
+# else
movdqa (%rsi,%rdx), %xmm1
TOLOWER (%xmm0, %xmm1)
pcmpistri $0x1a, %xmm1, %xmm0
-#endif
+# endif
jbe LABEL(use_sse4_2_exit)
-#ifdef USE_AS_STRNCMP
+# if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L
sub $16, %r11
jbe LABEL(strcmp_exitz_sse4_2)
-#endif
+# endif
add $16, %rdx
jmp LABEL(loop_ashr_8_use_sse4_2)
@@ -1084,10 +1144,10 @@ LABEL(nibble_ashr_8_use_sse4_2):
movdqa -16(%rdi, %rdx), %xmm0
psrldq $8, %xmm0
pcmpistri $0x3a,%xmm0, %xmm0
-#ifdef USE_AS_STRNCMP
+# if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L
cmp %r11, %rcx
jae LABEL(nibble_ashr_use_sse4_2_exit)
-#endif
+# endif
cmp $7, %ecx
ja LABEL(loop_ashr_8_use_sse4_2)
@@ -1138,18 +1198,18 @@ LABEL(loop_ashr_9_use_sse4_2):
movdqa (%rdi, %rdx), %xmm0
palignr $9, -16(%rdi, %rdx), %xmm0
-#ifndef USE_AS_STRCASECMP_L
+# if !defined USE_AS_STRCASECMP_L && !defined USE_AS_STRNCASECMP_L
pcmpistri $0x1a, (%rsi,%rdx), %xmm0
-#else
+# else
movdqa (%rsi,%rdx), %xmm1
TOLOWER (%xmm0, %xmm1)
pcmpistri $0x1a, %xmm1, %xmm0
-#endif
+# endif
jbe LABEL(use_sse4_2_exit)
-#ifdef USE_AS_STRNCMP
+# if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L
sub $16, %r11
jbe LABEL(strcmp_exitz_sse4_2)
-#endif
+# endif
add $16, %rdx
add $16, %r10
@@ -1157,18 +1217,18 @@ LABEL(loop_ashr_9_use_sse4_2):
movdqa (%rdi, %rdx), %xmm0
palignr $9, -16(%rdi, %rdx), %xmm0
-#ifndef USE_AS_STRCASECMP_L
+# if !defined USE_AS_STRCASECMP_L && !defined USE_AS_STRNCASECMP_L
pcmpistri $0x1a, (%rsi,%rdx), %xmm0
-#else
+# else
movdqa (%rsi,%rdx), %xmm1
TOLOWER (%xmm0, %xmm1)
pcmpistri $0x1a, %xmm1, %xmm0
-#endif
+# endif
jbe LABEL(use_sse4_2_exit)
-#ifdef USE_AS_STRNCMP
+# if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L
sub $16, %r11
jbe LABEL(strcmp_exitz_sse4_2)
-#endif
+# endif
add $16, %rdx
jmp LABEL(loop_ashr_9_use_sse4_2)
@@ -1178,10 +1238,10 @@ LABEL(nibble_ashr_9_use_sse4_2):
movdqa -16(%rdi, %rdx), %xmm0
psrldq $9, %xmm0
pcmpistri $0x3a,%xmm0, %xmm0
-#ifdef USE_AS_STRNCMP
+# if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L
cmp %r11, %rcx
jae LABEL(nibble_ashr_use_sse4_2_exit)
-#endif
+# endif
cmp $6, %ecx
ja LABEL(loop_ashr_9_use_sse4_2)
@@ -1231,18 +1291,18 @@ LABEL(loop_ashr_10_use_sse4_2):
movdqa (%rdi, %rdx), %xmm0
palignr $10, -16(%rdi, %rdx), %xmm0
-#ifndef USE_AS_STRCASECMP_L
+# if !defined USE_AS_STRCASECMP_L && !defined USE_AS_STRNCASECMP_L
pcmpistri $0x1a, (%rsi,%rdx), %xmm0
-#else
+# else
movdqa (%rsi,%rdx), %xmm1
TOLOWER (%xmm0, %xmm1)
pcmpistri $0x1a, %xmm1, %xmm0
-#endif
+# endif
jbe LABEL(use_sse4_2_exit)
-#ifdef USE_AS_STRNCMP
+# if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L
sub $16, %r11
jbe LABEL(strcmp_exitz_sse4_2)
-#endif
+# endif
add $16, %rdx
add $16, %r10
@@ -1250,18 +1310,18 @@ LABEL(loop_ashr_10_use_sse4_2):
movdqa (%rdi, %rdx), %xmm0
palignr $10, -16(%rdi, %rdx), %xmm0
-#ifndef USE_AS_STRCASECMP_L
+# if !defined USE_AS_STRCASECMP_L && !defined USE_AS_STRNCASECMP_L
pcmpistri $0x1a, (%rsi,%rdx), %xmm0
-#else
+# else
movdqa (%rsi,%rdx), %xmm1
TOLOWER (%xmm0, %xmm1)
pcmpistri $0x1a, %xmm1, %xmm0
-#endif
+# endif
jbe LABEL(use_sse4_2_exit)
-#ifdef USE_AS_STRNCMP
+# if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L
sub $16, %r11
jbe LABEL(strcmp_exitz_sse4_2)
-#endif
+# endif
add $16, %rdx
jmp LABEL(loop_ashr_10_use_sse4_2)
@@ -1271,10 +1331,10 @@ LABEL(nibble_ashr_10_use_sse4_2):
movdqa -16(%rdi, %rdx), %xmm0
psrldq $10, %xmm0
pcmpistri $0x3a,%xmm0, %xmm0
-#ifdef USE_AS_STRNCMP
+# if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L
cmp %r11, %rcx
jae LABEL(nibble_ashr_use_sse4_2_exit)
-#endif
+# endif
cmp $5, %ecx
ja LABEL(loop_ashr_10_use_sse4_2)
@@ -1324,18 +1384,18 @@ LABEL(loop_ashr_11_use_sse4_2):
movdqa (%rdi, %rdx), %xmm0
palignr $11, -16(%rdi, %rdx), %xmm0
-#ifndef USE_AS_STRCASECMP_L
+# if !defined USE_AS_STRCASECMP_L && !defined USE_AS_STRNCASECMP_L
pcmpistri $0x1a, (%rsi,%rdx), %xmm0
-#else
+# else
movdqa (%rsi,%rdx), %xmm1
TOLOWER (%xmm0, %xmm1)
pcmpistri $0x1a, %xmm1, %xmm0
-#endif
+# endif
jbe LABEL(use_sse4_2_exit)
-#ifdef USE_AS_STRNCMP
+# if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L
sub $16, %r11
jbe LABEL(strcmp_exitz_sse4_2)
-#endif
+# endif
add $16, %rdx
add $16, %r10
@@ -1343,18 +1403,18 @@ LABEL(loop_ashr_11_use_sse4_2):
movdqa (%rdi, %rdx), %xmm0
palignr $11, -16(%rdi, %rdx), %xmm0
-#ifndef USE_AS_STRCASECMP_L
+# if !defined USE_AS_STRCASECMP_L && !defined USE_AS_STRNCASECMP_L
pcmpistri $0x1a, (%rsi,%rdx), %xmm0
-#else
+# else
movdqa (%rsi,%rdx), %xmm1
TOLOWER (%xmm0, %xmm1)
pcmpistri $0x1a, %xmm1, %xmm0
-#endif
+# endif
jbe LABEL(use_sse4_2_exit)
-#ifdef USE_AS_STRNCMP
+# if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L
sub $16, %r11
jbe LABEL(strcmp_exitz_sse4_2)
-#endif
+# endif
add $16, %rdx
jmp LABEL(loop_ashr_11_use_sse4_2)
@@ -1364,10 +1424,10 @@ LABEL(nibble_ashr_11_use_sse4_2):
movdqa -16(%rdi, %rdx), %xmm0
psrldq $11, %xmm0
pcmpistri $0x3a,%xmm0, %xmm0
-#ifdef USE_AS_STRNCMP
+# if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L
cmp %r11, %rcx
jae LABEL(nibble_ashr_use_sse4_2_exit)
-#endif
+# endif
cmp $4, %ecx
ja LABEL(loop_ashr_11_use_sse4_2)
@@ -1417,18 +1477,18 @@ LABEL(loop_ashr_12_use_sse4_2):
movdqa (%rdi, %rdx), %xmm0
palignr $12, -16(%rdi, %rdx), %xmm0
-#ifndef USE_AS_STRCASECMP_L
+# if !defined USE_AS_STRCASECMP_L && !defined USE_AS_STRNCASECMP_L
pcmpistri $0x1a, (%rsi,%rdx), %xmm0
-#else
+# else
movdqa (%rsi,%rdx), %xmm1
TOLOWER (%xmm0, %xmm1)
pcmpistri $0x1a, %xmm1, %xmm0
-#endif
+# endif
jbe LABEL(use_sse4_2_exit)
-#ifdef USE_AS_STRNCMP
+# if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L
sub $16, %r11
jbe LABEL(strcmp_exitz_sse4_2)
-#endif
+# endif
add $16, %rdx
add $16, %r10
@@ -1436,18 +1496,18 @@ LABEL(loop_ashr_12_use_sse4_2):
movdqa (%rdi, %rdx), %xmm0
palignr $12, -16(%rdi, %rdx), %xmm0
-#ifndef USE_AS_STRCASECMP_L
+# if !defined USE_AS_STRCASECMP_L && !defined USE_AS_STRNCASECMP_L
pcmpistri $0x1a, (%rsi,%rdx), %xmm0
-#else
+# else
movdqa (%rsi,%rdx), %xmm1
TOLOWER (%xmm0, %xmm1)
pcmpistri $0x1a, %xmm1, %xmm0
-#endif
+# endif
jbe LABEL(use_sse4_2_exit)
-#ifdef USE_AS_STRNCMP
+# if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L
sub $16, %r11
jbe LABEL(strcmp_exitz_sse4_2)
-#endif
+# endif
add $16, %rdx
jmp LABEL(loop_ashr_12_use_sse4_2)
@@ -1457,10 +1517,10 @@ LABEL(nibble_ashr_12_use_sse4_2):
movdqa -16(%rdi, %rdx), %xmm0
psrldq $12, %xmm0
pcmpistri $0x3a,%xmm0, %xmm0
-#ifdef USE_AS_STRNCMP
+# if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L
cmp %r11, %rcx
jae LABEL(nibble_ashr_use_sse4_2_exit)
-#endif
+# endif
cmp $3, %ecx
ja LABEL(loop_ashr_12_use_sse4_2)
@@ -1511,18 +1571,18 @@ LABEL(loop_ashr_13_use_sse4_2):
movdqa (%rdi, %rdx), %xmm0
palignr $13, -16(%rdi, %rdx), %xmm0
-#ifndef USE_AS_STRCASECMP_L
+# if !defined USE_AS_STRCASECMP_L && !defined USE_AS_STRNCASECMP_L
pcmpistri $0x1a, (%rsi,%rdx), %xmm0
-#else
+# else
movdqa (%rsi,%rdx), %xmm1
TOLOWER (%xmm0, %xmm1)
pcmpistri $0x1a, %xmm1, %xmm0
-#endif
+# endif
jbe LABEL(use_sse4_2_exit)
-#ifdef USE_AS_STRNCMP
+# if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L
sub $16, %r11
jbe LABEL(strcmp_exitz_sse4_2)
-#endif
+# endif
add $16, %rdx
add $16, %r10
@@ -1530,18 +1590,18 @@ LABEL(loop_ashr_13_use_sse4_2):
movdqa (%rdi, %rdx), %xmm0
palignr $13, -16(%rdi, %rdx), %xmm0
-#ifndef USE_AS_STRCASECMP_L
+# if !defined USE_AS_STRCASECMP_L && !defined USE_AS_STRNCASECMP_L
pcmpistri $0x1a, (%rsi,%rdx), %xmm0
-#else
+# else
movdqa (%rsi,%rdx), %xmm1
TOLOWER (%xmm0, %xmm1)
pcmpistri $0x1a, %xmm1, %xmm0
-#endif
+# endif
jbe LABEL(use_sse4_2_exit)
-#ifdef USE_AS_STRNCMP
+# if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L
sub $16, %r11
jbe LABEL(strcmp_exitz_sse4_2)
-#endif
+# endif
add $16, %rdx
jmp LABEL(loop_ashr_13_use_sse4_2)
@@ -1551,10 +1611,10 @@ LABEL(nibble_ashr_13_use_sse4_2):
movdqa -16(%rdi, %rdx), %xmm0
psrldq $13, %xmm0
pcmpistri $0x3a,%xmm0, %xmm0
-#ifdef USE_AS_STRNCMP
+# if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L
cmp %r11, %rcx
jae LABEL(nibble_ashr_use_sse4_2_exit)
-#endif
+# endif
cmp $2, %ecx
ja LABEL(loop_ashr_13_use_sse4_2)
@@ -1605,18 +1665,18 @@ LABEL(loop_ashr_14_use_sse4_2):
movdqa (%rdi, %rdx), %xmm0
palignr $14, -16(%rdi, %rdx), %xmm0
-#ifndef USE_AS_STRCASECMP_L
+# if !defined USE_AS_STRCASECMP_L && !defined USE_AS_STRNCASECMP_L
pcmpistri $0x1a, (%rsi,%rdx), %xmm0
-#else
+# else
movdqa (%rsi,%rdx), %xmm1
TOLOWER (%xmm0, %xmm1)
pcmpistri $0x1a, %xmm1, %xmm0
-#endif
+# endif
jbe LABEL(use_sse4_2_exit)
-#ifdef USE_AS_STRNCMP
+# if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L
sub $16, %r11
jbe LABEL(strcmp_exitz_sse4_2)
-#endif
+# endif
add $16, %rdx
add $16, %r10
@@ -1624,18 +1684,18 @@ LABEL(loop_ashr_14_use_sse4_2):
movdqa (%rdi, %rdx), %xmm0
palignr $14, -16(%rdi, %rdx), %xmm0
-#ifndef USE_AS_STRCASECMP_L
+# if !defined USE_AS_STRCASECMP_L && !defined USE_AS_STRNCASECMP_L
pcmpistri $0x1a, (%rsi,%rdx), %xmm0
-#else
+# else
movdqa (%rsi,%rdx), %xmm1
TOLOWER (%xmm0, %xmm1)
pcmpistri $0x1a, %xmm1, %xmm0
-#endif
+# endif
jbe LABEL(use_sse4_2_exit)
-#ifdef USE_AS_STRNCMP
+# if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L
sub $16, %r11
jbe LABEL(strcmp_exitz_sse4_2)
-#endif
+# endif
add $16, %rdx
jmp LABEL(loop_ashr_14_use_sse4_2)
@@ -1645,10 +1705,10 @@ LABEL(nibble_ashr_14_use_sse4_2):
movdqa -16(%rdi, %rdx), %xmm0
psrldq $14, %xmm0
pcmpistri $0x3a,%xmm0, %xmm0
-#ifdef USE_AS_STRNCMP
+# if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L
cmp %r11, %rcx
jae LABEL(nibble_ashr_use_sse4_2_exit)
-#endif
+# endif
cmp $1, %ecx
ja LABEL(loop_ashr_14_use_sse4_2)
@@ -1701,18 +1761,18 @@ LABEL(loop_ashr_15_use_sse4_2):
movdqa (%rdi, %rdx), %xmm0
palignr $15, -16(%rdi, %rdx), %xmm0
-#ifndef USE_AS_STRCASECMP_L
+# if !defined USE_AS_STRCASECMP_L && !defined USE_AS_STRNCASECMP_L
pcmpistri $0x1a, (%rsi,%rdx), %xmm0
-#else
+# else
movdqa (%rsi,%rdx), %xmm1
TOLOWER (%xmm0, %xmm1)
pcmpistri $0x1a, %xmm1, %xmm0
-#endif
+# endif
jbe LABEL(use_sse4_2_exit)
-#ifdef USE_AS_STRNCMP
+# if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L
sub $16, %r11
jbe LABEL(strcmp_exitz_sse4_2)
-#endif
+# endif
add $16, %rdx
add $16, %r10
@@ -1720,18 +1780,18 @@ LABEL(loop_ashr_15_use_sse4_2):
movdqa (%rdi, %rdx), %xmm0
palignr $15, -16(%rdi, %rdx), %xmm0
-#ifndef USE_AS_STRCASECMP_L
+# if !defined USE_AS_STRCASECMP_L && !defined USE_AS_STRNCASECMP_L
pcmpistri $0x1a, (%rsi,%rdx), %xmm0
-#else
+# else
movdqa (%rsi,%rdx), %xmm1
TOLOWER (%xmm0, %xmm1)
pcmpistri $0x1a, %xmm1, %xmm0
-#endif
+# endif
jbe LABEL(use_sse4_2_exit)
-#ifdef USE_AS_STRNCMP
+# if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L
sub $16, %r11
jbe LABEL(strcmp_exitz_sse4_2)
-#endif
+# endif
add $16, %rdx
jmp LABEL(loop_ashr_15_use_sse4_2)
@@ -1741,10 +1801,10 @@ LABEL(nibble_ashr_15_use_sse4_2):
movdqa -16(%rdi, %rdx), %xmm0
psrldq $15, %xmm0
pcmpistri $0x3a,%xmm0, %xmm0
-#ifdef USE_AS_STRNCMP
+# if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L
cmp %r11, %rcx
jae LABEL(nibble_ashr_use_sse4_2_exit)
-#endif
+# endif
cmp $0, %ecx
ja LABEL(loop_ashr_15_use_sse4_2)
@@ -1753,10 +1813,10 @@ LABEL(nibble_ashr_use_sse4_2_exit):
.p2align 4
LABEL(use_sse4_2_exit):
jnc LABEL(strcmp_exitz_sse4_2)
-#ifdef USE_AS_STRNCMP
+# if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L
sub %rcx, %r11
jbe LABEL(strcmp_exitz_sse4_2)
-#endif
+# endif
add %rcx, %rdx
lea -16(%rdi, %r9), %rdi
movzbl (%rdi, %rdx), %eax
@@ -1765,7 +1825,7 @@ LABEL(use_sse4_2_exit):
jz LABEL(use_sse4_2_ret_sse4_2)
xchg %eax, %edx
LABEL(use_sse4_2_ret_sse4_2):
-# ifdef USE_AS_STRCASECMP_L
+# if defined USE_AS_STRCASECMP_L || defined USE_AS_STRNCASECMP_L
leaq _nl_C_LC_CTYPE_tolower+128*4(%rip), %rcx
movl (%rcx,%rdx,4), %edx
movl (%rcx,%rax,4), %eax
@@ -1786,14 +1846,14 @@ LABEL(ret_sse4_2):
LABEL(less16bytes_sse4_2):
bsf %rdx, %rdx /* find and store bit index in %rdx */
-#ifdef USE_AS_STRNCMP
+# if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L
sub %rdx, %r11
jbe LABEL(strcmp_exitz_sse4_2)
-#endif
+# endif
movzbl (%rsi, %rdx), %ecx
movzbl (%rdi, %rdx), %eax
-# ifdef USE_AS_STRCASECMP_L
+# if defined USE_AS_STRCASECMP_L || defined USE_AS_STRNCASECMP_L
leaq _nl_C_LC_CTYPE_tolower+128*4(%rip), %rdx
movl (%rdx,%rcx,4), %ecx
movl (%rdx,%rax,4), %eax
@@ -1812,7 +1872,7 @@ LABEL(Byte0_sse4_2):
movzx (%rsi), %ecx
movzx (%rdi), %eax
-# ifdef USE_AS_STRCASECMP_L
+# if defined USE_AS_STRCASECMP_L || defined USE_AS_STRNCASECMP_L
leaq _nl_C_LC_CTYPE_tolower+128*4(%rip), %rdx
movl (%rdx,%rcx,4), %ecx
movl (%rdx,%rax,4), %eax
@@ -1870,6 +1930,16 @@ LABEL(unaligned_table_sse4_2):
cfi_endproc; .size __strcasecmp_sse2, .-__strcasecmp_sse2
# endif
+# ifdef USE_AS_STRNCASECMP_L
+# define ENTRY2(name) \
+ .type __strncasecmp_sse2, @function; \
+ .align 16; \
+ __strncasecmp_sse2: cfi_startproc; \
+ CALL_MCOUNT
+# define END2(name) \
+ cfi_endproc; .size __strncasecmp_sse2, .-__strncasecmp_sse2
+# endif
+
# undef libc_hidden_builtin_def
/* It doesn't make sense to send libc-internal strcmp calls through a PLT.
The speedup we get from using SSE4.2 instruction is likely eaten away
diff --git a/sysdeps/x86_64/multiarch/strncase_l-ssse3.S b/sysdeps/x86_64/multiarch/strncase_l-ssse3.S
new file mode 100644
index 0000000000..6728678688
--- /dev/null
+++ b/sysdeps/x86_64/multiarch/strncase_l-ssse3.S
@@ -0,0 +1,6 @@
+#define USE_SSSE3 1
+#define USE_AS_STRNCASECMP_L
+#define NO_NOLOCALE_ALIAS
+#define STRCMP __strncasecmp_l_ssse3
+#define __strncasecmp __strncasecmp_ssse3
+#include "../strcmp.S"
diff --git a/sysdeps/x86_64/multiarch/strncase_l.S b/sysdeps/x86_64/multiarch/strncase_l.S
new file mode 100644
index 0000000000..c725cd85b3
--- /dev/null
+++ b/sysdeps/x86_64/multiarch/strncase_l.S
@@ -0,0 +1,6 @@
+#define STRCMP __strncasecmp_l
+#define USE_AS_STRNCASECMP_L
+#include "strcmp.S"
+
+weak_alias (__strncasecmp_l, strncasecmp_l)
+libc_hidden_def (strncasecmp_l)
diff --git a/sysdeps/x86_64/strcasecmp_l-nonascii.c b/sysdeps/x86_64/strcasecmp_l-nonascii.c
index 7a0a04f345..13b35de82d 100644
--- a/sysdeps/x86_64/strcasecmp_l-nonascii.c
+++ b/sysdeps/x86_64/strcasecmp_l-nonascii.c
@@ -1,5 +1,8 @@
#include <string.h>
+extern int __strcasecmp_l_nonascii (__const char *__s1, __const char *__s2,
+ __locale_t __loc);
+
#define __strcasecmp_l __strcasecmp_l_nonascii
#define USE_IN_EXTENDED_LOCALE_MODEL 1
#include <string/strcasecmp.c>
diff --git a/sysdeps/x86_64/strcmp.S b/sysdeps/x86_64/strcmp.S
index 1b48f04172..5a4346be05 100644
--- a/sysdeps/x86_64/strcmp.S
+++ b/sysdeps/x86_64/strcmp.S
@@ -60,10 +60,26 @@
# endif
# define UPDATE_STRNCMP_COUNTER
+#elif defined USE_AS_STRNCASECMP_L
+# include "locale-defines.h"
+
+/* No support for strncasecmp outside libc so far since it is not needed. */
+# ifdef NOT_IN_lib
+# error "strncasecmp_l not implemented so far"
+# endif
+
+# define UPDATE_STRNCMP_COUNTER \
+ /* calculate left number to compare */ \
+ lea -16(%rcx, %r11), %r9; \
+ cmp %r9, %r11; \
+ jb LABEL(strcmp_exitz); \
+ test %r9, %r9; \
+ je LABEL(strcmp_exitz); \
+ mov %r9, %r11
#else
# define UPDATE_STRNCMP_COUNTER
# ifndef STRCMP
-# define STRCMP strcmp
+# define STRCMP strncasecmp
# endif
#endif
@@ -79,7 +95,7 @@
# define END2(name) END (name)
# endif
- ENTRY2 (__strcasecmp)
+ENTRY2 (__strcasecmp)
movq __libc_tsd_LOCALE@gottpoff(%rip),%rax
movq %fs:(%rax),%rdx
@@ -92,6 +108,25 @@ weak_alias (__strcasecmp, strcasecmp)
libc_hidden_def (__strcasecmp)
# endif
/* FALLTHROUGH to strcasecmp_l. */
+#elif defined USE_AS_STRNCASECMP_L
+# ifndef ENTRY2
+# define ENTRY2(name) ENTRY (name)
+# define END2(name) END (name)
+# endif
+
+ENTRY2 (__strncasecmp)
+ movq __libc_tsd_LOCALE@gottpoff(%rip),%rax
+ movq %fs:(%rax),%r10
+
+ // XXX 5 byte should be before the function
+ /* 5-byte NOP. */
+ .byte 0x0f,0x1f,0x44,0x00,0x00
+END2 (__strncasecmp)
+# ifndef NO_NOLOCALE_ALIAS
+weak_alias (__strncasecmp, strncasecmp)
+libc_hidden_def (__strncasecmp)
+# endif
+ /* FALLTHROUGH to strncasecmp_l. */
#endif
ENTRY (BP_SYM (STRCMP))
@@ -124,12 +159,22 @@ END (BP_SYM (STRCMP))
# endif
testl $0, LOCALE_DATA_VALUES+_NL_CTYPE_NONASCII_CASE*SIZEOF_VALUES(%rax)
jne __strcasecmp_l_nonascii
+# elif defined USE_AS_STRNCASECMP_L
+ /* We have to fall back on the C implementation for locales
+ with encodings not matching ASCII for single bytes. */
+# if LOCALE_T___LOCALES != 0 || LC_CTYPE != 0
+ movq LOCALE_T___LOCALES+LC_CTYPE*8(%r10), %rax
+# else
+ movq (%r10), %rax
+# endif
+ testl $0, LOCALE_DATA_VALUES+_NL_CTYPE_NONASCII_CASE*SIZEOF_VALUES(%rax)
+ jne __strncasecmp_l_nonascii
# endif
/*
* This implementation uses SSE to compare up to 16 bytes at a time.
*/
-# ifdef USE_AS_STRNCMP
+# if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L
test %rdx, %rdx
je LABEL(strcmp_exitz)
cmp $1, %rdx
@@ -141,7 +186,7 @@ END (BP_SYM (STRCMP))
/* Use 64bit AND here to avoid long NOP padding. */
and $0x3f, %rcx /* rsi alignment in cache line */
and $0x3f, %rax /* rdi alignment in cache line */
-# ifdef USE_AS_STRCASECMP_L
+# if defined USE_AS_STRCASECMP_L || defined USE_AS_STRNCASECMP_L
.section .rodata.cst16,"aM",@progbits,16
.align 16
.Lbelowupper:
@@ -155,11 +200,11 @@ END (BP_SYM (STRCMP))
.quad 0x2020202020202020
.previous
movdqa .Lbelowupper(%rip), %xmm5
-# define UCLOW_reg %xmm5
+# define UCLOW_reg %xmm5
movdqa .Ltopupper(%rip), %xmm6
-# define UCHIGH_reg %xmm6
+# define UCHIGH_reg %xmm6
movdqa .Ltouppermask(%rip), %xmm7
-# define LCQWORD_reg %xmm7
+# define LCQWORD_reg %xmm7
# endif
cmp $0x30, %ecx
ja LABEL(crosscache) /* rsi: 16-byte load will cross cache line */
@@ -169,7 +214,7 @@ END (BP_SYM (STRCMP))
movlpd (%rsi), %xmm2
movhpd 8(%rdi), %xmm1
movhpd 8(%rsi), %xmm2
-# ifdef USE_AS_STRCASECMP_L
+# if defined USE_AS_STRCASECMP_L || defined USE_AS_STRNCASECMP_L
# define TOLOWER(reg1, reg2) \
movdqa reg1, %xmm8; \
movdqa UCHIGH_reg, %xmm9; \
@@ -196,7 +241,7 @@ END (BP_SYM (STRCMP))
pmovmskb %xmm1, %edx
sub $0xffff, %edx /* if first 16 bytes are same, edx == 0xffff */
jnz LABEL(less16bytes) /* If not, find different value or null char */
-# ifdef USE_AS_STRNCMP
+# if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L
sub $16, %r11
jbe LABEL(strcmp_exitz) /* finish comparision */
# endif
@@ -241,7 +286,7 @@ LABEL(ashr_0):
movdqa (%rsi), %xmm1
pxor %xmm0, %xmm0 /* clear %xmm0 for null char check */
pcmpeqb %xmm1, %xmm0 /* Any null chars? */
-# ifndef USE_AS_STRCASECMP_L
+# if !defined USE_AS_STRCASECMP_L && !defined USE_AS_STRNCASECMP_L
pcmpeqb (%rdi), %xmm1 /* compare 16 bytes for equality */
# else
movdqa (%rdi), %xmm2
@@ -280,7 +325,7 @@ LABEL(loop_ashr_0):
sub $0xffff, %edx
jnz LABEL(exit) /* mismatch or null char seen */
-# ifdef USE_AS_STRNCMP
+# if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L
sub $16, %r11
jbe LABEL(strcmp_exitz)
# endif
@@ -295,7 +340,7 @@ LABEL(loop_ashr_0):
pmovmskb %xmm1, %edx
sub $0xffff, %edx
jnz LABEL(exit)
-# ifdef USE_AS_STRNCMP
+# if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L
sub $16, %r11
jbe LABEL(strcmp_exitz)
# endif
@@ -363,7 +408,7 @@ LABEL(gobble_ashr_1):
sub $0xffff, %edx
jnz LABEL(exit)
-# ifdef USE_AS_STRNCMP
+# if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L
sub $16, %r11
jbe LABEL(strcmp_exitz)
# endif
@@ -393,7 +438,7 @@ LABEL(gobble_ashr_1):
sub $0xffff, %edx
jnz LABEL(exit)
-# ifdef USE_AS_STRNCMP
+# if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L
sub $16, %r11
jbe LABEL(strcmp_exitz)
# endif
@@ -412,7 +457,7 @@ LABEL(nibble_ashr_1):
test $0xfffe, %edx
jnz LABEL(ashr_1_exittail) /* find null char*/
-# ifdef USE_AS_STRNCMP
+# if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L
cmp $14, %r11
jbe LABEL(ashr_1_exittail)
# endif
@@ -493,7 +538,7 @@ LABEL(gobble_ashr_2):
sub $0xffff, %edx
jnz LABEL(exit)
-# ifdef USE_AS_STRNCMP
+# if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L
sub $16, %r11
jbe LABEL(strcmp_exitz)
# endif
@@ -524,7 +569,7 @@ LABEL(gobble_ashr_2):
sub $0xffff, %edx
jnz LABEL(exit)
-# ifdef USE_AS_STRNCMP
+# if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L
sub $16, %r11
jbe LABEL(strcmp_exitz)
# endif
@@ -540,7 +585,7 @@ LABEL(nibble_ashr_2):
test $0xfffc, %edx
jnz LABEL(ashr_2_exittail)
-# ifdef USE_AS_STRNCMP
+# if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L
cmp $13, %r11
jbe LABEL(ashr_2_exittail)
# endif
@@ -618,7 +663,7 @@ LABEL(gobble_ashr_3):
sub $0xffff, %edx
jnz LABEL(exit)
-# ifdef USE_AS_STRNCMP
+# if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L
sub $16, %r11
jbe LABEL(strcmp_exitz)
# endif
@@ -649,7 +694,7 @@ LABEL(gobble_ashr_3):
sub $0xffff, %edx
jnz LABEL(exit)
-# ifdef USE_AS_STRNCMP
+# if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L
sub $16, %r11
jbe LABEL(strcmp_exitz)
# endif
@@ -665,7 +710,7 @@ LABEL(nibble_ashr_3):
test $0xfff8, %edx
jnz LABEL(ashr_3_exittail)
-# ifdef USE_AS_STRNCMP
+# if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L
cmp $12, %r11
jbe LABEL(ashr_3_exittail)
# endif
@@ -743,7 +788,7 @@ LABEL(gobble_ashr_4):
sub $0xffff, %edx
jnz LABEL(exit)
-# ifdef USE_AS_STRNCMP
+# if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L
sub $16, %r11
jbe LABEL(strcmp_exitz)
# endif
@@ -774,7 +819,7 @@ LABEL(gobble_ashr_4):
sub $0xffff, %edx
jnz LABEL(exit)
-# ifdef USE_AS_STRNCMP
+# if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L
sub $16, %r11
jbe LABEL(strcmp_exitz)
# endif
@@ -790,7 +835,7 @@ LABEL(nibble_ashr_4):
test $0xfff0, %edx
jnz LABEL(ashr_4_exittail)
-# ifdef USE_AS_STRNCMP
+# if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L
cmp $11, %r11
jbe LABEL(ashr_4_exittail)
# endif
@@ -868,7 +913,7 @@ LABEL(gobble_ashr_5):
sub $0xffff, %edx
jnz LABEL(exit)
-# ifdef USE_AS_STRNCMP
+# if defined USE_AS_STRNCMP || defined USE_AS_STRCASECMP_L
sub $16, %r11
jbe LABEL(strcmp_exitz)
# endif
@@ -899,7 +944,7 @@ LABEL(gobble_ashr_5):
sub $0xffff, %edx
jnz LABEL(exit)
-# ifdef USE_AS_STRNCMP
+# if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L
sub $16, %r11
jbe LABEL(strcmp_exitz)
# endif
@@ -915,7 +960,7 @@ LABEL(nibble_ashr_5):
test $0xffe0, %edx
jnz LABEL(ashr_5_exittail)
-# ifdef USE_AS_STRNCMP
+# if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L
cmp $10, %r11
jbe LABEL(ashr_5_exittail)
# endif
@@ -993,7 +1038,7 @@ LABEL(gobble_ashr_6):
sub $0xffff, %edx
jnz LABEL(exit)
-# ifdef USE_AS_STRNCMP
+# if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L
sub $16, %r11
jbe LABEL(strcmp_exitz)
# endif
@@ -1024,7 +1069,7 @@ LABEL(gobble_ashr_6):
sub $0xffff, %edx
jnz LABEL(exit)
-# ifdef USE_AS_STRNCMP
+# if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L
sub $16, %r11
jbe LABEL(strcmp_exitz)
# endif
@@ -1040,7 +1085,7 @@ LABEL(nibble_ashr_6):
test $0xffc0, %edx
jnz LABEL(ashr_6_exittail)
-# ifdef USE_AS_STRNCMP
+# if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L
cmp $9, %r11
jbe LABEL(ashr_6_exittail)
# endif
@@ -1118,7 +1163,7 @@ LABEL(gobble_ashr_7):
sub $0xffff, %edx
jnz LABEL(exit)
-# ifdef USE_AS_STRNCMP
+# if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L
sub $16, %r11
jbe LABEL(strcmp_exitz)
# endif
@@ -1149,7 +1194,7 @@ LABEL(gobble_ashr_7):
sub $0xffff, %edx
jnz LABEL(exit)
-# ifdef USE_AS_STRNCMP
+# if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L
sub $16, %r11
jbe LABEL(strcmp_exitz)
# endif
@@ -1165,7 +1210,7 @@ LABEL(nibble_ashr_7):
test $0xff80, %edx
jnz LABEL(ashr_7_exittail)
-# ifdef USE_AS_STRNCMP
+# if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L
cmp $8, %r11
jbe LABEL(ashr_7_exittail)
# endif
@@ -1243,7 +1288,7 @@ LABEL(gobble_ashr_8):
sub $0xffff, %edx
jnz LABEL(exit)
-# ifdef USE_AS_STRNCMP
+# if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L
sub $16, %r11
jbe LABEL(strcmp_exitz)
# endif
@@ -1274,7 +1319,7 @@ LABEL(gobble_ashr_8):
sub $0xffff, %edx
jnz LABEL(exit)
-# ifdef USE_AS_STRNCMP
+# if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L
sub $16, %r11
jbe LABEL(strcmp_exitz)
# endif
@@ -1290,7 +1335,7 @@ LABEL(nibble_ashr_8):
test $0xff00, %edx
jnz LABEL(ashr_8_exittail)
-# ifdef USE_AS_STRNCMP
+# if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L
cmp $7, %r11
jbe LABEL(ashr_8_exittail)
# endif
@@ -1368,7 +1413,7 @@ LABEL(gobble_ashr_9):
sub $0xffff, %edx
jnz LABEL(exit)
-# ifdef USE_AS_STRNCMP
+# if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L
sub $16, %r11
jbe LABEL(strcmp_exitz)
# endif
@@ -1399,7 +1444,7 @@ LABEL(gobble_ashr_9):
sub $0xffff, %edx
jnz LABEL(exit)
-# ifdef USE_AS_STRNCMP
+# if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L
sub $16, %r11
jbe LABEL(strcmp_exitz)
# endif
@@ -1415,7 +1460,7 @@ LABEL(nibble_ashr_9):
test $0xfe00, %edx
jnz LABEL(ashr_9_exittail)
-# ifdef USE_AS_STRNCMP
+# if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L
cmp $6, %r11
jbe LABEL(ashr_9_exittail)
# endif
@@ -1493,7 +1538,7 @@ LABEL(gobble_ashr_10):
sub $0xffff, %edx
jnz LABEL(exit)
-# ifdef USE_AS_STRNCMP
+# if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L
sub $16, %r11
jbe LABEL(strcmp_exitz)
# endif
@@ -1524,7 +1569,7 @@ LABEL(gobble_ashr_10):
sub $0xffff, %edx
jnz LABEL(exit)
-# ifdef USE_AS_STRNCMP
+# if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L
sub $16, %r11
jbe LABEL(strcmp_exitz)
# endif
@@ -1540,7 +1585,7 @@ LABEL(nibble_ashr_10):
test $0xfc00, %edx
jnz LABEL(ashr_10_exittail)
-# ifdef USE_AS_STRNCMP
+# if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L
cmp $5, %r11
jbe LABEL(ashr_10_exittail)
# endif
@@ -1618,7 +1663,7 @@ LABEL(gobble_ashr_11):
sub $0xffff, %edx
jnz LABEL(exit)
-# ifdef USE_AS_STRNCMP
+# if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L
sub $16, %r11
jbe LABEL(strcmp_exitz)
# endif
@@ -1649,7 +1694,7 @@ LABEL(gobble_ashr_11):
sub $0xffff, %edx
jnz LABEL(exit)
-# ifdef USE_AS_STRNCMP
+# if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L
sub $16, %r11
jbe LABEL(strcmp_exitz)
# endif
@@ -1665,7 +1710,7 @@ LABEL(nibble_ashr_11):
test $0xf800, %edx
jnz LABEL(ashr_11_exittail)
-# ifdef USE_AS_STRNCMP
+# if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L
cmp $4, %r11
jbe LABEL(ashr_11_exittail)
# endif
@@ -1743,7 +1788,7 @@ LABEL(gobble_ashr_12):
sub $0xffff, %edx
jnz LABEL(exit)
-# ifdef USE_AS_STRNCMP
+# if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L
sub $16, %r11
jbe LABEL(strcmp_exitz)
# endif
@@ -1774,7 +1819,7 @@ LABEL(gobble_ashr_12):
sub $0xffff, %edx
jnz LABEL(exit)
-# ifdef USE_AS_STRNCMP
+# if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L
sub $16, %r11
jbe LABEL(strcmp_exitz)
# endif
@@ -1790,7 +1835,7 @@ LABEL(nibble_ashr_12):
test $0xf000, %edx
jnz LABEL(ashr_12_exittail)
-# ifdef USE_AS_STRNCMP
+# if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L
cmp $3, %r11
jbe LABEL(ashr_12_exittail)
# endif
@@ -1868,7 +1913,7 @@ LABEL(gobble_ashr_13):
sub $0xffff, %edx
jnz LABEL(exit)
-# ifdef USE_AS_STRNCMP
+# if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L
sub $16, %r11
jbe LABEL(strcmp_exitz)
# endif
@@ -1899,7 +1944,7 @@ LABEL(gobble_ashr_13):
sub $0xffff, %edx
jnz LABEL(exit)
-# ifdef USE_AS_STRNCMP
+# if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L
sub $16, %r11
jbe LABEL(strcmp_exitz)
# endif
@@ -1915,7 +1960,7 @@ LABEL(nibble_ashr_13):
test $0xe000, %edx
jnz LABEL(ashr_13_exittail)
-# ifdef USE_AS_STRNCMP
+# if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L
cmp $2, %r11
jbe LABEL(ashr_13_exittail)
# endif
@@ -1993,7 +2038,7 @@ LABEL(gobble_ashr_14):
sub $0xffff, %edx
jnz LABEL(exit)
-# ifdef USE_AS_STRNCMP
+# if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L
sub $16, %r11
jbe LABEL(strcmp_exitz)
# endif
@@ -2024,7 +2069,7 @@ LABEL(gobble_ashr_14):
sub $0xffff, %edx
jnz LABEL(exit)
-# ifdef USE_AS_STRNCMP
+# if defined USE_AS_STRNCMP | defined USE_AS_STRNCASECMP_L
sub $16, %r11
jbe LABEL(strcmp_exitz)
# endif
@@ -2040,7 +2085,7 @@ LABEL(nibble_ashr_14):
test $0xc000, %edx
jnz LABEL(ashr_14_exittail)
-# ifdef USE_AS_STRNCMP
+# if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L
cmp $1, %r11
jbe LABEL(ashr_14_exittail)
# endif
@@ -2120,7 +2165,7 @@ LABEL(gobble_ashr_15):
sub $0xffff, %edx
jnz LABEL(exit)
-# ifdef USE_AS_STRNCMP
+# if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L
sub $16, %r11
jbe LABEL(strcmp_exitz)
# endif
@@ -2151,7 +2196,7 @@ LABEL(gobble_ashr_15):
sub $0xffff, %edx
jnz LABEL(exit)
-# ifdef USE_AS_STRNCMP
+# if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L
sub $16, %r11
jbe LABEL(strcmp_exitz)
# endif
@@ -2167,7 +2212,7 @@ LABEL(nibble_ashr_15):
test $0x8000, %edx
jnz LABEL(ashr_15_exittail)
-# ifdef USE_AS_STRNCMP
+# if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L
test %r11, %r11
je LABEL(ashr_15_exittail)
# endif
@@ -2205,14 +2250,14 @@ LABEL(ret):
LABEL(less16bytes):
bsf %rdx, %rdx /* find and store bit index in %rdx */
-# ifdef USE_AS_STRNCMP
+# if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L
sub %rdx, %r11
jbe LABEL(strcmp_exitz)
# endif
movzbl (%rsi, %rdx), %ecx
movzbl (%rdi, %rdx), %eax
-# ifdef USE_AS_STRCASECMP_L
+# if defined USE_AS_STRCASECMP_L || defined USE_AS_STRNCASECMP_L
leaq _nl_C_LC_CTYPE_tolower+128*4(%rip), %rdx
movl (%rdx,%rcx,4), %ecx
movl (%rdx,%rax,4), %eax
@@ -2230,6 +2275,12 @@ LABEL(Byte0):
movzx (%rsi), %ecx
movzx (%rdi), %eax
+# if defined USE_AS_STRCASECMP_L || defined USE_AS_STRNCASECMP_L
+ leaq _nl_C_LC_CTYPE_tolower+128*4(%rip), %rdx
+ movl (%rdx,%rcx,4), %ecx
+ movl (%rdx,%rax,4), %eax
+# endif
+
sub %ecx, %eax
ret
END (BP_SYM (STRCMP))
diff --git a/sysdeps/x86_64/strncase.S b/sysdeps/x86_64/strncase.S
new file mode 100644
index 0000000000..2de2ce4b96
--- /dev/null
+++ b/sysdeps/x86_64/strncase.S
@@ -0,0 +1 @@
+/* In strncase_l.S. */
diff --git a/sysdeps/x86_64/strncase_l-nonascii.c b/sysdeps/x86_64/strncase_l-nonascii.c
new file mode 100644
index 0000000000..baabd318bf
--- /dev/null
+++ b/sysdeps/x86_64/strncase_l-nonascii.c
@@ -0,0 +1,8 @@
+#include <string.h>
+
+extern int __strncasecmp_l_nonascii (__const char *__s1, __const char *__s2,
+ size_t __n, __locale_t __loc);
+
+#define __strncasecmp_l __strncasecmp_l_nonascii
+#define USE_IN_EXTENDED_LOCALE_MODEL 1
+#include <string/strncase.c>
diff --git a/sysdeps/x86_64/strncase_l.S b/sysdeps/x86_64/strncase_l.S
new file mode 100644
index 0000000000..c725cd85b3
--- /dev/null
+++ b/sysdeps/x86_64/strncase_l.S
@@ -0,0 +1,6 @@
+#define STRCMP __strncasecmp_l
+#define USE_AS_STRNCASECMP_L
+#include "strcmp.S"
+
+weak_alias (__strncasecmp_l, strncasecmp_l)
+libc_hidden_def (strncasecmp_l)