aboutsummaryrefslogtreecommitdiff
path: root/sysdeps
diff options
context:
space:
mode:
Diffstat (limited to 'sysdeps')
-rw-r--r--sysdeps/i386/i686/multiarch/wcscmp-sse2.S158
-rw-r--r--sysdeps/x86_64/wcscmp.S109
2 files changed, 151 insertions, 116 deletions
diff --git a/sysdeps/i386/i686/multiarch/wcscmp-sse2.S b/sysdeps/i386/i686/multiarch/wcscmp-sse2.S
index 404a9a4d4c..cca0d8340b 100644
--- a/sysdeps/i386/i686/multiarch/wcscmp-sse2.S
+++ b/sysdeps/i386/i686/multiarch/wcscmp-sse2.S
@@ -21,7 +21,6 @@
#ifndef NOT_IN_libc
# include <sysdep.h>
-# include "asm-syntax.h"
# define CFI_PUSH(REG) \
cfi_adjust_cfa_offset (4); \
@@ -34,18 +33,16 @@
# define PUSH(REG) pushl REG; CFI_PUSH (REG)
# define POP(REG) popl REG; CFI_POP (REG)
-# ifndef STRCMP
-# define STRCMP __wcscmp_sse2
-# endif
-
# define ENTRANCE PUSH(%esi); PUSH(%edi)
# define RETURN POP(%edi); POP(%esi); ret; CFI_PUSH(%esi); CFI_PUSH(%edi);
# define PARMS 4
# define STR1 PARMS
# define STR2 STR1+4
+/* Note: wcscmp uses signed comparison, not unsugned as in strcmp function. */
+
.text
-ENTRY (STRCMP)
+ENTRY (__wcscmp_sse2)
/*
* This implementation uses SSE to compare up to 16 bytes at a time.
*/
@@ -131,7 +128,7 @@ L(continue_48_48):
jne L(nequal)
test %ecx, %ecx
jz L(equal)
-
+
movdqu 16(%edi), %xmm1
movdqu 16(%esi), %xmm2
pcmpeqd %xmm1, %xmm0 /* Any null double_word? */
@@ -264,21 +261,21 @@ L(continue_00_48):
test %ecx, %ecx
jnz L(less4_double_words1)
- sub (%esi), %eax
- jnz L(return)
-
+ cmp (%esi), %eax
+ jne L(nequal)
+
mov 4(%edi), %eax
- sub 4(%esi), %eax
- jnz L(return)
+ cmp 4(%esi), %eax
+ jne L(nequal)
mov 8(%edi), %eax
- sub 8(%esi), %eax
- jnz L(return)
+ cmp 8(%esi), %eax
+ jne L(nequal)
mov 12(%edi), %eax
- sub 12(%esi), %eax
- jnz L(return)
-
+ cmp 12(%esi), %eax
+ jne L(nequal)
+
movdqu 16(%esi), %xmm2
pcmpeqd %xmm2, %xmm0 /* Any null double_word? */
pcmpeqd 16(%edi), %xmm2 /* compare first 4 double_words for equality */
@@ -381,7 +378,7 @@ L(continue_32_48):
movdqu 48(%esi), %xmm2
pcmpeqd %xmm1, %xmm0 /* Any null double_word? */
pcmpeqd %xmm2, %xmm1 /* compare first 4 double_words for equality */
- psubb %xmm0, %xmm1 /* packed sub of comparison results*/
+ psubb %xmm0, %xmm1 /* packed sub of comparison results */
pmovmskb %xmm1, %edx
sub $0xffff, %edx /* if first 4 double_words are same, edx == 0xffff */
jnz L(less4_double_words_48)
@@ -585,21 +582,21 @@ L(continue_48_00):
test %ecx, %ecx
jnz L(less4_double_words1)
- sub (%esi), %eax
- jnz L(return)
-
+ cmp (%esi), %eax
+ jne L(nequal)
+
mov 4(%edi), %eax
- sub 4(%esi), %eax
- jnz L(return)
+ cmp 4(%esi), %eax
+ jne L(nequal)
mov 8(%edi), %eax
- sub 8(%esi), %eax
- jnz L(return)
+ cmp 8(%esi), %eax
+ jne L(nequal)
mov 12(%edi), %eax
- sub 12(%esi), %eax
- jnz L(return)
-
+ cmp 12(%esi), %eax
+ jne L(nequal)
+
movdqu 16(%edi), %xmm1
pcmpeqd %xmm1, %xmm0 /* Any null double_word? */
pcmpeqd 16(%esi), %xmm1 /* compare first 4 double_words for equality */
@@ -839,142 +836,161 @@ L(less4_double_words1):
test %ecx, %ecx
jz L(equal)
- mov 12(%esi), %edx
- mov 12(%edi), %eax
- sub %edx, %eax
+ mov 12(%esi), %ecx
+ cmp %ecx, 12(%edi)
+ jne L(nequal)
+ xor %eax, %eax
RETURN
.p2align 4
L(less4_double_words):
+ xor %eax, %eax
test %dl, %dl
jz L(next_two_double_words)
and $15, %dl
jz L(second_double_word)
- mov (%edi), %eax
- sub (%esi), %eax
+ mov (%esi), %ecx
+ cmp %ecx, (%edi)
+ jne L(nequal)
RETURN
.p2align 4
L(second_double_word):
- mov 4(%edi), %eax
- sub 4(%esi), %eax
+ mov 4(%esi), %ecx
+ cmp %ecx, 4(%edi)
+ jne L(nequal)
RETURN
.p2align 4
L(next_two_double_words):
and $15, %dh
jz L(fourth_double_word)
- mov 8(%edi), %eax
- sub 8(%esi), %eax
+ mov 8(%esi), %ecx
+ cmp %ecx, 8(%edi)
+ jne L(nequal)
RETURN
.p2align 4
L(fourth_double_word):
- mov 12(%edi), %eax
- sub 12(%esi), %eax
+ mov 12(%esi), %ecx
+ cmp %ecx, 12(%edi)
+ jne L(nequal)
RETURN
.p2align 4
L(less4_double_words_16):
+ xor %eax, %eax
test %dl, %dl
jz L(next_two_double_words_16)
and $15, %dl
jz L(second_double_word_16)
- mov 16(%edi), %eax
- sub 16(%esi), %eax
+ mov 16(%esi), %ecx
+ cmp %ecx, 16(%edi)
+ jne L(nequal)
RETURN
.p2align 4
L(second_double_word_16):
- mov 20(%edi), %eax
- sub 20(%esi), %eax
+ mov 20(%esi), %ecx
+ cmp %ecx, 20(%edi)
+ jne L(nequal)
RETURN
.p2align 4
L(next_two_double_words_16):
and $15, %dh
jz L(fourth_double_word_16)
- mov 24(%edi), %eax
- sub 24(%esi), %eax
+ mov 24(%esi), %ecx
+ cmp %ecx, 24(%edi)
+ jne L(nequal)
RETURN
.p2align 4
L(fourth_double_word_16):
- mov 28(%edi), %eax
- sub 28(%esi), %eax
+ mov 28(%esi), %ecx
+ cmp %ecx, 28(%edi)
+ jne L(nequal)
RETURN
.p2align 4
L(less4_double_words_32):
+ xor %eax, %eax
test %dl, %dl
jz L(next_two_double_words_32)
and $15, %dl
jz L(second_double_word_32)
- mov 32(%edi), %eax
- sub 32(%esi), %eax
+ mov 32(%esi), %ecx
+ cmp %ecx, 32(%edi)
+ jne L(nequal)
RETURN
.p2align 4
L(second_double_word_32):
- mov 36(%edi), %eax
- sub 36(%esi), %eax
+ mov 36(%esi), %ecx
+ cmp %ecx, 36(%edi)
+ jne L(nequal)
RETURN
.p2align 4
L(next_two_double_words_32):
and $15, %dh
jz L(fourth_double_word_32)
- mov 40(%edi), %eax
- sub 40(%esi), %eax
+ mov 40(%esi), %ecx
+ cmp %ecx, 40(%edi)
+ jne L(nequal)
RETURN
.p2align 4
L(fourth_double_word_32):
- mov 44(%edi), %eax
- sub 44(%esi), %eax
+ mov 44(%esi), %ecx
+ cmp %ecx, 44(%edi)
+ jne L(nequal)
RETURN
.p2align 4
L(less4_double_words_48):
+ xor %eax, %eax
test %dl, %dl
jz L(next_two_double_words_48)
and $15, %dl
jz L(second_double_word_48)
- mov 48(%edi), %eax
- sub 48(%esi), %eax
+ mov 48(%esi), %ecx
+ cmp %ecx, 48(%edi)
+ jne L(nequal)
RETURN
.p2align 4
L(second_double_word_48):
- mov 52(%edi), %eax
- sub 52(%esi), %eax
+ mov 52(%esi), %ecx
+ cmp %ecx, 52(%edi)
+ jne L(nequal)
RETURN
.p2align 4
L(next_two_double_words_48):
and $15, %dh
jz L(fourth_double_word_48)
- mov 56(%edi), %eax
- sub 56(%esi), %eax
+ mov 56(%esi), %ecx
+ cmp %ecx, 56(%edi)
+ jne L(nequal)
RETURN
.p2align 4
L(fourth_double_word_48):
- mov 60(%edi), %eax
- sub 60(%esi), %eax
- RETURN
-
- .p2align 4
-L(return):
+ mov 60(%esi), %ecx
+ cmp %ecx, 60(%edi)
+ jne L(nequal)
RETURN
.p2align 4
L(nequal):
mov $1, %eax
- ja L(nequal_bigger)
+ jg L(return)
neg %eax
+ RETURN
-L(nequal_bigger):
+ .p2align 4
+L(return):
RETURN
.p2align 4
@@ -988,7 +1004,7 @@ L(equal):
.p2align 4
L(neq):
mov $1, %eax
- ja L(neq_bigger)
+ jg L(neq_bigger)
neg %eax
L(neq_bigger):
@@ -999,5 +1015,5 @@ L(eq):
xorl %eax, %eax
ret
-END (STRCMP)
+END (__wcscmp_sse2)
#endif
diff --git a/sysdeps/x86_64/wcscmp.S b/sysdeps/x86_64/wcscmp.S
index 991ecb2cab..12bfdafd41 100644
--- a/sysdeps/x86_64/wcscmp.S
+++ b/sysdeps/x86_64/wcscmp.S
@@ -20,6 +20,8 @@
#include <sysdep.h>
+/* Note: wcscmp uses signed comparison, not unsighed as in strcmp function. */
+
.text
ENTRY (wcscmp)
/*
@@ -76,7 +78,7 @@ L(continue_48_48):
jne L(nequal)
test %ecx, %ecx
jz L(equal)
-
+
movdqu 16(%rdi), %xmm1
movdqu 16(%rsi), %xmm2
pcmpeqd %xmm1, %xmm0 /* Any null double_word? */
@@ -209,21 +211,21 @@ L(continue_00_48):
test %ecx, %ecx
jnz L(less4_double_words1)
- sub (%rsi), %eax
- jnz L(return)
-
+ cmp (%rsi), %eax
+ jne L(nequal)
+
mov 4(%rdi), %eax
- sub 4(%rsi), %eax
- jnz L(return)
+ cmp 4(%rsi), %eax
+ jne L(nequal)
mov 8(%rdi), %eax
- sub 8(%rsi), %eax
- jnz L(return)
+ cmp 8(%rsi), %eax
+ jne L(nequal)
mov 12(%rdi), %eax
- sub 12(%rsi), %eax
- jnz L(return)
-
+ cmp 12(%rsi), %eax
+ jne L(nequal)
+
movdqu 16(%rsi), %xmm2
pcmpeqd %xmm2, %xmm0 /* Any null double_word? */
pcmpeqd 16(%rdi), %xmm2 /* compare first 4 double_words for equality */
@@ -530,21 +532,21 @@ L(continue_48_00):
test %ecx, %ecx
jnz L(less4_double_words1)
- sub (%rsi), %eax
- jnz L(return)
-
+ cmp (%rsi), %eax
+ jne L(nequal)
+
mov 4(%rdi), %eax
- sub 4(%rsi), %eax
- jnz L(return)
+ cmp 4(%rsi), %eax
+ jne L(nequal)
mov 8(%rdi), %eax
- sub 8(%rsi), %eax
- jnz L(return)
+ cmp 8(%rsi), %eax
+ jne L(nequal)
mov 12(%rdi), %eax
- sub 12(%rsi), %eax
- jnz L(return)
-
+ cmp 12(%rsi), %eax
+ jne L(nequal)
+
movdqu 16(%rdi), %xmm1
pcmpeqd %xmm1, %xmm0 /* Any null double_word? */
pcmpeqd 16(%rsi), %xmm1 /* compare first 4 double_words for equality */
@@ -784,25 +786,29 @@ L(less4_double_words1):
test %ecx, %ecx
jz L(equal)
- mov 12(%rsi), %edx
- mov 12(%rdi), %eax
- sub %edx, %eax
+ mov 12(%rsi), %ecx
+ cmp %ecx, 12(%rdi)
+ jne L(nequal)
+ xor %eax, %eax
ret
.p2align 4
L(less4_double_words):
+ xor %eax, %eax
test %dl, %dl
jz L(next_two_double_words)
and $15, %dl
jz L(second_double_word)
mov (%rdi), %eax
- sub (%rsi), %eax
+ cmp (%rsi), %eax
+ jne L(nequal)
ret
.p2align 4
L(second_double_word):
mov 4(%rdi), %eax
- sub 4(%rsi), %eax
+ cmp 4(%rsi), %eax
+ jne L(nequal)
ret
.p2align 4
@@ -810,29 +816,34 @@ L(next_two_double_words):
and $15, %dh
jz L(fourth_double_word)
mov 8(%rdi), %eax
- sub 8(%rsi), %eax
+ cmp 8(%rsi), %eax
+ jne L(nequal)
ret
.p2align 4
L(fourth_double_word):
mov 12(%rdi), %eax
- sub 12(%rsi), %eax
+ cmp 12(%rsi), %eax
+ jne L(nequal)
ret
.p2align 4
L(less4_double_words_16):
+ xor %eax, %eax
test %dl, %dl
jz L(next_two_double_words_16)
and $15, %dl
jz L(second_double_word_16)
mov 16(%rdi), %eax
- sub 16(%rsi), %eax
+ cmp 16(%rsi), %eax
+ jne L(nequal)
ret
.p2align 4
L(second_double_word_16):
mov 20(%rdi), %eax
- sub 20(%rsi), %eax
+ cmp 20(%rsi), %eax
+ jne L(nequal)
ret
.p2align 4
@@ -840,29 +851,34 @@ L(next_two_double_words_16):
and $15, %dh
jz L(fourth_double_word_16)
mov 24(%rdi), %eax
- sub 24(%rsi), %eax
+ cmp 24(%rsi), %eax
+ jne L(nequal)
ret
.p2align 4
L(fourth_double_word_16):
mov 28(%rdi), %eax
- sub 28(%rsi), %eax
+ cmp 28(%rsi), %eax
+ jne L(nequal)
ret
.p2align 4
L(less4_double_words_32):
+ xor %eax, %eax
test %dl, %dl
jz L(next_two_double_words_32)
and $15, %dl
jz L(second_double_word_32)
mov 32(%rdi), %eax
- sub 32(%rsi), %eax
+ cmp 32(%rsi), %eax
+ jne L(nequal)
ret
.p2align 4
L(second_double_word_32):
mov 36(%rdi), %eax
- sub 36(%rsi), %eax
+ cmp 36(%rsi), %eax
+ jne L(nequal)
ret
.p2align 4
@@ -870,29 +886,34 @@ L(next_two_double_words_32):
and $15, %dh
jz L(fourth_double_word_32)
mov 40(%rdi), %eax
- sub 40(%rsi), %eax
+ cmp 40(%rsi), %eax
+ jne L(nequal)
ret
.p2align 4
L(fourth_double_word_32):
mov 44(%rdi), %eax
- sub 44(%rsi), %eax
+ cmp 44(%rsi), %eax
+ jne L(nequal)
ret
.p2align 4
L(less4_double_words_48):
+ xor %eax, %eax
test %dl, %dl
jz L(next_two_double_words_48)
and $15, %dl
jz L(second_double_word_48)
mov 48(%rdi), %eax
- sub 48(%rsi), %eax
+ cmp 48(%rsi), %eax
+ jne L(nequal)
ret
.p2align 4
L(second_double_word_48):
mov 52(%rdi), %eax
- sub 52(%rsi), %eax
+ cmp 52(%rsi), %eax
+ jne L(nequal)
ret
.p2align 4
@@ -900,23 +921,21 @@ L(next_two_double_words_48):
and $15, %dh
jz L(fourth_double_word_48)
mov 56(%rdi), %eax
- sub 56(%rsi), %eax
+ cmp 56(%rsi), %eax
+ jne L(nequal)
ret
.p2align 4
L(fourth_double_word_48):
mov 60(%rdi), %eax
- sub 60(%rsi), %eax
- ret
-
- .p2align 4
-L(return):
+ cmp 60(%rsi), %eax
+ jne L(nequal)
ret
.p2align 4
L(nequal):
mov $1, %eax
- ja L(nequal_bigger)
+ jg L(nequal_bigger)
neg %eax
L(nequal_bigger):