diff options
Diffstat (limited to 'sysdeps')
-rw-r--r-- | sysdeps/i386/i686/multiarch/wcscmp-sse2.S | 158 | ||||
-rw-r--r-- | sysdeps/x86_64/wcscmp.S | 109 |
2 files changed, 151 insertions, 116 deletions
diff --git a/sysdeps/i386/i686/multiarch/wcscmp-sse2.S b/sysdeps/i386/i686/multiarch/wcscmp-sse2.S index 404a9a4d4c..cca0d8340b 100644 --- a/sysdeps/i386/i686/multiarch/wcscmp-sse2.S +++ b/sysdeps/i386/i686/multiarch/wcscmp-sse2.S @@ -21,7 +21,6 @@ #ifndef NOT_IN_libc # include <sysdep.h> -# include "asm-syntax.h" # define CFI_PUSH(REG) \ cfi_adjust_cfa_offset (4); \ @@ -34,18 +33,16 @@ # define PUSH(REG) pushl REG; CFI_PUSH (REG) # define POP(REG) popl REG; CFI_POP (REG) -# ifndef STRCMP -# define STRCMP __wcscmp_sse2 -# endif - # define ENTRANCE PUSH(%esi); PUSH(%edi) # define RETURN POP(%edi); POP(%esi); ret; CFI_PUSH(%esi); CFI_PUSH(%edi); # define PARMS 4 # define STR1 PARMS # define STR2 STR1+4 +/* Note: wcscmp uses signed comparison, not unsugned as in strcmp function. */ + .text -ENTRY (STRCMP) +ENTRY (__wcscmp_sse2) /* * This implementation uses SSE to compare up to 16 bytes at a time. */ @@ -131,7 +128,7 @@ L(continue_48_48): jne L(nequal) test %ecx, %ecx jz L(equal) - + movdqu 16(%edi), %xmm1 movdqu 16(%esi), %xmm2 pcmpeqd %xmm1, %xmm0 /* Any null double_word? */ @@ -264,21 +261,21 @@ L(continue_00_48): test %ecx, %ecx jnz L(less4_double_words1) - sub (%esi), %eax - jnz L(return) - + cmp (%esi), %eax + jne L(nequal) + mov 4(%edi), %eax - sub 4(%esi), %eax - jnz L(return) + cmp 4(%esi), %eax + jne L(nequal) mov 8(%edi), %eax - sub 8(%esi), %eax - jnz L(return) + cmp 8(%esi), %eax + jne L(nequal) mov 12(%edi), %eax - sub 12(%esi), %eax - jnz L(return) - + cmp 12(%esi), %eax + jne L(nequal) + movdqu 16(%esi), %xmm2 pcmpeqd %xmm2, %xmm0 /* Any null double_word? */ pcmpeqd 16(%edi), %xmm2 /* compare first 4 double_words for equality */ @@ -381,7 +378,7 @@ L(continue_32_48): movdqu 48(%esi), %xmm2 pcmpeqd %xmm1, %xmm0 /* Any null double_word? */ pcmpeqd %xmm2, %xmm1 /* compare first 4 double_words for equality */ - psubb %xmm0, %xmm1 /* packed sub of comparison results*/ + psubb %xmm0, %xmm1 /* packed sub of comparison results */ pmovmskb %xmm1, %edx sub $0xffff, %edx /* if first 4 double_words are same, edx == 0xffff */ jnz L(less4_double_words_48) @@ -585,21 +582,21 @@ L(continue_48_00): test %ecx, %ecx jnz L(less4_double_words1) - sub (%esi), %eax - jnz L(return) - + cmp (%esi), %eax + jne L(nequal) + mov 4(%edi), %eax - sub 4(%esi), %eax - jnz L(return) + cmp 4(%esi), %eax + jne L(nequal) mov 8(%edi), %eax - sub 8(%esi), %eax - jnz L(return) + cmp 8(%esi), %eax + jne L(nequal) mov 12(%edi), %eax - sub 12(%esi), %eax - jnz L(return) - + cmp 12(%esi), %eax + jne L(nequal) + movdqu 16(%edi), %xmm1 pcmpeqd %xmm1, %xmm0 /* Any null double_word? */ pcmpeqd 16(%esi), %xmm1 /* compare first 4 double_words for equality */ @@ -839,142 +836,161 @@ L(less4_double_words1): test %ecx, %ecx jz L(equal) - mov 12(%esi), %edx - mov 12(%edi), %eax - sub %edx, %eax + mov 12(%esi), %ecx + cmp %ecx, 12(%edi) + jne L(nequal) + xor %eax, %eax RETURN .p2align 4 L(less4_double_words): + xor %eax, %eax test %dl, %dl jz L(next_two_double_words) and $15, %dl jz L(second_double_word) - mov (%edi), %eax - sub (%esi), %eax + mov (%esi), %ecx + cmp %ecx, (%edi) + jne L(nequal) RETURN .p2align 4 L(second_double_word): - mov 4(%edi), %eax - sub 4(%esi), %eax + mov 4(%esi), %ecx + cmp %ecx, 4(%edi) + jne L(nequal) RETURN .p2align 4 L(next_two_double_words): and $15, %dh jz L(fourth_double_word) - mov 8(%edi), %eax - sub 8(%esi), %eax + mov 8(%esi), %ecx + cmp %ecx, 8(%edi) + jne L(nequal) RETURN .p2align 4 L(fourth_double_word): - mov 12(%edi), %eax - sub 12(%esi), %eax + mov 12(%esi), %ecx + cmp %ecx, 12(%edi) + jne L(nequal) RETURN .p2align 4 L(less4_double_words_16): + xor %eax, %eax test %dl, %dl jz L(next_two_double_words_16) and $15, %dl jz L(second_double_word_16) - mov 16(%edi), %eax - sub 16(%esi), %eax + mov 16(%esi), %ecx + cmp %ecx, 16(%edi) + jne L(nequal) RETURN .p2align 4 L(second_double_word_16): - mov 20(%edi), %eax - sub 20(%esi), %eax + mov 20(%esi), %ecx + cmp %ecx, 20(%edi) + jne L(nequal) RETURN .p2align 4 L(next_two_double_words_16): and $15, %dh jz L(fourth_double_word_16) - mov 24(%edi), %eax - sub 24(%esi), %eax + mov 24(%esi), %ecx + cmp %ecx, 24(%edi) + jne L(nequal) RETURN .p2align 4 L(fourth_double_word_16): - mov 28(%edi), %eax - sub 28(%esi), %eax + mov 28(%esi), %ecx + cmp %ecx, 28(%edi) + jne L(nequal) RETURN .p2align 4 L(less4_double_words_32): + xor %eax, %eax test %dl, %dl jz L(next_two_double_words_32) and $15, %dl jz L(second_double_word_32) - mov 32(%edi), %eax - sub 32(%esi), %eax + mov 32(%esi), %ecx + cmp %ecx, 32(%edi) + jne L(nequal) RETURN .p2align 4 L(second_double_word_32): - mov 36(%edi), %eax - sub 36(%esi), %eax + mov 36(%esi), %ecx + cmp %ecx, 36(%edi) + jne L(nequal) RETURN .p2align 4 L(next_two_double_words_32): and $15, %dh jz L(fourth_double_word_32) - mov 40(%edi), %eax - sub 40(%esi), %eax + mov 40(%esi), %ecx + cmp %ecx, 40(%edi) + jne L(nequal) RETURN .p2align 4 L(fourth_double_word_32): - mov 44(%edi), %eax - sub 44(%esi), %eax + mov 44(%esi), %ecx + cmp %ecx, 44(%edi) + jne L(nequal) RETURN .p2align 4 L(less4_double_words_48): + xor %eax, %eax test %dl, %dl jz L(next_two_double_words_48) and $15, %dl jz L(second_double_word_48) - mov 48(%edi), %eax - sub 48(%esi), %eax + mov 48(%esi), %ecx + cmp %ecx, 48(%edi) + jne L(nequal) RETURN .p2align 4 L(second_double_word_48): - mov 52(%edi), %eax - sub 52(%esi), %eax + mov 52(%esi), %ecx + cmp %ecx, 52(%edi) + jne L(nequal) RETURN .p2align 4 L(next_two_double_words_48): and $15, %dh jz L(fourth_double_word_48) - mov 56(%edi), %eax - sub 56(%esi), %eax + mov 56(%esi), %ecx + cmp %ecx, 56(%edi) + jne L(nequal) RETURN .p2align 4 L(fourth_double_word_48): - mov 60(%edi), %eax - sub 60(%esi), %eax - RETURN - - .p2align 4 -L(return): + mov 60(%esi), %ecx + cmp %ecx, 60(%edi) + jne L(nequal) RETURN .p2align 4 L(nequal): mov $1, %eax - ja L(nequal_bigger) + jg L(return) neg %eax + RETURN -L(nequal_bigger): + .p2align 4 +L(return): RETURN .p2align 4 @@ -988,7 +1004,7 @@ L(equal): .p2align 4 L(neq): mov $1, %eax - ja L(neq_bigger) + jg L(neq_bigger) neg %eax L(neq_bigger): @@ -999,5 +1015,5 @@ L(eq): xorl %eax, %eax ret -END (STRCMP) +END (__wcscmp_sse2) #endif diff --git a/sysdeps/x86_64/wcscmp.S b/sysdeps/x86_64/wcscmp.S index 991ecb2cab..12bfdafd41 100644 --- a/sysdeps/x86_64/wcscmp.S +++ b/sysdeps/x86_64/wcscmp.S @@ -20,6 +20,8 @@ #include <sysdep.h> +/* Note: wcscmp uses signed comparison, not unsighed as in strcmp function. */ + .text ENTRY (wcscmp) /* @@ -76,7 +78,7 @@ L(continue_48_48): jne L(nequal) test %ecx, %ecx jz L(equal) - + movdqu 16(%rdi), %xmm1 movdqu 16(%rsi), %xmm2 pcmpeqd %xmm1, %xmm0 /* Any null double_word? */ @@ -209,21 +211,21 @@ L(continue_00_48): test %ecx, %ecx jnz L(less4_double_words1) - sub (%rsi), %eax - jnz L(return) - + cmp (%rsi), %eax + jne L(nequal) + mov 4(%rdi), %eax - sub 4(%rsi), %eax - jnz L(return) + cmp 4(%rsi), %eax + jne L(nequal) mov 8(%rdi), %eax - sub 8(%rsi), %eax - jnz L(return) + cmp 8(%rsi), %eax + jne L(nequal) mov 12(%rdi), %eax - sub 12(%rsi), %eax - jnz L(return) - + cmp 12(%rsi), %eax + jne L(nequal) + movdqu 16(%rsi), %xmm2 pcmpeqd %xmm2, %xmm0 /* Any null double_word? */ pcmpeqd 16(%rdi), %xmm2 /* compare first 4 double_words for equality */ @@ -530,21 +532,21 @@ L(continue_48_00): test %ecx, %ecx jnz L(less4_double_words1) - sub (%rsi), %eax - jnz L(return) - + cmp (%rsi), %eax + jne L(nequal) + mov 4(%rdi), %eax - sub 4(%rsi), %eax - jnz L(return) + cmp 4(%rsi), %eax + jne L(nequal) mov 8(%rdi), %eax - sub 8(%rsi), %eax - jnz L(return) + cmp 8(%rsi), %eax + jne L(nequal) mov 12(%rdi), %eax - sub 12(%rsi), %eax - jnz L(return) - + cmp 12(%rsi), %eax + jne L(nequal) + movdqu 16(%rdi), %xmm1 pcmpeqd %xmm1, %xmm0 /* Any null double_word? */ pcmpeqd 16(%rsi), %xmm1 /* compare first 4 double_words for equality */ @@ -784,25 +786,29 @@ L(less4_double_words1): test %ecx, %ecx jz L(equal) - mov 12(%rsi), %edx - mov 12(%rdi), %eax - sub %edx, %eax + mov 12(%rsi), %ecx + cmp %ecx, 12(%rdi) + jne L(nequal) + xor %eax, %eax ret .p2align 4 L(less4_double_words): + xor %eax, %eax test %dl, %dl jz L(next_two_double_words) and $15, %dl jz L(second_double_word) mov (%rdi), %eax - sub (%rsi), %eax + cmp (%rsi), %eax + jne L(nequal) ret .p2align 4 L(second_double_word): mov 4(%rdi), %eax - sub 4(%rsi), %eax + cmp 4(%rsi), %eax + jne L(nequal) ret .p2align 4 @@ -810,29 +816,34 @@ L(next_two_double_words): and $15, %dh jz L(fourth_double_word) mov 8(%rdi), %eax - sub 8(%rsi), %eax + cmp 8(%rsi), %eax + jne L(nequal) ret .p2align 4 L(fourth_double_word): mov 12(%rdi), %eax - sub 12(%rsi), %eax + cmp 12(%rsi), %eax + jne L(nequal) ret .p2align 4 L(less4_double_words_16): + xor %eax, %eax test %dl, %dl jz L(next_two_double_words_16) and $15, %dl jz L(second_double_word_16) mov 16(%rdi), %eax - sub 16(%rsi), %eax + cmp 16(%rsi), %eax + jne L(nequal) ret .p2align 4 L(second_double_word_16): mov 20(%rdi), %eax - sub 20(%rsi), %eax + cmp 20(%rsi), %eax + jne L(nequal) ret .p2align 4 @@ -840,29 +851,34 @@ L(next_two_double_words_16): and $15, %dh jz L(fourth_double_word_16) mov 24(%rdi), %eax - sub 24(%rsi), %eax + cmp 24(%rsi), %eax + jne L(nequal) ret .p2align 4 L(fourth_double_word_16): mov 28(%rdi), %eax - sub 28(%rsi), %eax + cmp 28(%rsi), %eax + jne L(nequal) ret .p2align 4 L(less4_double_words_32): + xor %eax, %eax test %dl, %dl jz L(next_two_double_words_32) and $15, %dl jz L(second_double_word_32) mov 32(%rdi), %eax - sub 32(%rsi), %eax + cmp 32(%rsi), %eax + jne L(nequal) ret .p2align 4 L(second_double_word_32): mov 36(%rdi), %eax - sub 36(%rsi), %eax + cmp 36(%rsi), %eax + jne L(nequal) ret .p2align 4 @@ -870,29 +886,34 @@ L(next_two_double_words_32): and $15, %dh jz L(fourth_double_word_32) mov 40(%rdi), %eax - sub 40(%rsi), %eax + cmp 40(%rsi), %eax + jne L(nequal) ret .p2align 4 L(fourth_double_word_32): mov 44(%rdi), %eax - sub 44(%rsi), %eax + cmp 44(%rsi), %eax + jne L(nequal) ret .p2align 4 L(less4_double_words_48): + xor %eax, %eax test %dl, %dl jz L(next_two_double_words_48) and $15, %dl jz L(second_double_word_48) mov 48(%rdi), %eax - sub 48(%rsi), %eax + cmp 48(%rsi), %eax + jne L(nequal) ret .p2align 4 L(second_double_word_48): mov 52(%rdi), %eax - sub 52(%rsi), %eax + cmp 52(%rsi), %eax + jne L(nequal) ret .p2align 4 @@ -900,23 +921,21 @@ L(next_two_double_words_48): and $15, %dh jz L(fourth_double_word_48) mov 56(%rdi), %eax - sub 56(%rsi), %eax + cmp 56(%rsi), %eax + jne L(nequal) ret .p2align 4 L(fourth_double_word_48): mov 60(%rdi), %eax - sub 60(%rsi), %eax - ret - - .p2align 4 -L(return): + cmp 60(%rsi), %eax + jne L(nequal) ret .p2align 4 L(nequal): mov $1, %eax - ja L(nequal_bigger) + jg L(nequal_bigger) neg %eax L(nequal_bigger): |