aboutsummaryrefslogtreecommitdiff
path: root/sysdeps/x86_64/multiarch/strcat-ssse3.S
diff options
context:
space:
mode:
Diffstat (limited to 'sysdeps/x86_64/multiarch/strcat-ssse3.S')
-rw-r--r--sysdeps/x86_64/multiarch/strcat-ssse3.S312
1 files changed, 3 insertions, 309 deletions
diff --git a/sysdeps/x86_64/multiarch/strcat-ssse3.S b/sysdeps/x86_64/multiarch/strcat-ssse3.S
index 901e66f2c8..fea9d11b40 100644
--- a/sysdeps/x86_64/multiarch/strcat-ssse3.S
+++ b/sysdeps/x86_64/multiarch/strcat-ssse3.S
@@ -33,317 +33,11 @@ ENTRY (STRCAT)
mov %rdx, %r8
# endif
- xor %eax, %eax
- cmpb $0, (%rdi)
- jz L(exit_tail0)
- cmpb $0, 1(%rdi)
- jz L(exit_tail1)
- cmpb $0, 2(%rdi)
- jz L(exit_tail2)
- cmpb $0, 3(%rdi)
- jz L(exit_tail3)
-
- cmpb $0, 4(%rdi)
- jz L(exit_tail4)
- cmpb $0, 5(%rdi)
- jz L(exit_tail5)
- cmpb $0, 6(%rdi)
- jz L(exit_tail6)
- cmpb $0, 7(%rdi)
- jz L(exit_tail7)
-
- cmpb $0, 8(%rdi)
- jz L(exit_tail8)
- cmpb $0, 9(%rdi)
- jz L(exit_tail9)
- cmpb $0, 10(%rdi)
- jz L(exit_tail10)
- cmpb $0, 11(%rdi)
- jz L(exit_tail11)
-
- cmpb $0, 12(%rdi)
- jz L(exit_tail12)
- cmpb $0, 13(%rdi)
- jz L(exit_tail13)
- cmpb $0, 14(%rdi)
- jz L(exit_tail14)
- cmpb $0, 15(%rdi)
- jz L(exit_tail15)
- pxor %xmm0, %xmm0
- lea 16(%rdi), %rcx
- lea 16(%rdi), %rax
- and $-16, %rax
-
- pcmpeqb (%rax), %xmm0
- pmovmskb %xmm0, %edx
- pxor %xmm1, %xmm1
- test %edx, %edx
- lea 16(%rax), %rax
- jnz L(exit)
-
- pcmpeqb (%rax), %xmm1
- pmovmskb %xmm1, %edx
- pxor %xmm2, %xmm2
- test %edx, %edx
- lea 16(%rax), %rax
- jnz L(exit)
-
- pcmpeqb (%rax), %xmm2
- pmovmskb %xmm2, %edx
- pxor %xmm3, %xmm3
- test %edx, %edx
- lea 16(%rax), %rax
- jnz L(exit)
-
- pcmpeqb (%rax), %xmm3
- pmovmskb %xmm3, %edx
- test %edx, %edx
- lea 16(%rax), %rax
- jnz L(exit)
-
- pcmpeqb (%rax), %xmm0
- pmovmskb %xmm0, %edx
- test %edx, %edx
- lea 16(%rax), %rax
- jnz L(exit)
-
- pcmpeqb (%rax), %xmm1
- pmovmskb %xmm1, %edx
- test %edx, %edx
- lea 16(%rax), %rax
- jnz L(exit)
-
- pcmpeqb (%rax), %xmm2
- pmovmskb %xmm2, %edx
- test %edx, %edx
- lea 16(%rax), %rax
- jnz L(exit)
-
- pcmpeqb (%rax), %xmm3
- pmovmskb %xmm3, %edx
- test %edx, %edx
- lea 16(%rax), %rax
- jnz L(exit)
-
- pcmpeqb (%rax), %xmm0
- pmovmskb %xmm0, %edx
- test %edx, %edx
- lea 16(%rax), %rax
- jnz L(exit)
-
- pcmpeqb (%rax), %xmm1
- pmovmskb %xmm1, %edx
- test %edx, %edx
- lea 16(%rax), %rax
- jnz L(exit)
-
- pcmpeqb (%rax), %xmm2
- pmovmskb %xmm2, %edx
- test %edx, %edx
- lea 16(%rax), %rax
- jnz L(exit)
-
- pcmpeqb (%rax), %xmm3
- pmovmskb %xmm3, %edx
- test %edx, %edx
- lea 16(%rax), %rax
- jnz L(exit)
-
- pcmpeqb (%rax), %xmm0
- pmovmskb %xmm0, %edx
- test %edx, %edx
- lea 16(%rax), %rax
- jnz L(exit)
-
- pcmpeqb (%rax), %xmm1
- pmovmskb %xmm1, %edx
- test %edx, %edx
- lea 16(%rax), %rax
- jnz L(exit)
-
- pcmpeqb (%rax), %xmm2
- pmovmskb %xmm2, %edx
- test %edx, %edx
- lea 16(%rax), %rax
- jnz L(exit)
-
- pcmpeqb (%rax), %xmm3
- pmovmskb %xmm3, %edx
- test %edx, %edx
- lea 16(%rax), %rax
- jnz L(exit)
-
- and $-0x40, %rax
+# define RETURN jmp L(StartStrcpyPart)
+# include "strlen-sse2-no-bsf.S"
- .p2align 4
-L(aligned_64):
- pcmpeqb (%rax), %xmm0
- pcmpeqb 16(%rax), %xmm1
- pcmpeqb 32(%rax), %xmm2
- pcmpeqb 48(%rax), %xmm3
- pmovmskb %xmm0, %edx
- pmovmskb %xmm1, %r11d
- pmovmskb %xmm2, %r10d
- pmovmskb %xmm3, %r9d
- or %edx, %r9d
- or %r11d, %r9d
- or %r10d, %r9d
- lea 64(%rax), %rax
- jz L(aligned_64)
-
- test %edx, %edx
- jnz L(aligned_64_exit_16)
- test %r11d, %r11d
- jnz L(aligned_64_exit_32)
- test %r10d, %r10d
- jnz L(aligned_64_exit_48)
-
-L(aligned_64_exit_64):
- pmovmskb %xmm3, %edx
- jmp L(exit)
-
-L(aligned_64_exit_48):
- lea -16(%rax), %rax
- mov %r10d, %edx
- jmp L(exit)
-
-L(aligned_64_exit_32):
- lea -32(%rax), %rax
- mov %r11d, %edx
- jmp L(exit)
-
-L(aligned_64_exit_16):
- lea -48(%rax), %rax
-
-L(exit):
- sub %rcx, %rax
- test %dl, %dl
- jz L(exit_high)
- test $0x01, %dl
- jnz L(exit_tail0)
-
- test $0x02, %dl
- jnz L(exit_tail1)
-
- test $0x04, %dl
- jnz L(exit_tail2)
-
- test $0x08, %dl
- jnz L(exit_tail3)
-
- test $0x10, %dl
- jnz L(exit_tail4)
-
- test $0x20, %dl
- jnz L(exit_tail5)
-
- test $0x40, %dl
- jnz L(exit_tail6)
- add $7, %eax
-L(exit_tail0):
- jmp L(StartStrcpyPart)
-
- .p2align 4
-L(exit_high):
- add $8, %eax
- test $0x01, %dh
- jnz L(exit_tail0)
-
- test $0x02, %dh
- jnz L(exit_tail1)
-
- test $0x04, %dh
- jnz L(exit_tail2)
-
- test $0x08, %dh
- jnz L(exit_tail3)
-
- test $0x10, %dh
- jnz L(exit_tail4)
-
- test $0x20, %dh
- jnz L(exit_tail5)
-
- test $0x40, %dh
- jnz L(exit_tail6)
- add $7, %eax
- jmp L(StartStrcpyPart)
-
- .p2align 4
-L(exit_tail1):
- add $1, %eax
- jmp L(StartStrcpyPart)
-
- .p2align 4
-L(exit_tail2):
- add $2, %eax
- jmp L(StartStrcpyPart)
-
- .p2align 4
-L(exit_tail3):
- add $3, %eax
- jmp L(StartStrcpyPart)
-
- .p2align 4
-L(exit_tail4):
- add $4, %eax
- jmp L(StartStrcpyPart)
-
- .p2align 4
-L(exit_tail5):
- add $5, %eax
- jmp L(StartStrcpyPart)
-
- .p2align 4
-L(exit_tail6):
- add $6, %eax
- jmp L(StartStrcpyPart)
-
- .p2align 4
-L(exit_tail7):
- add $7, %eax
- jmp L(StartStrcpyPart)
-
- .p2align 4
-L(exit_tail8):
- add $8, %eax
- jmp L(StartStrcpyPart)
-
- .p2align 4
-L(exit_tail9):
- add $9, %eax
- jmp L(StartStrcpyPart)
-
- .p2align 4
-L(exit_tail10):
- add $10, %eax
- jmp L(StartStrcpyPart)
-
- .p2align 4
-L(exit_tail11):
- add $11, %eax
- jmp L(StartStrcpyPart)
-
- .p2align 4
-L(exit_tail12):
- add $12, %eax
- jmp L(StartStrcpyPart)
-
- .p2align 4
-L(exit_tail13):
- add $13, %eax
- jmp L(StartStrcpyPart)
+# undef RETURN
- .p2align 4
-L(exit_tail14):
- add $14, %eax
- jmp L(StartStrcpyPart)
-
- .p2align 4
-L(exit_tail15):
- add $15, %eax
-
- .p2align 4
L(StartStrcpyPart):
mov %rsi, %rcx
lea (%rdi, %rax), %rdx