aboutsummaryrefslogtreecommitdiff
path: root/sysdeps/x86_64/multiarch/strstr-sse2-unaligned.S
diff options
context:
space:
mode:
Diffstat (limited to 'sysdeps/x86_64/multiarch/strstr-sse2-unaligned.S')
-rw-r--r--sysdeps/x86_64/multiarch/strstr-sse2-unaligned.S374
1 files changed, 0 insertions, 374 deletions
diff --git a/sysdeps/x86_64/multiarch/strstr-sse2-unaligned.S b/sysdeps/x86_64/multiarch/strstr-sse2-unaligned.S
deleted file mode 100644
index 138979d10a..0000000000
--- a/sysdeps/x86_64/multiarch/strstr-sse2-unaligned.S
+++ /dev/null
@@ -1,374 +0,0 @@
-/* strstr with unaligned loads
- Copyright (C) 2009-2017 Free Software Foundation, Inc.
- This file is part of the GNU C Library.
-
- The GNU C Library is free software; you can redistribute it and/or
- modify it under the terms of the GNU Lesser General Public
- License as published by the Free Software Foundation; either
- version 2.1 of the License, or (at your option) any later version.
-
- The GNU C Library is distributed in the hope that it will be useful,
- but WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- Lesser General Public License for more details.
-
- You should have received a copy of the GNU Lesser General Public
- License along with the GNU C Library; if not, see
- <http://www.gnu.org/licenses/>. */
-
-#include <sysdep.h>
-
-ENTRY(__strstr_sse2_unaligned)
- movzbl (%rsi), %eax
- testb %al, %al
- je L(empty)
- movzbl 1(%rsi), %edx
- testb %dl, %dl
- je L(strchr)
- movd %eax, %xmm1
- movd %edx, %xmm2
- movq %rdi, %rax
- andl $4095, %eax
- punpcklbw %xmm1, %xmm1
- cmpq $4031, %rax
- punpcklbw %xmm2, %xmm2
- punpcklwd %xmm1, %xmm1
- punpcklwd %xmm2, %xmm2
- pshufd $0, %xmm1, %xmm1
- pshufd $0, %xmm2, %xmm2
- ja L(cross_page)
- movdqu (%rdi), %xmm3
- pxor %xmm5, %xmm5
- movdqu 1(%rdi), %xmm4
- movdqa %xmm3, %xmm6
- pcmpeqb %xmm1, %xmm3
- pcmpeqb %xmm2, %xmm4
- movdqu 16(%rdi), %xmm0
- pcmpeqb %xmm5, %xmm6
- pminub %xmm4, %xmm3
- movdqa %xmm3, %xmm4
- movdqu 17(%rdi), %xmm3
- pcmpeqb %xmm0, %xmm5
- pcmpeqb %xmm2, %xmm3
- por %xmm6, %xmm4
- pcmpeqb %xmm1, %xmm0
- pminub %xmm3, %xmm0
- por %xmm5, %xmm0
- pmovmskb %xmm4, %r8d
- pmovmskb %xmm0, %eax
- salq $16, %rax
- orq %rax, %r8
- je L(next_32_bytes)
-L(next_pair_index):
- bsf %r8, %rax
- addq %rdi, %rax
- cmpb $0, (%rax)
- je L(zero1)
- movzbl 2(%rsi), %edx
- testb %dl, %dl
- je L(found1)
- cmpb 2(%rax), %dl
- jne L(next_pair)
- xorl %edx, %edx
- jmp L(pair_loop_start)
-
- .p2align 4
-L(strchr):
- movzbl %al, %esi
- jmp __strchr_sse2
-
- .p2align 4
-L(pair_loop):
- addq $1, %rdx
- cmpb 2(%rax,%rdx), %cl
- jne L(next_pair)
-L(pair_loop_start):
- movzbl 3(%rsi,%rdx), %ecx
- testb %cl, %cl
- jne L(pair_loop)
-L(found1):
- ret
-L(zero1):
- xorl %eax, %eax
- ret
-
- .p2align 4
-L(next_pair):
- leaq -1(%r8), %rax
- andq %rax, %r8
- jne L(next_pair_index)
-
- .p2align 4
-L(next_32_bytes):
- movdqu 32(%rdi), %xmm3
- pxor %xmm5, %xmm5
- movdqu 33(%rdi), %xmm4
- movdqa %xmm3, %xmm6
- pcmpeqb %xmm1, %xmm3
- pcmpeqb %xmm2, %xmm4
- movdqu 48(%rdi), %xmm0
- pcmpeqb %xmm5, %xmm6
- pminub %xmm4, %xmm3
- movdqa %xmm3, %xmm4
- movdqu 49(%rdi), %xmm3
- pcmpeqb %xmm0, %xmm5
- pcmpeqb %xmm2, %xmm3
- por %xmm6, %xmm4
- pcmpeqb %xmm1, %xmm0
- pminub %xmm3, %xmm0
- por %xmm5, %xmm0
- pmovmskb %xmm4, %eax
- salq $32, %rax
- pmovmskb %xmm0, %r8d
- salq $48, %r8
- orq %rax, %r8
- je L(loop_header)
-L(next_pair2_index):
- bsfq %r8, %rax
- addq %rdi, %rax
- cmpb $0, (%rax)
- je L(zero2)
- movzbl 2(%rsi), %edx
- testb %dl, %dl
- je L(found2)
- cmpb 2(%rax), %dl
- jne L(next_pair2)
- xorl %edx, %edx
- jmp L(pair_loop2_start)
-
- .p2align 4
-L(pair_loop2):
- addq $1, %rdx
- cmpb 2(%rax,%rdx), %cl
- jne L(next_pair2)
-L(pair_loop2_start):
- movzbl 3(%rsi,%rdx), %ecx
- testb %cl, %cl
- jne L(pair_loop2)
-L(found2):
- ret
- L(zero2):
- xorl %eax, %eax
- ret
-L(empty):
- mov %rdi, %rax
- ret
-
- .p2align 4
-L(next_pair2):
- leaq -1(%r8), %rax
- andq %rax, %r8
- jne L(next_pair2_index)
-L(loop_header):
- movq $-512, %r11
- movq %rdi, %r9
-
- pxor %xmm7, %xmm7
- andq $-64, %rdi
-
- .p2align 4
-L(loop):
- movdqa 64(%rdi), %xmm3
- movdqu 63(%rdi), %xmm6
- movdqa %xmm3, %xmm0
- pxor %xmm2, %xmm3
- pxor %xmm1, %xmm6
- movdqa 80(%rdi), %xmm10
- por %xmm3, %xmm6
- pminub %xmm10, %xmm0
- movdqu 79(%rdi), %xmm3
- pxor %xmm2, %xmm10
- pxor %xmm1, %xmm3
- movdqa 96(%rdi), %xmm9
- por %xmm10, %xmm3
- pminub %xmm9, %xmm0
- pxor %xmm2, %xmm9
- movdqa 112(%rdi), %xmm8
- addq $64, %rdi
- pminub %xmm6, %xmm3
- movdqu 31(%rdi), %xmm4
- pminub %xmm8, %xmm0
- pxor %xmm2, %xmm8
- pxor %xmm1, %xmm4
- por %xmm9, %xmm4
- pminub %xmm4, %xmm3
- movdqu 47(%rdi), %xmm5
- pxor %xmm1, %xmm5
- por %xmm8, %xmm5
- pminub %xmm5, %xmm3
- pminub %xmm3, %xmm0
- pcmpeqb %xmm7, %xmm0
- pmovmskb %xmm0, %eax
- testl %eax, %eax
- je L(loop)
- pminub (%rdi), %xmm6
- pminub 32(%rdi),%xmm4
- pminub 48(%rdi),%xmm5
- pcmpeqb %xmm7, %xmm6
- pcmpeqb %xmm7, %xmm5
- pmovmskb %xmm6, %edx
- movdqa 16(%rdi), %xmm8
- pcmpeqb %xmm7, %xmm4
- movdqu 15(%rdi), %xmm0
- pmovmskb %xmm5, %r8d
- movdqa %xmm8, %xmm3
- pmovmskb %xmm4, %ecx
- pcmpeqb %xmm1,%xmm0
- pcmpeqb %xmm2,%xmm3
- salq $32, %rcx
- pcmpeqb %xmm7,%xmm8
- salq $48, %r8
- pminub %xmm0,%xmm3
- orq %rcx, %rdx
- por %xmm3,%xmm8
- orq %rdx, %r8
- pmovmskb %xmm8, %eax
- salq $16, %rax
- orq %rax, %r8
- je L(loop)
-L(next_pair_index3):
- bsfq %r8, %rcx
- addq %rdi, %rcx
- cmpb $0, (%rcx)
- je L(zero)
- xorl %eax, %eax
- movzbl 2(%rsi), %edx
- testb %dl, %dl
- je L(success3)
- cmpb 1(%rcx), %dl
- jne L(next_pair3)
- jmp L(pair_loop_start3)
-
- .p2align 4
-L(pair_loop3):
- addq $1, %rax
- cmpb 1(%rcx,%rax), %dl
- jne L(next_pair3)
-L(pair_loop_start3):
- movzbl 3(%rsi,%rax), %edx
- testb %dl, %dl
- jne L(pair_loop3)
-L(success3):
- lea -1(%rcx), %rax
- ret
-
- .p2align 4
-L(next_pair3):
- addq %rax, %r11
- movq %rdi, %rax
- subq %r9, %rax
- cmpq %r11, %rax
- jl L(switch_strstr)
- leaq -1(%r8), %rax
- andq %rax, %r8
- jne L(next_pair_index3)
- jmp L(loop)
-
- .p2align 4
-L(switch_strstr):
- movq %rdi, %rdi
- jmp __strstr_sse2
-
- .p2align 4
-L(cross_page):
-
- movq %rdi, %rax
- pxor %xmm0, %xmm0
- andq $-64, %rax
- movdqa (%rax), %xmm3
- movdqu -1(%rax), %xmm4
- movdqa %xmm3, %xmm8
- movdqa 16(%rax), %xmm5
- pcmpeqb %xmm1, %xmm4
- pcmpeqb %xmm0, %xmm8
- pcmpeqb %xmm2, %xmm3
- movdqa %xmm5, %xmm7
- pminub %xmm4, %xmm3
- movdqu 15(%rax), %xmm4
- pcmpeqb %xmm0, %xmm7
- por %xmm3, %xmm8
- movdqa %xmm5, %xmm3
- movdqa 32(%rax), %xmm5
- pcmpeqb %xmm1, %xmm4
- pcmpeqb %xmm2, %xmm3
- movdqa %xmm5, %xmm6
- pmovmskb %xmm8, %ecx
- pminub %xmm4, %xmm3
- movdqu 31(%rax), %xmm4
- por %xmm3, %xmm7
- movdqa %xmm5, %xmm3
- pcmpeqb %xmm0, %xmm6
- movdqa 48(%rax), %xmm5
- pcmpeqb %xmm1, %xmm4
- pmovmskb %xmm7, %r8d
- pcmpeqb %xmm2, %xmm3
- pcmpeqb %xmm5, %xmm0
- pminub %xmm4, %xmm3
- movdqu 47(%rax), %xmm4
- por %xmm3, %xmm6
- movdqa %xmm5, %xmm3
- salq $16, %r8
- pcmpeqb %xmm1, %xmm4
- pcmpeqb %xmm2, %xmm3
- pmovmskb %xmm6, %r10d
- pminub %xmm4, %xmm3
- por %xmm3, %xmm0
- salq $32, %r10
- orq %r10, %r8
- orq %rcx, %r8
- movl %edi, %ecx
- pmovmskb %xmm0, %edx
- subl %eax, %ecx
- salq $48, %rdx
- orq %rdx, %r8
- shrq %cl, %r8
- je L(loop_header)
-L(next_pair_index4):
- bsfq %r8, %rax
- addq %rdi, %rax
- cmpb $0, (%rax)
- je L(zero)
-
- cmpq %rax,%rdi
- je L(next_pair4)
-
- movzbl 2(%rsi), %edx
- testb %dl, %dl
- je L(found3)
- cmpb 1(%rax), %dl
- jne L(next_pair4)
- xorl %edx, %edx
- jmp L(pair_loop_start4)
-
- .p2align 4
-L(pair_loop4):
- addq $1, %rdx
- cmpb 1(%rax,%rdx), %cl
- jne L(next_pair4)
-L(pair_loop_start4):
- movzbl 3(%rsi,%rdx), %ecx
- testb %cl, %cl
- jne L(pair_loop4)
-L(found3):
- subq $1, %rax
- ret
-
- .p2align 4
-L(next_pair4):
- leaq -1(%r8), %rax
- andq %rax, %r8
- jne L(next_pair_index4)
- jmp L(loop_header)
-
- .p2align 4
-L(found):
- rep
- ret
-
- .p2align 4
-L(zero):
- xorl %eax, %eax
- ret
-
-
-END(__strstr_sse2_unaligned)