aboutsummaryrefslogtreecommitdiff
path: root/sysdeps/x86_64/rawmemchr.S
diff options
context:
space:
mode:
Diffstat (limited to 'sysdeps/x86_64/rawmemchr.S')
-rw-r--r--sysdeps/x86_64/rawmemchr.S184
1 files changed, 6 insertions, 178 deletions
diff --git a/sysdeps/x86_64/rawmemchr.S b/sysdeps/x86_64/rawmemchr.S
index 4c1a3383b9..ba7e5202e6 100644
--- a/sysdeps/x86_64/rawmemchr.S
+++ b/sysdeps/x86_64/rawmemchr.S
@@ -17,185 +17,13 @@
License along with the GNU C Library; if not, see
<https://www.gnu.org/licenses/>. */
-#include <sysdep.h>
+#define RAWMEMCHR __rawmemchr
- .text
-ENTRY (__rawmemchr)
- movd %rsi, %xmm1
- mov %rdi, %rcx
+#define DEFAULT_IMPL_V1 "multiarch/rawmemchr-sse2.S"
+#define DEFAULT_IMPL_V3 "multiarch/rawmemchr-avx2.S"
+#define DEFAULT_IMPL_V4 "multiarch/rawmemchr-evex.S"
- punpcklbw %xmm1, %xmm1
- punpcklbw %xmm1, %xmm1
-
- and $63, %rcx
- pshufd $0, %xmm1, %xmm1
-
- cmp $48, %rcx
- ja L(crosscache)
-
- movdqu (%rdi), %xmm0
- pcmpeqb %xmm1, %xmm0
-/* Check if there is a match. */
- pmovmskb %xmm0, %eax
- test %eax, %eax
-
- jnz L(matches)
- add $16, %rdi
- and $-16, %rdi
- jmp L(loop_prolog)
-
- .p2align 4
-L(crosscache):
- and $15, %rcx
- and $-16, %rdi
- movdqa (%rdi), %xmm0
-
- pcmpeqb %xmm1, %xmm0
-/* Check if there is a match. */
- pmovmskb %xmm0, %eax
-/* Remove the leading bytes. */
- sar %cl, %eax
- test %eax, %eax
- je L(unaligned_no_match)
-/* Check which byte is a match. */
- bsf %eax, %eax
-
- add %rdi, %rax
- add %rcx, %rax
- ret
-
- .p2align 4
-L(unaligned_no_match):
- add $16, %rdi
-
- .p2align 4
-L(loop_prolog):
- movdqa (%rdi), %xmm0
- pcmpeqb %xmm1, %xmm0
- pmovmskb %xmm0, %eax
- test %eax, %eax
- jnz L(matches)
-
- movdqa 16(%rdi), %xmm2
- pcmpeqb %xmm1, %xmm2
- pmovmskb %xmm2, %eax
- test %eax, %eax
- jnz L(matches16)
-
- movdqa 32(%rdi), %xmm3
- pcmpeqb %xmm1, %xmm3
- pmovmskb %xmm3, %eax
- test %eax, %eax
- jnz L(matches32)
-
- movdqa 48(%rdi), %xmm4
- pcmpeqb %xmm1, %xmm4
- add $64, %rdi
- pmovmskb %xmm4, %eax
- test %eax, %eax
- jnz L(matches0)
-
- test $0x3f, %rdi
- jz L(align64_loop)
-
- movdqa (%rdi), %xmm0
- pcmpeqb %xmm1, %xmm0
- pmovmskb %xmm0, %eax
- test %eax, %eax
- jnz L(matches)
-
- movdqa 16(%rdi), %xmm2
- pcmpeqb %xmm1, %xmm2
- pmovmskb %xmm2, %eax
- test %eax, %eax
- jnz L(matches16)
-
- movdqa 32(%rdi), %xmm3
- pcmpeqb %xmm1, %xmm3
- pmovmskb %xmm3, %eax
- test %eax, %eax
- jnz L(matches32)
-
- movdqa 48(%rdi), %xmm3
- pcmpeqb %xmm1, %xmm3
- pmovmskb %xmm3, %eax
-
- add $64, %rdi
- test %eax, %eax
- jnz L(matches0)
-
- and $-64, %rdi
-
- .p2align 4
-L(align64_loop):
- movdqa (%rdi), %xmm0
- movdqa 16(%rdi), %xmm2
- movdqa 32(%rdi), %xmm3
- movdqa 48(%rdi), %xmm4
-
- pcmpeqb %xmm1, %xmm0
- pcmpeqb %xmm1, %xmm2
- pcmpeqb %xmm1, %xmm3
- pcmpeqb %xmm1, %xmm4
-
- pmaxub %xmm0, %xmm3
- pmaxub %xmm2, %xmm4
- pmaxub %xmm3, %xmm4
- pmovmskb %xmm4, %eax
-
- add $64, %rdi
-
- test %eax, %eax
- jz L(align64_loop)
-
- sub $64, %rdi
-
- pmovmskb %xmm0, %eax
- test %eax, %eax
- jnz L(matches)
-
- pmovmskb %xmm2, %eax
- test %eax, %eax
- jnz L(matches16)
-
- movdqa 32(%rdi), %xmm3
- pcmpeqb %xmm1, %xmm3
-
- pcmpeqb 48(%rdi), %xmm1
- pmovmskb %xmm3, %eax
- test %eax, %eax
- jnz L(matches32)
-
- pmovmskb %xmm1, %eax
- bsf %eax, %eax
- lea 48(%rdi, %rax), %rax
- ret
-
- .p2align 4
-L(matches0):
- bsf %eax, %eax
- lea -16(%rax, %rdi), %rax
- ret
-
- .p2align 4
-L(matches):
- bsf %eax, %eax
- add %rdi, %rax
- ret
-
- .p2align 4
-L(matches16):
- bsf %eax, %eax
- lea 16(%rax, %rdi), %rax
- ret
-
- .p2align 4
-L(matches32):
- bsf %eax, %eax
- lea 32(%rax, %rdi), %rax
- ret
-
-END (__rawmemchr)
+#include "isa-default-impl.h"
weak_alias (__rawmemchr, rawmemchr)
-libc_hidden_builtin_def (__rawmemchr)
+libc_hidden_def (__rawmemchr)