aboutsummaryrefslogtreecommitdiff
path: root/sysdeps/x86_64/memcmp.S
diff options
context:
space:
mode:
Diffstat (limited to 'sysdeps/x86_64/memcmp.S')
-rw-r--r--sysdeps/x86_64/memcmp.S358
1 files changed, 0 insertions, 358 deletions
diff --git a/sysdeps/x86_64/memcmp.S b/sysdeps/x86_64/memcmp.S
deleted file mode 100644
index 0828a22534..0000000000
--- a/sysdeps/x86_64/memcmp.S
+++ /dev/null
@@ -1,358 +0,0 @@
-/* memcmp with SSE2
- Copyright (C) 2009-2017 Free Software Foundation, Inc.
- Contributed by Intel Corporation.
- This file is part of the GNU C Library.
-
- The GNU C Library is free software; you can redistribute it and/or
- modify it under the terms of the GNU Lesser General Public
- License as published by the Free Software Foundation; either
- version 2.1 of the License, or (at your option) any later version.
-
- The GNU C Library is distributed in the hope that it will be useful,
- but WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- Lesser General Public License for more details.
-
- You should have received a copy of the GNU Lesser General Public
- License along with the GNU C Library; if not, see
- <http://www.gnu.org/licenses/>. */
-
-#include <sysdep.h>
-
- .text
-ENTRY (memcmp)
- test %rdx, %rdx
- jz L(finz)
- cmpq $1, %rdx
- jle L(finr1b)
- subq %rdi, %rsi
- movq %rdx, %r10
- cmpq $32, %r10
- jge L(gt32)
- /* Handle small chunks and last block of less than 32 bytes. */
-L(small):
- testq $1, %r10
- jz L(s2b)
- movzbl (%rdi), %eax
- movzbl (%rdi, %rsi), %edx
- subq $1, %r10
- je L(finz1)
- addq $1, %rdi
- subl %edx, %eax
- jnz L(exit)
-L(s2b):
- testq $2, %r10
- jz L(s4b)
- movzwl (%rdi), %eax
- movzwl (%rdi, %rsi), %edx
- subq $2, %r10
- je L(fin2_7)
- addq $2, %rdi
- cmpl %edx, %eax
- jnz L(fin2_7)
-L(s4b):
- testq $4, %r10
- jz L(s8b)
- movl (%rdi), %eax
- movl (%rdi, %rsi), %edx
- subq $4, %r10
- je L(fin2_7)
- addq $4, %rdi
- cmpl %edx, %eax
- jnz L(fin2_7)
-L(s8b):
- testq $8, %r10
- jz L(s16b)
- movq (%rdi), %rax
- movq (%rdi, %rsi), %rdx
- subq $8, %r10
- je L(fin2_7)
- addq $8, %rdi
- cmpq %rdx, %rax
- jnz L(fin2_7)
-L(s16b):
- movdqu (%rdi), %xmm1
- movdqu (%rdi, %rsi), %xmm0
- pcmpeqb %xmm0, %xmm1
- pmovmskb %xmm1, %edx
- xorl %eax, %eax
- subl $0xffff, %edx
- jz L(finz)
- bsfl %edx, %ecx
- leaq (%rdi, %rcx), %rcx
- movzbl (%rcx), %eax
- movzbl (%rsi, %rcx), %edx
- jmp L(finz1)
-
- .p2align 4,, 4
-L(finr1b):
- movzbl (%rdi), %eax
- movzbl (%rsi), %edx
-L(finz1):
- subl %edx, %eax
-L(exit):
- ret
-
- .p2align 4,, 4
-L(fin2_7):
- cmpq %rdx, %rax
- jz L(finz)
- movq %rax, %r11
- subq %rdx, %r11
- bsfq %r11, %rcx
- sarq $3, %rcx
- salq $3, %rcx
- sarq %cl, %rax
- movzbl %al, %eax
- sarq %cl, %rdx
- movzbl %dl, %edx
- subl %edx, %eax
- ret
-
- .p2align 4,, 4
-L(finz):
- xorl %eax, %eax
- ret
-
- /* For blocks bigger than 32 bytes
- 1. Advance one of the addr pointer to be 16B aligned.
- 2. Treat the case of both addr pointers aligned to 16B
- separately to avoid movdqu.
- 3. Handle any blocks of greater than 64 consecutive bytes with
- unrolling to reduce branches.
- 4. At least one addr pointer is 16B aligned, use memory version
- of pcmbeqb.
- */
- .p2align 4,, 4
-L(gt32):
- movq %rdx, %r11
- addq %rdi, %r11
- movq %rdi, %r8
-
- andq $15, %r8
- jz L(16am)
- /* Both pointers may be misaligned. */
- movdqu (%rdi), %xmm1
- movdqu (%rdi, %rsi), %xmm0
- pcmpeqb %xmm0, %xmm1
- pmovmskb %xmm1, %edx
- subl $0xffff, %edx
- jnz L(neq)
- neg %r8
- leaq 16(%rdi, %r8), %rdi
-L(16am):
- /* Handle two 16B aligned pointers separately. */
- testq $15, %rsi
- jz L(ATR)
- testq $16, %rdi
- jz L(A32)
- movdqu (%rdi, %rsi), %xmm0
- pcmpeqb (%rdi), %xmm0
- pmovmskb %xmm0, %edx
- subl $0xffff, %edx
- jnz L(neq)
- addq $16, %rdi
-L(A32):
- movq %r11, %r10
- andq $-32, %r10
- cmpq %r10, %rdi
- jge L(mt16)
- /* Pre-unroll to be ready for unrolled 64B loop. */
- testq $32, %rdi
- jz L(A64)
- movdqu (%rdi,%rsi), %xmm0
- pcmpeqb (%rdi), %xmm0
- pmovmskb %xmm0, %edx
- subl $0xffff, %edx
- jnz L(neq)
- addq $16, %rdi
-
- movdqu (%rdi,%rsi), %xmm0
- pcmpeqb (%rdi), %xmm0
- pmovmskb %xmm0, %edx
- subl $0xffff, %edx
- jnz L(neq)
- addq $16, %rdi
-
-L(A64):
- movq %r11, %r10
- andq $-64, %r10
- cmpq %r10, %rdi
- jge L(mt32)
-
-L(A64main):
- movdqu (%rdi,%rsi), %xmm0
- pcmpeqb (%rdi), %xmm0
- pmovmskb %xmm0, %edx
- subl $0xffff, %edx
- jnz L(neq)
- addq $16, %rdi
-
- movdqu (%rdi,%rsi), %xmm0
- pcmpeqb (%rdi), %xmm0
- pmovmskb %xmm0, %edx
- subl $0xffff, %edx
- jnz L(neq)
- addq $16, %rdi
-
- movdqu (%rdi,%rsi), %xmm0
- pcmpeqb (%rdi), %xmm0
- pmovmskb %xmm0, %edx
- subl $0xffff, %edx
- jnz L(neq)
- addq $16, %rdi
-
- movdqu (%rdi,%rsi), %xmm0
- pcmpeqb (%rdi), %xmm0
- pmovmskb %xmm0, %edx
- subl $0xffff, %edx
- jnz L(neq)
- addq $16, %rdi
-
- cmpq %rdi, %r10
- jne L(A64main)
-
-L(mt32):
- movq %r11, %r10
- andq $-32, %r10
- cmpq %r10, %rdi
- jge L(mt16)
-
-L(A32main):
- movdqu (%rdi,%rsi), %xmm0
- pcmpeqb (%rdi), %xmm0
- pmovmskb %xmm0, %edx
- subl $0xffff, %edx
- jnz L(neq)
- addq $16, %rdi
-
- movdqu (%rdi,%rsi), %xmm0
- pcmpeqb (%rdi), %xmm0
- pmovmskb %xmm0, %edx
- subl $0xffff, %edx
- jnz L(neq)
- addq $16, %rdi
-
- cmpq %rdi, %r10
- jne L(A32main)
-L(mt16):
- subq %rdi, %r11
- je L(finz)
- movq %r11, %r10
- jmp L(small)
-
- .p2align 4,, 4
-L(neq):
- bsfl %edx, %ecx
- movzbl (%rdi, %rcx), %eax
- addq %rdi, %rsi
- movzbl (%rsi,%rcx), %edx
- jmp L(finz1)
-
- .p2align 4,, 4
-L(ATR):
- movq %r11, %r10
- andq $-32, %r10
- cmpq %r10, %rdi
- jge L(mt16)
- testq $16, %rdi
- jz L(ATR32)
-
- movdqa (%rdi,%rsi), %xmm0
- pcmpeqb (%rdi), %xmm0
- pmovmskb %xmm0, %edx
- subl $0xffff, %edx
- jnz L(neq)
- addq $16, %rdi
- cmpq %rdi, %r10
- je L(mt16)
-
-L(ATR32):
- movq %r11, %r10
- andq $-64, %r10
- testq $32, %rdi
- jz L(ATR64)
-
- movdqa (%rdi,%rsi), %xmm0
- pcmpeqb (%rdi), %xmm0
- pmovmskb %xmm0, %edx
- subl $0xffff, %edx
- jnz L(neq)
- addq $16, %rdi
-
- movdqa (%rdi,%rsi), %xmm0
- pcmpeqb (%rdi), %xmm0
- pmovmskb %xmm0, %edx
- subl $0xffff, %edx
- jnz L(neq)
- addq $16, %rdi
-
-L(ATR64):
- cmpq %rdi, %r10
- je L(mt32)
-
-L(ATR64main):
- movdqa (%rdi,%rsi), %xmm0
- pcmpeqb (%rdi), %xmm0
- pmovmskb %xmm0, %edx
- subl $0xffff, %edx
- jnz L(neq)
- addq $16, %rdi
-
- movdqa (%rdi,%rsi), %xmm0
- pcmpeqb (%rdi), %xmm0
- pmovmskb %xmm0, %edx
- subl $0xffff, %edx
- jnz L(neq)
- addq $16, %rdi
-
- movdqa (%rdi,%rsi), %xmm0
- pcmpeqb (%rdi), %xmm0
- pmovmskb %xmm0, %edx
- subl $0xffff, %edx
- jnz L(neq)
- addq $16, %rdi
-
- movdqa (%rdi,%rsi), %xmm0
- pcmpeqb (%rdi), %xmm0
- pmovmskb %xmm0, %edx
- subl $0xffff, %edx
- jnz L(neq)
- addq $16, %rdi
- cmpq %rdi, %r10
- jne L(ATR64main)
-
- movq %r11, %r10
- andq $-32, %r10
- cmpq %r10, %rdi
- jge L(mt16)
-
-L(ATR32res):
- movdqa (%rdi,%rsi), %xmm0
- pcmpeqb (%rdi), %xmm0
- pmovmskb %xmm0, %edx
- subl $0xffff, %edx
- jnz L(neq)
- addq $16, %rdi
-
- movdqa (%rdi,%rsi), %xmm0
- pcmpeqb (%rdi), %xmm0
- pmovmskb %xmm0, %edx
- subl $0xffff, %edx
- jnz L(neq)
- addq $16, %rdi
-
- cmpq %r10, %rdi
- jne L(ATR32res)
-
- subq %rdi, %r11
- je L(finz)
- movq %r11, %r10
- jmp L(small)
- /* Align to 16byte to improve instruction fetch. */
- .p2align 4,, 4
-END(memcmp)
-
-#undef bcmp
-weak_alias (memcmp, bcmp)
-libc_hidden_builtin_def (memcmp)