diff options
author | Zack Weinberg <zackw@panix.com> | 2017-06-08 15:39:03 -0400 |
---|---|---|
committer | Zack Weinberg <zackw@panix.com> | 2017-06-08 15:39:03 -0400 |
commit | 5046dbb4a7eba5eccfd258f92f4735c9ffc8d069 (patch) | |
tree | 4470480d904b65cf14ca524f96f79eca818c3eaf /sysdeps/x86_64/memcmp.S | |
parent | 199fc19d3aaaf57944ef036e15904febe877fc93 (diff) | |
download | glibc-zack/build-layout-experiment.tar glibc-zack/build-layout-experiment.tar.gz glibc-zack/build-layout-experiment.tar.bz2 glibc-zack/build-layout-experiment.zip |
Prepare for radical source tree reorganization.zack/build-layout-experiment
All top-level files and directories are moved into a temporary storage
directory, REORG.TODO, except for files that will certainly still
exist in their current form at top level when we're done (COPYING,
COPYING.LIB, LICENSES, NEWS, README), all old ChangeLog files (which
are moved to the new directory OldChangeLogs, instead), and the
generated file INSTALL (which is just deleted; in the new order, there
will be no generated files checked into version control).
Diffstat (limited to 'sysdeps/x86_64/memcmp.S')
-rw-r--r-- | sysdeps/x86_64/memcmp.S | 358 |
1 files changed, 0 insertions, 358 deletions
diff --git a/sysdeps/x86_64/memcmp.S b/sysdeps/x86_64/memcmp.S deleted file mode 100644 index 0828a22534..0000000000 --- a/sysdeps/x86_64/memcmp.S +++ /dev/null @@ -1,358 +0,0 @@ -/* memcmp with SSE2 - Copyright (C) 2009-2017 Free Software Foundation, Inc. - Contributed by Intel Corporation. - This file is part of the GNU C Library. - - The GNU C Library is free software; you can redistribute it and/or - modify it under the terms of the GNU Lesser General Public - License as published by the Free Software Foundation; either - version 2.1 of the License, or (at your option) any later version. - - The GNU C Library is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - Lesser General Public License for more details. - - You should have received a copy of the GNU Lesser General Public - License along with the GNU C Library; if not, see - <http://www.gnu.org/licenses/>. */ - -#include <sysdep.h> - - .text -ENTRY (memcmp) - test %rdx, %rdx - jz L(finz) - cmpq $1, %rdx - jle L(finr1b) - subq %rdi, %rsi - movq %rdx, %r10 - cmpq $32, %r10 - jge L(gt32) - /* Handle small chunks and last block of less than 32 bytes. */ -L(small): - testq $1, %r10 - jz L(s2b) - movzbl (%rdi), %eax - movzbl (%rdi, %rsi), %edx - subq $1, %r10 - je L(finz1) - addq $1, %rdi - subl %edx, %eax - jnz L(exit) -L(s2b): - testq $2, %r10 - jz L(s4b) - movzwl (%rdi), %eax - movzwl (%rdi, %rsi), %edx - subq $2, %r10 - je L(fin2_7) - addq $2, %rdi - cmpl %edx, %eax - jnz L(fin2_7) -L(s4b): - testq $4, %r10 - jz L(s8b) - movl (%rdi), %eax - movl (%rdi, %rsi), %edx - subq $4, %r10 - je L(fin2_7) - addq $4, %rdi - cmpl %edx, %eax - jnz L(fin2_7) -L(s8b): - testq $8, %r10 - jz L(s16b) - movq (%rdi), %rax - movq (%rdi, %rsi), %rdx - subq $8, %r10 - je L(fin2_7) - addq $8, %rdi - cmpq %rdx, %rax - jnz L(fin2_7) -L(s16b): - movdqu (%rdi), %xmm1 - movdqu (%rdi, %rsi), %xmm0 - pcmpeqb %xmm0, %xmm1 - pmovmskb %xmm1, %edx - xorl %eax, %eax - subl $0xffff, %edx - jz L(finz) - bsfl %edx, %ecx - leaq (%rdi, %rcx), %rcx - movzbl (%rcx), %eax - movzbl (%rsi, %rcx), %edx - jmp L(finz1) - - .p2align 4,, 4 -L(finr1b): - movzbl (%rdi), %eax - movzbl (%rsi), %edx -L(finz1): - subl %edx, %eax -L(exit): - ret - - .p2align 4,, 4 -L(fin2_7): - cmpq %rdx, %rax - jz L(finz) - movq %rax, %r11 - subq %rdx, %r11 - bsfq %r11, %rcx - sarq $3, %rcx - salq $3, %rcx - sarq %cl, %rax - movzbl %al, %eax - sarq %cl, %rdx - movzbl %dl, %edx - subl %edx, %eax - ret - - .p2align 4,, 4 -L(finz): - xorl %eax, %eax - ret - - /* For blocks bigger than 32 bytes - 1. Advance one of the addr pointer to be 16B aligned. - 2. Treat the case of both addr pointers aligned to 16B - separately to avoid movdqu. - 3. Handle any blocks of greater than 64 consecutive bytes with - unrolling to reduce branches. - 4. At least one addr pointer is 16B aligned, use memory version - of pcmbeqb. - */ - .p2align 4,, 4 -L(gt32): - movq %rdx, %r11 - addq %rdi, %r11 - movq %rdi, %r8 - - andq $15, %r8 - jz L(16am) - /* Both pointers may be misaligned. */ - movdqu (%rdi), %xmm1 - movdqu (%rdi, %rsi), %xmm0 - pcmpeqb %xmm0, %xmm1 - pmovmskb %xmm1, %edx - subl $0xffff, %edx - jnz L(neq) - neg %r8 - leaq 16(%rdi, %r8), %rdi -L(16am): - /* Handle two 16B aligned pointers separately. */ - testq $15, %rsi - jz L(ATR) - testq $16, %rdi - jz L(A32) - movdqu (%rdi, %rsi), %xmm0 - pcmpeqb (%rdi), %xmm0 - pmovmskb %xmm0, %edx - subl $0xffff, %edx - jnz L(neq) - addq $16, %rdi -L(A32): - movq %r11, %r10 - andq $-32, %r10 - cmpq %r10, %rdi - jge L(mt16) - /* Pre-unroll to be ready for unrolled 64B loop. */ - testq $32, %rdi - jz L(A64) - movdqu (%rdi,%rsi), %xmm0 - pcmpeqb (%rdi), %xmm0 - pmovmskb %xmm0, %edx - subl $0xffff, %edx - jnz L(neq) - addq $16, %rdi - - movdqu (%rdi,%rsi), %xmm0 - pcmpeqb (%rdi), %xmm0 - pmovmskb %xmm0, %edx - subl $0xffff, %edx - jnz L(neq) - addq $16, %rdi - -L(A64): - movq %r11, %r10 - andq $-64, %r10 - cmpq %r10, %rdi - jge L(mt32) - -L(A64main): - movdqu (%rdi,%rsi), %xmm0 - pcmpeqb (%rdi), %xmm0 - pmovmskb %xmm0, %edx - subl $0xffff, %edx - jnz L(neq) - addq $16, %rdi - - movdqu (%rdi,%rsi), %xmm0 - pcmpeqb (%rdi), %xmm0 - pmovmskb %xmm0, %edx - subl $0xffff, %edx - jnz L(neq) - addq $16, %rdi - - movdqu (%rdi,%rsi), %xmm0 - pcmpeqb (%rdi), %xmm0 - pmovmskb %xmm0, %edx - subl $0xffff, %edx - jnz L(neq) - addq $16, %rdi - - movdqu (%rdi,%rsi), %xmm0 - pcmpeqb (%rdi), %xmm0 - pmovmskb %xmm0, %edx - subl $0xffff, %edx - jnz L(neq) - addq $16, %rdi - - cmpq %rdi, %r10 - jne L(A64main) - -L(mt32): - movq %r11, %r10 - andq $-32, %r10 - cmpq %r10, %rdi - jge L(mt16) - -L(A32main): - movdqu (%rdi,%rsi), %xmm0 - pcmpeqb (%rdi), %xmm0 - pmovmskb %xmm0, %edx - subl $0xffff, %edx - jnz L(neq) - addq $16, %rdi - - movdqu (%rdi,%rsi), %xmm0 - pcmpeqb (%rdi), %xmm0 - pmovmskb %xmm0, %edx - subl $0xffff, %edx - jnz L(neq) - addq $16, %rdi - - cmpq %rdi, %r10 - jne L(A32main) -L(mt16): - subq %rdi, %r11 - je L(finz) - movq %r11, %r10 - jmp L(small) - - .p2align 4,, 4 -L(neq): - bsfl %edx, %ecx - movzbl (%rdi, %rcx), %eax - addq %rdi, %rsi - movzbl (%rsi,%rcx), %edx - jmp L(finz1) - - .p2align 4,, 4 -L(ATR): - movq %r11, %r10 - andq $-32, %r10 - cmpq %r10, %rdi - jge L(mt16) - testq $16, %rdi - jz L(ATR32) - - movdqa (%rdi,%rsi), %xmm0 - pcmpeqb (%rdi), %xmm0 - pmovmskb %xmm0, %edx - subl $0xffff, %edx - jnz L(neq) - addq $16, %rdi - cmpq %rdi, %r10 - je L(mt16) - -L(ATR32): - movq %r11, %r10 - andq $-64, %r10 - testq $32, %rdi - jz L(ATR64) - - movdqa (%rdi,%rsi), %xmm0 - pcmpeqb (%rdi), %xmm0 - pmovmskb %xmm0, %edx - subl $0xffff, %edx - jnz L(neq) - addq $16, %rdi - - movdqa (%rdi,%rsi), %xmm0 - pcmpeqb (%rdi), %xmm0 - pmovmskb %xmm0, %edx - subl $0xffff, %edx - jnz L(neq) - addq $16, %rdi - -L(ATR64): - cmpq %rdi, %r10 - je L(mt32) - -L(ATR64main): - movdqa (%rdi,%rsi), %xmm0 - pcmpeqb (%rdi), %xmm0 - pmovmskb %xmm0, %edx - subl $0xffff, %edx - jnz L(neq) - addq $16, %rdi - - movdqa (%rdi,%rsi), %xmm0 - pcmpeqb (%rdi), %xmm0 - pmovmskb %xmm0, %edx - subl $0xffff, %edx - jnz L(neq) - addq $16, %rdi - - movdqa (%rdi,%rsi), %xmm0 - pcmpeqb (%rdi), %xmm0 - pmovmskb %xmm0, %edx - subl $0xffff, %edx - jnz L(neq) - addq $16, %rdi - - movdqa (%rdi,%rsi), %xmm0 - pcmpeqb (%rdi), %xmm0 - pmovmskb %xmm0, %edx - subl $0xffff, %edx - jnz L(neq) - addq $16, %rdi - cmpq %rdi, %r10 - jne L(ATR64main) - - movq %r11, %r10 - andq $-32, %r10 - cmpq %r10, %rdi - jge L(mt16) - -L(ATR32res): - movdqa (%rdi,%rsi), %xmm0 - pcmpeqb (%rdi), %xmm0 - pmovmskb %xmm0, %edx - subl $0xffff, %edx - jnz L(neq) - addq $16, %rdi - - movdqa (%rdi,%rsi), %xmm0 - pcmpeqb (%rdi), %xmm0 - pmovmskb %xmm0, %edx - subl $0xffff, %edx - jnz L(neq) - addq $16, %rdi - - cmpq %r10, %rdi - jne L(ATR32res) - - subq %rdi, %r11 - je L(finz) - movq %r11, %r10 - jmp L(small) - /* Align to 16byte to improve instruction fetch. */ - .p2align 4,, 4 -END(memcmp) - -#undef bcmp -weak_alias (memcmp, bcmp) -libc_hidden_builtin_def (memcmp) |