aboutsummaryrefslogtreecommitdiff
path: root/sysdeps/i386/i686/multiarch/strcpy-sse2.S
diff options
context:
space:
mode:
Diffstat (limited to 'sysdeps/i386/i686/multiarch/strcpy-sse2.S')
-rw-r--r--sysdeps/i386/i686/multiarch/strcpy-sse2.S2250
1 files changed, 0 insertions, 2250 deletions
diff --git a/sysdeps/i386/i686/multiarch/strcpy-sse2.S b/sysdeps/i386/i686/multiarch/strcpy-sse2.S
deleted file mode 100644
index ed627a5f62..0000000000
--- a/sysdeps/i386/i686/multiarch/strcpy-sse2.S
+++ /dev/null
@@ -1,2250 +0,0 @@
-/* strcpy with SSE2 and unaligned load
- Copyright (C) 2011-2017 Free Software Foundation, Inc.
- Contributed by Intel Corporation.
- This file is part of the GNU C Library.
-
- The GNU C Library is free software; you can redistribute it and/or
- modify it under the terms of the GNU Lesser General Public
- License as published by the Free Software Foundation; either
- version 2.1 of the License, or (at your option) any later version.
-
- The GNU C Library is distributed in the hope that it will be useful,
- but WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- Lesser General Public License for more details.
-
- You should have received a copy of the GNU Lesser General Public
- License along with the GNU C Library; if not, see
- <http://www.gnu.org/licenses/>. */
-
-
-#if IS_IN (libc)
-
-# include <sysdep.h>
-
-
-# define CFI_PUSH(REG) \
- cfi_adjust_cfa_offset (4); \
- cfi_rel_offset (REG, 0)
-
-# define CFI_POP(REG) \
- cfi_adjust_cfa_offset (-4); \
- cfi_restore (REG)
-
-# define PUSH(REG) pushl REG; CFI_PUSH (REG)
-# define POP(REG) popl REG; CFI_POP (REG)
-
-# ifndef STRCPY
-# define STRCPY __strcpy_sse2
-# endif
-
-# define STR1 PARMS
-# define STR2 STR1+4
-# define LEN STR2+4
-
-# ifdef USE_AS_STRNCPY
-# define PARMS 16
-# define ENTRANCE PUSH(%ebx); PUSH(%esi); PUSH(%edi)
-# define RETURN POP(%edi); POP(%esi); POP(%ebx); ret; \
- CFI_PUSH(%ebx); CFI_PUSH(%esi); CFI_PUSH(%edi);
-
-# ifdef SHARED
-# define JMPTBL(I, B) I - B
-
-/* Load an entry in a jump table into ECX and branch to it. TABLE is a
- jump table with relative offsets.
- INDEX is a register contains the index into the jump table.
- SCALE is the scale of INDEX. */
-
-# define BRANCH_TO_JMPTBL_ENTRY(TABLE, INDEX, SCALE) \
- /* We first load PC into ECX. */ \
- SETUP_PIC_REG(cx); \
- /* Get the address of the jump table. */ \
- addl $(TABLE - .), %ecx; \
- /* Get the entry and convert the relative offset to the \
- absolute address. */ \
- addl (%ecx,INDEX,SCALE), %ecx; \
- /* We loaded the jump table and adjusted ECX. Go. */ \
- jmp *%ecx
-# else
-# define JMPTBL(I, B) I
-
-/* Branch to an entry in a jump table. TABLE is a jump table with
- absolute offsets. INDEX is a register contains the index into the
- jump table. SCALE is the scale of INDEX. */
-
-# define BRANCH_TO_JMPTBL_ENTRY(TABLE, INDEX, SCALE) \
- jmp *TABLE(,INDEX,SCALE)
-# endif
-
-.text
-ENTRY (STRCPY)
- ENTRANCE
- mov STR1(%esp), %edi
- mov STR2(%esp), %esi
- movl LEN(%esp), %ebx
- test %ebx, %ebx
- jz L(ExitZero)
-
- mov %esi, %ecx
-# ifndef USE_AS_STPCPY
- mov %edi, %eax /* save result */
-# endif
- and $15, %ecx
- jz L(SourceStringAlignmentZero)
-
- and $-16, %esi
- pxor %xmm0, %xmm0
- pxor %xmm1, %xmm1
-
- pcmpeqb (%esi), %xmm1
- add %ecx, %ebx
- pmovmskb %xmm1, %edx
- shr %cl, %edx
-# ifdef USE_AS_STPCPY
- cmp $16, %ebx
- jbe L(CopyFrom1To16BytesTailCase2OrCase3)
-# else
- cmp $17, %ebx
- jbe L(CopyFrom1To16BytesTailCase2OrCase3)
-# endif
- test %edx, %edx
- jnz L(CopyFrom1To16BytesTail)
-
- pcmpeqb 16(%esi), %xmm0
- pmovmskb %xmm0, %edx
-# ifdef USE_AS_STPCPY
- cmp $32, %ebx
- jbe L(CopyFrom1To32BytesCase2OrCase3)
-# else
- cmp $33, %ebx
- jbe L(CopyFrom1To32BytesCase2OrCase3)
-# endif
- test %edx, %edx
- jnz L(CopyFrom1To32Bytes)
-
- movdqu (%esi, %ecx), %xmm1 /* copy 16 bytes */
- movdqu %xmm1, (%edi)
-
- sub %ecx, %edi
-
-/* If source address alignment != destination address alignment */
- .p2align 4
-L(Unalign16Both):
- mov $16, %ecx
- movdqa (%esi, %ecx), %xmm1
- movaps 16(%esi, %ecx), %xmm2
- movdqu %xmm1, (%edi, %ecx)
- pcmpeqb %xmm2, %xmm0
- pmovmskb %xmm0, %edx
- add $16, %ecx
- sub $48, %ebx
- jbe L(CopyFrom1To16BytesCase2OrCase3)
- test %edx, %edx
- jnz L(CopyFrom1To16BytesUnalignedXmm2)
-
- movaps 16(%esi, %ecx), %xmm3
- movdqu %xmm2, (%edi, %ecx)
- pcmpeqb %xmm3, %xmm0
- pmovmskb %xmm0, %edx
- add $16, %ecx
- sub $16, %ebx
- jbe L(CopyFrom1To16BytesCase2OrCase3)
- test %edx, %edx
- jnz L(CopyFrom1To16BytesUnalignedXmm3)
-
- movaps 16(%esi, %ecx), %xmm4
- movdqu %xmm3, (%edi, %ecx)
- pcmpeqb %xmm4, %xmm0
- pmovmskb %xmm0, %edx
- add $16, %ecx
- sub $16, %ebx
- jbe L(CopyFrom1To16BytesCase2OrCase3)
- test %edx, %edx
- jnz L(CopyFrom1To16BytesUnalignedXmm4)
-
- movaps 16(%esi, %ecx), %xmm1
- movdqu %xmm4, (%edi, %ecx)
- pcmpeqb %xmm1, %xmm0
- pmovmskb %xmm0, %edx
- add $16, %ecx
- sub $16, %ebx
- jbe L(CopyFrom1To16BytesCase2OrCase3)
- test %edx, %edx
- jnz L(CopyFrom1To16BytesUnalignedXmm1)
-
- movaps 16(%esi, %ecx), %xmm2
- movdqu %xmm1, (%edi, %ecx)
- pcmpeqb %xmm2, %xmm0
- pmovmskb %xmm0, %edx
- add $16, %ecx
- sub $16, %ebx
- jbe L(CopyFrom1To16BytesCase2OrCase3)
- test %edx, %edx
- jnz L(CopyFrom1To16BytesUnalignedXmm2)
-
- movaps 16(%esi, %ecx), %xmm3
- movdqu %xmm2, (%edi, %ecx)
- pcmpeqb %xmm3, %xmm0
- pmovmskb %xmm0, %edx
- add $16, %ecx
- sub $16, %ebx
- jbe L(CopyFrom1To16BytesCase2OrCase3)
- test %edx, %edx
- jnz L(CopyFrom1To16BytesUnalignedXmm3)
-
- movdqu %xmm3, (%edi, %ecx)
- mov %esi, %edx
- lea 16(%esi, %ecx), %esi
- and $-0x40, %esi
- sub %esi, %edx
- sub %edx, %edi
- lea 128(%ebx, %edx), %ebx
-
-L(Unaligned64Loop):
- movaps (%esi), %xmm2
- movaps %xmm2, %xmm4
- movaps 16(%esi), %xmm5
- movaps 32(%esi), %xmm3
- movaps %xmm3, %xmm6
- movaps 48(%esi), %xmm7
- pminub %xmm5, %xmm2
- pminub %xmm7, %xmm3
- pminub %xmm2, %xmm3
- pcmpeqb %xmm0, %xmm3
- pmovmskb %xmm3, %edx
- sub $64, %ebx
- jbe L(UnalignedLeaveCase2OrCase3)
- test %edx, %edx
- jnz L(Unaligned64Leave)
-L(Unaligned64Loop_start):
- add $64, %edi
- add $64, %esi
- movdqu %xmm4, -64(%edi)
- movaps (%esi), %xmm2
- movdqa %xmm2, %xmm4
- movdqu %xmm5, -48(%edi)
- movaps 16(%esi), %xmm5
- pminub %xmm5, %xmm2
- movaps 32(%esi), %xmm3
- movdqu %xmm6, -32(%edi)
- movaps %xmm3, %xmm6
- movdqu %xmm7, -16(%edi)
- movaps 48(%esi), %xmm7
- pminub %xmm7, %xmm3
- pminub %xmm2, %xmm3
- pcmpeqb %xmm0, %xmm3
- pmovmskb %xmm3, %edx
- sub $64, %ebx
- jbe L(UnalignedLeaveCase2OrCase3)
- test %edx, %edx
- jz L(Unaligned64Loop_start)
-L(Unaligned64Leave):
- pxor %xmm1, %xmm1
-
- pcmpeqb %xmm4, %xmm0
- pcmpeqb %xmm5, %xmm1
- pmovmskb %xmm0, %edx
- pmovmskb %xmm1, %ecx
- test %edx, %edx
- jnz L(CopyFrom1To16BytesUnaligned_0)
- test %ecx, %ecx
- jnz L(CopyFrom1To16BytesUnaligned_16)
-
- pcmpeqb %xmm6, %xmm0
- pcmpeqb %xmm7, %xmm1
- pmovmskb %xmm0, %edx
- pmovmskb %xmm1, %ecx
- test %edx, %edx
- jnz L(CopyFrom1To16BytesUnaligned_32)
-
- bsf %ecx, %edx
- movdqu %xmm4, (%edi)
- movdqu %xmm5, 16(%edi)
- movdqu %xmm6, 32(%edi)
-# ifdef USE_AS_STPCPY
- lea 48(%edi, %edx), %eax
-# endif
- movdqu %xmm7, 48(%edi)
- add $15, %ebx
- sub %edx, %ebx
- lea 49(%edi, %edx), %edi
- jmp L(StrncpyFillTailWithZero)
-
-/* If source address alignment == destination address alignment */
-
-L(SourceStringAlignmentZero):
- pxor %xmm0, %xmm0
- movdqa (%esi), %xmm1
- pcmpeqb %xmm1, %xmm0
- pmovmskb %xmm0, %edx
-# ifdef USE_AS_STPCPY
- cmp $16, %ebx
- jbe L(CopyFrom1To16BytesTail1Case2OrCase3)
-# else
- cmp $17, %ebx
- jbe L(CopyFrom1To16BytesTail1Case2OrCase3)
-# endif
- test %edx, %edx
- jnz L(CopyFrom1To16BytesTail1)
-
- pcmpeqb 16(%esi), %xmm0
- movdqu %xmm1, (%edi)
- pmovmskb %xmm0, %edx
-# ifdef USE_AS_STPCPY
- cmp $32, %ebx
- jbe L(CopyFrom1To32Bytes1Case2OrCase3)
-# else
- cmp $33, %ebx
- jbe L(CopyFrom1To32Bytes1Case2OrCase3)
-# endif
- test %edx, %edx
- jnz L(CopyFrom1To32Bytes1)
-
- jmp L(Unalign16Both)
-
-/*-----------------End of main part---------------------------*/
-
-/* Case1 */
- .p2align 4
-L(CopyFrom1To16BytesTail):
- sub %ecx, %ebx
- add %ecx, %esi
- bsf %edx, %edx
- BRANCH_TO_JMPTBL_ENTRY (L(ExitTable), %edx, 4)
-
- .p2align 4
-L(CopyFrom1To32Bytes1):
- add $16, %esi
- add $16, %edi
- sub $16, %ebx
-L(CopyFrom1To16BytesTail1):
- bsf %edx, %edx
- BRANCH_TO_JMPTBL_ENTRY (L(ExitTable), %edx, 4)
-
- .p2align 4
-L(CopyFrom1To32Bytes):
- sub %ecx, %ebx
- bsf %edx, %edx
- add %ecx, %esi
- add $16, %edx
- sub %ecx, %edx
- BRANCH_TO_JMPTBL_ENTRY (L(ExitTable), %edx, 4)
-
- .p2align 4
-L(CopyFrom1To16BytesUnaligned_0):
- bsf %edx, %edx
-# ifdef USE_AS_STPCPY
- lea (%edi, %edx), %eax
-# endif
- movdqu %xmm4, (%edi)
- add $63, %ebx
- sub %edx, %ebx
- lea 1(%edi, %edx), %edi
- jmp L(StrncpyFillTailWithZero)
-
- .p2align 4
-L(CopyFrom1To16BytesUnaligned_16):
- bsf %ecx, %edx
- movdqu %xmm4, (%edi)
-# ifdef USE_AS_STPCPY
- lea 16(%edi, %edx), %eax
-# endif
- movdqu %xmm5, 16(%edi)
- add $47, %ebx
- sub %edx, %ebx
- lea 17(%edi, %edx), %edi
- jmp L(StrncpyFillTailWithZero)
-
- .p2align 4
-L(CopyFrom1To16BytesUnaligned_32):
- bsf %edx, %edx
- movdqu %xmm4, (%edi)
- movdqu %xmm5, 16(%edi)
-# ifdef USE_AS_STPCPY
- lea 32(%edi, %edx), %eax
-# endif
- movdqu %xmm6, 32(%edi)
- add $31, %ebx
- sub %edx, %ebx
- lea 33(%edi, %edx), %edi
- jmp L(StrncpyFillTailWithZero)
-
- .p2align 4
-L(CopyFrom1To16BytesUnalignedXmm6):
- movdqu %xmm6, (%edi, %ecx)
- jmp L(CopyFrom1To16BytesXmmExit)
-
- .p2align 4
-L(CopyFrom1To16BytesUnalignedXmm5):
- movdqu %xmm5, (%edi, %ecx)
- jmp L(CopyFrom1To16BytesXmmExit)
-
- .p2align 4
-L(CopyFrom1To16BytesUnalignedXmm4):
- movdqu %xmm4, (%edi, %ecx)
- jmp L(CopyFrom1To16BytesXmmExit)
-
- .p2align 4
-L(CopyFrom1To16BytesUnalignedXmm3):
- movdqu %xmm3, (%edi, %ecx)
- jmp L(CopyFrom1To16BytesXmmExit)
-
- .p2align 4
-L(CopyFrom1To16BytesUnalignedXmm1):
- movdqu %xmm1, (%edi, %ecx)
- jmp L(CopyFrom1To16BytesXmmExit)
-
- .p2align 4
-L(CopyFrom1To16BytesExit):
- BRANCH_TO_JMPTBL_ENTRY (L(ExitTable), %edx, 4)
-
-/* Case2 */
-
- .p2align 4
-L(CopyFrom1To16BytesCase2):
- add $16, %ebx
- add %ecx, %edi
- add %ecx, %esi
- bsf %edx, %edx
- cmp %ebx, %edx
- jb L(CopyFrom1To16BytesExit)
- BRANCH_TO_JMPTBL_ENTRY (L(ExitStrncpyTable), %ebx, 4)
-
- .p2align 4
-L(CopyFrom1To32BytesCase2):
- sub %ecx, %ebx
- add %ecx, %esi
- bsf %edx, %edx
- add $16, %edx
- sub %ecx, %edx
- cmp %ebx, %edx
- jb L(CopyFrom1To16BytesExit)
- BRANCH_TO_JMPTBL_ENTRY (L(ExitStrncpyTable), %ebx, 4)
-
-L(CopyFrom1To16BytesTailCase2):
- sub %ecx, %ebx
- add %ecx, %esi
- bsf %edx, %edx
- cmp %ebx, %edx
- jb L(CopyFrom1To16BytesExit)
- BRANCH_TO_JMPTBL_ENTRY (L(ExitStrncpyTable), %ebx, 4)
-
-L(CopyFrom1To16BytesTail1Case2):
- bsf %edx, %edx
- cmp %ebx, %edx
- jb L(CopyFrom1To16BytesExit)
- BRANCH_TO_JMPTBL_ENTRY (L(ExitStrncpyTable), %ebx, 4)
-
-/* Case2 or Case3, Case3 */
-
- .p2align 4
-L(CopyFrom1To16BytesCase2OrCase3):
- test %edx, %edx
- jnz L(CopyFrom1To16BytesCase2)
-L(CopyFrom1To16BytesCase3):
- add $16, %ebx
- add %ecx, %edi
- add %ecx, %esi
- BRANCH_TO_JMPTBL_ENTRY (L(ExitStrncpyTable), %ebx, 4)
-
- .p2align 4
-L(CopyFrom1To32BytesCase2OrCase3):
- test %edx, %edx
- jnz L(CopyFrom1To32BytesCase2)
- sub %ecx, %ebx
- add %ecx, %esi
- BRANCH_TO_JMPTBL_ENTRY (L(ExitStrncpyTable), %ebx, 4)
-
- .p2align 4
-L(CopyFrom1To16BytesTailCase2OrCase3):
- test %edx, %edx
- jnz L(CopyFrom1To16BytesTailCase2)
- sub %ecx, %ebx
- add %ecx, %esi
- BRANCH_TO_JMPTBL_ENTRY (L(ExitStrncpyTable), %ebx, 4)
-
- .p2align 4
-L(CopyFrom1To32Bytes1Case2OrCase3):
- add $16, %edi
- add $16, %esi
- sub $16, %ebx
-L(CopyFrom1To16BytesTail1Case2OrCase3):
- test %edx, %edx
- jnz L(CopyFrom1To16BytesTail1Case2)
- BRANCH_TO_JMPTBL_ENTRY (L(ExitStrncpyTable), %ebx, 4)
-
- .p2align 4
-L(Exit0):
-# ifdef USE_AS_STPCPY
- mov %edi, %eax
-# endif
- RETURN
-
- .p2align 4
-L(Exit1):
- movb %dh, (%edi)
-# ifdef USE_AS_STPCPY
- lea (%edi), %eax
-# endif
- sub $1, %ebx
- lea 1(%edi), %edi
- jnz L(StrncpyFillTailWithZero)
- RETURN
-
- .p2align 4
-L(Exit2):
- movw (%esi), %dx
- movw %dx, (%edi)
-# ifdef USE_AS_STPCPY
- lea 1(%edi), %eax
-# endif
- sub $2, %ebx
- lea 2(%edi), %edi
- jnz L(StrncpyFillTailWithZero)
- RETURN
-
- .p2align 4
-L(Exit3):
- movw (%esi), %cx
- movw %cx, (%edi)
- movb %dh, 2(%edi)
-# ifdef USE_AS_STPCPY
- lea 2(%edi), %eax
-# endif
- sub $3, %ebx
- lea 3(%edi), %edi
- jnz L(StrncpyFillTailWithZero)
- RETURN
-
- .p2align 4
-L(Exit4):
- movl (%esi), %edx
- movl %edx, (%edi)
-# ifdef USE_AS_STPCPY
- lea 3(%edi), %eax
-# endif
- sub $4, %ebx
- lea 4(%edi), %edi
- jnz L(StrncpyFillTailWithZero)
- RETURN
-
- .p2align 4
-L(Exit5):
- movl (%esi), %ecx
- movb %dh, 4(%edi)
- movl %ecx, (%edi)
-# ifdef USE_AS_STPCPY
- lea 4(%edi), %eax
-# endif
- sub $5, %ebx
- lea 5(%edi), %edi
- jnz L(StrncpyFillTailWithZero)
- RETURN
-
- .p2align 4
-L(Exit6):
- movl (%esi), %ecx
- movw 4(%esi), %dx
- movl %ecx, (%edi)
- movw %dx, 4(%edi)
-# ifdef USE_AS_STPCPY
- lea 5(%edi), %eax
-# endif
- sub $6, %ebx
- lea 6(%edi), %edi
- jnz L(StrncpyFillTailWithZero)
- RETURN
-
- .p2align 4
-L(Exit7):
- movl (%esi), %ecx
- movl 3(%esi), %edx
- movl %ecx, (%edi)
- movl %edx, 3(%edi)
-# ifdef USE_AS_STPCPY
- lea 6(%edi), %eax
-# endif
- sub $7, %ebx
- lea 7(%edi), %edi
- jnz L(StrncpyFillTailWithZero)
- RETURN
-
- .p2align 4
-L(Exit8):
- movlpd (%esi), %xmm0
- movlpd %xmm0, (%edi)
-# ifdef USE_AS_STPCPY
- lea 7(%edi), %eax
-# endif
- sub $8, %ebx
- lea 8(%edi), %edi
- jnz L(StrncpyFillTailWithZero)
- RETURN
-
- .p2align 4
-L(Exit9):
- movlpd (%esi), %xmm0
- movb %dh, 8(%edi)
- movlpd %xmm0, (%edi)
-# ifdef USE_AS_STPCPY
- lea 8(%edi), %eax
-# endif
- sub $9, %ebx
- lea 9(%edi), %edi
- jnz L(StrncpyFillTailWithZero)
- RETURN
-
- .p2align 4
-L(Exit10):
- movlpd (%esi), %xmm0
- movw 8(%esi), %dx
- movlpd %xmm0, (%edi)
- movw %dx, 8(%edi)
-# ifdef USE_AS_STPCPY
- lea 9(%edi), %eax
-# endif
- sub $10, %ebx
- lea 10(%edi), %edi
- jnz L(StrncpyFillTailWithZero)
- RETURN
-
- .p2align 4
-L(Exit11):
- movlpd (%esi), %xmm0
- movl 7(%esi), %edx
- movlpd %xmm0, (%edi)
- movl %edx, 7(%edi)
-# ifdef USE_AS_STPCPY
- lea 10(%edi), %eax
-# endif
- sub $11, %ebx
- lea 11(%edi), %edi
- jnz L(StrncpyFillTailWithZero)
- RETURN
-
- .p2align 4
-L(Exit12):
- movlpd (%esi), %xmm0
- movl 8(%esi), %edx
- movlpd %xmm0, (%edi)
- movl %edx, 8(%edi)
-# ifdef USE_AS_STPCPY
- lea 11(%edi), %eax
-# endif
- sub $12, %ebx
- lea 12(%edi), %edi
- jnz L(StrncpyFillTailWithZero)
- RETURN
-
- .p2align 4
-L(Exit13):
- movlpd (%esi), %xmm0
- movlpd 5(%esi), %xmm1
- movlpd %xmm0, (%edi)
- movlpd %xmm1, 5(%edi)
-# ifdef USE_AS_STPCPY
- lea 12(%edi), %eax
-# endif
- sub $13, %ebx
- lea 13(%edi), %edi
- jnz L(StrncpyFillTailWithZero)
- RETURN
-
- .p2align 4
-L(Exit14):
- movlpd (%esi), %xmm0
- movlpd 6(%esi), %xmm1
- movlpd %xmm0, (%edi)
- movlpd %xmm1, 6(%edi)
-# ifdef USE_AS_STPCPY
- lea 13(%edi), %eax
-# endif
- sub $14, %ebx
- lea 14(%edi), %edi
- jnz L(StrncpyFillTailWithZero)
- RETURN
-
- .p2align 4
-L(Exit15):
- movlpd (%esi), %xmm0
- movlpd 7(%esi), %xmm1
- movlpd %xmm0, (%edi)
- movlpd %xmm1, 7(%edi)
-# ifdef USE_AS_STPCPY
- lea 14(%edi), %eax
-# endif
- sub $15, %ebx
- lea 15(%edi), %edi
- jnz L(StrncpyFillTailWithZero)
- RETURN
-
- .p2align 4
-L(Exit16):
- movdqu (%esi), %xmm0
- movdqu %xmm0, (%edi)
-# ifdef USE_AS_STPCPY
- lea 15(%edi), %eax
-# endif
- sub $16, %ebx
- lea 16(%edi), %edi
- jnz L(StrncpyFillTailWithZero)
- RETURN
-
- .p2align 4
-L(Exit17):
- movdqu (%esi), %xmm0
- movdqu %xmm0, (%edi)
- movb %dh, 16(%edi)
-# ifdef USE_AS_STPCPY
- lea 16(%edi), %eax
-# endif
- sub $17, %ebx
- lea 17(%edi), %edi
- jnz L(StrncpyFillTailWithZero)
- RETURN
-
- .p2align 4
-L(Exit18):
- movdqu (%esi), %xmm0
- movw 16(%esi), %cx
- movdqu %xmm0, (%edi)
- movw %cx, 16(%edi)
-# ifdef USE_AS_STPCPY
- lea 17(%edi), %eax
-# endif
- sub $18, %ebx
- lea 18(%edi), %edi
- jnz L(StrncpyFillTailWithZero)
- RETURN
-
- .p2align 4
-L(Exit19):
- movdqu (%esi), %xmm0
- movl 15(%esi), %ecx
- movdqu %xmm0, (%edi)
- movl %ecx, 15(%edi)
-# ifdef USE_AS_STPCPY
- lea 18(%edi), %eax
-# endif
- sub $19, %ebx
- lea 19(%edi), %edi
- jnz L(StrncpyFillTailWithZero)
- RETURN
-
- .p2align 4
-L(Exit20):
- movdqu (%esi), %xmm0
- movl 16(%esi), %ecx
- movdqu %xmm0, (%edi)
- movl %ecx, 16(%edi)
-# ifdef USE_AS_STPCPY
- lea 19(%edi), %eax
-# endif
- sub $20, %ebx
- lea 20(%edi), %edi
- jnz L(StrncpyFillTailWithZero)
- RETURN
-
- .p2align 4
-L(Exit21):
- movdqu (%esi), %xmm0
- movl 16(%esi), %ecx
- movdqu %xmm0, (%edi)
- movl %ecx, 16(%edi)
- movb %dh, 20(%edi)
-# ifdef USE_AS_STPCPY
- lea 20(%edi), %eax
-# endif
- sub $21, %ebx
- lea 21(%edi), %edi
- jnz L(StrncpyFillTailWithZero)
- RETURN
-
- .p2align 4
-L(Exit22):
- movdqu (%esi), %xmm0
- movlpd 14(%esi), %xmm3
- movdqu %xmm0, (%edi)
- movlpd %xmm3, 14(%edi)
-# ifdef USE_AS_STPCPY
- lea 21(%edi), %eax
-# endif
- sub $22, %ebx
- lea 22(%edi), %edi
- jnz L(StrncpyFillTailWithZero)
- RETURN
-
- .p2align 4
-L(Exit23):
- movdqu (%esi), %xmm0
- movlpd 15(%esi), %xmm3
- movdqu %xmm0, (%edi)
- movlpd %xmm3, 15(%edi)
-# ifdef USE_AS_STPCPY
- lea 22(%edi), %eax
-# endif
- sub $23, %ebx
- lea 23(%edi), %edi
- jnz L(StrncpyFillTailWithZero)
- RETURN
-
- .p2align 4
-L(Exit24):
- movdqu (%esi), %xmm0
- movlpd 16(%esi), %xmm2
- movdqu %xmm0, (%edi)
- movlpd %xmm2, 16(%edi)
-# ifdef USE_AS_STPCPY
- lea 23(%edi), %eax
-# endif
- sub $24, %ebx
- lea 24(%edi), %edi
- jnz L(StrncpyFillTailWithZero)
- RETURN
-
- .p2align 4
-L(Exit25):
- movdqu (%esi), %xmm0
- movlpd 16(%esi), %xmm2
- movdqu %xmm0, (%edi)
- movlpd %xmm2, 16(%edi)
- movb %dh, 24(%edi)
-# ifdef USE_AS_STPCPY
- lea 24(%edi), %eax
-# endif
- sub $25, %ebx
- lea 25(%edi), %edi
- jnz L(StrncpyFillTailWithZero)
- RETURN
-
- .p2align 4
-L(Exit26):
- movdqu (%esi), %xmm0
- movlpd 16(%esi), %xmm2
- movw 24(%esi), %cx
- movdqu %xmm0, (%edi)
- movlpd %xmm2, 16(%edi)
- movw %cx, 24(%edi)
-# ifdef USE_AS_STPCPY
- lea 25(%edi), %eax
-# endif
- sub $26, %ebx
- lea 26(%edi), %edi
- jnz L(StrncpyFillTailWithZero)
- RETURN
-
- .p2align 4
-L(Exit27):
- movdqu (%esi), %xmm0
- movlpd 16(%esi), %xmm2
- movl 23(%esi), %ecx
- movdqu %xmm0, (%edi)
- movlpd %xmm2, 16(%edi)
- movl %ecx, 23(%edi)
-# ifdef USE_AS_STPCPY
- lea 26(%edi), %eax
-# endif
- sub $27, %ebx
- lea 27(%edi), %edi
- jnz L(StrncpyFillTailWithZero)
- RETURN
-
- .p2align 4
-L(Exit28):
- movdqu (%esi), %xmm0
- movlpd 16(%esi), %xmm2
- movl 24(%esi), %ecx
- movdqu %xmm0, (%edi)
- movlpd %xmm2, 16(%edi)
- movl %ecx, 24(%edi)
-# ifdef USE_AS_STPCPY
- lea 27(%edi), %eax
-# endif
- sub $28, %ebx
- lea 28(%edi), %edi
- jnz L(StrncpyFillTailWithZero)
- RETURN
-
- .p2align 4
-L(Exit29):
- movdqu (%esi), %xmm0
- movdqu 13(%esi), %xmm2
- movdqu %xmm0, (%edi)
- movdqu %xmm2, 13(%edi)
-# ifdef USE_AS_STPCPY
- lea 28(%edi), %eax
-# endif
- sub $29, %ebx
- lea 29(%edi), %edi
- jnz L(StrncpyFillTailWithZero)
- RETURN
-
- .p2align 4
-L(Exit30):
- movdqu (%esi), %xmm0
- movdqu 14(%esi), %xmm2
- movdqu %xmm0, (%edi)
- movdqu %xmm2, 14(%edi)
-# ifdef USE_AS_STPCPY
- lea 29(%edi), %eax
-# endif
- sub $30, %ebx
- lea 30(%edi), %edi
- jnz L(StrncpyFillTailWithZero)
- RETURN
-
-
- .p2align 4
-L(Exit31):
- movdqu (%esi), %xmm0
- movdqu 15(%esi), %xmm2
- movdqu %xmm0, (%edi)
- movdqu %xmm2, 15(%edi)
-# ifdef USE_AS_STPCPY
- lea 30(%edi), %eax
-# endif
- sub $31, %ebx
- lea 31(%edi), %edi
- jnz L(StrncpyFillTailWithZero)
- RETURN
-
- .p2align 4
-L(Exit32):
- movdqu (%esi), %xmm0
- movdqu 16(%esi), %xmm2
- movdqu %xmm0, (%edi)
- movdqu %xmm2, 16(%edi)
-# ifdef USE_AS_STPCPY
- lea 31(%edi), %eax
-# endif
- sub $32, %ebx
- lea 32(%edi), %edi
- jnz L(StrncpyFillTailWithZero)
- RETURN
-
- .p2align 4
-L(StrncpyExit1):
- movb (%esi), %dl
- movb %dl, (%edi)
-# ifdef USE_AS_STPCPY
- lea 1(%edi), %eax
-# endif
- RETURN
-
- .p2align 4
-L(StrncpyExit2):
- movw (%esi), %dx
- movw %dx, (%edi)
-# ifdef USE_AS_STPCPY
- lea 2(%edi), %eax
-# endif
- RETURN
-
- .p2align 4
-L(StrncpyExit3):
- movw (%esi), %cx
- movb 2(%esi), %dl
- movw %cx, (%edi)
- movb %dl, 2(%edi)
-# ifdef USE_AS_STPCPY
- lea 3(%edi), %eax
-# endif
- RETURN
-
- .p2align 4
-L(StrncpyExit4):
- movl (%esi), %edx
- movl %edx, (%edi)
-# ifdef USE_AS_STPCPY
- lea 4(%edi), %eax
-# endif
- RETURN
-
- .p2align 4
-L(StrncpyExit5):
- movl (%esi), %ecx
- movb 4(%esi), %dl
- movl %ecx, (%edi)
- movb %dl, 4(%edi)
-# ifdef USE_AS_STPCPY
- lea 5(%edi), %eax
-# endif
- RETURN
-
- .p2align 4
-L(StrncpyExit6):
- movl (%esi), %ecx
- movw 4(%esi), %dx
- movl %ecx, (%edi)
- movw %dx, 4(%edi)
-# ifdef USE_AS_STPCPY
- lea 6(%edi), %eax
-# endif
- RETURN
-
- .p2align 4
-L(StrncpyExit7):
- movl (%esi), %ecx
- movl 3(%esi), %edx
- movl %ecx, (%edi)
- movl %edx, 3(%edi)
-# ifdef USE_AS_STPCPY
- lea 7(%edi), %eax
-# endif
- RETURN
-
- .p2align 4
-L(StrncpyExit8):
- movlpd (%esi), %xmm0
- movlpd %xmm0, (%edi)
-# ifdef USE_AS_STPCPY
- lea 8(%edi), %eax
-# endif
- RETURN
-
- .p2align 4
-L(StrncpyExit9):
- movlpd (%esi), %xmm0
- movb 8(%esi), %dl
- movlpd %xmm0, (%edi)
- movb %dl, 8(%edi)
-# ifdef USE_AS_STPCPY
- lea 9(%edi), %eax
-# endif
- RETURN
-
- .p2align 4
-L(StrncpyExit10):
- movlpd (%esi), %xmm0
- movw 8(%esi), %dx
- movlpd %xmm0, (%edi)
- movw %dx, 8(%edi)
-# ifdef USE_AS_STPCPY
- lea 10(%edi), %eax
-# endif
- RETURN
-
- .p2align 4
-L(StrncpyExit11):
- movlpd (%esi), %xmm0
- movl 7(%esi), %edx
- movlpd %xmm0, (%edi)
- movl %edx, 7(%edi)
-# ifdef USE_AS_STPCPY
- lea 11(%edi), %eax
-# endif
- RETURN
-
- .p2align 4
-L(StrncpyExit12):
- movlpd (%esi), %xmm0
- movl 8(%esi), %edx
- movlpd %xmm0, (%edi)
- movl %edx, 8(%edi)
-# ifdef USE_AS_STPCPY
- lea 12(%edi), %eax
-# endif
- RETURN
-
- .p2align 4
-L(StrncpyExit13):
- movlpd (%esi), %xmm0
- movlpd 5(%esi), %xmm1
- movlpd %xmm0, (%edi)
- movlpd %xmm1, 5(%edi)
-# ifdef USE_AS_STPCPY
- lea 13(%edi), %eax
-# endif
- RETURN
-
- .p2align 4
-L(StrncpyExit14):
- movlpd (%esi), %xmm0
- movlpd 6(%esi), %xmm1
- movlpd %xmm0, (%edi)
- movlpd %xmm1, 6(%edi)
-# ifdef USE_AS_STPCPY
- lea 14(%edi), %eax
-# endif
- RETURN
-
- .p2align 4
-L(StrncpyExit15):
- movlpd (%esi), %xmm0
- movlpd 7(%esi), %xmm1
- movlpd %xmm0, (%edi)
- movlpd %xmm1, 7(%edi)
-# ifdef USE_AS_STPCPY
- lea 15(%edi), %eax
-# endif
- RETURN
-
- .p2align 4
-L(StrncpyExit16):
- movdqu (%esi), %xmm0
- movdqu %xmm0, (%edi)
-# ifdef USE_AS_STPCPY
- lea 16(%edi), %eax
-# endif
- RETURN
-
- .p2align 4
-L(StrncpyExit17):
- movdqu (%esi), %xmm0
- movb 16(%esi), %cl
- movdqu %xmm0, (%edi)
- movb %cl, 16(%edi)
-# ifdef USE_AS_STPCPY
- lea 17(%edi), %eax
-# endif
- RETURN
-
- .p2align 4
-L(StrncpyExit18):
- movdqu (%esi), %xmm0
- movw 16(%esi), %cx
- movdqu %xmm0, (%edi)
- movw %cx, 16(%edi)
-# ifdef USE_AS_STPCPY
- lea 18(%edi), %eax
-# endif
- RETURN
-
- .p2align 4
-L(StrncpyExit19):
- movdqu (%esi), %xmm0
- movl 15(%esi), %ecx
- movdqu %xmm0, (%edi)
- movl %ecx, 15(%edi)
-# ifdef USE_AS_STPCPY
- lea 19(%edi), %eax
-# endif
- RETURN
-
- .p2align 4
-L(StrncpyExit20):
- movdqu (%esi), %xmm0
- movl 16(%esi), %ecx
- movdqu %xmm0, (%edi)
- movl %ecx, 16(%edi)
-# ifdef USE_AS_STPCPY
- lea 20(%edi), %eax
-# endif
- RETURN
-
- .p2align 4
-L(StrncpyExit21):
- movdqu (%esi), %xmm0
- movl 16(%esi), %ecx
- movb 20(%esi), %dl
- movdqu %xmm0, (%edi)
- movl %ecx, 16(%edi)
- movb %dl, 20(%edi)
-# ifdef USE_AS_STPCPY
- lea 21(%edi), %eax
-# endif
- RETURN
-
- .p2align 4
-L(StrncpyExit22):
- movdqu (%esi), %xmm0
- movlpd 14(%esi), %xmm3
- movdqu %xmm0, (%edi)
- movlpd %xmm3, 14(%edi)
-# ifdef USE_AS_STPCPY
- lea 22(%edi), %eax
-# endif
- RETURN
-
- .p2align 4
-L(StrncpyExit23):
- movdqu (%esi), %xmm0
- movlpd 15(%esi), %xmm3
- movdqu %xmm0, (%edi)
- movlpd %xmm3, 15(%edi)
-# ifdef USE_AS_STPCPY
- lea 23(%edi), %eax
-# endif
- RETURN
-
- .p2align 4
-L(StrncpyExit24):
- movdqu (%esi), %xmm0
- movlpd 16(%esi), %xmm2
- movdqu %xmm0, (%edi)
- movlpd %xmm2, 16(%edi)
-# ifdef USE_AS_STPCPY
- lea 24(%edi), %eax
-# endif
- RETURN
-
- .p2align 4
-L(StrncpyExit25):
- movdqu (%esi), %xmm0
- movlpd 16(%esi), %xmm2
- movb 24(%esi), %cl
- movdqu %xmm0, (%edi)
- movlpd %xmm2, 16(%edi)
- movb %cl, 24(%edi)
-# ifdef USE_AS_STPCPY
- lea 25(%edi), %eax
-# endif
- RETURN
-
- .p2align 4
-L(StrncpyExit26):
- movdqu (%esi), %xmm0
- movlpd 16(%esi), %xmm2
- movw 24(%esi), %cx
- movdqu %xmm0, (%edi)
- movlpd %xmm2, 16(%edi)
- movw %cx, 24(%edi)
-# ifdef USE_AS_STPCPY
- lea 26(%edi), %eax
-# endif
- RETURN
-
- .p2align 4
-L(StrncpyExit27):
- movdqu (%esi), %xmm0
- movlpd 16(%esi), %xmm2
- movl 23(%esi), %ecx
- movdqu %xmm0, (%edi)
- movlpd %xmm2, 16(%edi)
- movl %ecx, 23(%edi)
-# ifdef USE_AS_STPCPY
- lea 27(%edi), %eax
-# endif
- RETURN
-
- .p2align 4
-L(StrncpyExit28):
- movdqu (%esi), %xmm0
- movlpd 16(%esi), %xmm2
- movl 24(%esi), %ecx
- movdqu %xmm0, (%edi)
- movlpd %xmm2, 16(%edi)
- movl %ecx, 24(%edi)
-# ifdef USE_AS_STPCPY
- lea 28(%edi), %eax
-# endif
- RETURN
-
- .p2align 4
-L(StrncpyExit29):
- movdqu (%esi), %xmm0
- movdqu 13(%esi), %xmm2
- movdqu %xmm0, (%edi)
- movdqu %xmm2, 13(%edi)
-# ifdef USE_AS_STPCPY
- lea 29(%edi), %eax
-# endif
- RETURN
-
- .p2align 4
-L(StrncpyExit30):
- movdqu (%esi), %xmm0
- movdqu 14(%esi), %xmm2
- movdqu %xmm0, (%edi)
- movdqu %xmm2, 14(%edi)
-# ifdef USE_AS_STPCPY
- lea 30(%edi), %eax
-# endif
- RETURN
-
- .p2align 4
-L(StrncpyExit31):
- movdqu (%esi), %xmm0
- movdqu 15(%esi), %xmm2
- movdqu %xmm0, (%edi)
- movdqu %xmm2, 15(%edi)
-# ifdef USE_AS_STPCPY
- lea 31(%edi), %eax
-# endif
- RETURN
-
- .p2align 4
-L(StrncpyExit32):
- movdqu (%esi), %xmm0
- movdqu 16(%esi), %xmm2
- movdqu %xmm0, (%edi)
- movdqu %xmm2, 16(%edi)
-# ifdef USE_AS_STPCPY
- lea 32(%edi), %eax
-# endif
- RETURN
-
- .p2align 4
-L(StrncpyExit33):
- movdqu (%esi), %xmm0
- movdqu 16(%esi), %xmm2
- movb 32(%esi), %cl
- movdqu %xmm0, (%edi)
- movdqu %xmm2, 16(%edi)
- movb %cl, 32(%edi)
- RETURN
-
- .p2align 4
-L(Fill0):
- RETURN
-
- .p2align 4
-L(Fill1):
- movb %dl, (%edi)
- RETURN
-
- .p2align 4
-L(Fill2):
- movw %dx, (%edi)
- RETURN
-
- .p2align 4
-L(Fill3):
- movl %edx, -1(%edi)
- RETURN
-
- .p2align 4
-L(Fill4):
- movl %edx, (%edi)
- RETURN
-
- .p2align 4
-L(Fill5):
- movl %edx, (%edi)
- movb %dl, 4(%edi)
- RETURN
-
- .p2align 4
-L(Fill6):
- movl %edx, (%edi)
- movw %dx, 4(%edi)
- RETURN
-
- .p2align 4
-L(Fill7):
- movlpd %xmm0, -1(%edi)
- RETURN
-
- .p2align 4
-L(Fill8):
- movlpd %xmm0, (%edi)
- RETURN
-
- .p2align 4
-L(Fill9):
- movlpd %xmm0, (%edi)
- movb %dl, 8(%edi)
- RETURN
-
- .p2align 4
-L(Fill10):
- movlpd %xmm0, (%edi)
- movw %dx, 8(%edi)
- RETURN
-
- .p2align 4
-L(Fill11):
- movlpd %xmm0, (%edi)
- movl %edx, 7(%edi)
- RETURN
-
- .p2align 4
-L(Fill12):
- movlpd %xmm0, (%edi)
- movl %edx, 8(%edi)
- RETURN
-
- .p2align 4
-L(Fill13):
- movlpd %xmm0, (%edi)
- movlpd %xmm0, 5(%edi)
- RETURN
-
- .p2align 4
-L(Fill14):
- movlpd %xmm0, (%edi)
- movlpd %xmm0, 6(%edi)
- RETURN
-
- .p2align 4
-L(Fill15):
- movdqu %xmm0, -1(%edi)
- RETURN
-
- .p2align 4
-L(Fill16):
- movdqu %xmm0, (%edi)
- RETURN
-
- .p2align 4
-L(CopyFrom1To16BytesUnalignedXmm2):
- movdqu %xmm2, (%edi, %ecx)
-
- .p2align 4
-L(CopyFrom1To16BytesXmmExit):
- bsf %edx, %edx
- add $15, %ebx
- add %ecx, %edi
-# ifdef USE_AS_STPCPY
- lea (%edi, %edx), %eax
-# endif
- sub %edx, %ebx
- lea 1(%edi, %edx), %edi
-
- .p2align 4
-L(StrncpyFillTailWithZero):
- pxor %xmm0, %xmm0
- xor %edx, %edx
- sub $16, %ebx
- jbe L(StrncpyFillExit)
-
- movdqu %xmm0, (%edi)
- add $16, %edi
-
- mov %edi, %esi
- and $0xf, %esi
- sub %esi, %edi
- add %esi, %ebx
- sub $64, %ebx
- jb L(StrncpyFillLess64)
-
-L(StrncpyFillLoopMovdqa):
- movdqa %xmm0, (%edi)
- movdqa %xmm0, 16(%edi)
- movdqa %xmm0, 32(%edi)
- movdqa %xmm0, 48(%edi)
- add $64, %edi
- sub $64, %ebx
- jae L(StrncpyFillLoopMovdqa)
-
-L(StrncpyFillLess64):
- add $32, %ebx
- jl L(StrncpyFillLess32)
- movdqa %xmm0, (%edi)
- movdqa %xmm0, 16(%edi)
- add $32, %edi
- sub $16, %ebx
- jl L(StrncpyFillExit)
- movdqa %xmm0, (%edi)
- add $16, %edi
- BRANCH_TO_JMPTBL_ENTRY (L(FillTable), %ebx, 4)
-
-L(StrncpyFillLess32):
- add $16, %ebx
- jl L(StrncpyFillExit)
- movdqa %xmm0, (%edi)
- add $16, %edi
- BRANCH_TO_JMPTBL_ENTRY (L(FillTable), %ebx, 4)
-
-L(StrncpyFillExit):
- add $16, %ebx
- BRANCH_TO_JMPTBL_ENTRY (L(FillTable), %ebx, 4)
-
- .p2align 4
-L(UnalignedLeaveCase2OrCase3):
- test %edx, %edx
- jnz L(Unaligned64LeaveCase2)
-L(Unaligned64LeaveCase3):
- lea 64(%ebx), %ecx
- and $-16, %ecx
- add $48, %ebx
- jl L(CopyFrom1To16BytesCase3)
- movdqu %xmm4, (%edi)
- sub $16, %ebx
- jb L(CopyFrom1To16BytesCase3)
- movdqu %xmm5, 16(%edi)
- sub $16, %ebx
- jb L(CopyFrom1To16BytesCase3)
- movdqu %xmm6, 32(%edi)
- sub $16, %ebx
- jb L(CopyFrom1To16BytesCase3)
- movdqu %xmm7, 48(%edi)
-# ifdef USE_AS_STPCPY
- lea 64(%edi), %eax
-# endif
- RETURN
-
- .p2align 4
-L(Unaligned64LeaveCase2):
- xor %ecx, %ecx
- pcmpeqb %xmm4, %xmm0
- pmovmskb %xmm0, %edx
- add $48, %ebx
- jle L(CopyFrom1To16BytesCase2OrCase3)
- test %edx, %edx
- jnz L(CopyFrom1To16BytesUnalignedXmm4)
-
- pcmpeqb %xmm5, %xmm0
- pmovmskb %xmm0, %edx
- movdqu %xmm4, (%edi)
- add $16, %ecx
- sub $16, %ebx
- jbe L(CopyFrom1To16BytesCase2OrCase3)
- test %edx, %edx
- jnz L(CopyFrom1To16BytesUnalignedXmm5)
-
- pcmpeqb %xmm6, %xmm0
- pmovmskb %xmm0, %edx
- movdqu %xmm5, 16(%edi)
- add $16, %ecx
- sub $16, %ebx
- jbe L(CopyFrom1To16BytesCase2OrCase3)
- test %edx, %edx
- jnz L(CopyFrom1To16BytesUnalignedXmm6)
-
- pcmpeqb %xmm7, %xmm0
- pmovmskb %xmm0, %edx
- movdqu %xmm6, 32(%edi)
- lea 16(%edi, %ecx), %edi
- lea 16(%esi, %ecx), %esi
- bsf %edx, %edx
- cmp %ebx, %edx
- jb L(CopyFrom1To16BytesExit)
- BRANCH_TO_JMPTBL_ENTRY (L(ExitStrncpyTable), %ebx, 4)
-
- .p2align 4
-L(ExitZero):
- movl %edi, %eax
- RETURN
-
-END (STRCPY)
-
- .p2align 4
- .section .rodata
-L(ExitTable):
- .int JMPTBL(L(Exit1), L(ExitTable))
- .int JMPTBL(L(Exit2), L(ExitTable))
- .int JMPTBL(L(Exit3), L(ExitTable))
- .int JMPTBL(L(Exit4), L(ExitTable))
- .int JMPTBL(L(Exit5), L(ExitTable))
- .int JMPTBL(L(Exit6), L(ExitTable))
- .int JMPTBL(L(Exit7), L(ExitTable))
- .int JMPTBL(L(Exit8), L(ExitTable))
- .int JMPTBL(L(Exit9), L(ExitTable))
- .int JMPTBL(L(Exit10), L(ExitTable))
- .int JMPTBL(L(Exit11), L(ExitTable))
- .int JMPTBL(L(Exit12), L(ExitTable))
- .int JMPTBL(L(Exit13), L(ExitTable))
- .int JMPTBL(L(Exit14), L(ExitTable))
- .int JMPTBL(L(Exit15), L(ExitTable))
- .int JMPTBL(L(Exit16), L(ExitTable))
- .int JMPTBL(L(Exit17), L(ExitTable))
- .int JMPTBL(L(Exit18), L(ExitTable))
- .int JMPTBL(L(Exit19), L(ExitTable))
- .int JMPTBL(L(Exit20), L(ExitTable))
- .int JMPTBL(L(Exit21), L(ExitTable))
- .int JMPTBL(L(Exit22), L(ExitTable))
- .int JMPTBL(L(Exit23), L(ExitTable))
- .int JMPTBL(L(Exit24), L(ExitTable))
- .int JMPTBL(L(Exit25), L(ExitTable))
- .int JMPTBL(L(Exit26), L(ExitTable))
- .int JMPTBL(L(Exit27), L(ExitTable))
- .int JMPTBL(L(Exit28), L(ExitTable))
- .int JMPTBL(L(Exit29), L(ExitTable))
- .int JMPTBL(L(Exit30), L(ExitTable))
- .int JMPTBL(L(Exit31), L(ExitTable))
- .int JMPTBL(L(Exit32), L(ExitTable))
-
-L(ExitStrncpyTable):
- .int JMPTBL(L(Exit0), L(ExitStrncpyTable))
- .int JMPTBL(L(StrncpyExit1), L(ExitStrncpyTable))
- .int JMPTBL(L(StrncpyExit2), L(ExitStrncpyTable))
- .int JMPTBL(L(StrncpyExit3), L(ExitStrncpyTable))
- .int JMPTBL(L(StrncpyExit4), L(ExitStrncpyTable))
- .int JMPTBL(L(StrncpyExit5), L(ExitStrncpyTable))
- .int JMPTBL(L(StrncpyExit6), L(ExitStrncpyTable))
- .int JMPTBL(L(StrncpyExit7), L(ExitStrncpyTable))
- .int JMPTBL(L(StrncpyExit8), L(ExitStrncpyTable))
- .int JMPTBL(L(StrncpyExit9), L(ExitStrncpyTable))
- .int JMPTBL(L(StrncpyExit10), L(ExitStrncpyTable))
- .int JMPTBL(L(StrncpyExit11), L(ExitStrncpyTable))
- .int JMPTBL(L(StrncpyExit12), L(ExitStrncpyTable))
- .int JMPTBL(L(StrncpyExit13), L(ExitStrncpyTable))
- .int JMPTBL(L(StrncpyExit14), L(ExitStrncpyTable))
- .int JMPTBL(L(StrncpyExit15), L(ExitStrncpyTable))
- .int JMPTBL(L(StrncpyExit16), L(ExitStrncpyTable))
- .int JMPTBL(L(StrncpyExit17), L(ExitStrncpyTable))
- .int JMPTBL(L(StrncpyExit18), L(ExitStrncpyTable))
- .int JMPTBL(L(StrncpyExit19), L(ExitStrncpyTable))
- .int JMPTBL(L(StrncpyExit20), L(ExitStrncpyTable))
- .int JMPTBL(L(StrncpyExit21), L(ExitStrncpyTable))
- .int JMPTBL(L(StrncpyExit22), L(ExitStrncpyTable))
- .int JMPTBL(L(StrncpyExit23), L(ExitStrncpyTable))
- .int JMPTBL(L(StrncpyExit24), L(ExitStrncpyTable))
- .int JMPTBL(L(StrncpyExit25), L(ExitStrncpyTable))
- .int JMPTBL(L(StrncpyExit26), L(ExitStrncpyTable))
- .int JMPTBL(L(StrncpyExit27), L(ExitStrncpyTable))
- .int JMPTBL(L(StrncpyExit28), L(ExitStrncpyTable))
- .int JMPTBL(L(StrncpyExit29), L(ExitStrncpyTable))
- .int JMPTBL(L(StrncpyExit30), L(ExitStrncpyTable))
- .int JMPTBL(L(StrncpyExit31), L(ExitStrncpyTable))
- .int JMPTBL(L(StrncpyExit32), L(ExitStrncpyTable))
- .int JMPTBL(L(StrncpyExit33), L(ExitStrncpyTable))
-
- .p2align 4
-L(FillTable):
- .int JMPTBL(L(Fill0), L(FillTable))
- .int JMPTBL(L(Fill1), L(FillTable))
- .int JMPTBL(L(Fill2), L(FillTable))
- .int JMPTBL(L(Fill3), L(FillTable))
- .int JMPTBL(L(Fill4), L(FillTable))
- .int JMPTBL(L(Fill5), L(FillTable))
- .int JMPTBL(L(Fill6), L(FillTable))
- .int JMPTBL(L(Fill7), L(FillTable))
- .int JMPTBL(L(Fill8), L(FillTable))
- .int JMPTBL(L(Fill9), L(FillTable))
- .int JMPTBL(L(Fill10), L(FillTable))
- .int JMPTBL(L(Fill11), L(FillTable))
- .int JMPTBL(L(Fill12), L(FillTable))
- .int JMPTBL(L(Fill13), L(FillTable))
- .int JMPTBL(L(Fill14), L(FillTable))
- .int JMPTBL(L(Fill15), L(FillTable))
- .int JMPTBL(L(Fill16), L(FillTable))
-# else
-# define PARMS 4
-# define ENTRANCE
-# define RETURN POP (%edi); ret; CFI_PUSH (%edi)
-# define RETURN1 ret
-
- .text
-ENTRY (STRCPY)
- ENTRANCE
- mov STR1(%esp), %edx
- mov STR2(%esp), %ecx
-
- cmpb $0, (%ecx)
- jz L(ExitTail1)
- cmpb $0, 1(%ecx)
- jz L(ExitTail2)
- cmpb $0, 2(%ecx)
- jz L(ExitTail3)
- cmpb $0, 3(%ecx)
- jz L(ExitTail4)
- cmpb $0, 4(%ecx)
- jz L(ExitTail5)
- cmpb $0, 5(%ecx)
- jz L(ExitTail6)
- cmpb $0, 6(%ecx)
- jz L(ExitTail7)
- cmpb $0, 7(%ecx)
- jz L(ExitTail8)
- cmpb $0, 8(%ecx)
- jz L(ExitTail9)
- cmpb $0, 9(%ecx)
- jz L(ExitTail10)
- cmpb $0, 10(%ecx)
- jz L(ExitTail11)
- cmpb $0, 11(%ecx)
- jz L(ExitTail12)
- cmpb $0, 12(%ecx)
- jz L(ExitTail13)
- cmpb $0, 13(%ecx)
- jz L(ExitTail14)
- cmpb $0, 14(%ecx)
- jz L(ExitTail15)
- cmpb $0, 15(%ecx)
- jz L(ExitTail16)
-
- PUSH (%edi)
- PUSH (%ebx)
-
- mov %edx, %edi
- lea 16(%ecx), %ebx
- and $-16, %ebx
- pxor %xmm0, %xmm0
- movdqu (%ecx), %xmm1
- movdqu %xmm1, (%edx)
- pcmpeqb (%ebx), %xmm0
- pmovmskb %xmm0, %eax
- sub %ecx, %ebx
- test %eax, %eax
- jnz L(CopyFrom1To16Bytes)
-
- mov %ecx, %eax
- lea 16(%ecx), %ecx
- and $-16, %ecx
- sub %ecx, %eax
- sub %eax, %edx
- xor %ebx, %ebx
-
- .p2align 4
- movdqa (%ecx), %xmm1
- movaps 16(%ecx), %xmm2
- movdqu %xmm1, (%edx)
- pcmpeqb %xmm2, %xmm0
- pmovmskb %xmm0, %eax
- add $16, %ebx
- test %eax, %eax
- jnz L(CopyFrom1To16Bytes)
-
- movaps 16(%ecx, %ebx), %xmm3
- movdqu %xmm2, (%edx, %ebx)
- pcmpeqb %xmm3, %xmm0
- pmovmskb %xmm0, %eax
- add $16, %ebx
- test %eax, %eax
- jnz L(CopyFrom1To16Bytes)
-
- movaps 16(%ecx, %ebx), %xmm4
- movdqu %xmm3, (%edx, %ebx)
- pcmpeqb %xmm4, %xmm0
- pmovmskb %xmm0, %eax
- add $16, %ebx
- test %eax, %eax
- jnz L(CopyFrom1To16Bytes)
-
- movaps 16(%ecx, %ebx), %xmm1
- movdqu %xmm4, (%edx, %ebx)
- pcmpeqb %xmm1, %xmm0
- pmovmskb %xmm0, %eax
- add $16, %ebx
- test %eax, %eax
- jnz L(CopyFrom1To16Bytes)
-
- movaps 16(%ecx, %ebx), %xmm2
- movdqu %xmm1, (%edx, %ebx)
- pcmpeqb %xmm2, %xmm0
- pmovmskb %xmm0, %eax
- add $16, %ebx
- test %eax, %eax
- jnz L(CopyFrom1To16Bytes)
-
- movaps 16(%ecx, %ebx), %xmm3
- movdqu %xmm2, (%edx, %ebx)
- pcmpeqb %xmm3, %xmm0
- pmovmskb %xmm0, %eax
- add $16, %ebx
- test %eax, %eax
- jnz L(CopyFrom1To16Bytes)
-
- movdqu %xmm3, (%edx, %ebx)
- mov %ecx, %eax
- lea 16(%ecx, %ebx), %ecx
- and $-0x40, %ecx
- sub %ecx, %eax
- sub %eax, %edx
-
-L(Aligned64Loop):
- movaps (%ecx), %xmm2
- movaps %xmm2, %xmm4
- movaps 16(%ecx), %xmm5
- movaps 32(%ecx), %xmm3
- movaps %xmm3, %xmm6
- movaps 48(%ecx), %xmm7
- pminub %xmm5, %xmm2
- add $64, %ecx
- pminub %xmm7, %xmm3
- add $64, %edx
- pminub %xmm2, %xmm3
- pcmpeqb %xmm0, %xmm3
- pmovmskb %xmm3, %eax
- test %eax, %eax
- jnz L(Aligned64Leave)
-L(Aligned64Loop_start):
- movdqu %xmm4, -64(%edx)
- movaps (%ecx), %xmm2
- movdqa %xmm2, %xmm4
- movdqu %xmm5, -48(%edx)
- movaps 16(%ecx), %xmm5
- pminub %xmm5, %xmm2
- movaps 32(%ecx), %xmm3
- movdqu %xmm6, -32(%edx)
- movaps %xmm3, %xmm6
- movdqu %xmm7, -16(%edx)
- movaps 48(%ecx), %xmm7
- pminub %xmm7, %xmm3
- pminub %xmm2, %xmm3
- pcmpeqb %xmm3, %xmm0
- pmovmskb %xmm0, %eax
- add $64, %edx
- add $64, %ecx
- test %eax, %eax
- jz L(Aligned64Loop_start)
-L(Aligned64Leave):
- sub $0xa0, %ebx
- pxor %xmm0, %xmm0
- pcmpeqb %xmm4, %xmm0
- pmovmskb %xmm0, %eax
- test %eax, %eax
- jnz L(CopyFrom1To16Bytes)
-
- pcmpeqb %xmm5, %xmm0
- pmovmskb %xmm0, %eax
- movdqu %xmm4, -64(%edx)
- test %eax, %eax
- lea 16(%ebx), %ebx
- jnz L(CopyFrom1To16Bytes)
-
- pcmpeqb %xmm6, %xmm0
- pmovmskb %xmm0, %eax
- movdqu %xmm5, -48(%edx)
- test %eax, %eax
- lea 16(%ebx), %ebx
- jnz L(CopyFrom1To16Bytes)
-
- movdqu %xmm6, -32(%edx)
- pcmpeqb %xmm7, %xmm0
- pmovmskb %xmm0, %eax
- lea 16(%ebx), %ebx
-
-/*-----------------End of main part---------------------------*/
-
- .p2align 4
-L(CopyFrom1To16Bytes):
- add %ebx, %edx
- add %ebx, %ecx
-
- POP (%ebx)
- test %al, %al
- jz L(ExitHigh)
- test $0x01, %al
- jnz L(Exit1)
- test $0x02, %al
- jnz L(Exit2)
- test $0x04, %al
- jnz L(Exit3)
- test $0x08, %al
- jnz L(Exit4)
- test $0x10, %al
- jnz L(Exit5)
- test $0x20, %al
- jnz L(Exit6)
- test $0x40, %al
- jnz L(Exit7)
- /* Exit 8 */
- movl (%ecx), %eax
- movl %eax, (%edx)
- movl 4(%ecx), %eax
- movl %eax, 4(%edx)
-# ifdef USE_AS_STPCPY
- lea 7(%edx), %eax
-# else
- movl %edi, %eax
-# endif
- RETURN
-
- .p2align 4
-L(ExitHigh):
- test $0x01, %ah
- jnz L(Exit9)
- test $0x02, %ah
- jnz L(Exit10)
- test $0x04, %ah
- jnz L(Exit11)
- test $0x08, %ah
- jnz L(Exit12)
- test $0x10, %ah
- jnz L(Exit13)
- test $0x20, %ah
- jnz L(Exit14)
- test $0x40, %ah
- jnz L(Exit15)
- /* Exit 16 */
- movlpd (%ecx), %xmm0
- movlpd %xmm0, (%edx)
- movlpd 8(%ecx), %xmm0
- movlpd %xmm0, 8(%edx)
-# ifdef USE_AS_STPCPY
- lea 15(%edx), %eax
-# else
- movl %edi, %eax
-# endif
- RETURN
-
- .p2align 4
-L(Exit1):
- movb (%ecx), %al
- movb %al, (%edx)
-# ifdef USE_AS_STPCPY
- lea (%edx), %eax
-# else
- movl %edi, %eax
-# endif
- RETURN
-
- .p2align 4
-L(Exit2):
- movw (%ecx), %ax
- movw %ax, (%edx)
-# ifdef USE_AS_STPCPY
- lea 1(%edx), %eax
-# else
- movl %edi, %eax
-# endif
- RETURN
-
- .p2align 4
-L(Exit3):
- movw (%ecx), %ax
- movw %ax, (%edx)
- movb 2(%ecx), %al
- movb %al, 2(%edx)
-# ifdef USE_AS_STPCPY
- lea 2(%edx), %eax
-# else
- movl %edi, %eax
-# endif
- RETURN
-
- .p2align 4
-L(Exit4):
- movl (%ecx), %eax
- movl %eax, (%edx)
-# ifdef USE_AS_STPCPY
- lea 3(%edx), %eax
-# else
- movl %edi, %eax
-# endif
- RETURN
-
- .p2align 4
-L(Exit5):
- movl (%ecx), %eax
- movl %eax, (%edx)
- movb 4(%ecx), %al
- movb %al, 4(%edx)
-# ifdef USE_AS_STPCPY
- lea 4(%edx), %eax
-# else
- movl %edi, %eax
-# endif
- RETURN
-
- .p2align 4
-L(Exit6):
- movl (%ecx), %eax
- movl %eax, (%edx)
- movw 4(%ecx), %ax
- movw %ax, 4(%edx)
-# ifdef USE_AS_STPCPY
- lea 5(%edx), %eax
-# else
- movl %edi, %eax
-# endif
- RETURN
-
- .p2align 4
-L(Exit7):
- movl (%ecx), %eax
- movl %eax, (%edx)
- movl 3(%ecx), %eax
- movl %eax, 3(%edx)
-# ifdef USE_AS_STPCPY
- lea 6(%edx), %eax
-# else
- movl %edi, %eax
-# endif
- RETURN
-
- .p2align 4
-L(Exit9):
- movl (%ecx), %eax
- movl %eax, (%edx)
- movl 4(%ecx), %eax
- movl %eax, 4(%edx)
- movb 8(%ecx), %al
- movb %al, 8(%edx)
-# ifdef USE_AS_STPCPY
- lea 8(%edx), %eax
-# else
- movl %edi, %eax
-# endif
- RETURN
-
- .p2align 4
-L(Exit10):
- movl (%ecx), %eax
- movl %eax, (%edx)
- movl 4(%ecx), %eax
- movl %eax, 4(%edx)
- movw 8(%ecx), %ax
- movw %ax, 8(%edx)
-# ifdef USE_AS_STPCPY
- lea 9(%edx), %eax
-# else
- movl %edi, %eax
-# endif
- RETURN
-
- .p2align 4
-L(Exit11):
- movl (%ecx), %eax
- movl %eax, (%edx)
- movl 4(%ecx), %eax
- movl %eax, 4(%edx)
- movl 7(%ecx), %eax
- movl %eax, 7(%edx)
-# ifdef USE_AS_STPCPY
- lea 10(%edx), %eax
-# else
- movl %edi, %eax
-# endif
- RETURN
-
- .p2align 4
-L(Exit12):
- movl (%ecx), %eax
- movl %eax, (%edx)
- movl 4(%ecx), %eax
- movl %eax, 4(%edx)
- movl 8(%ecx), %eax
- movl %eax, 8(%edx)
-# ifdef USE_AS_STPCPY
- lea 11(%edx), %eax
-# else
- movl %edi, %eax
-# endif
- RETURN
-
- .p2align 4
-L(Exit13):
- movlpd (%ecx), %xmm0
- movlpd %xmm0, (%edx)
- movlpd 5(%ecx), %xmm0
- movlpd %xmm0, 5(%edx)
-# ifdef USE_AS_STPCPY
- lea 12(%edx), %eax
-# else
- movl %edi, %eax
-# endif
- RETURN
-
- .p2align 4
-L(Exit14):
- movlpd (%ecx), %xmm0
- movlpd %xmm0, (%edx)
- movlpd 6(%ecx), %xmm0
- movlpd %xmm0, 6(%edx)
-# ifdef USE_AS_STPCPY
- lea 13(%edx), %eax
-# else
- movl %edi, %eax
-# endif
- RETURN
-
- .p2align 4
-L(Exit15):
- movlpd (%ecx), %xmm0
- movlpd %xmm0, (%edx)
- movlpd 7(%ecx), %xmm0
- movlpd %xmm0, 7(%edx)
-# ifdef USE_AS_STPCPY
- lea 14(%edx), %eax
-# else
- movl %edi, %eax
-# endif
- RETURN
-
-CFI_POP (%edi)
-
- .p2align 4
-L(ExitTail1):
- movb (%ecx), %al
- movb %al, (%edx)
- movl %edx, %eax
- RETURN1
-
- .p2align 4
-L(ExitTail2):
- movw (%ecx), %ax
- movw %ax, (%edx)
-# ifdef USE_AS_STPCPY
- lea 1(%edx), %eax
-# else
- movl %edx, %eax
-# endif
- RETURN1
-
- .p2align 4
-L(ExitTail3):
- movw (%ecx), %ax
- movw %ax, (%edx)
- movb 2(%ecx), %al
- movb %al, 2(%edx)
-# ifdef USE_AS_STPCPY
- lea 2(%edx), %eax
-# else
- movl %edx, %eax
-# endif
- RETURN1
-
- .p2align 4
-L(ExitTail4):
- movl (%ecx), %eax
- movl %eax, (%edx)
-# ifdef USE_AS_STPCPY
- lea 3(%edx), %eax
-# else
- movl %edx, %eax
-# endif
- RETURN1
-
- .p2align 4
-L(ExitTail5):
- movl (%ecx), %eax
- movl %eax, (%edx)
- movb 4(%ecx), %al
- movb %al, 4(%edx)
-# ifdef USE_AS_STPCPY
- lea 4(%edx), %eax
-# else
- movl %edx, %eax
-# endif
- RETURN1
-
- .p2align 4
-L(ExitTail6):
- movl (%ecx), %eax
- movl %eax, (%edx)
- movw 4(%ecx), %ax
- movw %ax, 4(%edx)
-# ifdef USE_AS_STPCPY
- lea 5(%edx), %eax
-# else
- movl %edx, %eax
-# endif
- RETURN1
-
- .p2align 4
-L(ExitTail7):
- movl (%ecx), %eax
- movl %eax, (%edx)
- movl 3(%ecx), %eax
- movl %eax, 3(%edx)
-# ifdef USE_AS_STPCPY
- lea 6(%edx), %eax
-# else
- movl %edx, %eax
-# endif
- RETURN1
-
- .p2align 4
-L(ExitTail8):
- movl (%ecx), %eax
- movl %eax, (%edx)
- movl 4(%ecx), %eax
- movl %eax, 4(%edx)
-# ifdef USE_AS_STPCPY
- lea 7(%edx), %eax
-# else
- movl %edx, %eax
-# endif
- RETURN1
-
- .p2align 4
-L(ExitTail9):
- movl (%ecx), %eax
- movl %eax, (%edx)
- movl 4(%ecx), %eax
- movl %eax, 4(%edx)
- movb 8(%ecx), %al
- movb %al, 8(%edx)
-# ifdef USE_AS_STPCPY
- lea 8(%edx), %eax
-# else
- movl %edx, %eax
-# endif
- RETURN1
-
- .p2align 4
-L(ExitTail10):
- movl (%ecx), %eax
- movl %eax, (%edx)
- movl 4(%ecx), %eax
- movl %eax, 4(%edx)
- movw 8(%ecx), %ax
- movw %ax, 8(%edx)
-# ifdef USE_AS_STPCPY
- lea 9(%edx), %eax
-# else
- movl %edx, %eax
-# endif
- RETURN1
-
- .p2align 4
-L(ExitTail11):
- movl (%ecx), %eax
- movl %eax, (%edx)
- movl 4(%ecx), %eax
- movl %eax, 4(%edx)
- movl 7(%ecx), %eax
- movl %eax, 7(%edx)
-# ifdef USE_AS_STPCPY
- lea 10(%edx), %eax
-# else
- movl %edx, %eax
-# endif
- RETURN1
-
- .p2align 4
-L(ExitTail12):
- movl (%ecx), %eax
- movl %eax, (%edx)
- movl 4(%ecx), %eax
- movl %eax, 4(%edx)
- movl 8(%ecx), %eax
- movl %eax, 8(%edx)
-# ifdef USE_AS_STPCPY
- lea 11(%edx), %eax
-# else
- movl %edx, %eax
-# endif
- RETURN1
-
- .p2align 4
-L(ExitTail13):
- movlpd (%ecx), %xmm0
- movlpd %xmm0, (%edx)
- movlpd 5(%ecx), %xmm0
- movlpd %xmm0, 5(%edx)
-# ifdef USE_AS_STPCPY
- lea 12(%edx), %eax
-# else
- movl %edx, %eax
-# endif
- RETURN1
-
- .p2align 4
-L(ExitTail14):
- movlpd (%ecx), %xmm0
- movlpd %xmm0, (%edx)
- movlpd 6(%ecx), %xmm0
- movlpd %xmm0, 6(%edx)
-# ifdef USE_AS_STPCPY
- lea 13(%edx), %eax
-# else
- movl %edx, %eax
-# endif
- RETURN1
-
- .p2align 4
-L(ExitTail15):
- movlpd (%ecx), %xmm0
- movlpd %xmm0, (%edx)
- movlpd 7(%ecx), %xmm0
- movlpd %xmm0, 7(%edx)
-# ifdef USE_AS_STPCPY
- lea 14(%edx), %eax
-# else
- movl %edx, %eax
-# endif
- RETURN1
-
- .p2align 4
-L(ExitTail16):
- movlpd (%ecx), %xmm0
- movlpd %xmm0, (%edx)
- movlpd 8(%ecx), %xmm0
- movlpd %xmm0, 8(%edx)
-# ifdef USE_AS_STPCPY
- lea 15(%edx), %eax
-# else
- movl %edx, %eax
-# endif
- RETURN1
-
-END (STRCPY)
-# endif
-
-#endif