aboutsummaryrefslogtreecommitdiff
path: root/sysdeps/i386/i686/multiarch/memchr-sse2.S
diff options
context:
space:
mode:
Diffstat (limited to 'sysdeps/i386/i686/multiarch/memchr-sse2.S')
-rw-r--r--sysdeps/i386/i686/multiarch/memchr-sse2.S709
1 files changed, 0 insertions, 709 deletions
diff --git a/sysdeps/i386/i686/multiarch/memchr-sse2.S b/sysdeps/i386/i686/multiarch/memchr-sse2.S
deleted file mode 100644
index 172d70de13..0000000000
--- a/sysdeps/i386/i686/multiarch/memchr-sse2.S
+++ /dev/null
@@ -1,709 +0,0 @@
-/* Optimized memchr with sse2 without bsf
- Copyright (C) 2011-2017 Free Software Foundation, Inc.
- Contributed by Intel Corporation.
- This file is part of the GNU C Library.
-
- The GNU C Library is free software; you can redistribute it and/or
- modify it under the terms of the GNU Lesser General Public
- License as published by the Free Software Foundation; either
- version 2.1 of the License, or (at your option) any later version.
-
- The GNU C Library is distributed in the hope that it will be useful,
- but WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- Lesser General Public License for more details.
-
- You should have received a copy of the GNU Lesser General Public
- License along with the GNU C Library; if not, see
- <http://www.gnu.org/licenses/>. */
-
-#if IS_IN (libc)
-
-# include <sysdep.h>
-
-# define CFI_PUSH(REG) \
- cfi_adjust_cfa_offset (4); \
- cfi_rel_offset (REG, 0)
-
-# define CFI_POP(REG) \
- cfi_adjust_cfa_offset (-4); \
- cfi_restore (REG)
-
-# define PUSH(REG) pushl REG; CFI_PUSH (REG)
-# define POP(REG) popl REG; CFI_POP (REG)
-
-# ifndef USE_AS_RAWMEMCHR
-# define ENTRANCE PUSH(%edi);
-# define PARMS 8
-# define RETURN POP(%edi); ret; CFI_PUSH(%edi);
-# else
-# define ENTRANCE
-# define PARMS 4
-# endif
-
-# define STR1 PARMS
-# define STR2 STR1+4
-
-# ifndef USE_AS_RAWMEMCHR
-# define LEN STR2+4
-# endif
-
-# ifndef MEMCHR
-# define MEMCHR __memchr_sse2
-# endif
-
- atom_text_section
-ENTRY (MEMCHR)
- ENTRANCE
- mov STR1(%esp), %ecx
- movd STR2(%esp), %xmm1
-# ifndef USE_AS_RAWMEMCHR
- mov LEN(%esp), %edx
- test %edx, %edx
- jz L(return_null)
-# endif
-
- punpcklbw %xmm1, %xmm1
-# ifndef USE_AS_RAWMEMCHR
- mov %ecx, %edi
-# else
- mov %ecx, %edx
-# endif
- punpcklbw %xmm1, %xmm1
-
- and $63, %ecx
- pshufd $0, %xmm1, %xmm1
- cmp $48, %ecx
- ja L(crosscache)
-
-# ifndef USE_AS_RAWMEMCHR
- movdqu (%edi), %xmm0
-# else
- movdqu (%edx), %xmm0
-# endif
- pcmpeqb %xmm1, %xmm0
- pmovmskb %xmm0, %eax
- test %eax, %eax
-# ifndef USE_AS_RAWMEMCHR
- jnz L(match_case2_prolog)
-
- sub $16, %edx
- jbe L(return_null)
- lea 16(%edi), %edi
- and $15, %ecx
- and $-16, %edi
- add %ecx, %edx
-# else
- jnz L(match_case1_prolog)
- lea 16(%edx), %edx
- and $-16, %edx
-# endif
- jmp L(loop_prolog)
-
- .p2align 4
-L(crosscache):
- and $15, %ecx
-# ifndef USE_AS_RAWMEMCHR
- and $-16, %edi
- movdqa (%edi), %xmm0
-# else
- and $-16, %edx
- movdqa (%edx), %xmm0
-# endif
- pcmpeqb %xmm1, %xmm0
- pmovmskb %xmm0, %eax
- sar %cl, %eax
- test %eax, %eax
-
-# ifndef USE_AS_RAWMEMCHR
- jnz L(match_case2_prolog1)
- /* "ecx" is less than 16. Calculate "edx + ecx - 16" by using
- "edx - (16 - ecx)" instead of "(edx + ecx) - 16" to void
- possible addition overflow. */
- neg %ecx
- add $16, %ecx
- sub %ecx, %edx
- jbe L(return_null)
- lea 16(%edi), %edi
-# else
- jnz L(match_case1_prolog1)
- lea 16(%edx), %edx
-# endif
-
- .p2align 4
-L(loop_prolog):
-# ifndef USE_AS_RAWMEMCHR
- sub $64, %edx
- jbe L(exit_loop)
- movdqa (%edi), %xmm0
-# else
- movdqa (%edx), %xmm0
-# endif
- pcmpeqb %xmm1, %xmm0
- xor %ecx, %ecx
- pmovmskb %xmm0, %eax
- test %eax, %eax
- jnz L(match_case1)
-
-# ifndef USE_AS_RAWMEMCHR
- movdqa 16(%edi), %xmm2
-# else
- movdqa 16(%edx), %xmm2
-# endif
- pcmpeqb %xmm1, %xmm2
- lea 16(%ecx), %ecx
- pmovmskb %xmm2, %eax
- test %eax, %eax
- jnz L(match_case1)
-
-# ifndef USE_AS_RAWMEMCHR
- movdqa 32(%edi), %xmm3
-# else
- movdqa 32(%edx), %xmm3
-# endif
- pcmpeqb %xmm1, %xmm3
- lea 16(%ecx), %ecx
- pmovmskb %xmm3, %eax
- test %eax, %eax
- jnz L(match_case1)
-
-# ifndef USE_AS_RAWMEMCHR
- movdqa 48(%edi), %xmm4
-# else
- movdqa 48(%edx), %xmm4
-# endif
- pcmpeqb %xmm1, %xmm4
- lea 16(%ecx), %ecx
- pmovmskb %xmm4, %eax
- test %eax, %eax
- jnz L(match_case1)
-
-# ifndef USE_AS_RAWMEMCHR
- lea 64(%edi), %edi
- sub $64, %edx
- jbe L(exit_loop)
-
- movdqa (%edi), %xmm0
-# else
- lea 64(%edx), %edx
- movdqa (%edx), %xmm0
-# endif
- pcmpeqb %xmm1, %xmm0
- xor %ecx, %ecx
- pmovmskb %xmm0, %eax
- test %eax, %eax
- jnz L(match_case1)
-
-# ifndef USE_AS_RAWMEMCHR
- movdqa 16(%edi), %xmm2
-# else
- movdqa 16(%edx), %xmm2
-# endif
- pcmpeqb %xmm1, %xmm2
- lea 16(%ecx), %ecx
- pmovmskb %xmm2, %eax
- test %eax, %eax
- jnz L(match_case1)
-
-# ifndef USE_AS_RAWMEMCHR
- movdqa 32(%edi), %xmm3
-# else
- movdqa 32(%edx), %xmm3
-# endif
- pcmpeqb %xmm1, %xmm3
- lea 16(%ecx), %ecx
- pmovmskb %xmm3, %eax
- test %eax, %eax
- jnz L(match_case1)
-
-# ifndef USE_AS_RAWMEMCHR
- movdqa 48(%edi), %xmm4
-# else
- movdqa 48(%edx), %xmm4
-# endif
- pcmpeqb %xmm1, %xmm4
- lea 16(%ecx), %ecx
- pmovmskb %xmm4, %eax
- test %eax, %eax
- jnz L(match_case1)
-
-# ifndef USE_AS_RAWMEMCHR
- lea 64(%edi), %edi
- mov %edi, %ecx
- and $-64, %edi
- and $63, %ecx
- add %ecx, %edx
-# else
- lea 64(%edx), %edx
- and $-64, %edx
-# endif
-
- .p2align 4
-L(align64_loop):
-
-# ifndef USE_AS_RAWMEMCHR
- sub $64, %edx
- jbe L(exit_loop)
- movdqa (%edi), %xmm0
- movdqa 16(%edi), %xmm2
- movdqa 32(%edi), %xmm3
- movdqa 48(%edi), %xmm4
-# else
- movdqa (%edx), %xmm0
- movdqa 16(%edx), %xmm2
- movdqa 32(%edx), %xmm3
- movdqa 48(%edx), %xmm4
-# endif
- pcmpeqb %xmm1, %xmm0
- pcmpeqb %xmm1, %xmm2
- pcmpeqb %xmm1, %xmm3
- pcmpeqb %xmm1, %xmm4
-
- pmaxub %xmm0, %xmm3
- pmaxub %xmm2, %xmm4
- pmaxub %xmm3, %xmm4
-# ifndef USE_AS_RAWMEMCHR
- add $64, %edi
-# else
- add $64, %edx
-# endif
- pmovmskb %xmm4, %eax
-
- test %eax, %eax
- jz L(align64_loop)
-
-# ifndef USE_AS_RAWMEMCHR
- sub $64, %edi
-# else
- sub $64, %edx
-# endif
-
- pmovmskb %xmm0, %eax
- xor %ecx, %ecx
- test %eax, %eax
- jnz L(match_case1)
-
- pmovmskb %xmm2, %eax
- lea 16(%ecx), %ecx
- test %eax, %eax
- jnz L(match_case1)
-
-# ifndef USE_AS_RAWMEMCHR
- movdqa 32(%edi), %xmm3
-# else
- movdqa 32(%edx), %xmm3
-# endif
- pcmpeqb %xmm1, %xmm3
- pmovmskb %xmm3, %eax
- lea 16(%ecx), %ecx
- test %eax, %eax
- jnz L(match_case1)
-
-# ifndef USE_AS_RAWMEMCHR
- pcmpeqb 48(%edi), %xmm1
-# else
- pcmpeqb 48(%edx), %xmm1
-# endif
- pmovmskb %xmm1, %eax
- lea 16(%ecx), %ecx
-
- .p2align 4
-L(match_case1):
-# ifndef USE_AS_RAWMEMCHR
- add %ecx, %edi
-# else
-L(match_case1_prolog1):
- add %ecx, %edx
-L(match_case1_prolog):
-# endif
- test %al, %al
- jz L(match_case1_high)
- mov %al, %cl
- and $15, %cl
- jz L(match_case1_8)
- test $0x01, %al
- jnz L(ExitCase1_1)
- test $0x02, %al
- jnz L(ExitCase1_2)
- test $0x04, %al
- jnz L(ExitCase1_3)
-# ifndef USE_AS_RAWMEMCHR
- lea 3(%edi), %eax
- RETURN
-# else
- lea 3(%edx), %eax
- ret
-# endif
-
- .p2align 4
-L(match_case1_8):
- test $0x10, %al
- jnz L(ExitCase1_5)
- test $0x20, %al
- jnz L(ExitCase1_6)
- test $0x40, %al
- jnz L(ExitCase1_7)
-# ifndef USE_AS_RAWMEMCHR
- lea 7(%edi), %eax
- RETURN
-# else
- lea 7(%edx), %eax
- ret
-# endif
-
- .p2align 4
-L(match_case1_high):
- mov %ah, %ch
- and $15, %ch
- jz L(match_case1_high_8)
- test $0x01, %ah
- jnz L(ExitCase1_9)
- test $0x02, %ah
- jnz L(ExitCase1_10)
- test $0x04, %ah
- jnz L(ExitCase1_11)
-# ifndef USE_AS_RAWMEMCHR
- lea 11(%edi), %eax
- RETURN
-# else
- lea 11(%edx), %eax
- ret
-# endif
-
- .p2align 4
-L(match_case1_high_8):
- test $0x10, %ah
- jnz L(ExitCase1_13)
- test $0x20, %ah
- jnz L(ExitCase1_14)
- test $0x40, %ah
- jnz L(ExitCase1_15)
-# ifndef USE_AS_RAWMEMCHR
- lea 15(%edi), %eax
- RETURN
-# else
- lea 15(%edx), %eax
- ret
-# endif
-
-# ifndef USE_AS_RAWMEMCHR
- .p2align 4
-L(exit_loop):
- add $64, %edx
-
- movdqa (%edi), %xmm0
- pcmpeqb %xmm1, %xmm0
- xor %ecx, %ecx
- pmovmskb %xmm0, %eax
- test %eax, %eax
- jnz L(match_case2)
- cmp $16, %edx
- jbe L(return_null)
-
- movdqa 16(%edi), %xmm2
- pcmpeqb %xmm1, %xmm2
- lea 16(%ecx), %ecx
- pmovmskb %xmm2, %eax
- test %eax, %eax
- jnz L(match_case2)
- cmp $32, %edx
- jbe L(return_null)
-
- movdqa 32(%edi), %xmm3
- pcmpeqb %xmm1, %xmm3
- lea 16(%ecx), %ecx
- pmovmskb %xmm3, %eax
- test %eax, %eax
- jnz L(match_case2)
- cmp $48, %edx
- jbe L(return_null)
-
- pcmpeqb 48(%edi), %xmm1
- lea 16(%ecx), %ecx
- pmovmskb %xmm1, %eax
- test %eax, %eax
- jnz L(match_case2)
-
- xor %eax, %eax
- RETURN
-# endif
-
- .p2align 4
-L(ExitCase1_1):
-# ifndef USE_AS_RAWMEMCHR
- mov %edi, %eax
- RETURN
-# else
- mov %edx, %eax
- ret
-# endif
-
- .p2align 4
-L(ExitCase1_2):
-# ifndef USE_AS_RAWMEMCHR
- lea 1(%edi), %eax
- RETURN
-# else
- lea 1(%edx), %eax
- ret
-# endif
-
- .p2align 4
-L(ExitCase1_3):
-# ifndef USE_AS_RAWMEMCHR
- lea 2(%edi), %eax
- RETURN
-# else
- lea 2(%edx), %eax
- ret
-# endif
-
- .p2align 4
-L(ExitCase1_5):
-# ifndef USE_AS_RAWMEMCHR
- lea 4(%edi), %eax
- RETURN
-# else
- lea 4(%edx), %eax
- ret
-# endif
-
- .p2align 4
-L(ExitCase1_6):
-# ifndef USE_AS_RAWMEMCHR
- lea 5(%edi), %eax
- RETURN
-# else
- lea 5(%edx), %eax
- ret
-# endif
-
- .p2align 4
-L(ExitCase1_7):
-# ifndef USE_AS_RAWMEMCHR
- lea 6(%edi), %eax
- RETURN
-# else
- lea 6(%edx), %eax
- ret
-# endif
-
- .p2align 4
-L(ExitCase1_9):
-# ifndef USE_AS_RAWMEMCHR
- lea 8(%edi), %eax
- RETURN
-# else
- lea 8(%edx), %eax
- ret
-# endif
-
- .p2align 4
-L(ExitCase1_10):
-# ifndef USE_AS_RAWMEMCHR
- lea 9(%edi), %eax
- RETURN
-# else
- lea 9(%edx), %eax
- ret
-# endif
-
- .p2align 4
-L(ExitCase1_11):
-# ifndef USE_AS_RAWMEMCHR
- lea 10(%edi), %eax
- RETURN
-# else
- lea 10(%edx), %eax
- ret
-# endif
-
- .p2align 4
-L(ExitCase1_13):
-# ifndef USE_AS_RAWMEMCHR
- lea 12(%edi), %eax
- RETURN
-# else
- lea 12(%edx), %eax
- ret
-# endif
-
- .p2align 4
-L(ExitCase1_14):
-# ifndef USE_AS_RAWMEMCHR
- lea 13(%edi), %eax
- RETURN
-# else
- lea 13(%edx), %eax
- ret
-# endif
-
- .p2align 4
-L(ExitCase1_15):
-# ifndef USE_AS_RAWMEMCHR
- lea 14(%edi), %eax
- RETURN
-# else
- lea 14(%edx), %eax
- ret
-# endif
-
-# ifndef USE_AS_RAWMEMCHR
- .p2align 4
-L(match_case2):
- sub %ecx, %edx
-L(match_case2_prolog1):
- add %ecx, %edi
-L(match_case2_prolog):
- test %al, %al
- jz L(match_case2_high)
- mov %al, %cl
- and $15, %cl
- jz L(match_case2_8)
- test $0x01, %al
- jnz L(ExitCase2_1)
- test $0x02, %al
- jnz L(ExitCase2_2)
- test $0x04, %al
- jnz L(ExitCase2_3)
- sub $4, %edx
- jb L(return_null)
- lea 3(%edi), %eax
- RETURN
-
- .p2align 4
-L(match_case2_8):
- test $0x10, %al
- jnz L(ExitCase2_5)
- test $0x20, %al
- jnz L(ExitCase2_6)
- test $0x40, %al
- jnz L(ExitCase2_7)
- sub $8, %edx
- jb L(return_null)
- lea 7(%edi), %eax
- RETURN
-
- .p2align 4
-L(match_case2_high):
- mov %ah, %ch
- and $15, %ch
- jz L(match_case2_high_8)
- test $0x01, %ah
- jnz L(ExitCase2_9)
- test $0x02, %ah
- jnz L(ExitCase2_10)
- test $0x04, %ah
- jnz L(ExitCase2_11)
- sub $12, %edx
- jb L(return_null)
- lea 11(%edi), %eax
- RETURN
-
- .p2align 4
-L(match_case2_high_8):
- test $0x10, %ah
- jnz L(ExitCase2_13)
- test $0x20, %ah
- jnz L(ExitCase2_14)
- test $0x40, %ah
- jnz L(ExitCase2_15)
- sub $16, %edx
- jb L(return_null)
- lea 15(%edi), %eax
- RETURN
-
- .p2align 4
-L(ExitCase2_1):
- mov %edi, %eax
- RETURN
-
- .p2align 4
-L(ExitCase2_2):
- sub $2, %edx
- jb L(return_null)
- lea 1(%edi), %eax
- RETURN
-
- .p2align 4
-L(ExitCase2_3):
- sub $3, %edx
- jb L(return_null)
- lea 2(%edi), %eax
- RETURN
-
- .p2align 4
-L(ExitCase2_5):
- sub $5, %edx
- jb L(return_null)
- lea 4(%edi), %eax
- RETURN
-
- .p2align 4
-L(ExitCase2_6):
- sub $6, %edx
- jb L(return_null)
- lea 5(%edi), %eax
- RETURN
-
- .p2align 4
-L(ExitCase2_7):
- sub $7, %edx
- jb L(return_null)
- lea 6(%edi), %eax
- RETURN
-
- .p2align 4
-L(ExitCase2_9):
- sub $9, %edx
- jb L(return_null)
- lea 8(%edi), %eax
- RETURN
-
- .p2align 4
-L(ExitCase2_10):
- sub $10, %edx
- jb L(return_null)
- lea 9(%edi), %eax
- RETURN
-
- .p2align 4
-L(ExitCase2_11):
- sub $11, %edx
- jb L(return_null)
- lea 10(%edi), %eax
- RETURN
-
- .p2align 4
-L(ExitCase2_13):
- sub $13, %edx
- jb L(return_null)
- lea 12(%edi), %eax
- RETURN
-
- .p2align 4
-L(ExitCase2_14):
- sub $14, %edx
- jb L(return_null)
- lea 13(%edi), %eax
- RETURN
-
- .p2align 4
-L(ExitCase2_15):
- sub $15, %edx
- jb L(return_null)
- lea 14(%edi), %eax
- RETURN
-# endif
-
- .p2align 4
-L(return_null):
- xor %eax, %eax
-# ifndef USE_AS_RAWMEMCHR
- RETURN
-# else
- ret
-# endif
-
-END (MEMCHR)
-#endif