/* strcmp with SSE4.2 Copyright (C) 2010 Free Software Foundation, Inc. Contributed by Intel Corporation. This file is part of the GNU C Library. The GNU C Library is free software; you can redistribute it and/or modify it under the terms of the GNU Lesser General Public License as published by the Free Software Foundation; either version 2.1 of the License, or (at your option) any later version. The GNU C Library is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License for more details. You should have received a copy of the GNU Lesser General Public License along with the GNU C Library; if not, write to the Free Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA. */ #ifndef NOT_IN_libc #include #include "asm-syntax.h" #define CFI_PUSH(REG) \ cfi_adjust_cfa_offset (4); \ cfi_rel_offset (REG, 0) #define CFI_POP(REG) \ cfi_adjust_cfa_offset (-4); \ cfi_restore (REG) #define PUSH(REG) pushl REG; CFI_PUSH (REG) #define POP(REG) popl REG; CFI_POP (REG) #ifndef USE_AS_STRNCMP # ifndef STRCMP # define STRCMP __strcmp_sse4_2 # endif # define STR1 4 # define STR2 STR1+4 #else # ifndef STRCMP # define STRCMP __strncmp_sse4_2 # endif # define STR1 8 # define STR2 STR1+4 # define CNT STR2+4 #endif .section .text.sse4.2,"ax",@progbits ENTRY (STRCMP) #ifdef USE_AS_STRNCMP PUSH (%ebp) #endif mov STR1(%esp), %edx mov STR2(%esp), %eax #ifdef USE_AS_STRNCMP movl CNT(%esp), %ebp test %ebp, %ebp je L(eq) #endif mov %dx, %cx and $0xfff, %cx cmp $0xff0, %cx ja L(first4bytes) movdqu (%edx), %xmm2 mov %eax, %ecx and $0xfff, %ecx cmp $0xff0, %ecx ja L(first4bytes) movd %xmm2, %ecx cmp (%eax), %ecx jne L(less4bytes) movdqu (%eax), %xmm1 pxor %xmm2, %xmm1 pxor %xmm0, %xmm0 ptest %xmm1, %xmm0 jnc L(less16bytes) pcmpeqb %xmm0, %xmm2 ptest %xmm2, %xmm0 jnc L(less16bytes) #ifdef USE_AS_STRNCMP sub $16, %ebp jbe L(eq) #endif add $16, %edx add $16, %eax L(first4bytes): movzbl (%eax), %ecx cmpb %cl, (%edx) jne L(neq) cmpl $0, %ecx je L(eq) #ifdef USE_AS_STRNCMP cmp $1, %ebp je L(eq) #endif movzbl 1(%eax), %ecx cmpb %cl, 1(%edx) jne L(neq) cmpl $0, %ecx je L(eq) #ifdef USE_AS_STRNCMP cmp $2, %ebp je L(eq) #endif movzbl 2(%eax), %ecx cmpb %cl, 2(%edx) jne L(neq) cmpl $0, %ecx je L(eq) #ifdef USE_AS_STRNCMP cmp $3, %ebp je L(eq) #endif movzbl 3(%eax), %ecx cmpb %cl, 3(%edx) jne L(neq) cmpl $0, %ecx je L(eq) #ifdef USE_AS_STRNCMP cmp $4, %ebp je L(eq) #endif movzbl 4(%eax), %ecx cmpb %cl, 4(%edx) jne L(neq) cmpl $0, %ecx je L(eq) #ifdef USE_AS_STRNCMP cmp $5, %ebp je L(eq) #endif movzbl 5(%eax), %ecx cmpb %cl, 5(%edx) jne L(neq) cmpl $0, %ecx je L(eq) #ifdef USE_AS_STRNCMP cmp $6, %ebp je L(eq) #endif movzbl 6(%eax), %ecx cmpb %cl, 6(%edx) jne L(neq) cmpl $0, %ecx je L(eq) #ifdef USE_AS_STRNCMP cmp $7, %ebp je L(eq) #endif movzbl 7(%eax), %ecx cmpb %cl, 7(%edx) jne L(neq) cmpl $0, %ecx je L(eq) #ifdef USE_AS_STRNCMP sub $8, %ebp je L(eq) #endif add $8, %eax add $8, %edx PUSH (%ebx) PUSH (%edi) PUSH (%esi) cfi_remember_state mov %edx, %edi mov %eax, %esi xorl %eax, %eax L(check_offset): movl %edi, %ebx movl %esi, %ecx andl $0xfff, %ebx andl $0xfff, %ecx cmpl %ebx, %ecx cmovl %ebx, %ecx lea -0xff0(%ecx), %edx sub %edx, %edi sub %edx, %esi testl %edx, %edx jg L(crosspage) L(loop): movdqu (%esi,%edx), %xmm2 movdqu (%edi,%edx), %xmm1 pcmpistri $0x1a, %xmm2, %xmm1 jbe L(end) #ifdef USE_AS_STRNCMP sub $16, %ebp jbe L(more16byteseq) #endif add $16, %edx jle L(loop) L(crosspage): movzbl (%edi,%edx), %eax movzbl (%esi,%edx), %ebx subl %ebx, %eax jne L(ret) testl %ebx, %ebx je L(ret) #ifdef USE_AS_STRNCMP sub $1, %ebp jbe L(more16byteseq) #endif inc %edx cmp $15, %edx jle L(crosspage) add $16, %edi add $16, %esi jmp L(check_offset) L(end): jnc L(ret) #ifdef USE_AS_STRNCMP sub %ecx, %ebp jbe L(more16byteseq) #endif lea (%ecx,%edx), %ebx movzbl (%edi,%ebx), %eax movzbl (%esi,%ebx), %ecx subl %ecx, %eax L(ret): POP (%esi) POP (%edi) POP (%ebx) #ifdef USE_AS_STRNCMP POP (%ebp) #endif ret cfi_restore_state #ifdef USE_AS_STRNCMP L(more16byteseq): POP (%esi) POP (%edi) POP (%ebx) #endif L(eq): xorl %eax, %eax #ifdef USE_AS_STRNCMP POP (%ebp) #endif ret #ifdef USE_AS_STRNCMP CFI_PUSH (%ebp) #endif L(neq): mov $1, %eax ja L(neq_bigger) neg %eax L(neq_bigger): #ifdef USE_AS_STRNCMP POP (%ebp) #endif ret .p2align 4 #ifdef USE_AS_STRNCMP CFI_PUSH (%ebp) #endif L(less16bytes): add $0xfefefeff, %ecx jnc L(less4bytes) xor (%edx), %ecx or $0xfefefeff, %ecx add $1, %ecx jnz L(less4bytes) #ifdef USE_AS_STRNCMP cmp $4, %ebp jbe L(eq) #endif mov 4(%edx), %ecx cmp 4(%eax), %ecx jne L(more4bytes) add $0xfefefeff, %ecx jnc L(more4bytes) xor 4(%edx), %ecx or $0xfefefeff, %ecx add $1, %ecx jnz L(more4bytes) #ifdef USE_AS_STRNCMP sub $8, %ebp jbe L(eq) #endif add $8, %edx add $8, %eax L(less4bytes): movzbl (%eax), %ecx cmpb %cl, (%edx) jne L(neq) cmpl $0, %ecx je L(eq) #ifdef USE_AS_STRNCMP cmp $1, %ebp je L(eq) #endif movzbl 1(%eax), %ecx cmpb %cl, 1(%edx) jne L(neq) cmpl $0, %ecx je L(eq) #ifdef USE_AS_STRNCMP cmp $2, %ebp je L(eq) #endif movzbl 2(%eax), %ecx cmpb %cl, 2(%edx) jne L(neq) cmpl $0, %ecx je L(eq) #ifdef USE_AS_STRNCMP cmp $3, %ebp je L(eq) #endif movzbl 3(%eax), %ecx cmpb %cl, 3(%edx) jne L(neq) cmpl $0, %ecx je L(eq) L(more4bytes): #ifdef USE_AS_STRNCMP cmp $4, %ebp je L(eq) #endif movzbl 4(%eax), %ecx cmpb %cl, 4(%edx) jne L(neq) cmpl $0, %ecx je L(eq) #ifdef USE_AS_STRNCMP cmp $5, %ebp je L(eq) #endif movzbl 5(%eax), %ecx cmpb %cl, 5(%edx) jne L(neq) cmpl $0, %ecx je L(eq) #ifdef USE_AS_STRNCMP cmp $6, %ebp je L(eq) #endif movzbl 6(%eax), %ecx cmpb %cl, 6(%edx) jne L(neq) cmpl $0, %ecx je L(eq) #ifdef USE_AS_STRNCMP cmp $7, %ebp je L(eq) #endif movzbl 7(%eax), %ecx cmpb %cl, 7(%edx) jne L(neq) #if 0 // XXX bug in original code. It had a fallthru without any code cmpl $0, %ecx je L(eq) #else jmp L(eq) #endif END (STRCMP) #endif