diff options
Diffstat (limited to 'sysdeps/powerpc/powerpc64/power7/strcmp.S')
-rw-r--r-- | sysdeps/powerpc/powerpc64/power7/strcmp.S | 168 |
1 files changed, 0 insertions, 168 deletions
diff --git a/sysdeps/powerpc/powerpc64/power7/strcmp.S b/sysdeps/powerpc/powerpc64/power7/strcmp.S deleted file mode 100644 index 14e14f457e..0000000000 --- a/sysdeps/powerpc/powerpc64/power7/strcmp.S +++ /dev/null @@ -1,168 +0,0 @@ -/* Optimized strcmp implementation for Power7 using 'cmpb' instruction - Copyright (C) 2014-2017 Free Software Foundation, Inc. - This file is part of the GNU C Library. - - The GNU C Library is free software; you can redistribute it and/or - modify it under the terms of the GNU Lesser General Public - License as published by the Free Software Foundation; either - version 2.1 of the License, or (at your option) any later version. - - The GNU C Library is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - Lesser General Public License for more details. - - You should have received a copy of the GNU Lesser General Public - License along with the GNU C Library; if not, see - <http://www.gnu.org/licenses/>. */ - -/* The optimization is achieved here through cmpb instruction. - 8byte aligned strings are processed with double word comparision - and unaligned strings are handled effectively with loop unrolling - technique */ - -#include <sysdep.h> - -#ifndef STRCMP -# define STRCMP strcmp -#endif - -/* int [r3] strcmp (const char *s1 [r3], const char *s2 [r4]) */ - - .machine power7 -EALIGN (STRCMP, 4, 0) - CALL_MCOUNT 2 - - or r9, r3, r4 - rldicl. r10, r9, 0, 61 /* are s1 and s2 8 byte aligned..? */ - bne cr0, L(process_unaligned_bytes) - li r5, 0 - - .align 4 -/* process input parameters on double word aligned boundary */ -L(unrollDword): - ld r8,0(r3) - ld r10,0(r4) - cmpb r7,r8,r5 - cmpdi cr7,r7,0 - mr r9,r7 - bne cr7,L(null_found) - cmpld cr7,r8,r10 - bne cr7,L(different) - - ld r8,8(r3) - ld r10,8(r4) - cmpb r7,r8,r5 - cmpdi cr7,r7,0 - mr r9,r7 - bne cr7,L(null_found) - cmpld cr7,r8,r10 - bne cr7,L(different) - - ld r8,16(r3) - ld r10,16(r4) - cmpb r7,r8,r5 - cmpdi cr7,r7,0 - mr r9,r7 - bne cr7,L(null_found) - cmpld cr7,r8,r10 - bne cr7,L(different) - - ld r8,24(r3) - ld r10,24(r4) - cmpb r7,r8,r5 - cmpdi cr7,r7,0 - mr r9,r7 - bne cr7,L(null_found) - cmpld cr7,r8,r10 - bne cr7,L(different) - - addi r3, r3, 32 - addi r4, r4, 32 - beq cr7, L(unrollDword) - - .align 4 -L(null_found): -#ifdef __LITTLE_ENDIAN__ - neg r7,r9 - and r9,r9,r7 - li r7,-1 - cntlzd r9,r9 - subfic r9,r9,71 - sld r9,r7,r9 -#else - cntlzd r9,r9 - li r7,-1 - addi r9,r9,8 - srd r9,r7,r9 -#endif - or r8,r8,r9 - or r10,r10,r9 - -L(different): - cmpb r9,r8,r10 -#ifdef __LITTLE_ENDIAN__ - addi r7,r9,1 - andc r9,r7,r9 - cntlzd r9,r9 - subfic r9,r9,63 -#else - not r9,r9 - cntlzd r9,r9 - subfic r9,r9,56 -#endif - srd r3,r8,r9 - srd r10,r10,r9 - rldicl r10,r10,0,56 - rldicl r3,r3,0,56 - subf r3,r10,r3 - blr - - .align 4 -L(process_unaligned_bytes): - lbz r9, 0(r3) /* load byte from s1 */ - lbz r10, 0(r4) /* load byte from s2 */ - cmpdi cr7, r9, 0 /* compare *s1 with NULL */ - beq cr7, L(diffOfNULL) /* if *s1 is NULL , return *s1 - *s2 */ - cmplw cr7, r9, r10 /* compare *s1 and *s2 */ - bne cr7, L(ComputeDiff) /* branch to compute difference and return */ - - lbz r9, 1(r3) /* load next byte from s1 */ - lbz r10, 1(r4) /* load next byte from s2 */ - cmpdi cr7, r9, 0 /* compare *s1 with NULL */ - beq cr7, L(diffOfNULL) /* if *s1 is NULL , return *s1 - *s2 */ - cmplw cr7, r9, r10 /* compare *s1 and *s2 */ - bne cr7, L(ComputeDiff) /* branch to compute difference and return */ - - lbz r9, 2(r3) /* unroll 3rd byte here */ - lbz r10, 2(r4) - cmpdi cr7, r9, 0 - beq cr7, L(diffOfNULL) - cmplw cr7, r9, r10 - bne 7, L(ComputeDiff) - - lbz r9, 3(r3) /* unroll 4th byte now */ - lbz r10, 3(r4) - addi r3, r3, 4 /* increment s1 by unroll factor */ - cmpdi cr7, r9, 0 - cmplw cr6, 9, r10 - beq cr7, L(diffOfNULL) - addi r4, r4, 4 /* increment s2 by unroll factor */ - beq cr6, L(process_unaligned_bytes) /* unroll byte processing */ - - .align 4 -L(ComputeDiff): - extsw r9, r9 - subf r10, r10, r9 /* compute s1 - s2 */ - extsw r3, r10 - blr /* return */ - - .align 4 -L(diffOfNULL): - li r9, 0 - subf r10, r10, r9 /* compute s1 - s2 */ - extsw r3, r10 /* sign extend result */ - blr /* return */ - -END (STRCMP) -libc_hidden_builtin_def (strcmp) |