diff options
author | Adhemerval Zanella <azanella@linux.vnet.ibm.com> | 2011-12-17 20:32:59 -0500 |
---|---|---|
committer | Ulrich Drepper <drepper@gmail.com> | 2011-12-17 20:32:59 -0500 |
commit | f0b264f17458b2289a7354fb606fbdfca58826fb (patch) | |
tree | 451b7569b2833ac0096db576342fb38b0d03a1fc | |
parent | 36b1a74da5ab853f9cc3e62bb3d74818fb778ee8 (diff) | |
download | glibc-f0b264f17458b2289a7354fb606fbdfca58826fb.tar glibc-f0b264f17458b2289a7354fb606fbdfca58826fb.tar.gz glibc-f0b264f17458b2289a7354fb606fbdfca58826fb.tar.bz2 glibc-f0b264f17458b2289a7354fb606fbdfca58826fb.zip |
Optimized strcasecmp for Power7
-rw-r--r-- | ChangeLog | 13 | ||||
-rw-r--r-- | NEWS | 3 | ||||
-rw-r--r-- | sysdeps/powerpc/Makefile | 2 | ||||
-rw-r--r-- | sysdeps/powerpc/locale-defines.sym | 5 | ||||
-rw-r--r-- | sysdeps/powerpc/powerpc32/power7/Makefile | 4 | ||||
-rw-r--r-- | sysdeps/powerpc/powerpc32/power7/strcasecmp.S | 132 | ||||
-rw-r--r-- | sysdeps/powerpc/powerpc32/power7/strcasecmp_l.S | 5 | ||||
-rw-r--r-- | sysdeps/powerpc/powerpc64/power7/Makefile | 5 | ||||
-rw-r--r-- | sysdeps/powerpc/powerpc64/power7/strcasecmp.S | 125 | ||||
-rw-r--r-- | sysdeps/powerpc/powerpc64/power7/strcasecmp_l.S | 5 |
10 files changed, 298 insertions, 1 deletions
@@ -1,3 +1,16 @@ +2011-11-22 Adhemerval Zanella <azanella@linux.vnet.ibm.com> + + * sysdeps/powerpc/Makefile: Added locale-defines.sym generation. + * sysdeps/powerpc/locale-defines.sym: Locale definitions for strcasecmp + optimized code. + * sysdeps/powerpc/powerpc32/power7/Makefile: New file. + * sysdeps/powerpc/powerpc32/power7/strcasecmp.S: New file. + * sysdeps/powerpc/powerpc32/power7/strcasecmp_l.S: New file. + * sysdeps/powerpc/powerpc64/power7/Makefile: Added unroll-loop option + for strncasecmp/strncasecmp_l compilation. + * sysdeps/powerpc/powerpc64/power7/strcasecmp.S: New file. + * sysdeps/powerpc/powerpc64/power7/strcasecmp_l.S: New file. + 2011-12-08 Marek Polacek <mpolacek@redhat.com> [BZ #13484] @@ -62,7 +62,8 @@ Version 2.15 * Optimized strcasecmp and strncasecmp for SSSE3 and SSE4.2 on x86-32. Implemented by Ulrich Drepper. -* Optimized nearbyint for PPC. Implemented by Adhemerval Zanella. +* Optimized nearbyint and strcasecmp for PPC. + Implemented by Adhemerval Zanella. Version 2.14 diff --git a/sysdeps/powerpc/Makefile b/sysdeps/powerpc/Makefile index e43ca704f0..23a9a16730 100644 --- a/sysdeps/powerpc/Makefile +++ b/sysdeps/powerpc/Makefile @@ -23,4 +23,6 @@ endif ifeq ($(subdir),csu) # get offset to rtld_global._dl_hwcap gen-as-const-headers += rtld-global-offsets.sym +# get offset to __locale_struct.__ctype_tolower +gen-as-const-headers += locale-defines.sym endif diff --git a/sysdeps/powerpc/locale-defines.sym b/sysdeps/powerpc/locale-defines.sym new file mode 100644 index 0000000000..af64b920a4 --- /dev/null +++ b/sysdeps/powerpc/locale-defines.sym @@ -0,0 +1,5 @@ +#include <locale/localeinfo.h> + +-- + +LOCALE_CTYPE_TOLOWER offsetof (struct __locale_struct, __ctype_tolower) diff --git a/sysdeps/powerpc/powerpc32/power7/Makefile b/sysdeps/powerpc/powerpc32/power7/Makefile new file mode 100644 index 0000000000..5e8f4a28ba --- /dev/null +++ b/sysdeps/powerpc/powerpc32/power7/Makefile @@ -0,0 +1,4 @@ +ifeq ($(subdir),string) +CFLAGS-strncase.c += -funroll-loops +CFLAGS-strncase_l.c += -funroll-loops +endif diff --git a/sysdeps/powerpc/powerpc32/power7/strcasecmp.S b/sysdeps/powerpc/powerpc32/power7/strcasecmp.S new file mode 100644 index 0000000000..5d84fce470 --- /dev/null +++ b/sysdeps/powerpc/powerpc32/power7/strcasecmp.S @@ -0,0 +1,132 @@ +/* Optimized strcasecmp implementation for PowerPC32. + Copyright (C) 2011 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, write to the Free + Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA + 02111-1307 USA. */ + +#include <sysdep.h> +#include <bp-sym.h> +#include <bp-asm.h> +#include <locale-defines.h> + +/* int [r3] strcasecmp (const char *s1 [r3], const char *s2 [r4] ) + + or if defined USE_IN_EXTENDED_LOCALE_MODEL: + + int [r3] strcasecmp_l (const char *s1 [r3], const char *s2 [r4], + __locale_t loc [r5]) */ + +#ifndef STRCMP +# define __STRCMP __strcasecmp +# define STRCMP strcasecmp +#endif + +ENTRY (BP_SYM (__STRCMP)) + +#define rRTN r3 /* Return value */ +#define rSTR1 r5 /* 1st string */ +#define rSTR2 r4 /* 2nd string */ +#define rLOCARG r5 /* 3rd argument: locale_t */ +#define rCHAR1 r6 /* Byte readed from 1st string */ +#define rCHAR2 r7 /* Byte readed from 2nd string */ +#define rADDR1 r8 /* Address of tolower(rCHAR1) */ +#define rADDR2 r12 /* Address of tolower(rCHAR2) */ +#define rLWR1 r8 /* Byte tolower(rCHAR1) */ +#define rLWR2 r12 /* Byte tolower(rCHAR2) */ +#define rTMP r0 +#define rGOT r9 /* Address of the Global Offset Table */ +#define rLOC r11 /* Default locale address */ + + cmpw cr7, r3, r4 +#ifndef USE_IN_EXTENDED_LOCALE_MODEL +# ifdef SHARED + mflr rTMP + bcl 20,31,.L1 +.L1: mflr rGOT + addis rGOT, rGOT, _GLOBAL_OFFSET_TABLE_-.L1@ha + addi rGOT, rGOT, _GLOBAL_OFFSET_TABLE_-.L1@l + lwz rLOC, __libc_tsd_LOCALE@got@tprel(rGOT) + add rLOC, rLOC, __libc_tsd_LOCALE@tls + lwz rLOC, 0(rLOC) + mtlr rTMP +# else + lis rTMP,_GLOBAL_OFFSET_TABLE_@ha + la rLOC,_GLOBAL_OFFSET_TABLE_@l(rTMP) + lwz rLOC, __libc_tsd_LOCALE@got@tprel(rGOT) + add rLOC, rLOC, __libc_tsd_LOCALE@tls + lwz rLOC, 0(rLOC) +# endif /* SHARED */ +#else + mr rLOC, rLOCARG +#endif + mr rSTR1, rRTN + lwz rLOC, LOCALE_CTYPE_TOLOWER(rLOC) + li rRTN, 0 + beqlr cr7 + + /* Unrolling loop for POWER: loads are done with 'lbz' plus + offset and string descriptors are only updated in the end + of loop unrolling. */ + +L(loop): + lbz rCHAR1, 0(rSTR1) /* Load char from s1 */ + lbz rCHAR2, 0(rSTR2) /* Load char from s2 */ + sldi rADDR1, rCHAR1, 2 /* Calculate address for tolower(*s1) */ + sldi rADDR2, rCHAR2, 2 /* Calculate address for tolower(*s2) */ + lwzx rLWR1, rLOC, rADDR1 /* Load tolower(*s1) */ + lwzx rLWR2, rLOC, rADDR2 /* Load tolower(*s2) */ + cmpwi cr7, rCHAR1, 0 /* *s1 == '\0' ? */ + subf. r3, rLWR2, rLWR1 + bnelr + beqlr cr7 + lbz rCHAR1, 1(rSTR1) + lbz rCHAR2, 1(rSTR2) + sldi rADDR1, rCHAR1, 2 + sldi rADDR2, rCHAR2, 2 + lwzx rLWR1, rLOC, rADDR1 + lwzx rLWR2, rLOC, rADDR2 + cmpwi cr7, rCHAR1, 0 + subf. r3, rLWR2, rLWR1 + bnelr + beqlr cr7 + lbz rCHAR1, 2(rSTR1) + lbz rCHAR2, 2(rSTR2) + sldi rADDR1, rCHAR1, 2 + sldi rADDR2, rCHAR2, 2 + lwzx rLWR1, rLOC, rADDR1 + lwzx rLWR2, rLOC, rADDR2 + cmpwi cr7, rCHAR1, 0 + subf. r3, rLWR2, rLWR1 + bnelr + beqlr cr7 + lbz rCHAR1, 3(rSTR1) + lbz rCHAR2, 3(rSTR2) + /* Increment both string descriptors */ + addi rSTR1, rSTR1, 4 + addi rSTR2, rSTR2, 4 + sldi rADDR1, rCHAR1, 2 + sldi rADDR2, rCHAR2, 2 + lwzx rLWR1, rLOC, rADDR1 + lwzx rLWR2, rLOC, rADDR2 + cmpwi cr7, rCHAR1, 0 + subf. r3, rLWR2, rLWR1 + bnelr + bne cr7,L(loop) + blr +END (BP_SYM (__STRCMP)) + +weak_alias (BP_SYM (__STRCMP), BP_SYM (STRCMP)) +libc_hidden_builtin_def (__STRCMP) diff --git a/sysdeps/powerpc/powerpc32/power7/strcasecmp_l.S b/sysdeps/powerpc/powerpc32/power7/strcasecmp_l.S new file mode 100644 index 0000000000..c13c4ebcb8 --- /dev/null +++ b/sysdeps/powerpc/powerpc32/power7/strcasecmp_l.S @@ -0,0 +1,5 @@ +#define USE_IN_EXTENDED_LOCALE_MODEL +#define STRCMP strcasecmp_l +#define __STRCMP __strcasecmp_l + +#include "strcasecmp.S" diff --git a/sysdeps/powerpc/powerpc64/power7/Makefile b/sysdeps/powerpc/powerpc64/power7/Makefile index b0f45205b9..40aacfa15a 100644 --- a/sysdeps/powerpc/powerpc64/power7/Makefile +++ b/sysdeps/powerpc/powerpc64/power7/Makefile @@ -3,3 +3,8 @@ ifeq ($(subdir),elf) # optimization may require a TOC reference before relocations are resolved. CFLAGS-rtld.c += -mno-vsx endif + +ifeq ($(subdir),string) +CFLAGS-strncase.c += -funroll-loops +CFLAGS-strncase_l.c += -funroll-loops +endif diff --git a/sysdeps/powerpc/powerpc64/power7/strcasecmp.S b/sysdeps/powerpc/powerpc64/power7/strcasecmp.S new file mode 100644 index 0000000000..1477b2e17e --- /dev/null +++ b/sysdeps/powerpc/powerpc64/power7/strcasecmp.S @@ -0,0 +1,125 @@ +/* Optimized strcasecmp implementation for PowerPC64. + Copyright (C) 2011 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, write to the Free + Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA + 02111-1307 USA. */ + +#include <sysdep.h> +#include <bp-sym.h> +#include <bp-asm.h> +#include <locale-defines.h> + +/* int [r3] strcasecmp (const char *s1 [r3], const char *s2 [r4] ) + + or if defined USE_IN_EXTENDED_LOCALE_MODEL: + + int [r3] strcasecmp_l (const char *s1 [r3], const char *s2 [r4], + __locale_t loc [r5]) */ + +#ifndef STRCMP +# define __STRCMP __strcasecmp +# define STRCMP strcasecmp +#endif + +ENTRY (BP_SYM (__STRCMP)) + CALL_MCOUNT 2 + +#define rRTN r3 /* Return value */ +#define rSTR1 r5 /* 1st string */ +#define rSTR2 r4 /* 2nd string */ +#define rLOCARG r5 /* 3rd argument: locale_t */ +#define rCHAR1 r6 /* Byte readed from 1st string */ +#define rCHAR2 r7 /* Byte readed from 2nd string */ +#define rADDR1 r8 /* Address of tolower(rCHAR1) */ +#define rADDR2 r12 /* Address of tolower(rCHAR2) */ +#define rLWR1 r8 /* Word tolower(rCHAR1) */ +#define rLWR2 r12 /* Word tolower(rCHAR2) */ +#define rTMP r9 +#define rLOC r11 /* Default locale address */ + + cmpd cr7, r3, r4 +#ifndef USE_IN_EXTENDED_LOCALE_MODEL + ld rTMP, __libc_tsd_LOCALE@got@tprel(r2) + add rLOC, rTMP, __libc_tsd_LOCALE@tls + ld rLOC, 0(rLOC) +#else + mr rLOC, rLOCARG +#endif + ld rLOC, LOCALE_CTYPE_TOLOWER(rLOC) + mr rSTR1, rRTN + li rRTN, 0 + beqlr cr7 + + + /* Unrolling loop for POWER: loads are done with 'lbz' plus + offset and string descriptors are only updated in the end + of loop unrolling. */ + + lbz rCHAR1, 0(rSTR1) /* Load char from s1 */ + lbz rCHAR2, 0(rSTR2) /* Load char from s2 */ +L(loop): + cmpdi rCHAR1, 0 /* *s1 == '\0' ? */ + sldi rADDR1, rCHAR1, 2 /* Calculate address for tolower(*s1) */ + sldi rADDR2, rCHAR2, 2 /* Calculate address for tolower(*s2) */ + lwzx rLWR1, rLOC, rADDR1 /* Load tolower(*s1) */ + lwzx rLWR2, rLOC, rADDR2 /* Load tolower(*s2) */ + cmpw cr1, rLWR1, rLWR2 /* r = tolower(*s1) == tolower(*s2) ? */ + crorc 4*cr1+eq,eq,4*cr1+eq /* (*s1 != '\0') || (r == 1) */ + beq cr1, L(done) + lbz rCHAR1, 1(rSTR1) + lbz rCHAR2, 1(rSTR2) + cmpdi rCHAR1, 0 + sldi rADDR1, rCHAR1, 2 + sldi rADDR2, rCHAR2, 2 + lwzx rLWR1, rLOC, rADDR1 + lwzx rLWR2, rLOC, rADDR2 + cmpw cr1, rLWR1, rLWR2 + crorc 4*cr1+eq,eq,4*cr1+eq + beq cr1, L(done) + lbz rCHAR1, 2(rSTR1) + lbz rCHAR2, 2(rSTR2) + cmpdi rCHAR1, 0 + sldi rADDR1, rCHAR1, 2 + sldi rADDR2, rCHAR2, 2 + lwzx rLWR1, rLOC, rADDR1 + lwzx rLWR2, rLOC, rADDR2 + cmpw cr1, rLWR1, rLWR2 + crorc 4*cr1+eq,eq,4*cr1+eq + beq cr1, L(done) + lbz rCHAR1, 3(rSTR1) + lbz rCHAR2, 3(rSTR2) + cmpdi rCHAR1, 0 + /* Increment both string descriptors */ + addi rSTR1, rSTR1, 4 + addi rSTR2, rSTR2, 4 + sldi rADDR1, rCHAR1, 2 + sldi rADDR2, rCHAR2, 2 + lwzx rLWR1, rLOC, rADDR1 + lwzx rLWR2, rLOC, rADDR2 + cmpw cr1, rLWR1, rLWR2 + crorc 4*cr1+eq,eq,4*cr1+eq + beq cr1,L(done) + lbz rCHAR1, 0(rSTR1) /* Load char from s1 */ + lbz rCHAR2, 0(rSTR2) /* Load char from s2 */ + b L(loop) +L(done): + subf r0, rLWR2, rLWR1 + extsw rRTN, r0 + blr +END (BP_SYM (__STRCMP)) + +weak_alias (BP_SYM (__STRCMP), BP_SYM (STRCMP)) +libc_hidden_builtin_def (__STRCMP) diff --git a/sysdeps/powerpc/powerpc64/power7/strcasecmp_l.S b/sysdeps/powerpc/powerpc64/power7/strcasecmp_l.S new file mode 100644 index 0000000000..c13c4ebcb8 --- /dev/null +++ b/sysdeps/powerpc/powerpc64/power7/strcasecmp_l.S @@ -0,0 +1,5 @@ +#define USE_IN_EXTENDED_LOCALE_MODEL +#define STRCMP strcasecmp_l +#define __STRCMP __strcasecmp_l + +#include "strcasecmp.S" |