aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorRajalakshmi Srinivasaraghavan <raji@linux.vnet.ibm.com>2015-08-18 22:40:56 +0530
committerTulio Magno Quites Machado Filho <tuliom@linux.vnet.ibm.com>2015-08-25 13:45:56 -0300
commitfe7faec3e56a8dd64f78023a2f4a74fc8d42e79f (patch)
tree242d3afd1b66ac57b75eaf8814f5e439971258c1
parent2194737e77256a847ed4fca7652e4dcb8d3f9c1e (diff)
downloadglibc-fe7faec3e56a8dd64f78023a2f4a74fc8d42e79f.tar
glibc-fe7faec3e56a8dd64f78023a2f4a74fc8d42e79f.tar.gz
glibc-fe7faec3e56a8dd64f78023a2f4a74fc8d42e79f.tar.bz2
glibc-fe7faec3e56a8dd64f78023a2f4a74fc8d42e79f.zip
powerpc: Handle worstcase behavior in strstr() for POWER7
Instead of checking needle length, constant 'n' number of comparisons is checked to fall back to default implementation. This patch is tested on powerpc64 and powerpc64le. 2015-08-25 Rajalakshmi Srinivasaraghavan <raji@linux.vnet.ibm.com> * sysdeps/powerpc/powerpc64/power7/strstr.S: Handle worst case.
-rw-r--r--ChangeLog4
-rw-r--r--sysdeps/powerpc/powerpc64/power7/strstr.S22
2 files changed, 19 insertions, 7 deletions
diff --git a/ChangeLog b/ChangeLog
index a442ee1e12..13fc3fb98a 100644
--- a/ChangeLog
+++ b/ChangeLog
@@ -1,3 +1,7 @@
+2015-08-25 Rajalakshmi Srinivasaraghavan <raji@linux.vnet.ibm.com>
+
+ * sysdeps/powerpc/powerpc64/power7/strstr.S: Handle worst case.
+
2015-08-25 H.J. Lu <hongjiu.lu@intel.com>
* sysdeps/x86_64/strlen.S: Replace %xmm[8-12] with %xmm[0-4].
diff --git a/sysdeps/powerpc/powerpc64/power7/strstr.S b/sysdeps/powerpc/powerpc64/power7/strstr.S
index bfb0c4992a..fb3c810ac6 100644
--- a/sysdeps/powerpc/powerpc64/power7/strstr.S
+++ b/sysdeps/powerpc/powerpc64/power7/strstr.S
@@ -23,6 +23,8 @@
/* The performance gain is obtained using aligned memory access, load
* doubleword and usage of cmpb instruction for quicker comparison. */
+#define ITERATIONS 64
+
#ifndef STRLEN
/* For builds with no IFUNC support, local calls should be made to internal
GLIBC symbol (created by libc_hidden_builtin_def). */
@@ -62,6 +64,8 @@ EALIGN (strstr, 4, 0)
cfi_offset(r30, -16)
std r29, -24(r1) /* Save callers register r29. */
cfi_offset(r29, -24)
+ std r28, -32(r1) /* Save callers register r28. */
+ cfi_offset(r28, -32)
std r0, 16(r1) /* Store the link register. */
cfi_offset(lr, 16)
stdu r1, -FRAMESIZE(r1) /* Create the stack frame. */
@@ -69,7 +73,6 @@ EALIGN (strstr, 4, 0)
dcbt 0, r3
dcbt 0, r4
-
cmpdi cr7, r3, 0
beq cr7, L(retnull)
cmpdi cr7, r4, 0
@@ -84,10 +87,6 @@ EALIGN (strstr, 4, 0)
cmpdi cr7, r3, 0 /* If search str is null. */
beq cr7, L(ret_r3)
- /* Call __strstr_ppc if needle len > 2048 */
- cmpdi cr7, r3, 2048
- bgt cr7, L(default)
-
mr r31, r3
mr r4, r3
mr r3, r29
@@ -105,7 +104,8 @@ EALIGN (strstr, 4, 0)
/* If first char of search str is not present. */
cmpdi cr7, r3, 0
ble cr7, L(end)
-
+ /* Reg r28 is used to count the number of iterations. */
+ li r28, 0
rldicl r8, r3, 0, 52 /* Page cross check. */
cmpldi cr7, r8, 4096-16
bgt cr7, L(bytebybyte)
@@ -324,6 +324,10 @@ L(return4):
.align 4
L(begin):
mr r3, r8
+ /* When our iterations exceed ITERATIONS,fall back to default. */
+ addi r28, r28, 1
+ cmpdi cr7, r28, ITERATIONS
+ beq cr7, L(default)
lbz r4, 0(r30)
bl STRCHR
nop
@@ -423,6 +427,10 @@ L(nextbyte):
cmpdi cr7, r9, -1
beq cr7, L(end)
addi r3, r4, 1
+ /* When our iterations exceed ITERATIONS,fall back to default. */
+ addi r28, r28, 1
+ cmpdi cr7, r28, ITERATIONS
+ beq cr7, L(default)
lbz r4, 0(r30)
bl STRCHR
nop
@@ -490,7 +498,6 @@ L(retnull):
.align 4
L(default):
- mr r3, r29
mr r4, r30
bl __strstr_ppc
nop
@@ -500,6 +507,7 @@ L(end):
addi r1, r1, FRAMESIZE /* Restore stack pointer. */
cfi_adjust_cfa_offset(-FRAMESIZE)
ld r0, 16(r1) /* Restore the saved link register. */
+ ld r28, -32(r1) /* Restore callers save register r28. */
ld r29, -24(r1) /* Restore callers save register r29. */
ld r30, -16(r1) /* Restore callers save register r30. */
ld r31, -8(r1) /* Restore callers save register r31. */