aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--ChangeLog6
-rw-r--r--sysdeps/powerpc/powerpc64/power8/strcmp.S30
-rw-r--r--sysdeps/powerpc/powerpc64/power9/strcmp.S30
3 files changed, 22 insertions, 44 deletions
diff --git a/ChangeLog b/ChangeLog
index 6399c1f521..769e73819b 100644
--- a/ChangeLog
+++ b/ChangeLog
@@ -1,3 +1,9 @@
+2017-02-07 Rajalakshmi Srinivasaraghavan <raji@linux.vnet.ibm.com>
+
+ * sysdeps/powerpc/powerpc64/power8/strcmp.S: Adjust address for
+ unaligned load for shorter strings.
+ * sysdeps/powerpc/powerpc64/power9/strcmp.S: Likewise.
+
2017-02-06 Joseph Myers <joseph@codesourcery.com>
* math/libm-test-driver.c (flag_test_errno): New variable.
diff --git a/sysdeps/powerpc/powerpc64/power8/strcmp.S b/sysdeps/powerpc/powerpc64/power8/strcmp.S
index c34ff4a23b..d46bff80cd 100644
--- a/sysdeps/powerpc/powerpc64/power8/strcmp.S
+++ b/sysdeps/powerpc/powerpc64/power8/strcmp.S
@@ -30,21 +30,21 @@
EALIGN (strcmp, 4, 0)
li r0,0
- /* Check if [s1]+32 or [s2]+32 will cross a 4K page boundary using
+ /* Check if [s1]+16 or [s2]+16 will cross a 4K page boundary using
the code:
(((size_t) s1) % PAGE_SIZE > (PAGE_SIZE - ITER_SIZE))
- with PAGE_SIZE being 4096 and ITER_SIZE begin 32. */
+ with PAGE_SIZE being 4096 and ITER_SIZE begin 16. */
rldicl r7,r3,0,52
rldicl r9,r4,0,52
- cmpldi cr7,r7,4096-32
+ cmpldi cr7,r7,4096-16
bgt cr7,L(pagecross_check)
- cmpldi cr5,r9,4096-32
+ cmpldi cr5,r9,4096-16
bgt cr5,L(pagecross_check)
- /* For short string up to 32 bytes, load both s1 and s2 using
+ /* For short string up to 16 bytes, load both s1 and s2 using
unaligned dwords and compare. */
ld r8,0(r3)
ld r10,0(r4)
@@ -60,25 +60,11 @@ EALIGN (strcmp, 4, 0)
orc. r9,r12,r11
bne cr0,L(different_nocmpb)
- ld r8,16(r3)
- ld r10,16(r4)
- cmpb r12,r8,r0
- cmpb r11,r8,r10
- orc. r9,r12,r11
- bne cr0,L(different_nocmpb)
-
- ld r8,24(r3)
- ld r10,24(r4)
- cmpb r12,r8,r0
- cmpb r11,r8,r10
- orc. r9,r12,r11
- bne cr0,L(different_nocmpb)
-
- addi r7,r3,32
- addi r4,r4,32
+ addi r7,r3,16
+ addi r4,r4,16
L(align_8b):
- /* Now it has checked for first 32 bytes, align source1 to doubleword
+ /* Now it has checked for first 16 bytes, align source1 to doubleword
and adjust source2 address. */
rldicl r9,r7,0,61 /* source1 alignment to doubleword */
subf r4,r9,r4 /* Adjust source2 address based on source1
diff --git a/sysdeps/powerpc/powerpc64/power9/strcmp.S b/sysdeps/powerpc/powerpc64/power9/strcmp.S
index 3e32396c94..17ec8c24c3 100644
--- a/sysdeps/powerpc/powerpc64/power9/strcmp.S
+++ b/sysdeps/powerpc/powerpc64/power9/strcmp.S
@@ -65,21 +65,21 @@
EALIGN (strcmp, 4, 0)
li r0, 0
- /* Check if [s1]+32 or [s2]+32 will cross a 4K page boundary using
+ /* Check if [s1]+16 or [s2]+16 will cross a 4K page boundary using
the code:
(((size_t) s1) % PAGE_SIZE > (PAGE_SIZE - ITER_SIZE))
- with PAGE_SIZE being 4096 and ITER_SIZE begin 32. */
+ with PAGE_SIZE being 4096 and ITER_SIZE begin 16. */
rldicl r7, r3, 0, 52
rldicl r9, r4, 0, 52
- cmpldi cr7, r7, 4096-32
+ cmpldi cr7, r7, 4096-16
bgt cr7, L(pagecross_check)
- cmpldi cr5, r9, 4096-32
+ cmpldi cr5, r9, 4096-16
bgt cr5, L(pagecross_check)
- /* For short strings up to 32 bytes, load both s1 and s2 using
+ /* For short strings up to 16 bytes, load both s1 and s2 using
unaligned dwords and compare. */
ld r8, 0(r3)
ld r10, 0(r4)
@@ -95,25 +95,11 @@ EALIGN (strcmp, 4, 0)
orc. r9, r12, r11
bne cr0, L(different_nocmpb)
- ld r8, 16(r3)
- ld r10, 16(r4)
- cmpb r12, r8, r0
- cmpb r11, r8, r10
- orc. r9, r12, r11
- bne cr0, L(different_nocmpb)
-
- ld r8, 24(r3)
- ld r10, 24(r4)
- cmpb r12, r8, r0
- cmpb r11, r8, r10
- orc. r9, r12, r11
- bne cr0, L(different_nocmpb)
-
- addi r7, r3, 32
- addi r4, r4, 32
+ addi r7, r3, 16
+ addi r4, r4, 16
L(align):
- /* Now it has checked for first 32 bytes. */
+ /* Now it has checked for first 16 bytes. */
vspltisb v0, 0
vspltisb v2, -1
lvsr v6, 0, r4 /* Compute mask. */