aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorRoland McGrath <roland@gnu.org>2003-04-04 22:03:25 +0000
committerRoland McGrath <roland@gnu.org>2003-04-04 22:03:25 +0000
commitbeb03cee27a133e3fd34795e32d6d51c7b7b4d4d (patch)
tree920a38ae4bd7d92bbb77e229af7b92e14c4c66ab
parent91613ed9d8e3cdef7a4257b1bec241828fa222c1 (diff)
downloadglibc-beb03cee27a133e3fd34795e32d6d51c7b7b4d4d.tar
glibc-beb03cee27a133e3fd34795e32d6d51c7b7b4d4d.tar.gz
glibc-beb03cee27a133e3fd34795e32d6d51c7b7b4d4d.tar.bz2
glibc-beb03cee27a133e3fd34795e32d6d51c7b7b4d4d.zip
* sysdeps/powerpc/powerpc64/strchr.S: 64-bit optimizations.
* sysdeps/powerpc/powerpc64/strlen.S: 64-bit optimizations. * sysdeps/powerpc/fpu/bits/mathdef.h (FLT_EVAL_METHOD): Undef before defining.
-rw-r--r--ChangeLog8
-rw-r--r--sysdeps/powerpc/powerpc64/strchr.S42
-rw-r--r--sysdeps/powerpc/powerpc64/strlen.S70
3 files changed, 74 insertions, 46 deletions
diff --git a/ChangeLog b/ChangeLog
index 101a229bf0..c663eb4d10 100644
--- a/ChangeLog
+++ b/ChangeLog
@@ -1,3 +1,11 @@
+2003-04-04 Steven Munroe <sjmunroe@us.ibm.com>
+
+ * sysdeps/powerpc/powerpc64/strchr.S: 64-bit optimizations.
+ * sysdeps/powerpc/powerpc64/strlen.S: 64-bit optimizations.
+
+ * sysdeps/powerpc/fpu/bits/mathdef.h (FLT_EVAL_METHOD): Undef before
+ defining.
+
2003-04-04 Alexandre Oliva <aoliva@redhat.com>
* sysdeps/unix/sysv/linux/mips/bits/fcntl.h (struct flock): Adjust
diff --git a/sysdeps/powerpc/powerpc64/strchr.S b/sysdeps/powerpc/powerpc64/strchr.S
index f6d418bcae..e581f8e77a 100644
--- a/sysdeps/powerpc/powerpc64/strchr.S
+++ b/sysdeps/powerpc/powerpc64/strchr.S
@@ -1,5 +1,5 @@
/* Optimized strchr implementation for PowerPC64.
- Copyright (C) 1997, 1999, 2000, 2002 Free Software Foundation, Inc.
+ Copyright (C) 1997, 1999, 2000, 2002, 2003 Free Software Foundation, Inc.
This file is part of the GNU C Library.
The GNU C Library is free software; you can redistribute it and/or
@@ -29,6 +29,11 @@ ENTRY (BP_SYM (strchr))
#define rTMP1 r0
#define rRTN r3 /* outgoing result */
+/* Note: The Bounded pointer support in this code is broken. This code
+ was inherited from PPC32 and and that support was never completed.
+ Currently PPC gcc does not support -fbounds-check or -fbounded-pointers.
+ These artifacts are left in the code as a reminder in case we need
+ bounded pointer support in the future. */
#if __BOUNDED_POINTERS__
# define rSTR r4
# define rCHR r5 /* byte we're looking for, spread over the whole word */
@@ -39,8 +44,8 @@ ENTRY (BP_SYM (strchr))
# define rWORD r5 /* the current word */
#endif
#define rCLZB rCHR /* leading zero byte count */
-#define rFEFE r6 /* constant 0xfefefeff (-0x01010101) */
-#define r7F7F r7 /* constant 0x7f7f7f7f */
+#define rFEFE r6 /* constant 0xfefefefefefefeff (-0x0101010101010101) */
+#define r7F7F r7 /* constant 0x7f7f7f7f7f7f7f7f */
#define rTMP2 r9
#define rIGN r10 /* number of bits we should ignore in the first word */
#define rMASK r11 /* mask with the bits to ignore set to 0 */
@@ -49,18 +54,23 @@ ENTRY (BP_SYM (strchr))
CHECK_BOUNDS_LOW (rSTR, rTMP1, rTMP2)
STORE_RETURN_BOUNDS (rTMP1, rTMP2)
+ dcbt 0,rRTN
rlwimi rCHR, rCHR, 8, 16, 23
li rMASK, -1
rlwimi rCHR, rCHR, 16, 0, 15
- rlwinm rIGN, rRTN, 3, 27, 28
+ rlwinm rIGN, rRTN, 3, 26, 28
+ insrdi rCHR, rCHR, 32, 0
lis rFEFE, -0x101
lis r7F7F, 0x7f7f
- clrrdi rSTR, rRTN, 2
+ clrrdi rSTR, rRTN, 3
addi rFEFE, rFEFE, -0x101
addi r7F7F, r7F7F, 0x7f7f
+ sldi rTMP1, rFEFE, 32
+ insrdi r7F7F, r7F7F, 32, 0
+ add rFEFE, rFEFE, rTMP1
/* Test the first (partial?) word. */
- lwz rWORD, 0(rSTR)
- srw rMASK, rMASK, rIGN
+ ld rWORD, 0(rSTR)
+ srd rMASK, rMASK, rIGN
orc rWORD, rWORD, rMASK
add rTMP1, rFEFE, rWORD
nor rTMP2, r7F7F, rWORD
@@ -71,7 +81,7 @@ ENTRY (BP_SYM (strchr))
/* The loop. */
-L(loop):lwzu rWORD, 4(rSTR)
+L(loop):ldu rWORD, 8(rSTR)
and. rTMP1, rTMP1, rTMP2
/* Test for 0. */
add rTMP1, rFEFE, rWORD
@@ -104,12 +114,12 @@ L(missed):
add rTMP1, rTMP1, r7F7F
nor rWORD, rMASK, rFEFE
nor rTMP2, rIGN, rTMP1
- cmplw rWORD, rTMP2
+ cmpld rWORD, rTMP2
bgtlr
- cntlzw rCLZB, rTMP2
- srwi rCLZB, rCLZB, 3
+ cntlzd rCLZB, rTMP2
+ srdi rCLZB, rCLZB, 3
add rRTN, rSTR, rCLZB
- CHECK_BOUNDS_HIGH_RTN (rSTR, rTMP2, twlge)
+ CHECK_BOUNDS_HIGH_RTN (rSTR, rTMP2, tdlge)
STORE_RETURN_VALUE (rSTR)
blr
@@ -118,11 +128,11 @@ L(foundit):
or rIGN, r7F7F, rTMP3
add rTMP1, rTMP1, r7F7F
nor rTMP2, rIGN, rTMP1
- cntlzw rCLZB, rTMP2
- subi rSTR, rSTR, 4
- srwi rCLZB, rCLZB, 3
+ cntlzd rCLZB, rTMP2
+ subi rSTR, rSTR, 8
+ srdi rCLZB, rCLZB, 3
add rRTN, rSTR, rCLZB
- CHECK_BOUNDS_HIGH_RTN (rSTR, rTMP2, twlge)
+ CHECK_BOUNDS_HIGH_RTN (rSTR, rTMP2, tdlge)
STORE_RETURN_VALUE (rSTR)
blr
END (BP_SYM (strchr))
diff --git a/sysdeps/powerpc/powerpc64/strlen.S b/sysdeps/powerpc/powerpc64/strlen.S
index 7907382002..22a835b109 100644
--- a/sysdeps/powerpc/powerpc64/strlen.S
+++ b/sysdeps/powerpc/powerpc64/strlen.S
@@ -1,5 +1,5 @@
/* Optimized strlen implementation for PowerPC64.
- Copyright (C) 1997, 1999, 2000, 2002 Free Software Foundation, Inc.
+ Copyright (C) 1997, 1999, 2000, 2002, 2003 Free Software Foundation, Inc.
This file is part of the GNU C Library.
The GNU C Library is free software; you can redistribute it and/or
@@ -60,7 +60,12 @@
2) How popular are bytes with the high bit set? If they are very rare,
on some processors it might be useful to use the simpler expression
~((x - 0x01010101) | 0x7f7f7f7f) (that is, on processors with only one
- ALU), but this fails when any character has its high bit set. */
+ ALU), but this fails when any character has its high bit set.
+
+ Answer:
+ 1) Added a Data Cache Block Touch early to prefetch the first 128
+ byte cache line. Adding dcbt instructions to the loop would not be
+ effective since most strings will be shorter than the cache line.*/
/* Some notes on register usage: Under the SVR4 ABI, we can use registers
0 and 3 through 12 (so long as we don't call any procedures) without
@@ -80,63 +85,68 @@ ENTRY (BP_SYM (strlen))
#define rSTR r4 /* current string position */
#define rPADN r5 /* number of padding bits we prepend to the
string to make it start at a word boundary */
-#define rFEFE r6 /* constant 0xfefefeff (-0x01010101) */
-#define r7F7F r7 /* constant 0x7f7f7f7f */
-#define rWORD1 r8 /* current string word */
-#define rWORD2 r9 /* next string word */
-#define rMASK r9 /* mask for first string word */
+#define rFEFE r6 /* constant 0xfefefefefefefeff (-0x0101010101010101) */
+#define r7F7F r7 /* constant 0x7f7f7f7f7f7f7f7f */
+#define rWORD1 r8 /* current string doubleword */
+#define rWORD2 r9 /* next string doubleword */
+#define rMASK r9 /* mask for first string doubleword */
#define rTMP2 r10
#define rTMP3 r11
#define rTMP4 r12
+/* Note: The Bounded pointer support in this code is broken. This code
+ was inherited from PPC32 and and that support was never completed.
+ Current PPC gcc does not support -fbounds-check or -fbounded-pointers.
+ These artifacts are left in the code as a reminder in case we need
+ bounded pointer support in the future. */
CHECK_BOUNDS_LOW (rRTN, rTMP1, rTMP2)
- clrrdi rSTR, rRTN, 2
+ dcbt 0,rRTN
+ clrrdi rSTR, rRTN, 3
lis r7F7F, 0x7f7f
- rlwinm rPADN, rRTN, 3, 27, 28
- lwz rWORD1, 0(rSTR)
- li rMASK, -1
+ rlwinm rPADN, rRTN, 3, 26, 28
+ ld rWORD1, 0(rSTR)
addi r7F7F, r7F7F, 0x7f7f
-/* That's the setup done, now do the first pair of words.
- We make an exception and use method (2) on the first two words, to reduce
- overhead. */
- srw rMASK, rMASK, rPADN
+ li rMASK, -1
+ insrdi r7F7F, r7F7F, 32, 0
+/* That's the setup done, now do the first pair of doublewords.
+ We make an exception and use method (2) on the first two doublewords,
+ to reduce overhead. */
+ srd rMASK, rMASK, rPADN
and rTMP1, r7F7F, rWORD1
or rTMP2, r7F7F, rWORD1
+ lis rFEFE, -0x101
add rTMP1, rTMP1, r7F7F
+ addi rFEFE, rFEFE, -0x101
nor rTMP1, rTMP2, rTMP1
and. rWORD1, rTMP1, rMASK
mtcrf 0x01, rRTN
bne L(done0)
- lis rFEFE, -0x101
- addi rFEFE, rFEFE, -0x101
- clrldi rFEFE,rFEFE,32 /* clear upper 32 */
+ sldi rTMP1, rFEFE, 32
+ add rFEFE, rFEFE, rTMP1
/* Are we now aligned to a doubleword boundary? */
- bt 29, L(loop)
+ bt 28, L(loop)
-/* Handle second word of pair. */
- lwzu rWORD1, 4(rSTR)
+/* Handle second doubleword of pair. */
+ ldu rWORD1, 8(rSTR)
and rTMP1, r7F7F, rWORD1
or rTMP2, r7F7F, rWORD1
add rTMP1, rTMP1, r7F7F
nor. rWORD1, rTMP2, rTMP1
- clrldi. rWORD1,rWORD1,32 /* clear upper 32 */
bne L(done0)
/* The loop. */
L(loop):
- lwz rWORD1, 4(rSTR)
- lwzu rWORD2, 8(rSTR)
+ ld rWORD1, 8(rSTR)
+ ldu rWORD2, 16(rSTR)
add rTMP1, rFEFE, rWORD1
nor rTMP2, r7F7F, rWORD1
and. rTMP1, rTMP1, rTMP2
- clrldi. rTMP1,rTMP1,32 /* clear upper 32 */
add rTMP3, rFEFE, rWORD2
nor rTMP4, r7F7F, rWORD2
bne L(done1)
and. rTMP1, rTMP3, rTMP4
- clrldi. rTMP1,rTMP1,32 /* clear upper 32 */
beq L(loop)
and rTMP1, r7F7F, rWORD2
@@ -146,17 +156,17 @@ L(loop):
L(done1):
and rTMP1, r7F7F, rWORD1
- subi rSTR, rSTR, 4
+ subi rSTR, rSTR, 8
add rTMP1, rTMP1, r7F7F
andc rWORD1, rTMP2, rTMP1
-/* When we get to here, rSTR points to the first word in the string that
+/* When we get to here, rSTR points to the first doubleword in the string that
contains a zero byte, and the most significant set bit in rWORD1 is in that
byte. */
L(done0):
- cntlzw rTMP3, rWORD1
+ cntlzd rTMP3, rWORD1
subf rTMP1, rRTN, rSTR
- srwi rTMP3, rTMP3, 3
+ srdi rTMP3, rTMP3, 3
add rRTN, rTMP1, rTMP3
/* GKM FIXME: check high bound. */
blr