aboutsummaryrefslogtreecommitdiff
path: root/REORG.TODO/sysdeps/powerpc/powerpc64/strchr.S
diff options
context:
space:
mode:
Diffstat (limited to 'REORG.TODO/sysdeps/powerpc/powerpc64/strchr.S')
-rw-r--r--REORG.TODO/sysdeps/powerpc/powerpc64/strchr.S155
1 files changed, 155 insertions, 0 deletions
diff --git a/REORG.TODO/sysdeps/powerpc/powerpc64/strchr.S b/REORG.TODO/sysdeps/powerpc/powerpc64/strchr.S
new file mode 100644
index 0000000000..cbfcc14cfe
--- /dev/null
+++ b/REORG.TODO/sysdeps/powerpc/powerpc64/strchr.S
@@ -0,0 +1,155 @@
+/* Optimized strchr implementation for PowerPC64.
+ Copyright (C) 1997-2017 Free Software Foundation, Inc.
+ This file is part of the GNU C Library.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library; if not, see
+ <http://www.gnu.org/licenses/>. */
+
+#include <sysdep.h>
+
+/* See strlen.s for comments on how this works. */
+
+/* char * [r3] strchr (const char *s [r3] , int c [r4] ) */
+
+#ifndef STRCHR
+# define STRCHR strchr
+#endif
+
+ENTRY (STRCHR)
+ CALL_MCOUNT 2
+
+#define rTMP1 r0
+#define rRTN r3 /* outgoing result */
+#define rSTR r8 /* current word pointer */
+#define rCHR r4 /* byte we're looking for, spread over the whole word */
+#define rWORD r5 /* the current word */
+#define rCLZB rCHR /* leading zero byte count */
+#define rFEFE r6 /* constant 0xfefefefefefefeff (-0x0101010101010101) */
+#define r7F7F r7 /* constant 0x7f7f7f7f7f7f7f7f */
+#define rTMP2 r9
+#define rIGN r10 /* number of bits we should ignore in the first word */
+#define rMASK r11 /* mask with the bits to ignore set to 0 */
+#define rTMP3 r12
+#define rTMP4 rIGN
+#define rTMP5 rMASK
+
+ dcbt 0,rRTN
+ insrdi rCHR, rCHR, 8, 48
+ li rMASK, -1
+ insrdi rCHR, rCHR, 16, 32
+ rlwinm rIGN, rRTN, 3, 26, 28
+ insrdi rCHR, rCHR, 32, 0
+ lis rFEFE, -0x101
+ lis r7F7F, 0x7f7f
+ clrrdi rSTR, rRTN, 3
+ addi rFEFE, rFEFE, -0x101
+ addi r7F7F, r7F7F, 0x7f7f
+ sldi rTMP1, rFEFE, 32
+ insrdi r7F7F, r7F7F, 32, 0
+ add rFEFE, rFEFE, rTMP1
+/* Test the first (partial?) word. */
+ ld rWORD, 0(rSTR)
+#ifdef __LITTLE_ENDIAN__
+ sld rMASK, rMASK, rIGN
+#else
+ srd rMASK, rMASK, rIGN
+#endif
+ orc rWORD, rWORD, rMASK
+ add rTMP1, rFEFE, rWORD
+ nor rTMP2, r7F7F, rWORD
+ and. rTMP4, rTMP1, rTMP2
+ xor rTMP3, rCHR, rWORD
+ orc rTMP3, rTMP3, rMASK
+ b L(loopentry)
+
+/* The loop. */
+
+L(loop):
+ ldu rWORD, 8(rSTR)
+ and. rTMP5, rTMP1, rTMP2
+/* Test for 0. */
+ add rTMP1, rFEFE, rWORD /* x - 0x01010101. */
+ nor rTMP2, r7F7F, rWORD /* ~(x | 0x7f7f7f7f) == ~x & 0x80808080. */
+ bne L(foundit)
+ and. rTMP4, rTMP1, rTMP2 /* (x - 0x01010101) & ~x & 0x80808080. */
+/* Start test for the bytes we're looking for. */
+ xor rTMP3, rCHR, rWORD
+L(loopentry):
+ add rTMP1, rFEFE, rTMP3
+ nor rTMP2, r7F7F, rTMP3
+ beq L(loop)
+
+/* There is a zero byte in the word, but may also be a matching byte (either
+ before or after the zero byte). In fact, we may be looking for a
+ zero byte, in which case we return a match. */
+ and. rTMP5, rTMP1, rTMP2
+ li rRTN, 0
+ beqlr
+/* At this point:
+ rTMP5 bytes are 0x80 for each match of c, 0 otherwise.
+ rTMP4 bytes are 0x80 for each match of 0, 0 otherwise.
+ But there may be false matches in the next most significant byte from
+ a true match due to carries. This means we need to recalculate the
+ matches using a longer method for big-endian. */
+#ifdef __LITTLE_ENDIAN__
+ addi rTMP1, rTMP5, -1
+ andc rTMP1, rTMP1, rTMP5
+ cntlzd rCLZB, rTMP1
+ addi rTMP2, rTMP4, -1
+ andc rTMP2, rTMP2, rTMP4
+ cmpld rTMP1, rTMP2
+ bgtlr
+ subfic rCLZB, rCLZB, 64-7
+#else
+/* I think we could reduce this by two instructions by keeping the "nor"
+ results from the loop for reuse here. See strlen.S tail. Similarly
+ one instruction could be pruned from L(foundit). */
+ and rFEFE, r7F7F, rWORD
+ or rTMP5, r7F7F, rWORD
+ and rTMP1, r7F7F, rTMP3
+ or rTMP4, r7F7F, rTMP3
+ add rFEFE, rFEFE, r7F7F
+ add rTMP1, rTMP1, r7F7F
+ nor rWORD, rTMP5, rFEFE
+ nor rTMP2, rTMP4, rTMP1
+ cntlzd rCLZB, rTMP2
+ cmpld rWORD, rTMP2
+ bgtlr
+#endif
+ srdi rCLZB, rCLZB, 3
+ add rRTN, rSTR, rCLZB
+ blr
+
+L(foundit):
+#ifdef __LITTLE_ENDIAN__
+ addi rTMP1, rTMP5, -1
+ andc rTMP1, rTMP1, rTMP5
+ cntlzd rCLZB, rTMP1
+ subfic rCLZB, rCLZB, 64-7-64
+ sradi rCLZB, rCLZB, 3
+#else
+ and rTMP1, r7F7F, rTMP3
+ or rTMP4, r7F7F, rTMP3
+ add rTMP1, rTMP1, r7F7F
+ nor rTMP2, rTMP4, rTMP1
+ cntlzd rCLZB, rTMP2
+ subi rSTR, rSTR, 8
+ srdi rCLZB, rCLZB, 3
+#endif
+ add rRTN, rSTR, rCLZB
+ blr
+END (STRCHR)
+
+weak_alias (strchr, index)
+libc_hidden_builtin_def (strchr)