diff options
Diffstat (limited to 'REORG.TODO/sysdeps/powerpc/powerpc64/strchr.S')
-rw-r--r-- | REORG.TODO/sysdeps/powerpc/powerpc64/strchr.S | 155 |
1 files changed, 155 insertions, 0 deletions
diff --git a/REORG.TODO/sysdeps/powerpc/powerpc64/strchr.S b/REORG.TODO/sysdeps/powerpc/powerpc64/strchr.S new file mode 100644 index 0000000000..cbfcc14cfe --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc64/strchr.S @@ -0,0 +1,155 @@ +/* Optimized strchr implementation for PowerPC64. + Copyright (C) 1997-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <sysdep.h> + +/* See strlen.s for comments on how this works. */ + +/* char * [r3] strchr (const char *s [r3] , int c [r4] ) */ + +#ifndef STRCHR +# define STRCHR strchr +#endif + +ENTRY (STRCHR) + CALL_MCOUNT 2 + +#define rTMP1 r0 +#define rRTN r3 /* outgoing result */ +#define rSTR r8 /* current word pointer */ +#define rCHR r4 /* byte we're looking for, spread over the whole word */ +#define rWORD r5 /* the current word */ +#define rCLZB rCHR /* leading zero byte count */ +#define rFEFE r6 /* constant 0xfefefefefefefeff (-0x0101010101010101) */ +#define r7F7F r7 /* constant 0x7f7f7f7f7f7f7f7f */ +#define rTMP2 r9 +#define rIGN r10 /* number of bits we should ignore in the first word */ +#define rMASK r11 /* mask with the bits to ignore set to 0 */ +#define rTMP3 r12 +#define rTMP4 rIGN +#define rTMP5 rMASK + + dcbt 0,rRTN + insrdi rCHR, rCHR, 8, 48 + li rMASK, -1 + insrdi rCHR, rCHR, 16, 32 + rlwinm rIGN, rRTN, 3, 26, 28 + insrdi rCHR, rCHR, 32, 0 + lis rFEFE, -0x101 + lis r7F7F, 0x7f7f + clrrdi rSTR, rRTN, 3 + addi rFEFE, rFEFE, -0x101 + addi r7F7F, r7F7F, 0x7f7f + sldi rTMP1, rFEFE, 32 + insrdi r7F7F, r7F7F, 32, 0 + add rFEFE, rFEFE, rTMP1 +/* Test the first (partial?) word. */ + ld rWORD, 0(rSTR) +#ifdef __LITTLE_ENDIAN__ + sld rMASK, rMASK, rIGN +#else + srd rMASK, rMASK, rIGN +#endif + orc rWORD, rWORD, rMASK + add rTMP1, rFEFE, rWORD + nor rTMP2, r7F7F, rWORD + and. rTMP4, rTMP1, rTMP2 + xor rTMP3, rCHR, rWORD + orc rTMP3, rTMP3, rMASK + b L(loopentry) + +/* The loop. */ + +L(loop): + ldu rWORD, 8(rSTR) + and. rTMP5, rTMP1, rTMP2 +/* Test for 0. */ + add rTMP1, rFEFE, rWORD /* x - 0x01010101. */ + nor rTMP2, r7F7F, rWORD /* ~(x | 0x7f7f7f7f) == ~x & 0x80808080. */ + bne L(foundit) + and. rTMP4, rTMP1, rTMP2 /* (x - 0x01010101) & ~x & 0x80808080. */ +/* Start test for the bytes we're looking for. */ + xor rTMP3, rCHR, rWORD +L(loopentry): + add rTMP1, rFEFE, rTMP3 + nor rTMP2, r7F7F, rTMP3 + beq L(loop) + +/* There is a zero byte in the word, but may also be a matching byte (either + before or after the zero byte). In fact, we may be looking for a + zero byte, in which case we return a match. */ + and. rTMP5, rTMP1, rTMP2 + li rRTN, 0 + beqlr +/* At this point: + rTMP5 bytes are 0x80 for each match of c, 0 otherwise. + rTMP4 bytes are 0x80 for each match of 0, 0 otherwise. + But there may be false matches in the next most significant byte from + a true match due to carries. This means we need to recalculate the + matches using a longer method for big-endian. */ +#ifdef __LITTLE_ENDIAN__ + addi rTMP1, rTMP5, -1 + andc rTMP1, rTMP1, rTMP5 + cntlzd rCLZB, rTMP1 + addi rTMP2, rTMP4, -1 + andc rTMP2, rTMP2, rTMP4 + cmpld rTMP1, rTMP2 + bgtlr + subfic rCLZB, rCLZB, 64-7 +#else +/* I think we could reduce this by two instructions by keeping the "nor" + results from the loop for reuse here. See strlen.S tail. Similarly + one instruction could be pruned from L(foundit). */ + and rFEFE, r7F7F, rWORD + or rTMP5, r7F7F, rWORD + and rTMP1, r7F7F, rTMP3 + or rTMP4, r7F7F, rTMP3 + add rFEFE, rFEFE, r7F7F + add rTMP1, rTMP1, r7F7F + nor rWORD, rTMP5, rFEFE + nor rTMP2, rTMP4, rTMP1 + cntlzd rCLZB, rTMP2 + cmpld rWORD, rTMP2 + bgtlr +#endif + srdi rCLZB, rCLZB, 3 + add rRTN, rSTR, rCLZB + blr + +L(foundit): +#ifdef __LITTLE_ENDIAN__ + addi rTMP1, rTMP5, -1 + andc rTMP1, rTMP1, rTMP5 + cntlzd rCLZB, rTMP1 + subfic rCLZB, rCLZB, 64-7-64 + sradi rCLZB, rCLZB, 3 +#else + and rTMP1, r7F7F, rTMP3 + or rTMP4, r7F7F, rTMP3 + add rTMP1, rTMP1, r7F7F + nor rTMP2, rTMP4, rTMP1 + cntlzd rCLZB, rTMP2 + subi rSTR, rSTR, 8 + srdi rCLZB, rCLZB, 3 +#endif + add rRTN, rSTR, rCLZB + blr +END (STRCHR) + +weak_alias (strchr, index) +libc_hidden_builtin_def (strchr) |