aboutsummaryrefslogtreecommitdiff
path: root/sysdeps/powerpc/memset.S
diff options
context:
space:
mode:
authorGreg McGary <greg@mcgary.org>2000-06-06 22:37:40 +0000
committerGreg McGary <greg@mcgary.org>2000-06-06 22:37:40 +0000
commit1d280d9f1e4d4c889d8123663a81b49256cf6fd6 (patch)
treed75c35fc1e267377ac0957f4b521ae6bdfbab680 /sysdeps/powerpc/memset.S
parent019357d23488c773cdef1dd077cc8915b6012d52 (diff)
downloadglibc-1d280d9f1e4d4c889d8123663a81b49256cf6fd6.tar
glibc-1d280d9f1e4d4c889d8123663a81b49256cf6fd6.tar.gz
glibc-1d280d9f1e4d4c889d8123663a81b49256cf6fd6.tar.bz2
glibc-1d280d9f1e4d4c889d8123663a81b49256cf6fd6.zip
* sysdeps/powerpc/memset.S: Define & use symbolic register names.
Use C comments throughout. Line up operands column with tabs. * sysdeps/powerpc/strchr.S: Likewise. * sysdeps/powerpc/strcmp.S: Likewise. * sysdeps/powerpc/strcpy.S: Likewise. * sysdeps/powerpc/strlen.S: Likewise. * sysdeps/powerpc/memset.S: Define & use symbolic register names. Use C comments throughout. Line up operands column with tabs. * sysdeps/powerpc/strchr.S: Likewise. * sysdeps/powerpc/strcmp.S: Likewise. * sysdeps/powerpc/strcpy.S: Likewise. * sysdeps/powerpc/strlen.S: Likewise.
Diffstat (limited to 'sysdeps/powerpc/memset.S')
-rw-r--r--sysdeps/powerpc/memset.S273
1 files changed, 142 insertions, 131 deletions
diff --git a/sysdeps/powerpc/memset.S b/sysdeps/powerpc/memset.S
index 1b95bc7591..c48c0af7c8 100644
--- a/sysdeps/powerpc/memset.S
+++ b/sysdeps/powerpc/memset.S
@@ -19,181 +19,192 @@
#include <sysdep.h>
-EALIGN(memset,5,1)
/* __ptr_t [r3] memset (__ptr_t s [r3], int c [r4], size_t n [r5]));
Returns 's'.
The memset is done in three sizes: byte (8 bits), word (32 bits),
cache line (256 bits). There is a special case for setting cache lines
- to 0, to take advantage of the dcbz instruction.
- r6: current address we are storing at
- r7: number of bytes we are setting now (when aligning) */
+ to 0, to take advantage of the dcbz instruction. */
+
+EALIGN (memset, 5, 1)
+
+#define rTMP r0
+#define rRTN r3 /* initial value of 1st argument */
+#define rCHR r4 /* char to set in each byte */
+#define rLEN r5 /* length of region to set */
+#define rMEMP r6 /* address at which we are storing */
+#define rALIGN r7 /* number of bytes we are setting now (when aligning) */
+#define rMEMP2 r8
+
+#define rPOS32 r7 /* constant +32 for clearing with dcbz */
+#define rNEG64 r8 /* constant -64 for clearing with dcbz */
+#define rNEG32 r9 /* constant -32 for clearing with dcbz */
/* take care of case for size <= 4 */
- cmplwi cr1,r5,4
- andi. r7,r3,3
- mr r6,r3
- ble- cr1,L(small)
+ cmplwi cr1, rLEN, 4
+ andi. rALIGN, rRTN, 3
+ mr rMEMP, rRTN
+ ble- cr1, L(small)
/* align to word boundary */
- cmplwi cr5,r5,31
- rlwimi r4,r4,8,16,23
- beq+ L(aligned) # 8th instruction from .align
- mtcrf 0x01,r3
- subfic r7,r7,4
- add r6,r6,r7
- sub r5,r5,r7
- bf+ 31,L(g0)
- stb r4,0(r3)
- bt 30,L(aligned)
-L(g0): sth r4,-2(r6) # 16th instruction from .align
+ cmplwi cr5, rLEN, 31
+ rlwimi rCHR, rCHR, 8, 16, 23
+ beq+ L(aligned) /* 8th instruction from .align */
+ mtcrf 0x01, rRTN
+ subfic rALIGN, rALIGN, 4
+ add rMEMP, rMEMP, rALIGN
+ sub rLEN, rLEN, rALIGN
+ bf+ 31, L(g0)
+ stb rCHR, 0(rRTN)
+ bt 30, L(aligned)
+L(g0): sth rCHR, -2(rMEMP) /* 16th instruction from .align */
/* take care of case for size < 31 */
L(aligned):
- mtcrf 0x01,r5
- rlwimi r4,r4,16,0,15
- ble cr5,L(medium)
+ mtcrf 0x01, rLEN
+ rlwimi rCHR, rCHR, 16, 0, 15
+ ble cr5, L(medium)
/* align to cache line boundary... */
- andi. r7,r6,0x1C
- subfic r7,r7,0x20
- beq L(caligned)
- mtcrf 0x01,r7
- add r6,r6,r7
- sub r5,r5,r7
- cmplwi cr1,r7,0x10
- mr r8,r6
- bf 28,L(a1)
- stw r4,-4(r8)
- stwu r4,-8(r8)
-L(a1): blt cr1,L(a2)
- stw r4,-4(r8) # 32nd instruction from .align
- stw r4,-8(r8)
- stw r4,-12(r8)
- stwu r4,-16(r8)
-L(a2): bf 29,L(caligned)
- stw r4,-4(r8)
+ andi. rALIGN, rMEMP, 0x1C
+ subfic rALIGN, rALIGN, 0x20
+ beq L(caligned)
+ mtcrf 0x01, rALIGN
+ add rMEMP, rMEMP, rALIGN
+ sub rLEN, rLEN, rALIGN
+ cmplwi cr1, rALIGN, 0x10
+ mr rMEMP2, rMEMP
+ bf 28, L(a1)
+ stw rCHR, -4(rMEMP2)
+ stwu rCHR, -8(rMEMP2)
+L(a1): blt cr1, L(a2)
+ stw rCHR, -4(rMEMP2) /* 32nd instruction from .align */
+ stw rCHR, -8(rMEMP2)
+ stw rCHR, -12(rMEMP2)
+ stwu rCHR, -16(rMEMP2)
+L(a2): bf 29, L(caligned)
+ stw rCHR, -4(rMEMP2)
/* now aligned to a cache line. */
L(caligned):
- cmplwi cr1,r4,0
- clrrwi. r7,r5,5
- mtcrf 0x01,r5 # 40th instruction from .align
- beq cr1,L(zloopstart) # special case for clearing memory using dcbz
- srwi r0,r7,5
- mtctr r0
- beq L(medium) # we may not actually get to do a full line
- clrlwi. r5,r5,27
- add r6,r6,r7
- li r8,-0x40
- bdz L(cloopdone) # 48th instruction from .align
+ cmplwi cr1, rCHR, 0
+ clrrwi. rALIGN, rLEN, 5
+ mtcrf 0x01, rLEN /* 40th instruction from .align */
+ beq cr1, L(zloopstart) /* special case for clearing memory using dcbz */
+ srwi rTMP, rALIGN, 5
+ mtctr rTMP
+ beq L(medium) /* we may not actually get to do a full line */
+ clrlwi. rLEN, rLEN, 27
+ add rMEMP, rMEMP, rALIGN
+ li rNEG64, -0x40
+ bdz L(cloopdone) /* 48th instruction from .align */
-L(c3): dcbz r8,r6
- stw r4,-4(r6)
- stw r4,-8(r6)
- stw r4,-12(r6)
- stw r4,-16(r6)
- nop # let 601 fetch last 4 instructions of loop
- stw r4,-20(r6)
- stw r4,-24(r6) # 56th instruction from .align
- nop # let 601 fetch first 8 instructions of loop
- stw r4,-28(r6)
- stwu r4,-32(r6)
- bdnz L(c3)
+L(c3): dcbz rNEG64, rMEMP
+ stw rCHR, -4(rMEMP)
+ stw rCHR, -8(rMEMP)
+ stw rCHR, -12(rMEMP)
+ stw rCHR, -16(rMEMP)
+ nop /* let 601 fetch last 4 instructions of loop */
+ stw rCHR, -20(rMEMP)
+ stw rCHR, -24(rMEMP) /* 56th instruction from .align */
+ nop /* let 601 fetch first 8 instructions of loop */
+ stw rCHR, -28(rMEMP)
+ stwu rCHR, -32(rMEMP)
+ bdnz L(c3)
L(cloopdone):
- stw r4,-4(r6)
- stw r4,-8(r6)
- stw r4,-12(r6)
- stw r4,-16(r6) # 64th instruction from .align
- stw r4,-20(r6)
- cmplwi cr1,r5,16
- stw r4,-24(r6)
- stw r4,-28(r6)
- stwu r4,-32(r6)
+ stw rCHR, -4(rMEMP)
+ stw rCHR, -8(rMEMP)
+ stw rCHR, -12(rMEMP)
+ stw rCHR, -16(rMEMP) /* 64th instruction from .align */
+ stw rCHR, -20(rMEMP)
+ cmplwi cr1, rLEN, 16
+ stw rCHR, -24(rMEMP)
+ stw rCHR, -28(rMEMP)
+ stwu rCHR, -32(rMEMP)
beqlr
- add r6,r6,r7
- b L(medium_tail2) # 72nd instruction from .align
+ add rMEMP, rMEMP, rALIGN
+ b L(medium_tail2) /* 72nd instruction from .align */
.align 5
nop
/* Clear lines of memory in 128-byte chunks. */
L(zloopstart):
- clrlwi r5,r5,27
- mtcrf 0x02,r7
- srwi. r0,r7,7
- mtctr r0
- li r7,0x20
- li r8,-0x40
- cmplwi cr1,r5,16 # 8
- bf 26,L(z0)
- dcbz 0,r6
- addi r6,r6,0x20
-L(z0): li r9,-0x20
- bf 25,L(z1)
- dcbz 0,r6
- dcbz r7,r6
- addi r6,r6,0x40 # 16
-L(z1): cmplwi cr5,r5,0
- beq L(medium)
+ clrlwi rLEN, rLEN, 27
+ mtcrf 0x02, rALIGN
+ srwi. rTMP, rALIGN, 7
+ mtctr rTMP
+ li rPOS32, 0x20
+ li rNEG64, -0x40
+ cmplwi cr1, rLEN, 16 /* 8 */
+ bf 26, L(z0)
+ dcbz 0, rMEMP
+ addi rMEMP, rMEMP, 0x20
+L(z0): li rNEG32, -0x20
+ bf 25, L(z1)
+ dcbz 0, rMEMP
+ dcbz rPOS32, rMEMP
+ addi rMEMP, rMEMP, 0x40 /* 16 */
+L(z1): cmplwi cr5, rLEN, 0
+ beq L(medium)
L(zloop):
- dcbz 0,r6
- dcbz r7,r6
- addi r6,r6,0x80
- dcbz r8,r6
- dcbz r9,r6
- bdnz L(zloop)
- beqlr cr5
- b L(medium_tail2)
+ dcbz 0, rMEMP
+ dcbz rPOS32, rMEMP
+ addi rMEMP, rMEMP, 0x80
+ dcbz rNEG64, rMEMP
+ dcbz rNEG32, rMEMP
+ bdnz L(zloop)
+ beqlr cr5
+ b L(medium_tail2)
.align 5
L(small):
/* Memset of 4 bytes or less. */
- cmplwi cr5,r5,1
- cmplwi cr1,r5,3
- bltlr cr5
- stb r4,0(r6)
- beqlr cr5
+ cmplwi cr5, rLEN, 1
+ cmplwi cr1, rLEN, 3
+ bltlr cr5
+ stb rCHR, 0(rMEMP)
+ beqlr cr5
nop
- stb r4,1(r6)
- bltlr cr1
- stb r4,2(r6)
- beqlr cr1
+ stb rCHR, 1(rMEMP)
+ bltlr cr1
+ stb rCHR, 2(rMEMP)
+ beqlr cr1
nop
- stb r4,3(r6)
+ stb rCHR, 3(rMEMP)
blr
/* Memset of 0-31 bytes. */
.align 5
L(medium):
- cmplwi cr1,r5,16
+ cmplwi cr1, rLEN, 16
L(medium_tail2):
- add r6,r6,r5
+ add rMEMP, rMEMP, rLEN
L(medium_tail):
- bt- 31,L(medium_31t)
- bt- 30,L(medium_30t)
+ bt- 31, L(medium_31t)
+ bt- 30, L(medium_30t)
L(medium_30f):
- bt- 29,L(medium_29t)
+ bt- 29, L(medium_29t)
L(medium_29f):
- bge- cr1,L(medium_27t)
- bflr- 28
- stw r4,-4(r6) # 8th instruction from .align
- stw r4,-8(r6)
+ bge- cr1, L(medium_27t)
+ bflr- 28
+ stw rCHR, -4(rMEMP) /* 8th instruction from .align */
+ stw rCHR, -8(rMEMP)
blr
L(medium_31t):
- stbu r4,-1(r6)
- bf- 30,L(medium_30f)
+ stbu rCHR, -1(rMEMP)
+ bf- 30, L(medium_30f)
L(medium_30t):
- sthu r4,-2(r6)
- bf- 29,L(medium_29f)
+ sthu rCHR, -2(rMEMP)
+ bf- 29, L(medium_29f)
L(medium_29t):
- stwu r4,-4(r6)
- blt- cr1,L(medium_27f) # 16th instruction from .align
+ stwu rCHR, -4(rMEMP)
+ blt- cr1, L(medium_27f) /* 16th instruction from .align */
L(medium_27t):
- stw r4,-4(r6)
- stw r4,-8(r6)
- stw r4,-12(r6)
- stwu r4,-16(r6)
+ stw rCHR, -4(rMEMP)
+ stw rCHR, -8(rMEMP)
+ stw rCHR, -12(rMEMP)
+ stwu rCHR, -16(rMEMP)
L(medium_27f):
- bflr- 28
+ bflr- 28
L(medium_28t):
- stw r4,-4(r6)
- stw r4,-8(r6)
+ stw rCHR, -4(rMEMP)
+ stw rCHR, -8(rMEMP)
blr
END(memset)