diff options
author | Greg McGary <greg@mcgary.org> | 2000-06-06 22:37:40 +0000 |
---|---|---|
committer | Greg McGary <greg@mcgary.org> | 2000-06-06 22:37:40 +0000 |
commit | 1d280d9f1e4d4c889d8123663a81b49256cf6fd6 (patch) | |
tree | d75c35fc1e267377ac0957f4b521ae6bdfbab680 /sysdeps/powerpc/memset.S | |
parent | 019357d23488c773cdef1dd077cc8915b6012d52 (diff) | |
download | glibc-1d280d9f1e4d4c889d8123663a81b49256cf6fd6.tar glibc-1d280d9f1e4d4c889d8123663a81b49256cf6fd6.tar.gz glibc-1d280d9f1e4d4c889d8123663a81b49256cf6fd6.tar.bz2 glibc-1d280d9f1e4d4c889d8123663a81b49256cf6fd6.zip |
* sysdeps/powerpc/memset.S: Define & use symbolic register names.
Use C comments throughout. Line up operands column with tabs.
* sysdeps/powerpc/strchr.S: Likewise.
* sysdeps/powerpc/strcmp.S: Likewise.
* sysdeps/powerpc/strcpy.S: Likewise.
* sysdeps/powerpc/strlen.S: Likewise.
* sysdeps/powerpc/memset.S: Define & use symbolic register names.
Use C comments throughout. Line up operands column with tabs.
* sysdeps/powerpc/strchr.S: Likewise.
* sysdeps/powerpc/strcmp.S: Likewise.
* sysdeps/powerpc/strcpy.S: Likewise.
* sysdeps/powerpc/strlen.S: Likewise.
Diffstat (limited to 'sysdeps/powerpc/memset.S')
-rw-r--r-- | sysdeps/powerpc/memset.S | 273 |
1 files changed, 142 insertions, 131 deletions
diff --git a/sysdeps/powerpc/memset.S b/sysdeps/powerpc/memset.S index 1b95bc7591..c48c0af7c8 100644 --- a/sysdeps/powerpc/memset.S +++ b/sysdeps/powerpc/memset.S @@ -19,181 +19,192 @@ #include <sysdep.h> -EALIGN(memset,5,1) /* __ptr_t [r3] memset (__ptr_t s [r3], int c [r4], size_t n [r5])); Returns 's'. The memset is done in three sizes: byte (8 bits), word (32 bits), cache line (256 bits). There is a special case for setting cache lines - to 0, to take advantage of the dcbz instruction. - r6: current address we are storing at - r7: number of bytes we are setting now (when aligning) */ + to 0, to take advantage of the dcbz instruction. */ + +EALIGN (memset, 5, 1) + +#define rTMP r0 +#define rRTN r3 /* initial value of 1st argument */ +#define rCHR r4 /* char to set in each byte */ +#define rLEN r5 /* length of region to set */ +#define rMEMP r6 /* address at which we are storing */ +#define rALIGN r7 /* number of bytes we are setting now (when aligning) */ +#define rMEMP2 r8 + +#define rPOS32 r7 /* constant +32 for clearing with dcbz */ +#define rNEG64 r8 /* constant -64 for clearing with dcbz */ +#define rNEG32 r9 /* constant -32 for clearing with dcbz */ /* take care of case for size <= 4 */ - cmplwi cr1,r5,4 - andi. r7,r3,3 - mr r6,r3 - ble- cr1,L(small) + cmplwi cr1, rLEN, 4 + andi. rALIGN, rRTN, 3 + mr rMEMP, rRTN + ble- cr1, L(small) /* align to word boundary */ - cmplwi cr5,r5,31 - rlwimi r4,r4,8,16,23 - beq+ L(aligned) # 8th instruction from .align - mtcrf 0x01,r3 - subfic r7,r7,4 - add r6,r6,r7 - sub r5,r5,r7 - bf+ 31,L(g0) - stb r4,0(r3) - bt 30,L(aligned) -L(g0): sth r4,-2(r6) # 16th instruction from .align + cmplwi cr5, rLEN, 31 + rlwimi rCHR, rCHR, 8, 16, 23 + beq+ L(aligned) /* 8th instruction from .align */ + mtcrf 0x01, rRTN + subfic rALIGN, rALIGN, 4 + add rMEMP, rMEMP, rALIGN + sub rLEN, rLEN, rALIGN + bf+ 31, L(g0) + stb rCHR, 0(rRTN) + bt 30, L(aligned) +L(g0): sth rCHR, -2(rMEMP) /* 16th instruction from .align */ /* take care of case for size < 31 */ L(aligned): - mtcrf 0x01,r5 - rlwimi r4,r4,16,0,15 - ble cr5,L(medium) + mtcrf 0x01, rLEN + rlwimi rCHR, rCHR, 16, 0, 15 + ble cr5, L(medium) /* align to cache line boundary... */ - andi. r7,r6,0x1C - subfic r7,r7,0x20 - beq L(caligned) - mtcrf 0x01,r7 - add r6,r6,r7 - sub r5,r5,r7 - cmplwi cr1,r7,0x10 - mr r8,r6 - bf 28,L(a1) - stw r4,-4(r8) - stwu r4,-8(r8) -L(a1): blt cr1,L(a2) - stw r4,-4(r8) # 32nd instruction from .align - stw r4,-8(r8) - stw r4,-12(r8) - stwu r4,-16(r8) -L(a2): bf 29,L(caligned) - stw r4,-4(r8) + andi. rALIGN, rMEMP, 0x1C + subfic rALIGN, rALIGN, 0x20 + beq L(caligned) + mtcrf 0x01, rALIGN + add rMEMP, rMEMP, rALIGN + sub rLEN, rLEN, rALIGN + cmplwi cr1, rALIGN, 0x10 + mr rMEMP2, rMEMP + bf 28, L(a1) + stw rCHR, -4(rMEMP2) + stwu rCHR, -8(rMEMP2) +L(a1): blt cr1, L(a2) + stw rCHR, -4(rMEMP2) /* 32nd instruction from .align */ + stw rCHR, -8(rMEMP2) + stw rCHR, -12(rMEMP2) + stwu rCHR, -16(rMEMP2) +L(a2): bf 29, L(caligned) + stw rCHR, -4(rMEMP2) /* now aligned to a cache line. */ L(caligned): - cmplwi cr1,r4,0 - clrrwi. r7,r5,5 - mtcrf 0x01,r5 # 40th instruction from .align - beq cr1,L(zloopstart) # special case for clearing memory using dcbz - srwi r0,r7,5 - mtctr r0 - beq L(medium) # we may not actually get to do a full line - clrlwi. r5,r5,27 - add r6,r6,r7 - li r8,-0x40 - bdz L(cloopdone) # 48th instruction from .align + cmplwi cr1, rCHR, 0 + clrrwi. rALIGN, rLEN, 5 + mtcrf 0x01, rLEN /* 40th instruction from .align */ + beq cr1, L(zloopstart) /* special case for clearing memory using dcbz */ + srwi rTMP, rALIGN, 5 + mtctr rTMP + beq L(medium) /* we may not actually get to do a full line */ + clrlwi. rLEN, rLEN, 27 + add rMEMP, rMEMP, rALIGN + li rNEG64, -0x40 + bdz L(cloopdone) /* 48th instruction from .align */ -L(c3): dcbz r8,r6 - stw r4,-4(r6) - stw r4,-8(r6) - stw r4,-12(r6) - stw r4,-16(r6) - nop # let 601 fetch last 4 instructions of loop - stw r4,-20(r6) - stw r4,-24(r6) # 56th instruction from .align - nop # let 601 fetch first 8 instructions of loop - stw r4,-28(r6) - stwu r4,-32(r6) - bdnz L(c3) +L(c3): dcbz rNEG64, rMEMP + stw rCHR, -4(rMEMP) + stw rCHR, -8(rMEMP) + stw rCHR, -12(rMEMP) + stw rCHR, -16(rMEMP) + nop /* let 601 fetch last 4 instructions of loop */ + stw rCHR, -20(rMEMP) + stw rCHR, -24(rMEMP) /* 56th instruction from .align */ + nop /* let 601 fetch first 8 instructions of loop */ + stw rCHR, -28(rMEMP) + stwu rCHR, -32(rMEMP) + bdnz L(c3) L(cloopdone): - stw r4,-4(r6) - stw r4,-8(r6) - stw r4,-12(r6) - stw r4,-16(r6) # 64th instruction from .align - stw r4,-20(r6) - cmplwi cr1,r5,16 - stw r4,-24(r6) - stw r4,-28(r6) - stwu r4,-32(r6) + stw rCHR, -4(rMEMP) + stw rCHR, -8(rMEMP) + stw rCHR, -12(rMEMP) + stw rCHR, -16(rMEMP) /* 64th instruction from .align */ + stw rCHR, -20(rMEMP) + cmplwi cr1, rLEN, 16 + stw rCHR, -24(rMEMP) + stw rCHR, -28(rMEMP) + stwu rCHR, -32(rMEMP) beqlr - add r6,r6,r7 - b L(medium_tail2) # 72nd instruction from .align + add rMEMP, rMEMP, rALIGN + b L(medium_tail2) /* 72nd instruction from .align */ .align 5 nop /* Clear lines of memory in 128-byte chunks. */ L(zloopstart): - clrlwi r5,r5,27 - mtcrf 0x02,r7 - srwi. r0,r7,7 - mtctr r0 - li r7,0x20 - li r8,-0x40 - cmplwi cr1,r5,16 # 8 - bf 26,L(z0) - dcbz 0,r6 - addi r6,r6,0x20 -L(z0): li r9,-0x20 - bf 25,L(z1) - dcbz 0,r6 - dcbz r7,r6 - addi r6,r6,0x40 # 16 -L(z1): cmplwi cr5,r5,0 - beq L(medium) + clrlwi rLEN, rLEN, 27 + mtcrf 0x02, rALIGN + srwi. rTMP, rALIGN, 7 + mtctr rTMP + li rPOS32, 0x20 + li rNEG64, -0x40 + cmplwi cr1, rLEN, 16 /* 8 */ + bf 26, L(z0) + dcbz 0, rMEMP + addi rMEMP, rMEMP, 0x20 +L(z0): li rNEG32, -0x20 + bf 25, L(z1) + dcbz 0, rMEMP + dcbz rPOS32, rMEMP + addi rMEMP, rMEMP, 0x40 /* 16 */ +L(z1): cmplwi cr5, rLEN, 0 + beq L(medium) L(zloop): - dcbz 0,r6 - dcbz r7,r6 - addi r6,r6,0x80 - dcbz r8,r6 - dcbz r9,r6 - bdnz L(zloop) - beqlr cr5 - b L(medium_tail2) + dcbz 0, rMEMP + dcbz rPOS32, rMEMP + addi rMEMP, rMEMP, 0x80 + dcbz rNEG64, rMEMP + dcbz rNEG32, rMEMP + bdnz L(zloop) + beqlr cr5 + b L(medium_tail2) .align 5 L(small): /* Memset of 4 bytes or less. */ - cmplwi cr5,r5,1 - cmplwi cr1,r5,3 - bltlr cr5 - stb r4,0(r6) - beqlr cr5 + cmplwi cr5, rLEN, 1 + cmplwi cr1, rLEN, 3 + bltlr cr5 + stb rCHR, 0(rMEMP) + beqlr cr5 nop - stb r4,1(r6) - bltlr cr1 - stb r4,2(r6) - beqlr cr1 + stb rCHR, 1(rMEMP) + bltlr cr1 + stb rCHR, 2(rMEMP) + beqlr cr1 nop - stb r4,3(r6) + stb rCHR, 3(rMEMP) blr /* Memset of 0-31 bytes. */ .align 5 L(medium): - cmplwi cr1,r5,16 + cmplwi cr1, rLEN, 16 L(medium_tail2): - add r6,r6,r5 + add rMEMP, rMEMP, rLEN L(medium_tail): - bt- 31,L(medium_31t) - bt- 30,L(medium_30t) + bt- 31, L(medium_31t) + bt- 30, L(medium_30t) L(medium_30f): - bt- 29,L(medium_29t) + bt- 29, L(medium_29t) L(medium_29f): - bge- cr1,L(medium_27t) - bflr- 28 - stw r4,-4(r6) # 8th instruction from .align - stw r4,-8(r6) + bge- cr1, L(medium_27t) + bflr- 28 + stw rCHR, -4(rMEMP) /* 8th instruction from .align */ + stw rCHR, -8(rMEMP) blr L(medium_31t): - stbu r4,-1(r6) - bf- 30,L(medium_30f) + stbu rCHR, -1(rMEMP) + bf- 30, L(medium_30f) L(medium_30t): - sthu r4,-2(r6) - bf- 29,L(medium_29f) + sthu rCHR, -2(rMEMP) + bf- 29, L(medium_29f) L(medium_29t): - stwu r4,-4(r6) - blt- cr1,L(medium_27f) # 16th instruction from .align + stwu rCHR, -4(rMEMP) + blt- cr1, L(medium_27f) /* 16th instruction from .align */ L(medium_27t): - stw r4,-4(r6) - stw r4,-8(r6) - stw r4,-12(r6) - stwu r4,-16(r6) + stw rCHR, -4(rMEMP) + stw rCHR, -8(rMEMP) + stw rCHR, -12(rMEMP) + stwu rCHR, -16(rMEMP) L(medium_27f): - bflr- 28 + bflr- 28 L(medium_28t): - stw r4,-4(r6) - stw r4,-8(r6) + stw rCHR, -4(rMEMP) + stw rCHR, -8(rMEMP) blr END(memset) |