diff options
Diffstat (limited to 'ports/sysdeps/powerpc/powerpc32/405/memcpy.S')
-rw-r--r-- | ports/sysdeps/powerpc/powerpc32/405/memcpy.S | 132 |
1 files changed, 132 insertions, 0 deletions
diff --git a/ports/sysdeps/powerpc/powerpc32/405/memcpy.S b/ports/sysdeps/powerpc/powerpc32/405/memcpy.S new file mode 100644 index 0000000000..61025cf818 --- /dev/null +++ b/ports/sysdeps/powerpc/powerpc32/405/memcpy.S @@ -0,0 +1,132 @@ +/* Optimized memcpy implementation for PowerPC476. + Copyright (C) 2010 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library. If not, see + <http://www.gnu.org/licenses/>. */ + +#include <sysdep.h> +#include <bp-sym.h> +#include <bp-asm.h> + +/* memcpy + + r0:return address + r3:destination address + r4:source address + r5:byte count + + Save return address in r0. + If destinationn and source are unaligned and copy count is greater than 256 + then copy 0-3 bytes to make destination aligned. + If 32 or more bytes to copy we use 32 byte copy loop. + Finaly we copy 0-31 extra bytes. */ + +EALIGN (BP_SYM (memcpy), 5, 0) +/* Check if bytes to copy are greater than 256 and if + source and destination are unaligned */ + cmpwi r5,0x0100 + addi r0,r3,0 + ble L(string_count_loop) + neg r6,r3 + clrlwi. r6,r6,30 + beq L(string_count_loop) + neg r6,r4 + clrlwi. r6,r6,30 + beq L(string_count_loop) + mtctr r6 + subf r5,r6,r5 + +L(unaligned_bytecopy_loop): /* Align destination by coping 0-3 bytes */ + lbz r8,0x0(r4) + addi r4,r4,1 + stb r8,0x0(r3) + addi r3,r3,1 + bdnz L(unaligned_bytecopy_loop) + srwi. r7,r5,5 + beq L(preword2_count_loop) + mtctr r7 + +L(word8_count_loop_no_dcbt): /* Copy 32 bytes at a time */ + lwz r6,0(r4) + lwz r7,4(r4) + lwz r8,8(r4) + lwz r9,12(r4) + subi r5,r5,0x20 + stw r6,0(r3) + stw r7,4(r3) + stw r8,8(r3) + stw r9,12(r3) + lwz r6,16(r4) + lwz r7,20(r4) + lwz r8,24(r4) + lwz r9,28(r4) + addi r4,r4,0x20 + stw r6,16(r3) + stw r7,20(r3) + stw r8,24(r3) + stw r9,28(r3) + addi r3,r3,0x20 + bdnz L(word8_count_loop_no_dcbt) + +L(preword2_count_loop): /* Copy remaining 0-31 bytes */ + clrlwi. r12,r5,27 + beq L(end_memcpy) + mtxer r12 + lswx r5,0,r4 + stswx r5,0,r3 + mr r3,r0 + blr + +L(string_count_loop): /* Copy odd 0-31 bytes */ + clrlwi. r12,r5,28 + add r3,r3,r5 + add r4,r4,r5 + beq L(pre_string_copy) + mtxer r12 + subf r4,r12,r4 + subf r3,r12,r3 + lswx r6,0,r4 + stswx r6,0,r3 + +L(pre_string_copy): /* Check how many 32 byte chunck to copy */ + srwi. r7,r5,4 + beq L(end_memcpy) + mtctr r7 + +L(word4_count_loop_no_dcbt): /* Copy 32 bytes at a time */ + lwz r6,-4(r4) + lwz r7,-8(r4) + lwz r8,-12(r4) + lwzu r9,-16(r4) + stw r6,-4(r3) + stw r7,-8(r3) + stw r8,-12(r3) + stwu r9,-16(r3) + bdz L(end_memcpy) + lwz r6,-4(r4) + lwz r7,-8(r4) + lwz r8,-12(r4) + lwzu r9,-16(r4) + stw r6,-4(r3) + stw r7,-8(r3) + stw r8,-12(r3) + stwu r9,-16(r3) + bdnz L(word4_count_loop_no_dcbt) + +L(end_memcpy): + mr r3,r0 + blr +END (BP_SYM (memcpy)) +libc_hidden_builtin_def (memcpy) |