diff options
Diffstat (limited to 'sysdeps/x86_64/memcpy.S')
-rw-r--r-- | sysdeps/x86_64/memcpy.S | 92 |
1 files changed, 92 insertions, 0 deletions
diff --git a/sysdeps/x86_64/memcpy.S b/sysdeps/x86_64/memcpy.S new file mode 100644 index 0000000000..1339036bdb --- /dev/null +++ b/sysdeps/x86_64/memcpy.S @@ -0,0 +1,92 @@ +/* Highly optimized version for x86-64. + Copyright (C) 1997, 2000, 2002 Free Software Foundation, Inc. + This file is part of the GNU C Library. + Based on i586 version contributed by Ulrich Drepper <drepper@cygnus.com>, 1997. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, write to the Free + Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA + 02111-1307 USA. */ + +#include <sysdep.h> +#include "asm-syntax.h" +#include "bp-sym.h" +#include "bp-asm.h" + +/* BEWARE: `#ifdef memcpy' means that memcpy is redefined as `mempcpy', + and the return value is the byte after the last one copied in + the destination. */ +#define MEMPCPY_P (defined memcpy) + + .text +ENTRY (BP_SYM (memcpy)) + /* Cutoff for the big loop is a size of 32 bytes since otherwise + the loop will never be entered. */ + cmpq $32, %rdx + movq %rdx, %rcx +#if !MEMPCPY_P + movq %rdi, %r10 /* Save value. */ +#endif + + /* We need this in any case. */ + cld + + jbe 1f + + /* Align destination. */ + movq %rdi, %rax + negq %rax + andq $3, %rax + subq %rax, %rcx + xchgq %rax, %rcx + + rep; movsb + + movq %rax, %rcx + subq $32, %rcx + js 2f + + .p2align 4 +3: + + /* Now correct the loop counter. Please note that in the following + code the flags are not changed anymore. */ + subq $32, %rcx + + movq (%rsi), %rax + movq 8(%rsi), %rdx + movq 16(%rsi), %r8 + movq 24(%rsi), %r9 + movq %rax, (%rdi) + movq %rdx, 8(%rdi) + movq %r8, 16(%rdi) + movq %r9, 24(%rdi) + + leaq 32(%rsi), %rsi + leaq 32(%rdi), %rdi + + jns 3b + + /* Correct extra loop counter modification. */ +2: addq $32, %rcx +1: rep; movsb + +#if MEMPCPY_P + movq %rdi, %rax /* Set return value. */ +#else + movq %r10, %rax /* Set return value. */ + +#endif + ret + +END (BP_SYM (memcpy)) |