aboutsummaryrefslogtreecommitdiff
path: root/sysdeps/x86_64/multiarch/memmove-vec-unaligned-erms.S
diff options
context:
space:
mode:
Diffstat (limited to 'sysdeps/x86_64/multiarch/memmove-vec-unaligned-erms.S')
-rw-r--r--sysdeps/x86_64/multiarch/memmove-vec-unaligned-erms.S553
1 files changed, 0 insertions, 553 deletions
diff --git a/sysdeps/x86_64/multiarch/memmove-vec-unaligned-erms.S b/sysdeps/x86_64/multiarch/memmove-vec-unaligned-erms.S
deleted file mode 100644
index dee3ec529c..0000000000
--- a/sysdeps/x86_64/multiarch/memmove-vec-unaligned-erms.S
+++ /dev/null
@@ -1,553 +0,0 @@
-/* memmove/memcpy/mempcpy with unaligned load/store and rep movsb
- Copyright (C) 2016-2017 Free Software Foundation, Inc.
- This file is part of the GNU C Library.
-
- The GNU C Library is free software; you can redistribute it and/or
- modify it under the terms of the GNU Lesser General Public
- License as published by the Free Software Foundation; either
- version 2.1 of the License, or (at your option) any later version.
-
- The GNU C Library is distributed in the hope that it will be useful,
- but WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- Lesser General Public License for more details.
-
- You should have received a copy of the GNU Lesser General Public
- License along with the GNU C Library; if not, see
- <http://www.gnu.org/licenses/>. */
-
-/* memmove/memcpy/mempcpy is implemented as:
- 1. Use overlapping load and store to avoid branch.
- 2. Load all sources into registers and store them together to avoid
- possible address overlap between source and destination.
- 3. If size is 8 * VEC_SIZE or less, load all sources into registers
- and store them together.
- 4. If address of destination > address of source, backward copy
- 4 * VEC_SIZE at a time with unaligned load and aligned store.
- Load the first 4 * VEC and last VEC before the loop and store
- them after the loop to support overlapping addresses.
- 5. Otherwise, forward copy 4 * VEC_SIZE at a time with unaligned
- load and aligned store. Load the last 4 * VEC and first VEC
- before the loop and store them after the loop to support
- overlapping addresses.
- 6. If size >= __x86_shared_non_temporal_threshold and there is no
- overlap between destination and source, use non-temporal store
- instead of aligned store. */
-
-#include <sysdep.h>
-
-#ifndef MEMCPY_SYMBOL
-# define MEMCPY_SYMBOL(p,s) MEMMOVE_SYMBOL(p, s)
-#endif
-
-#ifndef MEMPCPY_SYMBOL
-# define MEMPCPY_SYMBOL(p,s) MEMMOVE_SYMBOL(p, s)
-#endif
-
-#ifndef MEMMOVE_CHK_SYMBOL
-# define MEMMOVE_CHK_SYMBOL(p,s) MEMMOVE_SYMBOL(p, s)
-#endif
-
-#ifndef VZEROUPPER
-# if VEC_SIZE > 16
-# define VZEROUPPER vzeroupper
-# else
-# define VZEROUPPER
-# endif
-#endif
-
-/* Threshold to use Enhanced REP MOVSB. Since there is overhead to set
- up REP MOVSB operation, REP MOVSB isn't faster on short data. The
- memcpy micro benchmark in glibc shows that 2KB is the approximate
- value above which REP MOVSB becomes faster than SSE2 optimization
- on processors with Enhanced REP MOVSB. Since larger register size
- can move more data with a single load and store, the threshold is
- higher with larger register size. */
-#ifndef REP_MOVSB_THRESHOLD
-# define REP_MOVSB_THRESHOLD (2048 * (VEC_SIZE / 16))
-#endif
-
-#ifndef PREFETCH
-# define PREFETCH(addr) prefetcht0 addr
-#endif
-
-/* Assume 64-byte prefetch size. */
-#ifndef PREFETCH_SIZE
-# define PREFETCH_SIZE 64
-#endif
-
-#define PREFETCHED_LOAD_SIZE (VEC_SIZE * 4)
-
-#if PREFETCH_SIZE == 64
-# if PREFETCHED_LOAD_SIZE == PREFETCH_SIZE
-# define PREFETCH_ONE_SET(dir, base, offset) \
- PREFETCH ((offset)base)
-# elif PREFETCHED_LOAD_SIZE == 2 * PREFETCH_SIZE
-# define PREFETCH_ONE_SET(dir, base, offset) \
- PREFETCH ((offset)base); \
- PREFETCH ((offset + dir * PREFETCH_SIZE)base)
-# elif PREFETCHED_LOAD_SIZE == 4 * PREFETCH_SIZE
-# define PREFETCH_ONE_SET(dir, base, offset) \
- PREFETCH ((offset)base); \
- PREFETCH ((offset + dir * PREFETCH_SIZE)base); \
- PREFETCH ((offset + dir * PREFETCH_SIZE)base); \
- PREFETCH ((offset + dir * PREFETCH_SIZE * 2)base); \
- PREFETCH ((offset + dir * PREFETCH_SIZE * 3)base)
-# else
-# error Unsupported PREFETCHED_LOAD_SIZE!
-# endif
-#else
-# error Unsupported PREFETCH_SIZE!
-#endif
-
-#ifndef SECTION
-# error SECTION is not defined!
-#endif
-
- .section SECTION(.text),"ax",@progbits
-#if defined SHARED && IS_IN (libc)
-ENTRY (MEMMOVE_CHK_SYMBOL (__mempcpy_chk, unaligned))
- cmpq %rdx, %rcx
- jb HIDDEN_JUMPTARGET (__chk_fail)
-END (MEMMOVE_CHK_SYMBOL (__mempcpy_chk, unaligned))
-#endif
-
-#if VEC_SIZE == 16 || defined SHARED
-ENTRY (MEMPCPY_SYMBOL (__mempcpy, unaligned))
- movq %rdi, %rax
- addq %rdx, %rax
- jmp L(start)
-END (MEMPCPY_SYMBOL (__mempcpy, unaligned))
-#endif
-
-#if defined SHARED && IS_IN (libc)
-ENTRY (MEMMOVE_CHK_SYMBOL (__memmove_chk, unaligned))
- cmpq %rdx, %rcx
- jb HIDDEN_JUMPTARGET (__chk_fail)
-END (MEMMOVE_CHK_SYMBOL (__memmove_chk, unaligned))
-#endif
-
-ENTRY (MEMMOVE_SYMBOL (__memmove, unaligned))
- movq %rdi, %rax
-L(start):
- cmpq $VEC_SIZE, %rdx
- jb L(less_vec)
- cmpq $(VEC_SIZE * 2), %rdx
- ja L(more_2x_vec)
-#if !defined USE_MULTIARCH || !IS_IN (libc)
-L(last_2x_vec):
-#endif
- /* From VEC and to 2 * VEC. No branch when size == VEC_SIZE. */
- VMOVU (%rsi), %VEC(0)
- VMOVU -VEC_SIZE(%rsi,%rdx), %VEC(1)
- VMOVU %VEC(0), (%rdi)
- VMOVU %VEC(1), -VEC_SIZE(%rdi,%rdx)
- VZEROUPPER
-#if !defined USE_MULTIARCH || !IS_IN (libc)
-L(nop):
-#endif
- ret
-#if defined USE_MULTIARCH && IS_IN (libc)
-END (MEMMOVE_SYMBOL (__memmove, unaligned))
-
-# if VEC_SIZE == 16
-# if defined SHARED
-/* Only used to measure performance of REP MOVSB. */
-ENTRY (__mempcpy_erms)
- movq %rdi, %rax
- addq %rdx, %rax
- jmp L(start_movsb)
-END (__mempcpy_erms)
-# endif
-
-ENTRY (__memmove_erms)
- movq %rdi, %rax
-L(start_movsb):
- movq %rdx, %rcx
- cmpq %rsi, %rdi
- jb 1f
- /* Source == destination is less common. */
- je 2f
- leaq (%rsi,%rcx), %rdx
- cmpq %rdx, %rdi
- jb L(movsb_backward)
-1:
- rep movsb
-2:
- ret
-L(movsb_backward):
- leaq -1(%rdi,%rcx), %rdi
- leaq -1(%rsi,%rcx), %rsi
- std
- rep movsb
- cld
- ret
-END (__memmove_erms)
-# if defined SHARED
-strong_alias (__memmove_erms, __memcpy_erms)
-# endif
-# endif
-
-# ifdef SHARED
-ENTRY (MEMMOVE_CHK_SYMBOL (__mempcpy_chk, unaligned_erms))
- cmpq %rdx, %rcx
- jb HIDDEN_JUMPTARGET (__chk_fail)
-END (MEMMOVE_CHK_SYMBOL (__mempcpy_chk, unaligned_erms))
-
-ENTRY (MEMMOVE_SYMBOL (__mempcpy, unaligned_erms))
- movq %rdi, %rax
- addq %rdx, %rax
- jmp L(start_erms)
-END (MEMMOVE_SYMBOL (__mempcpy, unaligned_erms))
-
-ENTRY (MEMMOVE_CHK_SYMBOL (__memmove_chk, unaligned_erms))
- cmpq %rdx, %rcx
- jb HIDDEN_JUMPTARGET (__chk_fail)
-END (MEMMOVE_CHK_SYMBOL (__memmove_chk, unaligned_erms))
-# endif
-
-ENTRY (MEMMOVE_SYMBOL (__memmove, unaligned_erms))
- movq %rdi, %rax
-L(start_erms):
- cmpq $VEC_SIZE, %rdx
- jb L(less_vec)
- cmpq $(VEC_SIZE * 2), %rdx
- ja L(movsb_more_2x_vec)
-L(last_2x_vec):
- /* From VEC and to 2 * VEC. No branch when size == VEC_SIZE. */
- VMOVU (%rsi), %VEC(0)
- VMOVU -VEC_SIZE(%rsi,%rdx), %VEC(1)
- VMOVU %VEC(0), (%rdi)
- VMOVU %VEC(1), -VEC_SIZE(%rdi,%rdx)
-L(return):
- VZEROUPPER
- ret
-
-L(movsb):
- cmpq __x86_shared_non_temporal_threshold(%rip), %rdx
- jae L(more_8x_vec)
- cmpq %rsi, %rdi
- jb 1f
- /* Source == destination is less common. */
- je L(nop)
- leaq (%rsi,%rdx), %r9
- cmpq %r9, %rdi
- /* Avoid slow backward REP MOVSB. */
-# if REP_MOVSB_THRESHOLD <= (VEC_SIZE * 8)
-# error Unsupported REP_MOVSB_THRESHOLD and VEC_SIZE!
-# endif
- jb L(more_8x_vec_backward)
-1:
- movq %rdx, %rcx
- rep movsb
-L(nop):
- ret
-#endif
-
-L(less_vec):
- /* Less than 1 VEC. */
-#if VEC_SIZE != 16 && VEC_SIZE != 32 && VEC_SIZE != 64
-# error Unsupported VEC_SIZE!
-#endif
-#if VEC_SIZE > 32
- cmpb $32, %dl
- jae L(between_32_63)
-#endif
-#if VEC_SIZE > 16
- cmpb $16, %dl
- jae L(between_16_31)
-#endif
- cmpb $8, %dl
- jae L(between_8_15)
- cmpb $4, %dl
- jae L(between_4_7)
- cmpb $1, %dl
- ja L(between_2_3)
- jb 1f
- movzbl (%rsi), %ecx
- movb %cl, (%rdi)
-1:
- ret
-#if VEC_SIZE > 32
-L(between_32_63):
- /* From 32 to 63. No branch when size == 32. */
- vmovdqu (%rsi), %ymm0
- vmovdqu -32(%rsi,%rdx), %ymm1
- vmovdqu %ymm0, (%rdi)
- vmovdqu %ymm1, -32(%rdi,%rdx)
- VZEROUPPER
- ret
-#endif
-#if VEC_SIZE > 16
- /* From 16 to 31. No branch when size == 16. */
-L(between_16_31):
- vmovdqu (%rsi), %xmm0
- vmovdqu -16(%rsi,%rdx), %xmm1
- vmovdqu %xmm0, (%rdi)
- vmovdqu %xmm1, -16(%rdi,%rdx)
- ret
-#endif
-L(between_8_15):
- /* From 8 to 15. No branch when size == 8. */
- movq -8(%rsi,%rdx), %rcx
- movq (%rsi), %rsi
- movq %rcx, -8(%rdi,%rdx)
- movq %rsi, (%rdi)
- ret
-L(between_4_7):
- /* From 4 to 7. No branch when size == 4. */
- movl -4(%rsi,%rdx), %ecx
- movl (%rsi), %esi
- movl %ecx, -4(%rdi,%rdx)
- movl %esi, (%rdi)
- ret
-L(between_2_3):
- /* From 2 to 3. No branch when size == 2. */
- movzwl -2(%rsi,%rdx), %ecx
- movzwl (%rsi), %esi
- movw %cx, -2(%rdi,%rdx)
- movw %si, (%rdi)
- ret
-
-#if defined USE_MULTIARCH && IS_IN (libc)
-L(movsb_more_2x_vec):
- cmpq $REP_MOVSB_THRESHOLD, %rdx
- ja L(movsb)
-#endif
-L(more_2x_vec):
- /* More than 2 * VEC and there may be overlap between destination
- and source. */
- cmpq $(VEC_SIZE * 8), %rdx
- ja L(more_8x_vec)
- cmpq $(VEC_SIZE * 4), %rdx
- jb L(last_4x_vec)
- /* Copy from 4 * VEC to 8 * VEC, inclusively. */
- VMOVU (%rsi), %VEC(0)
- VMOVU VEC_SIZE(%rsi), %VEC(1)
- VMOVU (VEC_SIZE * 2)(%rsi), %VEC(2)
- VMOVU (VEC_SIZE * 3)(%rsi), %VEC(3)
- VMOVU -VEC_SIZE(%rsi,%rdx), %VEC(4)
- VMOVU -(VEC_SIZE * 2)(%rsi,%rdx), %VEC(5)
- VMOVU -(VEC_SIZE * 3)(%rsi,%rdx), %VEC(6)
- VMOVU -(VEC_SIZE * 4)(%rsi,%rdx), %VEC(7)
- VMOVU %VEC(0), (%rdi)
- VMOVU %VEC(1), VEC_SIZE(%rdi)
- VMOVU %VEC(2), (VEC_SIZE * 2)(%rdi)
- VMOVU %VEC(3), (VEC_SIZE * 3)(%rdi)
- VMOVU %VEC(4), -VEC_SIZE(%rdi,%rdx)
- VMOVU %VEC(5), -(VEC_SIZE * 2)(%rdi,%rdx)
- VMOVU %VEC(6), -(VEC_SIZE * 3)(%rdi,%rdx)
- VMOVU %VEC(7), -(VEC_SIZE * 4)(%rdi,%rdx)
- VZEROUPPER
- ret
-L(last_4x_vec):
- /* Copy from 2 * VEC to 4 * VEC. */
- VMOVU (%rsi), %VEC(0)
- VMOVU VEC_SIZE(%rsi), %VEC(1)
- VMOVU -VEC_SIZE(%rsi,%rdx), %VEC(2)
- VMOVU -(VEC_SIZE * 2)(%rsi,%rdx), %VEC(3)
- VMOVU %VEC(0), (%rdi)
- VMOVU %VEC(1), VEC_SIZE(%rdi)
- VMOVU %VEC(2), -VEC_SIZE(%rdi,%rdx)
- VMOVU %VEC(3), -(VEC_SIZE * 2)(%rdi,%rdx)
- VZEROUPPER
- ret
-
-L(more_8x_vec):
- cmpq %rsi, %rdi
- ja L(more_8x_vec_backward)
- /* Source == destination is less common. */
- je L(nop)
- /* Load the first VEC and last 4 * VEC to support overlapping
- addresses. */
- VMOVU (%rsi), %VEC(4)
- VMOVU -VEC_SIZE(%rsi, %rdx), %VEC(5)
- VMOVU -(VEC_SIZE * 2)(%rsi, %rdx), %VEC(6)
- VMOVU -(VEC_SIZE * 3)(%rsi, %rdx), %VEC(7)
- VMOVU -(VEC_SIZE * 4)(%rsi, %rdx), %VEC(8)
- /* Save start and stop of the destination buffer. */
- movq %rdi, %r11
- leaq -VEC_SIZE(%rdi, %rdx), %rcx
- /* Align destination for aligned stores in the loop. Compute
- how much destination is misaligned. */
- movq %rdi, %r8
- andq $(VEC_SIZE - 1), %r8
- /* Get the negative of offset for alignment. */
- subq $VEC_SIZE, %r8
- /* Adjust source. */
- subq %r8, %rsi
- /* Adjust destination which should be aligned now. */
- subq %r8, %rdi
- /* Adjust length. */
- addq %r8, %rdx
-#if (defined USE_MULTIARCH || VEC_SIZE == 16) && IS_IN (libc)
- /* Check non-temporal store threshold. */
- cmpq __x86_shared_non_temporal_threshold(%rip), %rdx
- ja L(large_forward)
-#endif
-L(loop_4x_vec_forward):
- /* Copy 4 * VEC a time forward. */
- VMOVU (%rsi), %VEC(0)
- VMOVU VEC_SIZE(%rsi), %VEC(1)
- VMOVU (VEC_SIZE * 2)(%rsi), %VEC(2)
- VMOVU (VEC_SIZE * 3)(%rsi), %VEC(3)
- addq $(VEC_SIZE * 4), %rsi
- subq $(VEC_SIZE * 4), %rdx
- VMOVA %VEC(0), (%rdi)
- VMOVA %VEC(1), VEC_SIZE(%rdi)
- VMOVA %VEC(2), (VEC_SIZE * 2)(%rdi)
- VMOVA %VEC(3), (VEC_SIZE * 3)(%rdi)
- addq $(VEC_SIZE * 4), %rdi
- cmpq $(VEC_SIZE * 4), %rdx
- ja L(loop_4x_vec_forward)
- /* Store the last 4 * VEC. */
- VMOVU %VEC(5), (%rcx)
- VMOVU %VEC(6), -VEC_SIZE(%rcx)
- VMOVU %VEC(7), -(VEC_SIZE * 2)(%rcx)
- VMOVU %VEC(8), -(VEC_SIZE * 3)(%rcx)
- /* Store the first VEC. */
- VMOVU %VEC(4), (%r11)
- VZEROUPPER
- ret
-
-L(more_8x_vec_backward):
- /* Load the first 4 * VEC and last VEC to support overlapping
- addresses. */
- VMOVU (%rsi), %VEC(4)
- VMOVU VEC_SIZE(%rsi), %VEC(5)
- VMOVU (VEC_SIZE * 2)(%rsi), %VEC(6)
- VMOVU (VEC_SIZE * 3)(%rsi), %VEC(7)
- VMOVU -VEC_SIZE(%rsi,%rdx), %VEC(8)
- /* Save stop of the destination buffer. */
- leaq -VEC_SIZE(%rdi, %rdx), %r11
- /* Align destination end for aligned stores in the loop. Compute
- how much destination end is misaligned. */
- leaq -VEC_SIZE(%rsi, %rdx), %rcx
- movq %r11, %r9
- movq %r11, %r8
- andq $(VEC_SIZE - 1), %r8
- /* Adjust source. */
- subq %r8, %rcx
- /* Adjust the end of destination which should be aligned now. */
- subq %r8, %r9
- /* Adjust length. */
- subq %r8, %rdx
-#if (defined USE_MULTIARCH || VEC_SIZE == 16) && IS_IN (libc)
- /* Check non-temporal store threshold. */
- cmpq __x86_shared_non_temporal_threshold(%rip), %rdx
- ja L(large_backward)
-#endif
-L(loop_4x_vec_backward):
- /* Copy 4 * VEC a time backward. */
- VMOVU (%rcx), %VEC(0)
- VMOVU -VEC_SIZE(%rcx), %VEC(1)
- VMOVU -(VEC_SIZE * 2)(%rcx), %VEC(2)
- VMOVU -(VEC_SIZE * 3)(%rcx), %VEC(3)
- subq $(VEC_SIZE * 4), %rcx
- subq $(VEC_SIZE * 4), %rdx
- VMOVA %VEC(0), (%r9)
- VMOVA %VEC(1), -VEC_SIZE(%r9)
- VMOVA %VEC(2), -(VEC_SIZE * 2)(%r9)
- VMOVA %VEC(3), -(VEC_SIZE * 3)(%r9)
- subq $(VEC_SIZE * 4), %r9
- cmpq $(VEC_SIZE * 4), %rdx
- ja L(loop_4x_vec_backward)
- /* Store the first 4 * VEC. */
- VMOVU %VEC(4), (%rdi)
- VMOVU %VEC(5), VEC_SIZE(%rdi)
- VMOVU %VEC(6), (VEC_SIZE * 2)(%rdi)
- VMOVU %VEC(7), (VEC_SIZE * 3)(%rdi)
- /* Store the last VEC. */
- VMOVU %VEC(8), (%r11)
- VZEROUPPER
- ret
-
-#if (defined USE_MULTIARCH || VEC_SIZE == 16) && IS_IN (libc)
-L(large_forward):
- /* Don't use non-temporal store if there is overlap between
- destination and source since destination may be in cache
- when source is loaded. */
- leaq (%rdi, %rdx), %r10
- cmpq %r10, %rsi
- jb L(loop_4x_vec_forward)
-L(loop_large_forward):
- /* Copy 4 * VEC a time forward with non-temporal stores. */
- PREFETCH_ONE_SET (1, (%rsi), PREFETCHED_LOAD_SIZE * 2)
- PREFETCH_ONE_SET (1, (%rsi), PREFETCHED_LOAD_SIZE * 3)
- VMOVU (%rsi), %VEC(0)
- VMOVU VEC_SIZE(%rsi), %VEC(1)
- VMOVU (VEC_SIZE * 2)(%rsi), %VEC(2)
- VMOVU (VEC_SIZE * 3)(%rsi), %VEC(3)
- addq $PREFETCHED_LOAD_SIZE, %rsi
- subq $PREFETCHED_LOAD_SIZE, %rdx
- VMOVNT %VEC(0), (%rdi)
- VMOVNT %VEC(1), VEC_SIZE(%rdi)
- VMOVNT %VEC(2), (VEC_SIZE * 2)(%rdi)
- VMOVNT %VEC(3), (VEC_SIZE * 3)(%rdi)
- addq $PREFETCHED_LOAD_SIZE, %rdi
- cmpq $PREFETCHED_LOAD_SIZE, %rdx
- ja L(loop_large_forward)
- sfence
- /* Store the last 4 * VEC. */
- VMOVU %VEC(5), (%rcx)
- VMOVU %VEC(6), -VEC_SIZE(%rcx)
- VMOVU %VEC(7), -(VEC_SIZE * 2)(%rcx)
- VMOVU %VEC(8), -(VEC_SIZE * 3)(%rcx)
- /* Store the first VEC. */
- VMOVU %VEC(4), (%r11)
- VZEROUPPER
- ret
-
-L(large_backward):
- /* Don't use non-temporal store if there is overlap between
- destination and source since destination may be in cache
- when source is loaded. */
- leaq (%rcx, %rdx), %r10
- cmpq %r10, %r9
- jb L(loop_4x_vec_backward)
-L(loop_large_backward):
- /* Copy 4 * VEC a time backward with non-temporal stores. */
- PREFETCH_ONE_SET (-1, (%rcx), -PREFETCHED_LOAD_SIZE * 2)
- PREFETCH_ONE_SET (-1, (%rcx), -PREFETCHED_LOAD_SIZE * 3)
- VMOVU (%rcx), %VEC(0)
- VMOVU -VEC_SIZE(%rcx), %VEC(1)
- VMOVU -(VEC_SIZE * 2)(%rcx), %VEC(2)
- VMOVU -(VEC_SIZE * 3)(%rcx), %VEC(3)
- subq $PREFETCHED_LOAD_SIZE, %rcx
- subq $PREFETCHED_LOAD_SIZE, %rdx
- VMOVNT %VEC(0), (%r9)
- VMOVNT %VEC(1), -VEC_SIZE(%r9)
- VMOVNT %VEC(2), -(VEC_SIZE * 2)(%r9)
- VMOVNT %VEC(3), -(VEC_SIZE * 3)(%r9)
- subq $PREFETCHED_LOAD_SIZE, %r9
- cmpq $PREFETCHED_LOAD_SIZE, %rdx
- ja L(loop_large_backward)
- sfence
- /* Store the first 4 * VEC. */
- VMOVU %VEC(4), (%rdi)
- VMOVU %VEC(5), VEC_SIZE(%rdi)
- VMOVU %VEC(6), (VEC_SIZE * 2)(%rdi)
- VMOVU %VEC(7), (VEC_SIZE * 3)(%rdi)
- /* Store the last VEC. */
- VMOVU %VEC(8), (%r11)
- VZEROUPPER
- ret
-#endif
-END (MEMMOVE_SYMBOL (__memmove, unaligned_erms))
-
-#ifdef SHARED
-# if IS_IN (libc)
-# ifdef USE_MULTIARCH
-strong_alias (MEMMOVE_SYMBOL (__memmove, unaligned_erms),
- MEMMOVE_SYMBOL (__memcpy, unaligned_erms))
-strong_alias (MEMMOVE_SYMBOL (__memmove_chk, unaligned_erms),
- MEMMOVE_SYMBOL (__memcpy_chk, unaligned_erms))
-# endif
-strong_alias (MEMMOVE_CHK_SYMBOL (__memmove_chk, unaligned),
- MEMMOVE_CHK_SYMBOL (__memcpy_chk, unaligned))
-# endif
-#endif
-#if VEC_SIZE == 16 || defined SHARED
-strong_alias (MEMMOVE_SYMBOL (__memmove, unaligned),
- MEMCPY_SYMBOL (__memcpy, unaligned))
-#endif