aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorH.J. Lu <hjl.tools@gmail.com>2016-03-04 08:37:40 -0800
committerH.J. Lu <hjl.tools@gmail.com>2016-04-02 10:53:45 -0700
commit0c8e297a186f844ebb7eba7a3bc0343c83615ca9 (patch)
tree525bcfa2e8e0a14b5a52d96b7f3a4220ec443b7b
parent3c772cb4d9cbe19cd97ad991e3dab43014198c44 (diff)
downloadglibc-0c8e297a186f844ebb7eba7a3bc0343c83615ca9.tar
glibc-0c8e297a186f844ebb7eba7a3bc0343c83615ca9.tar.gz
glibc-0c8e297a186f844ebb7eba7a3bc0343c83615ca9.tar.bz2
glibc-0c8e297a186f844ebb7eba7a3bc0343c83615ca9.zip
x86-64: Fix memcpy IFUNC selection
Chek Fast_Unaligned_Load, instead of Slow_BSF, and also check for Fast_Copy_Backward to enable __memcpy_ssse3_back. Existing selection order is updated with following selection order: 1. __memcpy_avx_unaligned if AVX_Fast_Unaligned_Load bit is set. 2. __memcpy_sse2_unaligned if Fast_Unaligned_Load bit is set. 3. __memcpy_sse2 if SSSE3 isn't available. 4. __memcpy_ssse3_back if Fast_Copy_Backward bit it set. 5. __memcpy_ssse3 [BZ #18880] * sysdeps/x86_64/multiarch/memcpy.S: Check Fast_Unaligned_Load, instead of Slow_BSF, and also check for Fast_Copy_Backward to enable __memcpy_ssse3_back. (cherry picked from commit 14a1d7cc4c4fd5ee8e4e66b777221dd32a84efe8)
-rw-r--r--sysdeps/x86_64/multiarch/memcpy.S27
1 files changed, 14 insertions, 13 deletions
diff --git a/sysdeps/x86_64/multiarch/memcpy.S b/sysdeps/x86_64/multiarch/memcpy.S
index 8841ec81a0..1b67326ea8 100644
--- a/sysdeps/x86_64/multiarch/memcpy.S
+++ b/sysdeps/x86_64/multiarch/memcpy.S
@@ -35,22 +35,23 @@ ENTRY(__new_memcpy)
jz 1f
HAS_ARCH_FEATURE (Prefer_No_VZEROUPPER)
jz 1f
- leaq __memcpy_avx512_no_vzeroupper(%rip), %rax
+ lea __memcpy_avx512_no_vzeroupper(%rip), %RAX_LP
ret
#endif
-1: leaq __memcpy_avx_unaligned(%rip), %rax
+1: lea __memcpy_avx_unaligned(%rip), %RAX_LP
HAS_ARCH_FEATURE (AVX_Fast_Unaligned_Load)
- jz 2f
- ret
-2: leaq __memcpy_sse2(%rip), %rax
- HAS_ARCH_FEATURE (Slow_BSF)
- jnz 3f
- leaq __memcpy_sse2_unaligned(%rip), %rax
- ret
-3: HAS_CPU_FEATURE (SSSE3)
- jz 4f
- leaq __memcpy_ssse3(%rip), %rax
-4: ret
+ jnz 2f
+ lea __memcpy_sse2_unaligned(%rip), %RAX_LP
+ HAS_ARCH_FEATURE (Fast_Unaligned_Load)
+ jnz 2f
+ lea __memcpy_sse2(%rip), %RAX_LP
+ HAS_CPU_FEATURE (SSSE3)
+ jz 2f
+ lea __memcpy_ssse3_back(%rip), %RAX_LP
+ HAS_ARCH_FEATURE (Fast_Copy_Backward)
+ jnz 2f
+ lea __memcpy_ssse3(%rip), %RAX_LP
+2: ret
END(__new_memcpy)
# undef ENTRY