summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorH.J. Lu <hjl.tools@gmail.com>2016-04-06 09:10:18 -0700
committerH.J. Lu <hjl.tools@gmail.com>2016-04-06 09:10:35 -0700
commit4af1bb06c59d24f35bf8dc55897838d926c05892 (patch)
tree905c9d344060fde892d029d30015883fc4d2798a
parenta25322f4e855eb5b24c63c2395c941c3327627ca (diff)
downloadglibc-4af1bb06c59d24f35bf8dc55897838d926c05892.tar
glibc-4af1bb06c59d24f35bf8dc55897838d926c05892.tar.gz
glibc-4af1bb06c59d24f35bf8dc55897838d926c05892.tar.bz2
glibc-4af1bb06c59d24f35bf8dc55897838d926c05892.zip
X86-64: Prepare memset-vec-unaligned-erms.S
Prepare memset-vec-unaligned-erms.S to make the SSE2 version as the default memset. * sysdeps/x86_64/multiarch/memset-vec-unaligned-erms.S (MEMSET_CHK_SYMBOL): New. Define if not defined. (__bzero): Check VEC_SIZE == 16 instead of USE_MULTIARCH. Disabled fro now. Replace MEMSET_SYMBOL with MEMSET_CHK_SYMBOL on __memset_chk symbols. Properly check USE_MULTIARCH on __memset symbols.
-rw-r--r--ChangeLog9
-rw-r--r--sysdeps/x86_64/multiarch/memset-vec-unaligned-erms.S32
2 files changed, 28 insertions, 13 deletions
diff --git a/ChangeLog b/ChangeLog
index 4ba9309333..c801aff3f3 100644
--- a/ChangeLog
+++ b/ChangeLog
@@ -1,5 +1,14 @@
2016-04-06 H.J. Lu <hongjiu.lu@intel.com>
+ * sysdeps/x86_64/multiarch/memset-vec-unaligned-erms.S
+ (MEMSET_CHK_SYMBOL): New. Define if not defined.
+ (__bzero): Check VEC_SIZE == 16 instead of USE_MULTIARCH.
+ Disabled fro now.
+ Replace MEMSET_SYMBOL with MEMSET_CHK_SYMBOL on __memset_chk
+ symbols. Properly check USE_MULTIARCH on __memset symbols.
+
+2016-04-06 H.J. Lu <hongjiu.lu@intel.com>
+
* benchtests/Makefile (string-benchset): Add memcpy-large,
memmove-large and memset-large.
* benchtests/bench-memcpy-large.c: New file.
diff --git a/sysdeps/x86_64/multiarch/memset-vec-unaligned-erms.S b/sysdeps/x86_64/multiarch/memset-vec-unaligned-erms.S
index fe0f74516d..578a5ae0a2 100644
--- a/sysdeps/x86_64/multiarch/memset-vec-unaligned-erms.S
+++ b/sysdeps/x86_64/multiarch/memset-vec-unaligned-erms.S
@@ -28,6 +28,10 @@
#include <sysdep.h>
+#ifndef MEMSET_CHK_SYMBOL
+# define MEMSET_CHK_SYMBOL(p,s) MEMSET_SYMBOL(p, s)
+#endif
+
#ifndef VZEROUPPER
# if VEC_SIZE > 16
# define VZEROUPPER vzeroupper
@@ -66,8 +70,8 @@
# error SECTION is not defined!
#endif
-#if !defined USE_MULTIARCH && IS_IN (libc)
.section SECTION(.text),"ax",@progbits
+#if VEC_SIZE == 16 && IS_IN (libc) && 0
ENTRY (__bzero)
movq %rdi, %rax /* Set return value. */
movq %rsi, %rdx /* Set n. */
@@ -78,10 +82,10 @@ weak_alias (__bzero, bzero)
#endif
#if defined SHARED && IS_IN (libc)
-ENTRY_CHK (MEMSET_SYMBOL (__memset_chk, unaligned))
+ENTRY_CHK (MEMSET_CHK_SYMBOL (__memset_chk, unaligned))
cmpq %rdx, %rcx
jb HIDDEN_JUMPTARGET (__chk_fail)
-END_CHK (MEMSET_SYMBOL (__memset_chk, unaligned))
+END_CHK (MEMSET_CHK_SYMBOL (__memset_chk, unaligned))
#endif
ENTRY (MEMSET_SYMBOL (__memset, unaligned))
@@ -97,15 +101,16 @@ L(entry_from_bzero):
VMOVU %VEC(0), (%rdi)
VZEROUPPER
ret
+#if defined USE_MULTIARCH && IS_IN (libc)
END (MEMSET_SYMBOL (__memset, unaligned))
-#if VEC_SIZE == 16
+# if VEC_SIZE == 16
/* Only used to measure performance of REP STOSB. */
ENTRY (__memset_erms)
-#else
+# else
/* Provide a symbol to debugger. */
ENTRY (MEMSET_SYMBOL (__memset, erms))
-#endif
+# endif
L(stosb):
movq %rdx, %rcx
movzbl %sil, %eax
@@ -113,18 +118,18 @@ L(stosb):
rep stosb
movq %rdx, %rax
ret
-#if VEC_SIZE == 16
+# if VEC_SIZE == 16
END (__memset_erms)
-#else
+# else
END (MEMSET_SYMBOL (__memset, erms))
-#endif
+# endif
-#if defined SHARED && IS_IN (libc)
-ENTRY_CHK (MEMSET_SYMBOL (__memset_chk, unaligned_erms))
+# if defined SHARED && IS_IN (libc)
+ENTRY_CHK (MEMSET_CHK_SYMBOL (__memset_chk, unaligned_erms))
cmpq %rdx, %rcx
jb HIDDEN_JUMPTARGET (__chk_fail)
-END_CHK (MEMSET_SYMBOL (__memset_chk, unaligned_erms))
-#endif
+END_CHK (MEMSET_CHK_SYMBOL (__memset_chk, unaligned_erms))
+# endif
ENTRY (MEMSET_SYMBOL (__memset, unaligned_erms))
VDUP_TO_VEC0_AND_SET_RETURN (%esi, %rdi)
@@ -144,6 +149,7 @@ L(stosb_more_2x_vec):
/* Force 32-bit displacement to avoid long nop between
instructions. */
ja.d32 L(stosb)
+#endif
.p2align 4
L(more_2x_vec):
cmpq $(VEC_SIZE * 4), %rdx