x86: Add thresholds for "rep movsb/stosb" to tunables

Add x86_rep_movsb_threshold and x86_rep_stosb_threshold to tunables to update thresholds for "rep movsb" and "rep stosb" at run-time. Note that the user specified threshold for "rep movsb" smaller than the minimum threshold will be ignored. Reviewed-by: Carlos O'Donell <carlos@redhat.com>
author: H.J. Lu <hjl.tools@gmail.com> 2020-07-06 11:48:09 -0700
committer: H.J. Lu <hjl.tools@gmail.com> 2020-07-06 11:48:42 -0700
commit: 3f4b61a0b8de67ef9f20737919c713ddfc4bd620 (patch)
tree: 521cfbc4f297a2fe5d4fc91e6c30d590f1225027 /sysdeps/x86_64
parent: 6c010c5dde1735f93cc3a6597cdcc2b482af85f8 (diff)
download: glibc-3f4b61a0b8de67ef9f20737919c713ddfc4bd620.tar
glibc-3f4b61a0b8de67ef9f20737919c713ddfc4bd620.tar.gz
glibc-3f4b61a0b8de67ef9f20737919c713ddfc4bd620.tar.bz2
glibc-3f4b61a0b8de67ef9f20737919c713ddfc4bd620.zip
2 files changed, 2 insertions, 26 deletions
diff --git a/sysdeps/x86_64/multiarch/memmove-vec-unaligned-erms.S b/sysdeps/x86_64/multiarch/memmove-vec-unaligned-erms.S
index 74953245aa..bd5dc1a3f3 100644
--- a/sysdeps/x86_64/multiarch/memmove-vec-unaligned-erms.S
+++ b/sysdeps/x86_64/multiarch/memmove-vec-unaligned-erms.S
@@ -56,17 +56,6 @@
 # endif
 #endif
 
-/* Threshold to use Enhanced REP MOVSB.  Since there is overhead to set
-   up REP MOVSB operation, REP MOVSB isn't faster on short data.  The
-   memcpy micro benchmark in glibc shows that 2KB is the approximate
-   value above which REP MOVSB becomes faster than SSE2 optimization
-   on processors with Enhanced REP MOVSB.  Since larger register size
-   can move more data with a single load and store, the threshold is
-   higher with larger register size.  */
-#ifndef REP_MOVSB_THRESHOLD
-# define REP_MOVSB_THRESHOLD	(2048 * (VEC_SIZE / 16))
-#endif
-
 #ifndef PREFETCH
 # define PREFETCH(addr) prefetcht0 addr
 #endif
@@ -253,9 +242,6 @@ L(movsb):
 	leaq	(%rsi,%rdx), %r9
 	cmpq	%r9, %rdi
 	/* Avoid slow backward REP MOVSB.  */
-# if REP_MOVSB_THRESHOLD <= (VEC_SIZE * 8)
-#  error Unsupported REP_MOVSB_THRESHOLD and VEC_SIZE!
-# endif
 	jb	L(more_8x_vec_backward)
 1:
 	mov	%RDX_LP, %RCX_LP
@@ -331,7 +317,7 @@ L(between_2_3):
 
 #if defined USE_MULTIARCH && IS_IN (libc)
 L(movsb_more_2x_vec):
-	cmpq	$REP_MOVSB_THRESHOLD, %rdx
+	cmp	__x86_rep_movsb_threshold(%rip), %RDX_LP
 	ja	L(movsb)
 #endif
 L(more_2x_vec):
diff --git a/sysdeps/x86_64/multiarch/memset-vec-unaligned-erms.S b/sysdeps/x86_64/multiarch/memset-vec-unaligned-erms.S
index af2299709c..2bfc95de05 100644
--- a/sysdeps/x86_64/multiarch/memset-vec-unaligned-erms.S
+++ b/sysdeps/x86_64/multiarch/memset-vec-unaligned-erms.S
@@ -58,16 +58,6 @@
 # endif
 #endif
 
-/* Threshold to use Enhanced REP STOSB.  Since there is overhead to set
-   up REP STOSB operation, REP STOSB isn't faster on short data.  The
-   memset micro benchmark in glibc shows that 2KB is the approximate
-   value above which REP STOSB becomes faster on processors with
-   Enhanced REP STOSB.  Since the stored value is fixed, larger register
-   size has minimal impact on threshold.  */
-#ifndef REP_STOSB_THRESHOLD
-# define REP_STOSB_THRESHOLD		2048
-#endif
-
 #ifndef SECTION
 # error SECTION is not defined!
 #endif
@@ -181,7 +171,7 @@ ENTRY (MEMSET_SYMBOL (__memset, unaligned_erms))
 	ret
 
 L(stosb_more_2x_vec):
-	cmpq	$REP_STOSB_THRESHOLD, %rdx
+	cmp	__x86_rep_stosb_threshold(%rip), %RDX_LP
 	ja	L(stosb)
 #endif
 L(more_2x_vec):
author	H.J. Lu <hjl.tools@gmail.com>	2020-07-06 11:48:09 -0700
committer	H.J. Lu <hjl.tools@gmail.com>	2020-07-06 11:48:42 -0700
commit	3f4b61a0b8de67ef9f20737919c713ddfc4bd620 (patch)
tree	521cfbc4f297a2fe5d4fc91e6c30d590f1225027 /sysdeps/x86_64
parent	6c010c5dde1735f93cc3a6597cdcc2b482af85f8 (diff)
download	glibc-3f4b61a0b8de67ef9f20737919c713ddfc4bd620.tar glibc-3f4b61a0b8de67ef9f20737919c713ddfc4bd620.tar.gz glibc-3f4b61a0b8de67ef9f20737919c713ddfc4bd620.tar.bz2 glibc-3f4b61a0b8de67ef9f20737919c713ddfc4bd620.zip