aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorH.J. Lu <hjl.tools@gmail.com>2021-04-30 05:58:59 -0700
committerH.J. Lu <hjl.tools@gmail.com>2021-05-03 05:08:22 -0700
commitcf2c57526ba4b57e6863ad4db8a868e2678adce8 (patch)
treebf6a69a19ea3df3d4e905c73298dbbee74559ebe
parent98544f5bcf1bef9311463ded60ddd3941c75a547 (diff)
downloadglibc-cf2c57526ba4b57e6863ad4db8a868e2678adce8.tar
glibc-cf2c57526ba4b57e6863ad4db8a868e2678adce8.tar.gz
glibc-cf2c57526ba4b57e6863ad4db8a868e2678adce8.tar.bz2
glibc-cf2c57526ba4b57e6863ad4db8a868e2678adce8.zip
x86: Set rep_movsb_threshold to 2112 on processors with FSRM
The glibc memcpy benchmark on Intel Core i7-1065G7 (Ice Lake) showed that REP MOVSB became faster after 2112 bytes: Vector Move REP MOVSB length=2112, align1=0, align2=0: 24.20 24.40 length=2112, align1=1, align2=0: 26.07 23.13 length=2112, align1=0, align2=1: 27.18 28.13 length=2112, align1=1, align2=1: 26.23 25.16 length=2176, align1=0, align2=0: 23.18 22.52 length=2176, align1=2, align2=0: 25.45 22.52 length=2176, align1=0, align2=2: 27.14 27.82 length=2176, align1=2, align2=2: 22.73 25.56 length=2240, align1=0, align2=0: 24.62 24.25 length=2240, align1=3, align2=0: 29.77 27.15 length=2240, align1=0, align2=3: 35.55 29.93 length=2240, align1=3, align2=3: 34.49 25.15 length=2304, align1=0, align2=0: 34.75 26.64 length=2304, align1=4, align2=0: 32.09 22.63 length=2304, align1=0, align2=4: 28.43 31.24 Use REP MOVSB for data size > 2112 bytes in memcpy on processors with fast short REP MOVSB (FSRM). * sysdeps/x86/dl-cacheinfo.h (dl_init_cacheinfo): Set rep_movsb_threshold to 2112 on processors with fast short REP MOVSB (FSRM).
-rw-r--r--sysdeps/x86/dl-cacheinfo.h4
1 files changed, 4 insertions, 0 deletions
diff --git a/sysdeps/x86/dl-cacheinfo.h b/sysdeps/x86/dl-cacheinfo.h
index d9944250fc..e6c94dfd02 100644
--- a/sysdeps/x86/dl-cacheinfo.h
+++ b/sysdeps/x86/dl-cacheinfo.h
@@ -891,6 +891,10 @@ dl_init_cacheinfo (struct cpu_features *cpu_features)
minimum_rep_movsb_threshold = 16 * 8;
#endif
}
+ /* NB: The default REP MOVSB threshold is 2112 on processors with fast
+ short REP MOVSB (FSRM). */
+ if (CPU_FEATURE_USABLE_P (cpu_features, FSRM))
+ rep_movsb_threshold = 2112;
unsigned long int rep_movsb_stop_threshold;
/* ERMS feature is implemented from AMD Zen3 architecture and it is