aboutsummaryrefslogtreecommitdiff
path: root/sysdeps/x86/include/cpu-features.h
diff options
context:
space:
mode:
authorNoah Goldstein <goldstein.w.n@gmail.com>2023-06-07 13:18:03 -0500
committerNoah Goldstein <goldstein.w.n@gmail.com>2023-06-12 11:33:39 -0500
commit180897c161a171d8ef0faee1c6c9fd6b57d8b13b (patch)
tree89e71e02a6e1edc57bb13f311228816dcbc92bd6 /sysdeps/x86/include/cpu-features.h
parentf193ea20eddc6cef84cba54cf1a647204ee6a86b (diff)
downloadglibc-180897c161a171d8ef0faee1c6c9fd6b57d8b13b.tar
glibc-180897c161a171d8ef0faee1c6c9fd6b57d8b13b.tar.gz
glibc-180897c161a171d8ef0faee1c6c9fd6b57d8b13b.tar.bz2
glibc-180897c161a171d8ef0faee1c6c9fd6b57d8b13b.zip
x86: Make the divisor in setting `non_temporal_threshold` cpu specific
Different systems prefer a different divisors. From benchmarks[1] so far the following divisors have been found: ICX : 2 SKX : 2 BWD : 8 For Intel, we are generalizing that BWD and older prefers 8 as a divisor, and SKL and newer prefers 2. This number can be further tuned as benchmarks are run. [1]: https://github.com/goldsteinn/memcpy-nt-benchmarks Reviewed-by: DJ Delorie <dj@redhat.com>
Diffstat (limited to 'sysdeps/x86/include/cpu-features.h')
-rw-r--r--sysdeps/x86/include/cpu-features.h3
1 files changed, 3 insertions, 0 deletions
diff --git a/sysdeps/x86/include/cpu-features.h b/sysdeps/x86/include/cpu-features.h
index 40b8129d6a..c740e1a5fc 100644
--- a/sysdeps/x86/include/cpu-features.h
+++ b/sysdeps/x86/include/cpu-features.h
@@ -945,6 +945,9 @@ struct cpu_features
unsigned long int level3_cache_linesize;
/* /_SC_LEVEL4_CACHE_SIZE. */
unsigned long int level4_cache_size;
+ /* When no user non_temporal_threshold is specified. We default to
+ cachesize / cachesize_non_temporal_divisor. */
+ unsigned long int cachesize_non_temporal_divisor;
};
/* Get a pointer to the CPU features structure. */