From 180897c161a171d8ef0faee1c6c9fd6b57d8b13b Mon Sep 17 00:00:00 2001 From: Noah Goldstein Date: Wed, 7 Jun 2023 13:18:03 -0500 Subject: x86: Make the divisor in setting `non_temporal_threshold` cpu specific Different systems prefer a different divisors. From benchmarks[1] so far the following divisors have been found: ICX : 2 SKX : 2 BWD : 8 For Intel, we are generalizing that BWD and older prefers 8 as a divisor, and SKL and newer prefers 2. This number can be further tuned as benchmarks are run. [1]: https://github.com/goldsteinn/memcpy-nt-benchmarks Reviewed-by: DJ Delorie --- sysdeps/x86/include/cpu-features.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'sysdeps/x86/include/cpu-features.h') diff --git a/sysdeps/x86/include/cpu-features.h b/sysdeps/x86/include/cpu-features.h index 40b8129d6a..c740e1a5fc 100644 --- a/sysdeps/x86/include/cpu-features.h +++ b/sysdeps/x86/include/cpu-features.h @@ -945,6 +945,9 @@ struct cpu_features unsigned long int level3_cache_linesize; /* /_SC_LEVEL4_CACHE_SIZE. */ unsigned long int level4_cache_size; + /* When no user non_temporal_threshold is specified. We default to + cachesize / cachesize_non_temporal_divisor. */ + unsigned long int cachesize_non_temporal_divisor; }; /* Get a pointer to the CPU features structure. */ -- cgit v1.2.3