aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorH.J. Lu <hjl.tools@gmail.com>2016-05-19 09:09:00 -0700
committerH.J. Lu <hjl.tools@gmail.com>2016-05-19 09:09:00 -0700
commit7c08d791ee4fabf96d96b66dec803602e621057c (patch)
treefdc67fe76da8482d407594c70260a64c27e31cb7
parenteb2c88c7c83901737db5c4de7dc4470c5681b2cb (diff)
downloadglibc-7c08d791ee4fabf96d96b66dec803602e621057c.tar
glibc-7c08d791ee4fabf96d96b66dec803602e621057c.tar.gz
glibc-7c08d791ee4fabf96d96b66dec803602e621057c.tar.bz2
glibc-7c08d791ee4fabf96d96b66dec803602e621057c.zip
Check the HTT bit before counting logical threads
Skip counting logical threads for Intel processors if the HTT bit is 0 which indicates there is only a single logical processor. * sysdeps/x86/cacheinfo.c (init_cacheinfo): Skip counting logical threads if the HTT bit is 0. * sysdeps/x86/cpu-features.h (bit_cpu_HTT): New. (index_cpu_HTT): Likewise. (reg_HTT): Likewise.
-rw-r--r--ChangeLog8
-rw-r--r--sysdeps/x86/cacheinfo.c158
-rw-r--r--sysdeps/x86/cpu-features.h3
3 files changed, 93 insertions, 76 deletions
diff --git a/ChangeLog b/ChangeLog
index 8a4918cbc8..8adf828d1b 100644
--- a/ChangeLog
+++ b/ChangeLog
@@ -1,5 +1,13 @@
2016-05-19 H.J. Lu <hongjiu.lu@intel.com>
+ * sysdeps/x86/cacheinfo.c (init_cacheinfo): Skip counting
+ logical threads if the HTT bit is 0.
+ * sysdeps/x86/cpu-features.h (bit_cpu_HTT): New.
+ (index_cpu_HTT): Likewise.
+ (reg_HTT): Likewise.
+
+2016-05-19 H.J. Lu <hongjiu.lu@intel.com>
+
[BZ #20115]
* sysdeps/x86_64/multiarch/memset-vec-unaligned-erms.S (__memset):
Remove alignments on jump targets.
diff --git a/sysdeps/x86/cacheinfo.c b/sysdeps/x86/cacheinfo.c
index 8408624ea4..1f46d9de20 100644
--- a/sysdeps/x86/cacheinfo.c
+++ b/sysdeps/x86/cacheinfo.c
@@ -506,99 +506,105 @@ init_cacheinfo (void)
shared = core;
}
- /* Figure out the number of logical threads that share the
- highest cache level. */
- if (max_cpuid >= 4)
+ /* A value of 0 for the HTT bit indicates there is only a single
+ logical processor. */
+ if (HAS_CPU_FEATURE (HTT))
{
- unsigned int family = GLRO(dl_x86_cpu_features).family;
- unsigned int model = GLRO(dl_x86_cpu_features).model;
+ /* Figure out the number of logical threads that share the
+ highest cache level. */
+ if (max_cpuid >= 4)
+ {
+ unsigned int family = GLRO(dl_x86_cpu_features).family;
+ unsigned int model = GLRO(dl_x86_cpu_features).model;
- int i = 0;
+ int i = 0;
- /* Query until desired cache level is enumerated. */
- do
- {
- __cpuid_count (4, i++, eax, ebx, ecx, edx);
-
- /* There seems to be a bug in at least some Pentium Ds
- which sometimes fail to iterate all cache parameters.
- Do not loop indefinitely here, stop in this case and
- assume there is no such information. */
- if ((eax & 0x1f) == 0)
- goto intel_bug_no_cache_info;
- }
- while (((eax >> 5) & 0x7) != level);
+ /* Query until desired cache level is enumerated. */
+ do
+ {
+ __cpuid_count (4, i++, eax, ebx, ecx, edx);
+
+ /* There seems to be a bug in at least some Pentium Ds
+ which sometimes fail to iterate all cache parameters.
+ Do not loop indefinitely here, stop in this case and
+ assume there is no such information. */
+ if ((eax & 0x1f) == 0)
+ goto intel_bug_no_cache_info;
+ }
+ while (((eax >> 5) & 0x7) != level);
- /* Check if cache is inclusive of lower cache levels. */
- inclusive_cache = (edx & 0x2) != 0;
+ /* Check if cache is inclusive of lower cache levels. */
+ inclusive_cache = (edx & 0x2) != 0;
- threads = (eax >> 14) & 0x3ff;
+ threads = (eax >> 14) & 0x3ff;
- /* If max_cpuid >= 11, THREADS is the maximum number of
- addressable IDs for logical processors sharing the
- cache, instead of the maximum number of threads
- sharing the cache. */
- if (threads && max_cpuid >= 11)
- {
- /* Find the number of logical processors shipped in
- one core and apply count mask. */
- i = 0;
- while (1)
+ /* If max_cpuid >= 11, THREADS is the maximum number of
+ addressable IDs for logical processors sharing the
+ cache, instead of the maximum number of threads
+ sharing the cache. */
+ if (threads && max_cpuid >= 11)
{
- __cpuid_count (11, i++, eax, ebx, ecx, edx);
-
- int shipped = ebx & 0xff;
- int type = ecx & 0xff0;
- if (shipped == 0 || type == 0)
- break;
- else if (type == 0x200)
+ /* Find the number of logical processors shipped in
+ one core and apply count mask. */
+ i = 0;
+ while (1)
{
- int count_mask;
-
- /* Compute count mask. */
- asm ("bsr %1, %0"
- : "=r" (count_mask) : "g" (threads));
- count_mask = ~(-1 << (count_mask + 1));
- threads = (shipped - 1) & count_mask;
+ __cpuid_count (11, i++, eax, ebx, ecx, edx);
+
+ int shipped = ebx & 0xff;
+ int type = ecx & 0xff0;
+ if (shipped == 0 || type == 0)
+ break;
+ else if (type == 0x200)
+ {
+ int count_mask;
+
+ /* Compute count mask. */
+ asm ("bsr %1, %0"
+ : "=r" (count_mask) : "g" (threads));
+ count_mask = ~(-1 << (count_mask + 1));
+ threads = (shipped - 1) & count_mask;
+ break;
+ }
+ }
+ }
+ threads += 1;
+ if (threads > 2 && level == 2 && family == 6)
+ {
+ switch (model)
+ {
+ case 0x57:
+ /* Knights Landing has L2 cache shared by 2 cores. */
+ case 0x37:
+ case 0x4a:
+ case 0x4d:
+ case 0x5a:
+ case 0x5d:
+ /* Silvermont has L2 cache shared by 2 cores. */
+ threads = 2;
+ break;
+ default:
break;
}
}
}
- threads += 1;
- if (threads > 2 && level == 2 && family == 6)
+ else
{
- switch (model)
- {
- case 0x57:
- /* Knights Landing has L2 cache shared by 2 cores. */
- case 0x37:
- case 0x4a:
- case 0x4d:
- case 0x5a:
- case 0x5d:
- /* Silvermont has L2 cache shared by 2 cores. */
- threads = 2;
- break;
- default:
- break;
- }
+intel_bug_no_cache_info:
+ /* Assume that all logical threads share the highest cache
+ level. */
+
+ threads
+ = ((GLRO(dl_x86_cpu_features).cpuid[COMMON_CPUID_INDEX_1].ebx
+ >> 16) & 0xff);
}
- }
- else
- {
- intel_bug_no_cache_info:
- /* Assume that all logical threads share the highest cache level. */
- threads
- = ((GLRO(dl_x86_cpu_features).cpuid[COMMON_CPUID_INDEX_1].ebx
- >> 16) & 0xff);
+ /* Cap usage of highest cache level to the number of supported
+ threads. */
+ if (shared > 0 && threads > 0)
+ shared /= threads;
}
- /* Cap usage of highest cache level to the number of supported
- threads. */
- if (shared > 0 && threads > 0)
- shared /= threads;
-
/* Account for non-inclusive L2 and L3 caches. */
if (level == 3 && !inclusive_cache)
shared += core;
diff --git a/sysdeps/x86/cpu-features.h b/sysdeps/x86/cpu-features.h
index 9529d61ff5..2bd93713a1 100644
--- a/sysdeps/x86/cpu-features.h
+++ b/sysdeps/x86/cpu-features.h
@@ -51,6 +51,7 @@
#define bit_cpu_POPCOUNT (1 << 23)
#define bit_cpu_FMA (1 << 12)
#define bit_cpu_FMA4 (1 << 16)
+#define bit_cpu_HTT (1 << 28)
/* COMMON_CPUID_INDEX_7. */
#define bit_cpu_ERMS (1 << 9)
@@ -235,6 +236,7 @@ extern const struct cpu_features *__get_cpu_features (void)
# define index_cpu_FMA4 COMMON_CPUID_INDEX_80000001
# define index_cpu_POPCOUNT COMMON_CPUID_INDEX_1
# define index_cpu_OSXSAVE COMMON_CPUID_INDEX_1
+# define index_cpu_HTT COMMON_CPUID_INDEX_1
# define reg_CX8 edx
# define reg_CMOV edx
@@ -252,6 +254,7 @@ extern const struct cpu_features *__get_cpu_features (void)
# define reg_FMA4 ecx
# define reg_POPCOUNT ecx
# define reg_OSXSAVE ecx
+# define reg_HTT edx
# define index_arch_Fast_Rep_String FEATURE_INDEX_1
# define index_arch_Fast_Copy_Backward FEATURE_INDEX_1