aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--ChangeLog6
-rw-r--r--sysdeps/x86/cacheinfo.c150
2 files changed, 122 insertions, 34 deletions
diff --git a/ChangeLog b/ChangeLog
index 776b7cab98..1e11464acc 100644
--- a/ChangeLog
+++ b/ChangeLog
@@ -1,3 +1,9 @@
+2016-05-27 H.J. Lu <hongjiu.lu@intel.com>
+
+ * sysdeps/x86/cacheinfo.c (init_cacheinfo): Count number of
+ available logical processors with SMT level type sharing L2
+ cache for Intel processors.
+
2016-05-27 Joseph Myers <joseph@codesourcery.com>
[BZ #20160]
diff --git a/sysdeps/x86/cacheinfo.c b/sysdeps/x86/cacheinfo.c
index 182426b2d0..cf4f64b0d8 100644
--- a/sysdeps/x86/cacheinfo.c
+++ b/sysdeps/x86/cacheinfo.c
@@ -499,11 +499,24 @@ init_cacheinfo (void)
level = 3;
shared = handle_intel (_SC_LEVEL3_CACHE_SIZE, max_cpuid);
+ /* Number of logical processors sharing L2 cache. */
+ int threads_l2;
+
+ /* Number of logical processors sharing L3 cache. */
+ int threads_l3;
+
if (shared <= 0)
{
/* Try L2 otherwise. */
level = 2;
shared = core;
+ threads_l2 = 0;
+ threads_l3 = -1;
+ }
+ else
+ {
+ threads_l2 = 0;
+ threads_l3 = 0;
}
/* A value of 0 for the HTT bit indicates there is only a single
@@ -519,7 +532,8 @@ init_cacheinfo (void)
int i = 0;
- /* Query until desired cache level is enumerated. */
+ /* Query until cache level 2 and 3 are enumerated. */
+ int check = 0x1 | (threads_l3 == 0) << 1;
do
{
__cpuid_count (4, i++, eax, ebx, ecx, edx);
@@ -530,24 +544,53 @@ init_cacheinfo (void)
assume there is no such information. */
if ((eax & 0x1f) == 0)
goto intel_bug_no_cache_info;
- }
- while (((eax >> 5) & 0x7) != level);
- /* Check if cache is inclusive of lower cache levels. */
- inclusive_cache = (edx & 0x2) != 0;
+ switch ((eax >> 5) & 0x7)
+ {
+ default:
+ break;
+ case 2:
+ if ((check & 0x1))
+ {
+ /* Get maximum number of logical processors
+ sharing L2 cache. */
+ threads_l2 = (eax >> 14) & 0x3ff;
+ check &= ~0x1;
+ }
+ break;
+ case 3:
+ if ((check & (0x1 << 1)))
+ {
+ /* Get maximum number of logical processors
+ sharing L3 cache. */
+ threads_l3 = (eax >> 14) & 0x3ff;
- threads = (eax >> 14) & 0x3ff;
+ /* Check if L2 and L3 caches are inclusive. */
+ inclusive_cache = (edx & 0x2) != 0;
+ check &= ~(0x1 << 1);
+ }
+ break;
+ }
+ }
+ while (check);
- /* If max_cpuid >= 11, THREADS is the maximum number of
- addressable IDs for logical processors sharing the
- cache, instead of the maximum number of threads
+ /* If max_cpuid >= 11, THREADS_L2/THREADS_L3 are the maximum
+ numbers of addressable IDs for logical processors sharing
+ the cache, instead of the maximum number of threads
sharing the cache. */
- if (threads && max_cpuid >= 11)
+ if (max_cpuid >= 11)
{
/* Find the number of logical processors shipped in
one core and apply count mask. */
i = 0;
- while (1)
+
+ /* Count SMT only if there is L3 cache. Always count
+ core if there is no L3 cache. */
+ int count = ((threads_l2 > 0 && level == 3)
+ | ((threads_l3 > 0
+ || (threads_l2 > 0 && level == 2)) << 1));
+
+ while (count)
{
__cpuid_count (11, i++, eax, ebx, ecx, edx);
@@ -555,36 +598,71 @@ init_cacheinfo (void)
int type = ecx & 0xff00;
if (shipped == 0 || type == 0)
break;
+ else if (type == 0x100)
+ {
+ /* Count SMT. */
+ if ((count & 0x1))
+ {
+ int count_mask;
+
+ /* Compute count mask. */
+ asm ("bsr %1, %0"
+ : "=r" (count_mask) : "g" (threads_l2));
+ count_mask = ~(-1 << (count_mask + 1));
+ threads_l2 = (shipped - 1) & count_mask;
+ count &= ~0x1;
+ }
+ }
else if (type == 0x200)
{
- int count_mask;
-
- /* Compute count mask. */
- asm ("bsr %1, %0"
- : "=r" (count_mask) : "g" (threads));
- count_mask = ~(-1 << (count_mask + 1));
- threads = (shipped - 1) & count_mask;
- break;
+ /* Count core. */
+ if ((count & (0x1 << 1)))
+ {
+ int count_mask;
+ int threads_core
+ = (level == 2 ? threads_l2 : threads_l3);
+
+ /* Compute count mask. */
+ asm ("bsr %1, %0"
+ : "=r" (count_mask) : "g" (threads_core));
+ count_mask = ~(-1 << (count_mask + 1));
+ threads_core = (shipped - 1) & count_mask;
+ if (level == 2)
+ threads_l2 = threads_core;
+ else
+ threads_l3 = threads_core;
+ count &= ~(0x1 << 1);
+ }
}
}
}
- threads += 1;
- if (threads > 2 && level == 2 && family == 6)
+ if (threads_l2 > 0)
+ threads_l2 += 1;
+ if (threads_l3 > 0)
+ threads_l3 += 1;
+ if (level == 2)
{
- switch (model)
+ if (threads_l2)
{
- case 0x37:
- case 0x4a:
- case 0x4d:
- case 0x5a:
- case 0x5d:
- /* Silvermont has L2 cache shared by 2 cores. */
- threads = 2;
- break;
- default:
- break;
+ threads = threads_l2;
+ if (threads > 2 && family == 6)
+ switch (model)
+ {
+ case 0x37:
+ case 0x4a:
+ case 0x4d:
+ case 0x5a:
+ case 0x5d:
+ /* Silvermont has L2 cache shared by 2 cores. */
+ threads = 2;
+ break;
+ default:
+ break;
+ }
}
}
+ else if (threads_l3)
+ threads = threads_l3;
}
else
{
@@ -604,8 +682,12 @@ intel_bug_no_cache_info:
}
/* Account for non-inclusive L2 and L3 caches. */
- if (level == 3 && !inclusive_cache)
- shared += core;
+ if (!inclusive_cache)
+ {
+ if (threads_l2 > 0)
+ core /= threads_l2;
+ shared += core;
+ }
}
/* This spells out "AuthenticAMD". */
else if (is_amd)