diff options
-rw-r--r-- | ChangeLog | 6 | ||||
-rw-r--r-- | sysdeps/x86/cacheinfo.c | 150 |
2 files changed, 122 insertions, 34 deletions
@@ -1,3 +1,9 @@ +2016-05-27 H.J. Lu <hongjiu.lu@intel.com> + + * sysdeps/x86/cacheinfo.c (init_cacheinfo): Count number of + available logical processors with SMT level type sharing L2 + cache for Intel processors. + 2016-05-27 Joseph Myers <joseph@codesourcery.com> [BZ #20160] diff --git a/sysdeps/x86/cacheinfo.c b/sysdeps/x86/cacheinfo.c index 182426b2d0..cf4f64b0d8 100644 --- a/sysdeps/x86/cacheinfo.c +++ b/sysdeps/x86/cacheinfo.c @@ -499,11 +499,24 @@ init_cacheinfo (void) level = 3; shared = handle_intel (_SC_LEVEL3_CACHE_SIZE, max_cpuid); + /* Number of logical processors sharing L2 cache. */ + int threads_l2; + + /* Number of logical processors sharing L3 cache. */ + int threads_l3; + if (shared <= 0) { /* Try L2 otherwise. */ level = 2; shared = core; + threads_l2 = 0; + threads_l3 = -1; + } + else + { + threads_l2 = 0; + threads_l3 = 0; } /* A value of 0 for the HTT bit indicates there is only a single @@ -519,7 +532,8 @@ init_cacheinfo (void) int i = 0; - /* Query until desired cache level is enumerated. */ + /* Query until cache level 2 and 3 are enumerated. */ + int check = 0x1 | (threads_l3 == 0) << 1; do { __cpuid_count (4, i++, eax, ebx, ecx, edx); @@ -530,24 +544,53 @@ init_cacheinfo (void) assume there is no such information. */ if ((eax & 0x1f) == 0) goto intel_bug_no_cache_info; - } - while (((eax >> 5) & 0x7) != level); - /* Check if cache is inclusive of lower cache levels. */ - inclusive_cache = (edx & 0x2) != 0; + switch ((eax >> 5) & 0x7) + { + default: + break; + case 2: + if ((check & 0x1)) + { + /* Get maximum number of logical processors + sharing L2 cache. */ + threads_l2 = (eax >> 14) & 0x3ff; + check &= ~0x1; + } + break; + case 3: + if ((check & (0x1 << 1))) + { + /* Get maximum number of logical processors + sharing L3 cache. */ + threads_l3 = (eax >> 14) & 0x3ff; - threads = (eax >> 14) & 0x3ff; + /* Check if L2 and L3 caches are inclusive. */ + inclusive_cache = (edx & 0x2) != 0; + check &= ~(0x1 << 1); + } + break; + } + } + while (check); - /* If max_cpuid >= 11, THREADS is the maximum number of - addressable IDs for logical processors sharing the - cache, instead of the maximum number of threads + /* If max_cpuid >= 11, THREADS_L2/THREADS_L3 are the maximum + numbers of addressable IDs for logical processors sharing + the cache, instead of the maximum number of threads sharing the cache. */ - if (threads && max_cpuid >= 11) + if (max_cpuid >= 11) { /* Find the number of logical processors shipped in one core and apply count mask. */ i = 0; - while (1) + + /* Count SMT only if there is L3 cache. Always count + core if there is no L3 cache. */ + int count = ((threads_l2 > 0 && level == 3) + | ((threads_l3 > 0 + || (threads_l2 > 0 && level == 2)) << 1)); + + while (count) { __cpuid_count (11, i++, eax, ebx, ecx, edx); @@ -555,36 +598,71 @@ init_cacheinfo (void) int type = ecx & 0xff00; if (shipped == 0 || type == 0) break; + else if (type == 0x100) + { + /* Count SMT. */ + if ((count & 0x1)) + { + int count_mask; + + /* Compute count mask. */ + asm ("bsr %1, %0" + : "=r" (count_mask) : "g" (threads_l2)); + count_mask = ~(-1 << (count_mask + 1)); + threads_l2 = (shipped - 1) & count_mask; + count &= ~0x1; + } + } else if (type == 0x200) { - int count_mask; - - /* Compute count mask. */ - asm ("bsr %1, %0" - : "=r" (count_mask) : "g" (threads)); - count_mask = ~(-1 << (count_mask + 1)); - threads = (shipped - 1) & count_mask; - break; + /* Count core. */ + if ((count & (0x1 << 1))) + { + int count_mask; + int threads_core + = (level == 2 ? threads_l2 : threads_l3); + + /* Compute count mask. */ + asm ("bsr %1, %0" + : "=r" (count_mask) : "g" (threads_core)); + count_mask = ~(-1 << (count_mask + 1)); + threads_core = (shipped - 1) & count_mask; + if (level == 2) + threads_l2 = threads_core; + else + threads_l3 = threads_core; + count &= ~(0x1 << 1); + } } } } - threads += 1; - if (threads > 2 && level == 2 && family == 6) + if (threads_l2 > 0) + threads_l2 += 1; + if (threads_l3 > 0) + threads_l3 += 1; + if (level == 2) { - switch (model) + if (threads_l2) { - case 0x37: - case 0x4a: - case 0x4d: - case 0x5a: - case 0x5d: - /* Silvermont has L2 cache shared by 2 cores. */ - threads = 2; - break; - default: - break; + threads = threads_l2; + if (threads > 2 && family == 6) + switch (model) + { + case 0x37: + case 0x4a: + case 0x4d: + case 0x5a: + case 0x5d: + /* Silvermont has L2 cache shared by 2 cores. */ + threads = 2; + break; + default: + break; + } } } + else if (threads_l3) + threads = threads_l3; } else { @@ -604,8 +682,12 @@ intel_bug_no_cache_info: } /* Account for non-inclusive L2 and L3 caches. */ - if (level == 3 && !inclusive_cache) - shared += core; + if (!inclusive_cache) + { + if (threads_l2 > 0) + core /= threads_l2; + shared += core; + } } /* This spells out "AuthenticAMD". */ else if (is_amd) |