aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorH.J. Lu <hongjiu.lu@intel.com>2009-08-07 09:39:36 -0700
committerUlrich Drepper <drepper@redhat.com>2009-08-07 09:39:36 -0700
commita546baa9cd2e5176e9851811d5df6f23e35d3bb8 (patch)
tree1db7355ddca82021eef7f9f2d2baa11e3da78a2b
parent77c84aeb81808c3109665949448dba59965c391e (diff)
downloadglibc-a546baa9cd2e5176e9851811d5df6f23e35d3bb8.tar
glibc-a546baa9cd2e5176e9851811d5df6f23e35d3bb8.tar.gz
glibc-a546baa9cd2e5176e9851811d5df6f23e35d3bb8.tar.bz2
glibc-a546baa9cd2e5176e9851811d5df6f23e35d3bb8.zip
Properly count number of logical processors on Intel CPUs.
The meaning of the 25-14 bits in EAX returned from cpuid with EAX = 4 has been changed from "the maximum number of threads sharing the cache" to "the maximum number of addressable IDs for logical processors sharing the cache" if cpuid takes EAX = 11. We need to use results from both EAX = 4 and EAX = 11 to get the number of threads sharing the cache. The 25-14 bits in EAX on Core i7 is 15 although the number of logical processors is 8. Here is a white paper on this: http://software.intel.com/en-us/articles/intel-64-architecture-processor-topology-enumeration/ This patch correctly counts number of logical processors on Intel CPUs with EAX = 11 support on cpuid. Tested on Dinnington, Core i7 and Nehalem EX/EP. It also fixed Pentium Ds workaround since EBX may not have the right value returned from cpuid with EAX = 1.
-rw-r--r--ChangeLog5
-rw-r--r--sysdeps/x86_64/cacheinfo.c42
2 files changed, 43 insertions, 4 deletions
diff --git a/ChangeLog b/ChangeLog
index c6169d776e..debf530fab 100644
--- a/ChangeLog
+++ b/ChangeLog
@@ -1,3 +1,8 @@
+2009-08-05 H.J. Lu <hongjiu.lu@intel.com>
+
+ * sysdeps/x86_64/cacheinfo.c (init_cacheinfo): Properly use
+ EBX from EAX = 1. Handle EAX = 11.
+
2009-08-07 Andreas Schwab <schwab@redhat.com>
* Makefile (TAGS): Use separate sed -e expressions to avoid \
diff --git a/sysdeps/x86_64/cacheinfo.c b/sysdeps/x86_64/cacheinfo.c
index f252fc2c6c..ddad63baee 100644
--- a/sysdeps/x86_64/cacheinfo.c
+++ b/sysdeps/x86_64/cacheinfo.c
@@ -516,13 +516,15 @@ init_cacheinfo (void)
shared = handle_intel (_SC_LEVEL2_CACHE_SIZE, max_cpuid);
}
+ unsigned int ebx_1;
+
#ifdef USE_MULTIARCH
eax = __cpu_features.cpuid[COMMON_CPUID_INDEX_1].eax;
- ebx = __cpu_features.cpuid[COMMON_CPUID_INDEX_1].ebx;
+ ebx_1 = __cpu_features.cpuid[COMMON_CPUID_INDEX_1].ebx;
ecx = __cpu_features.cpuid[COMMON_CPUID_INDEX_1].ecx;
edx = __cpu_features.cpuid[COMMON_CPUID_INDEX_1].edx;
#else
- __cpuid (1, eax, ebx, ecx, edx);
+ __cpuid (1, eax, ebx_1, ecx, edx);
#endif
#ifndef DISABLE_PREFERRED_MEMORY_INSTRUCTION
@@ -554,14 +556,46 @@ init_cacheinfo (void)
}
while (((eax >> 5) & 0x7) != level);
- threads = ((eax >> 14) & 0x3ff) + 1;
+ threads = (eax >> 14) & 0x3ff;
+
+ /* If max_cpuid >= 11, THREADS is the maximum number of
+ addressable IDs for logical processors sharing the
+ cache, instead of the maximum number of threads
+ sharing the cache. */
+ if (threads && max_cpuid >= 11)
+ {
+ /* Find the number of logical processors shipped in
+ one core and apply count mask. */
+ i = 0;
+ while (1)
+ {
+ __cpuid_count (11, i++, eax, ebx, ecx, edx);
+
+ int shipped = ebx & 0xff;
+ int type = ecx & 0xff0;
+ if (shipped == 0 || type == 0)
+ break;
+ else if (type == 0x200)
+ {
+ int count_mask;
+
+ /* Compute count mask. */
+ asm ("bsr %1, %0"
+ : "=r" (count_mask) : "g" (threads));
+ count_mask = ~(-1 << (count_mask + 1));
+ threads = (shipped - 1) & count_mask;
+ break;
+ }
+ }
+ }
+ threads += 1;
}
else
{
intel_bug_no_cache_info:
/* Assume that all logical threads share the highest cache level. */
- threads = (ebx >> 16) & 0xff;
+ threads = (ebx_1 >> 16) & 0xff;
}
/* Cap usage of highest cache level to the number of supported