From ff6d62e9edb5dce537a6dd4a237d6053f331f09c Mon Sep 17 00:00:00 2001 From: "H.J. Lu" Date: Fri, 25 Dec 2020 07:30:46 -0800 Subject: : Remove the C preprocessor magic In , define CPU features as enum instead of using the C preprocessor magic to make it easier to wrap this functionality in other languages. Move the C preprocessor magic to internal header for better GCC codegen when more than one features are checked in a single expression as in x86-64 dl-hwcaps-subdirs.c. 1. Rename COMMON_CPUID_INDEX_XXX to CPUID_INDEX_XXX. 2. Move CPUID_INDEX_MAX to sysdeps/x86/include/cpu-features.h. 3. Remove struct cpu_features and __x86_get_cpu_features from . 4. Add __x86_get_cpuid_feature_leaf to and put it in libc. 5. Make __get_cpu_features() private to glibc. 6. Replace __x86_get_cpu_features(N) with __get_cpu_features(). 7. Add _dl_x86_get_cpu_features to GLIBC_PRIVATE. 8. Use a single enum index for each CPU feature detection. 9. Pass the CPUID feature leaf to __x86_get_cpuid_feature_leaf. 10. Return zero struct cpuid_feature for the older glibc binary with a smaller CPUID_INDEX_MAX [BZ #27104]. 11. Inside glibc, use the C preprocessor magic so that cpu_features data can be loaded just once leading to more compact code for glibc. 256 bits are used for each CPUID leaf. Some leaves only contain a few features. We can add exceptions to such leaves. But it will increase code sizes and it is harder to provide backward/forward compatibilities when new features are added to such leaves in the future. When new leaves are added, _rtld_global_ro offsets will change which leads to race condition during in-place updates. We may avoid in-place updates by 1. Rename the old glibc. 2. Install the new glibc. 3. Remove the old glibc. NB: A function, __x86_get_cpuid_feature_leaf , is used to avoid the copy relocation issue with IFUNC resolver as shown in IFUNC resolver tests. --- sysdeps/x86/cpu-features.c | 68 +++++++++++++++++++++++----------------------- 1 file changed, 34 insertions(+), 34 deletions(-) (limited to 'sysdeps/x86/cpu-features.c') diff --git a/sysdeps/x86/cpu-features.c b/sysdeps/x86/cpu-features.c index 06e4307b71..6496512a0d 100644 --- a/sysdeps/x86/cpu-features.c +++ b/sysdeps/x86/cpu-features.c @@ -293,22 +293,22 @@ get_extended_indices (struct cpu_features *cpu_features) __cpuid (0x80000000, eax, ebx, ecx, edx); if (eax >= 0x80000001) __cpuid (0x80000001, - cpu_features->features[COMMON_CPUID_INDEX_80000001].cpuid.eax, - cpu_features->features[COMMON_CPUID_INDEX_80000001].cpuid.ebx, - cpu_features->features[COMMON_CPUID_INDEX_80000001].cpuid.ecx, - cpu_features->features[COMMON_CPUID_INDEX_80000001].cpuid.edx); + cpu_features->features[CPUID_INDEX_80000001].cpuid.eax, + cpu_features->features[CPUID_INDEX_80000001].cpuid.ebx, + cpu_features->features[CPUID_INDEX_80000001].cpuid.ecx, + cpu_features->features[CPUID_INDEX_80000001].cpuid.edx); if (eax >= 0x80000007) __cpuid (0x80000007, - cpu_features->features[COMMON_CPUID_INDEX_80000007].cpuid.eax, - cpu_features->features[COMMON_CPUID_INDEX_80000007].cpuid.ebx, - cpu_features->features[COMMON_CPUID_INDEX_80000007].cpuid.ecx, - cpu_features->features[COMMON_CPUID_INDEX_80000007].cpuid.edx); + cpu_features->features[CPUID_INDEX_80000007].cpuid.eax, + cpu_features->features[CPUID_INDEX_80000007].cpuid.ebx, + cpu_features->features[CPUID_INDEX_80000007].cpuid.ecx, + cpu_features->features[CPUID_INDEX_80000007].cpuid.edx); if (eax >= 0x80000008) __cpuid (0x80000008, - cpu_features->features[COMMON_CPUID_INDEX_80000008].cpuid.eax, - cpu_features->features[COMMON_CPUID_INDEX_80000008].cpuid.ebx, - cpu_features->features[COMMON_CPUID_INDEX_80000008].cpuid.ecx, - cpu_features->features[COMMON_CPUID_INDEX_80000008].cpuid.edx); + cpu_features->features[CPUID_INDEX_80000008].cpuid.eax, + cpu_features->features[CPUID_INDEX_80000008].cpuid.ebx, + cpu_features->features[CPUID_INDEX_80000008].cpuid.ecx, + cpu_features->features[CPUID_INDEX_80000008].cpuid.edx); } static void @@ -320,10 +320,10 @@ get_common_indices (struct cpu_features *cpu_features, { unsigned int eax; __cpuid (1, eax, - cpu_features->features[COMMON_CPUID_INDEX_1].cpuid.ebx, - cpu_features->features[COMMON_CPUID_INDEX_1].cpuid.ecx, - cpu_features->features[COMMON_CPUID_INDEX_1].cpuid.edx); - cpu_features->features[COMMON_CPUID_INDEX_1].cpuid.eax = eax; + cpu_features->features[CPUID_INDEX_1].cpuid.ebx, + cpu_features->features[CPUID_INDEX_1].cpuid.ecx, + cpu_features->features[CPUID_INDEX_1].cpuid.edx); + cpu_features->features[CPUID_INDEX_1].cpuid.eax = eax; *family = (eax >> 8) & 0x0f; *model = (eax >> 4) & 0x0f; *extended_model = (eax >> 12) & 0xf0; @@ -338,30 +338,30 @@ get_common_indices (struct cpu_features *cpu_features, if (cpu_features->basic.max_cpuid >= 7) { __cpuid_count (7, 0, - cpu_features->features[COMMON_CPUID_INDEX_7].cpuid.eax, - cpu_features->features[COMMON_CPUID_INDEX_7].cpuid.ebx, - cpu_features->features[COMMON_CPUID_INDEX_7].cpuid.ecx, - cpu_features->features[COMMON_CPUID_INDEX_7].cpuid.edx); + cpu_features->features[CPUID_INDEX_7].cpuid.eax, + cpu_features->features[CPUID_INDEX_7].cpuid.ebx, + cpu_features->features[CPUID_INDEX_7].cpuid.ecx, + cpu_features->features[CPUID_INDEX_7].cpuid.edx); __cpuid_count (7, 1, - cpu_features->features[COMMON_CPUID_INDEX_7_ECX_1].cpuid.eax, - cpu_features->features[COMMON_CPUID_INDEX_7_ECX_1].cpuid.ebx, - cpu_features->features[COMMON_CPUID_INDEX_7_ECX_1].cpuid.ecx, - cpu_features->features[COMMON_CPUID_INDEX_7_ECX_1].cpuid.edx); + cpu_features->features[CPUID_INDEX_7_ECX_1].cpuid.eax, + cpu_features->features[CPUID_INDEX_7_ECX_1].cpuid.ebx, + cpu_features->features[CPUID_INDEX_7_ECX_1].cpuid.ecx, + cpu_features->features[CPUID_INDEX_7_ECX_1].cpuid.edx); } if (cpu_features->basic.max_cpuid >= 0xd) __cpuid_count (0xd, 1, - cpu_features->features[COMMON_CPUID_INDEX_D_ECX_1].cpuid.eax, - cpu_features->features[COMMON_CPUID_INDEX_D_ECX_1].cpuid.ebx, - cpu_features->features[COMMON_CPUID_INDEX_D_ECX_1].cpuid.ecx, - cpu_features->features[COMMON_CPUID_INDEX_D_ECX_1].cpuid.edx); + cpu_features->features[CPUID_INDEX_D_ECX_1].cpuid.eax, + cpu_features->features[CPUID_INDEX_D_ECX_1].cpuid.ebx, + cpu_features->features[CPUID_INDEX_D_ECX_1].cpuid.ecx, + cpu_features->features[CPUID_INDEX_D_ECX_1].cpuid.edx); if (cpu_features->basic.max_cpuid >= 0x19) __cpuid_count (0x19, 0, - cpu_features->features[COMMON_CPUID_INDEX_19].cpuid.eax, - cpu_features->features[COMMON_CPUID_INDEX_19].cpuid.ebx, - cpu_features->features[COMMON_CPUID_INDEX_19].cpuid.ecx, - cpu_features->features[COMMON_CPUID_INDEX_19].cpuid.edx); + cpu_features->features[CPUID_INDEX_19].cpuid.eax, + cpu_features->features[CPUID_INDEX_19].cpuid.ebx, + cpu_features->features[CPUID_INDEX_19].cpuid.ecx, + cpu_features->features[CPUID_INDEX_19].cpuid.edx); } _Static_assert (((index_arch_Fast_Unaligned_Load @@ -536,11 +536,11 @@ init_cpu_features (struct cpu_features *cpu_features) update_usable (cpu_features); - ecx = cpu_features->features[COMMON_CPUID_INDEX_1].cpuid.ecx; + ecx = cpu_features->features[CPUID_INDEX_1].cpuid.ecx; if (CPU_FEATURE_USABLE_P (cpu_features, AVX)) { - /* Since the FMA4 bit is in COMMON_CPUID_INDEX_80000001 and + /* Since the FMA4 bit is in CPUID_INDEX_80000001 and FMA4 requires AVX, determine if FMA4 is usable here. */ CPU_FEATURE_SET_USABLE (cpu_features, FMA4); } -- cgit v1.2.3-70-g09d2