diff options
Diffstat (limited to 'sysdeps/x86/cpu-features.c')
-rw-r--r-- | sysdeps/x86/cpu-features.c | 88 |
1 files changed, 71 insertions, 17 deletions
diff --git a/sysdeps/x86/cpu-features.c b/sysdeps/x86/cpu-features.c index 332b0f0d4a..87aaa8683c 100644 --- a/sysdeps/x86/cpu-features.c +++ b/sysdeps/x86/cpu-features.c @@ -19,6 +19,7 @@ #include <cpuid.h> #include <cpu-features.h> #include <dl-hwcap.h> +#include <libc-pointer-arith.h> #if HAVE_TUNABLES # define TUNABLE_NAMESPACE tune @@ -103,6 +104,76 @@ get_common_indeces (struct cpu_features *cpu_features, } } } + + /* For _dl_runtime_resolve, set xsave_state_size to xsave area + size + integer register save size and align it to 64 bytes. */ + if (cpu_features->max_cpuid >= 0xd) + { + unsigned int eax, ebx, ecx, edx; + + __cpuid_count (0xd, 0, eax, ebx, ecx, edx); + if (ebx != 0) + { + unsigned int xsave_state_full_size + = ALIGN_UP (ebx + STATE_SAVE_OFFSET, 64); + + cpu_features->xsave_state_size + = xsave_state_full_size; + cpu_features->xsave_state_full_size + = xsave_state_full_size; + + __cpuid_count (0xd, 1, eax, ebx, ecx, edx); + + /* Check if XSAVEC is available. */ + if ((eax & (1 << 1)) != 0) + { + unsigned int xstate_comp_offsets[32]; + unsigned int xstate_comp_sizes[32]; + unsigned int i; + + xstate_comp_offsets[0] = 0; + xstate_comp_offsets[1] = 160; + xstate_comp_offsets[2] = 576; + xstate_comp_sizes[0] = 160; + xstate_comp_sizes[1] = 256; + + for (i = 2; i < 32; i++) + { + if ((STATE_SAVE_MASK & (1 << i)) != 0) + { + __cpuid_count (0xd, i, eax, ebx, ecx, edx); + xstate_comp_sizes[i] = eax; + } + else + { + ecx = 0; + xstate_comp_sizes[i] = 0; + } + + if (i > 2) + { + xstate_comp_offsets[i] + = (xstate_comp_offsets[i - 1] + + xstate_comp_sizes[i -1]); + if ((ecx & (1 << 1)) != 0) + xstate_comp_offsets[i] + = ALIGN_UP (xstate_comp_offsets[i], 64); + } + } + + /* Use XSAVEC. */ + unsigned int size + = xstate_comp_offsets[31] + xstate_comp_sizes[31]; + if (size) + { + cpu_features->xsave_state_size + = ALIGN_UP (size + STATE_SAVE_OFFSET, 64); + cpu_features->feature[index_arch_XSAVEC_Usable] + |= bit_arch_XSAVEC_Usable; + } + } + } + } } } @@ -242,23 +313,6 @@ init_cpu_features (struct cpu_features *cpu_features) else cpu_features->feature[index_arch_Prefer_No_AVX512] |= bit_arch_Prefer_No_AVX512; - - /* To avoid SSE transition penalty, use _dl_runtime_resolve_slow. - If XGETBV suports ECX == 1, use _dl_runtime_resolve_opt. - Use _dl_runtime_resolve_opt only with AVX512F since it is - slower than _dl_runtime_resolve_slow with AVX. */ - cpu_features->feature[index_arch_Use_dl_runtime_resolve_slow] - |= bit_arch_Use_dl_runtime_resolve_slow; - if (CPU_FEATURES_ARCH_P (cpu_features, AVX512F_Usable) - && cpu_features->max_cpuid >= 0xd) - { - unsigned int eax; - - __cpuid_count (0xd, 1, eax, ebx, ecx, edx); - if ((eax & (1 << 2)) != 0) - cpu_features->feature[index_arch_Use_dl_runtime_resolve_opt] - |= bit_arch_Use_dl_runtime_resolve_opt; - } } /* This spells out "AuthenticAMD". */ else if (ebx == 0x68747541 && ecx == 0x444d4163 && edx == 0x69746e65) |