diff options
Diffstat (limited to 'sysdeps/x86')
-rw-r--r-- | sysdeps/x86/cpu-features-offsets.sym | 1 | ||||
-rw-r--r-- | sysdeps/x86/cpu-features.c | 88 | ||||
-rw-r--r-- | sysdeps/x86/cpu-features.h | 34 | ||||
-rw-r--r-- | sysdeps/x86/cpu-tunables.c | 17 |
4 files changed, 109 insertions, 31 deletions
diff --git a/sysdeps/x86/cpu-features-offsets.sym b/sysdeps/x86/cpu-features-offsets.sym index f6739fae81..33dd094e37 100644 --- a/sysdeps/x86/cpu-features-offsets.sym +++ b/sysdeps/x86/cpu-features-offsets.sym @@ -15,6 +15,7 @@ CPUID_ECX_OFFSET offsetof (struct cpuid_registers, ecx) CPUID_EDX_OFFSET offsetof (struct cpuid_registers, edx) FAMILY_OFFSET offsetof (struct cpu_features, family) MODEL_OFFSET offsetof (struct cpu_features, model) +XSAVE_STATE_SIZE_OFFSET offsetof (struct cpu_features, xsave_state_size) FEATURE_OFFSET offsetof (struct cpu_features, feature) FEATURE_SIZE sizeof (unsigned int) diff --git a/sysdeps/x86/cpu-features.c b/sysdeps/x86/cpu-features.c index 332b0f0d4a..87aaa8683c 100644 --- a/sysdeps/x86/cpu-features.c +++ b/sysdeps/x86/cpu-features.c @@ -19,6 +19,7 @@ #include <cpuid.h> #include <cpu-features.h> #include <dl-hwcap.h> +#include <libc-pointer-arith.h> #if HAVE_TUNABLES # define TUNABLE_NAMESPACE tune @@ -103,6 +104,76 @@ get_common_indeces (struct cpu_features *cpu_features, } } } + + /* For _dl_runtime_resolve, set xsave_state_size to xsave area + size + integer register save size and align it to 64 bytes. */ + if (cpu_features->max_cpuid >= 0xd) + { + unsigned int eax, ebx, ecx, edx; + + __cpuid_count (0xd, 0, eax, ebx, ecx, edx); + if (ebx != 0) + { + unsigned int xsave_state_full_size + = ALIGN_UP (ebx + STATE_SAVE_OFFSET, 64); + + cpu_features->xsave_state_size + = xsave_state_full_size; + cpu_features->xsave_state_full_size + = xsave_state_full_size; + + __cpuid_count (0xd, 1, eax, ebx, ecx, edx); + + /* Check if XSAVEC is available. */ + if ((eax & (1 << 1)) != 0) + { + unsigned int xstate_comp_offsets[32]; + unsigned int xstate_comp_sizes[32]; + unsigned int i; + + xstate_comp_offsets[0] = 0; + xstate_comp_offsets[1] = 160; + xstate_comp_offsets[2] = 576; + xstate_comp_sizes[0] = 160; + xstate_comp_sizes[1] = 256; + + for (i = 2; i < 32; i++) + { + if ((STATE_SAVE_MASK & (1 << i)) != 0) + { + __cpuid_count (0xd, i, eax, ebx, ecx, edx); + xstate_comp_sizes[i] = eax; + } + else + { + ecx = 0; + xstate_comp_sizes[i] = 0; + } + + if (i > 2) + { + xstate_comp_offsets[i] + = (xstate_comp_offsets[i - 1] + + xstate_comp_sizes[i -1]); + if ((ecx & (1 << 1)) != 0) + xstate_comp_offsets[i] + = ALIGN_UP (xstate_comp_offsets[i], 64); + } + } + + /* Use XSAVEC. */ + unsigned int size + = xstate_comp_offsets[31] + xstate_comp_sizes[31]; + if (size) + { + cpu_features->xsave_state_size + = ALIGN_UP (size + STATE_SAVE_OFFSET, 64); + cpu_features->feature[index_arch_XSAVEC_Usable] + |= bit_arch_XSAVEC_Usable; + } + } + } + } } } @@ -242,23 +313,6 @@ init_cpu_features (struct cpu_features *cpu_features) else cpu_features->feature[index_arch_Prefer_No_AVX512] |= bit_arch_Prefer_No_AVX512; - - /* To avoid SSE transition penalty, use _dl_runtime_resolve_slow. - If XGETBV suports ECX == 1, use _dl_runtime_resolve_opt. - Use _dl_runtime_resolve_opt only with AVX512F since it is - slower than _dl_runtime_resolve_slow with AVX. */ - cpu_features->feature[index_arch_Use_dl_runtime_resolve_slow] - |= bit_arch_Use_dl_runtime_resolve_slow; - if (CPU_FEATURES_ARCH_P (cpu_features, AVX512F_Usable) - && cpu_features->max_cpuid >= 0xd) - { - unsigned int eax; - - __cpuid_count (0xd, 1, eax, ebx, ecx, edx); - if ((eax & (1 << 2)) != 0) - cpu_features->feature[index_arch_Use_dl_runtime_resolve_opt] - |= bit_arch_Use_dl_runtime_resolve_opt; - } } /* This spells out "AuthenticAMD". */ else if (ebx == 0x68747541 && ecx == 0x444d4163 && edx == 0x69746e65) diff --git a/sysdeps/x86/cpu-features.h b/sysdeps/x86/cpu-features.h index a032a2e168..b7f7898d11 100644 --- a/sysdeps/x86/cpu-features.h +++ b/sysdeps/x86/cpu-features.h @@ -37,10 +37,9 @@ #define bit_arch_Prefer_No_VZEROUPPER (1 << 17) #define bit_arch_Fast_Unaligned_Copy (1 << 18) #define bit_arch_Prefer_ERMS (1 << 19) -#define bit_arch_Use_dl_runtime_resolve_opt (1 << 20) -#define bit_arch_Use_dl_runtime_resolve_slow (1 << 21) -#define bit_arch_Prefer_No_AVX512 (1 << 22) -#define bit_arch_MathVec_Prefer_No_AVX512 (1 << 23) +#define bit_arch_Prefer_No_AVX512 (1 << 20) +#define bit_arch_MathVec_Prefer_No_AVX512 (1 << 21) +#define bit_arch_XSAVEC_Usable (1 << 22) /* CPUID Feature flags. */ @@ -91,8 +90,18 @@ /* The current maximum size of the feature integer bit array. */ #define FEATURE_INDEX_MAX 1 -#ifndef __ASSEMBLER__ +/* Offset for fxsave/xsave area used by _dl_runtime_resolve. Also need + space to preserve RCX, RDX, RSI, RDI, R8, R9 and RAX. It must be + aligned to 16 bytes for fxsave and 64 bytes for xsave. */ +#define STATE_SAVE_OFFSET (8 * 7 + 8) +/* Save SSE, AVX, AVX512, mask and bound registers. */ +#define STATE_SAVE_MASK \ + ((1 << 1) | (1 << 2) | (1 << 3) | (1 << 5) | (1 << 6) | (1 << 7)) + +#ifdef __ASSEMBLER__ +# include <cpu-features-offsets.h> +#else /* __ASSEMBLER__ */ enum { COMMON_CPUID_INDEX_1 = 0, @@ -121,6 +130,18 @@ struct cpu_features } cpuid[COMMON_CPUID_INDEX_MAX]; unsigned int family; unsigned int model; + /* The state size for XSAVEC or XSAVE. The type must be unsigned long + int so that we use + + sub xsave_state_size_offset(%rip) %RSP_LP + + in _dl_runtime_resolve. */ + unsigned long int xsave_state_size; + /* The full state size for XSAVE when XSAVEC is disabled by + + GLIBC_TUNABLES=glibc.tune.hwcaps=-XSAVEC_Usable + */ + unsigned int xsave_state_full_size; unsigned int feature[FEATURE_INDEX_MAX]; /* Data cache size for use in memory and string routines, typically L1 size. */ @@ -237,10 +258,9 @@ extern const struct cpu_features *__get_cpu_features (void) # define index_arch_Prefer_No_VZEROUPPER FEATURE_INDEX_1 # define index_arch_Fast_Unaligned_Copy FEATURE_INDEX_1 # define index_arch_Prefer_ERMS FEATURE_INDEX_1 -# define index_arch_Use_dl_runtime_resolve_opt FEATURE_INDEX_1 -# define index_arch_Use_dl_runtime_resolve_slow FEATURE_INDEX_1 # define index_arch_Prefer_No_AVX512 FEATURE_INDEX_1 # define index_arch_MathVec_Prefer_No_AVX512 FEATURE_INDEX_1 +# define index_arch_XSAVEC_Usable FEATURE_INDEX_1 #endif /* !__ASSEMBLER__ */ diff --git a/sysdeps/x86/cpu-tunables.c b/sysdeps/x86/cpu-tunables.c index ec72d86f08..dcd0165f2e 100644 --- a/sysdeps/x86/cpu-tunables.c +++ b/sysdeps/x86/cpu-tunables.c @@ -242,6 +242,16 @@ TUNABLE_CALLBACK (set_hwcaps) (tunable_val_t *valp) Slow_SSE4_2, SSE4_2, disable, 11); break; + case 13: + if (disable) + { + /* Update xsave_state_size to XSAVE state size. */ + cpu_features->xsave_state_size + = cpu_features->xsave_state_full_size; + CHECK_GLIBC_IFUNC_ARCH_OFF (n, cpu_features, + XSAVEC_Usable, 13); + } + break; case 14: if (disable) { @@ -317,13 +327,6 @@ TUNABLE_CALLBACK (set_hwcaps) (tunable_val_t *valp) disable, 26); } break; - case 27: - { - CHECK_GLIBC_IFUNC_ARCH_BOTH (n, cpu_features, - Use_dl_runtime_resolve_slow, - disable, 27); - } - break; } p += len + 1; } |