aboutsummaryrefslogtreecommitdiff
path: root/sysdeps/x86
diff options
context:
space:
mode:
authorH.J. Lu <hjl.tools@gmail.com>2024-02-26 06:37:03 -0800
committerH.J. Lu <hjl.tools@gmail.com>2024-02-28 09:02:56 -0800
commit0aac205a814a8511e98d02b91a8dc908f1c53cde (patch)
treee9ad8dfa06abd2f6d9b521bb95e9b7cfaa0d7de7 /sysdeps/x86
parente6350be7e9cae8f71c96c1f06eab61b9acb227c8 (diff)
downloadglibc-master.tar
glibc-master.tar.gz
glibc-master.tar.bz2
glibc-master.zip
x86: Update _dl_tlsdesc_dynamic to preserve caller-saved registersHEADmaster
Compiler generates the following instruction sequence for GNU2 dynamic TLS access: leaq tls_var@TLSDESC(%rip), %rax call *tls_var@TLSCALL(%rax) or leal tls_var@TLSDESC(%ebx), %eax call *tls_var@TLSCALL(%eax) CALL instruction is transparent to compiler which assumes all registers, except for EFLAGS and RAX/EAX, are unchanged after CALL. When _dl_tlsdesc_dynamic is called, it calls __tls_get_addr on the slow path. __tls_get_addr is a normal function which doesn't preserve any caller-saved registers. _dl_tlsdesc_dynamic saved and restored integer caller-saved registers, but didn't preserve any other caller-saved registers. Add _dl_tlsdesc_dynamic IFUNC functions for FNSAVE, FXSAVE, XSAVE and XSAVEC to save and restore all caller-saved registers. This fixes BZ #31372. Add GLRO(dl_x86_64_runtime_resolve) with GLRO(dl_x86_tlsdesc_dynamic) to optimize elf_machine_runtime_setup. Reviewed-by: Noah Goldstein <goldstein.w.n@gmail.com>
Diffstat (limited to 'sysdeps/x86')
-rw-r--r--sysdeps/x86/Makefile7
-rw-r--r--sysdeps/x86/cpu-features.c56
-rw-r--r--sysdeps/x86/dl-procinfo.c16
-rw-r--r--sysdeps/x86/features-offsets.sym8
-rw-r--r--sysdeps/x86/sysdep.h6
-rw-r--r--sysdeps/x86/tst-gnu2-tls2.c20
6 files changed, 110 insertions, 3 deletions
diff --git a/sysdeps/x86/Makefile b/sysdeps/x86/Makefile
index 73b29cc78c..5311b594af 100644
--- a/sysdeps/x86/Makefile
+++ b/sysdeps/x86/Makefile
@@ -1,5 +1,5 @@
ifeq ($(subdir),csu)
-gen-as-const-headers += cpu-features-offsets.sym
+gen-as-const-headers += cpu-features-offsets.sym features-offsets.sym
endif
ifeq ($(subdir),elf)
@@ -86,6 +86,11 @@ endif
tst-ifunc-isa-2-ENV = GLIBC_TUNABLES=glibc.cpu.hwcaps=-SSE4_2,-AVX,-AVX2,-AVX512F
tst-ifunc-isa-2-static-ENV = $(tst-ifunc-isa-2-ENV)
tst-hwcap-tunables-ARGS = -- $(host-test-program-cmd)
+
+CFLAGS-tst-gnu2-tls2.c += -msse
+CFLAGS-tst-gnu2-tls2mod0.c += -msse2 -mtune=haswell
+CFLAGS-tst-gnu2-tls2mod1.c += -msse2 -mtune=haswell
+CFLAGS-tst-gnu2-tls2mod2.c += -msse2 -mtune=haswell
endif
ifeq ($(subdir),math)
diff --git a/sysdeps/x86/cpu-features.c b/sysdeps/x86/cpu-features.c
index 25e6622a79..835113b42f 100644
--- a/sysdeps/x86/cpu-features.c
+++ b/sysdeps/x86/cpu-features.c
@@ -27,8 +27,13 @@
extern void TUNABLE_CALLBACK (set_hwcaps) (tunable_val_t *)
attribute_hidden;
-#if defined SHARED && defined __x86_64__
-# include <dl-plt-rewrite.h>
+#if defined SHARED
+extern void _dl_tlsdesc_dynamic_fxsave (void) attribute_hidden;
+extern void _dl_tlsdesc_dynamic_xsave (void) attribute_hidden;
+extern void _dl_tlsdesc_dynamic_xsavec (void) attribute_hidden;
+
+# ifdef __x86_64__
+# include <dl-plt-rewrite.h>
static void
TUNABLE_CALLBACK (set_plt_rewrite) (tunable_val_t *valp)
@@ -47,6 +52,15 @@ TUNABLE_CALLBACK (set_plt_rewrite) (tunable_val_t *valp)
: plt_rewrite_jmp);
}
}
+# else
+extern void _dl_tlsdesc_dynamic_fnsave (void) attribute_hidden;
+# endif
+#endif
+
+#ifdef __x86_64__
+extern void _dl_runtime_resolve_fxsave (void) attribute_hidden;
+extern void _dl_runtime_resolve_xsave (void) attribute_hidden;
+extern void _dl_runtime_resolve_xsavec (void) attribute_hidden;
#endif
#ifdef __LP64__
@@ -1130,6 +1144,44 @@ no_cpuid:
TUNABLE_CALLBACK (set_x86_shstk));
#endif
+ if (GLRO(dl_x86_cpu_features).xsave_state_size != 0)
+ {
+ if (CPU_FEATURE_USABLE_P (cpu_features, XSAVEC))
+ {
+#ifdef __x86_64__
+ GLRO(dl_x86_64_runtime_resolve) = _dl_runtime_resolve_xsavec;
+#endif
+#ifdef SHARED
+ GLRO(dl_x86_tlsdesc_dynamic) = _dl_tlsdesc_dynamic_xsavec;
+#endif
+ }
+ else
+ {
+#ifdef __x86_64__
+ GLRO(dl_x86_64_runtime_resolve) = _dl_runtime_resolve_xsave;
+#endif
+#ifdef SHARED
+ GLRO(dl_x86_tlsdesc_dynamic) = _dl_tlsdesc_dynamic_xsave;
+#endif
+ }
+ }
+ else
+ {
+#ifdef __x86_64__
+ GLRO(dl_x86_64_runtime_resolve) = _dl_runtime_resolve_fxsave;
+# ifdef SHARED
+ GLRO(dl_x86_tlsdesc_dynamic) = _dl_tlsdesc_dynamic_fxsave;
+# endif
+#else
+# ifdef SHARED
+ if (CPU_FEATURE_USABLE_P (cpu_features, FXSR))
+ GLRO(dl_x86_tlsdesc_dynamic) = _dl_tlsdesc_dynamic_fxsave;
+ else
+ GLRO(dl_x86_tlsdesc_dynamic) = _dl_tlsdesc_dynamic_fnsave;
+# endif
+#endif
+ }
+
#ifdef SHARED
# ifdef __x86_64__
TUNABLE_GET (plt_rewrite, tunable_val_t *,
diff --git a/sysdeps/x86/dl-procinfo.c b/sysdeps/x86/dl-procinfo.c
index ee957b4d70..5920d4b320 100644
--- a/sysdeps/x86/dl-procinfo.c
+++ b/sysdeps/x86/dl-procinfo.c
@@ -86,3 +86,19 @@ PROCINFO_CLASS const char _dl_x86_platforms[4][9]
#else
,
#endif
+
+#if defined SHARED && !IS_IN (ldconfig)
+# if !defined PROCINFO_DECL
+ ._dl_x86_tlsdesc_dynamic
+# else
+PROCINFO_CLASS void * _dl_x86_tlsdesc_dynamic
+# endif
+# ifndef PROCINFO_DECL
+= NULL
+# endif
+# ifdef PROCINFO_DECL
+;
+# else
+,
+# endif
+#endif
diff --git a/sysdeps/x86/features-offsets.sym b/sysdeps/x86/features-offsets.sym
new file mode 100644
index 0000000000..77e990c705
--- /dev/null
+++ b/sysdeps/x86/features-offsets.sym
@@ -0,0 +1,8 @@
+#define SHARED 1
+
+#include <ldsodefs.h>
+
+RTLD_GLOBAL_RO_DL_X86_CPU_FEATURES_OFFSET offsetof (struct rtld_global_ro, _dl_x86_cpu_features)
+#ifdef __x86_64__
+RTLD_GLOBAL_DL_X86_FEATURE_1_OFFSET offsetof (struct rtld_global, _dl_x86_feature_1)
+#endif
diff --git a/sysdeps/x86/sysdep.h b/sysdeps/x86/sysdep.h
index 837fd28734..485cad9c02 100644
--- a/sysdeps/x86/sysdep.h
+++ b/sysdeps/x86/sysdep.h
@@ -70,6 +70,12 @@
| (1 << X86_XSTATE_ZMM_H_ID))
#endif
+/* States which should be saved for TLSDESC_CALL and TLS_DESC_CALL.
+ Compiler assumes that all registers, including x87 FPU stack registers,
+ are unchanged after CALL, except for EFLAGS and RAX/EAX. */
+#define TLSDESC_CALL_STATE_SAVE_MASK \
+ (STATE_SAVE_MASK | (1 << X86_XSTATE_X87_ID))
+
/* Constants for bits in __x86_string_control: */
/* Avoid short distance REP MOVSB. */
diff --git a/sysdeps/x86/tst-gnu2-tls2.c b/sysdeps/x86/tst-gnu2-tls2.c
new file mode 100644
index 0000000000..de900a423b
--- /dev/null
+++ b/sysdeps/x86/tst-gnu2-tls2.c
@@ -0,0 +1,20 @@
+#ifndef __x86_64__
+#include <sys/platform/x86.h>
+
+#define IS_SUPPORTED() CPU_FEATURE_ACTIVE (SSE2)
+#endif
+
+/* Clear XMM0...XMM7 */
+#define PREPARE_MALLOC() \
+{ \
+ asm volatile ("xorps %%xmm0, %%xmm0" : : : "xmm0" ); \
+ asm volatile ("xorps %%xmm1, %%xmm1" : : : "xmm1" ); \
+ asm volatile ("xorps %%xmm2, %%xmm2" : : : "xmm2" ); \
+ asm volatile ("xorps %%xmm3, %%xmm3" : : : "xmm3" ); \
+ asm volatile ("xorps %%xmm4, %%xmm4" : : : "xmm4" ); \
+ asm volatile ("xorps %%xmm5, %%xmm5" : : : "xmm5" ); \
+ asm volatile ("xorps %%xmm6, %%xmm6" : : : "xmm6" ); \
+ asm volatile ("xorps %%xmm7, %%xmm7" : : : "xmm7" ); \
+}
+
+#include <elf/tst-gnu2-tls2.c>