aboutsummaryrefslogtreecommitdiff
path: root/sysdeps/x86_64/dl-machine.h
diff options
context:
space:
mode:
authorH.J. Lu <hjl.tools@gmail.com>2024-02-26 06:37:03 -0800
committerH.J. Lu <hjl.tools@gmail.com>2024-02-28 09:02:56 -0800
commit0aac205a814a8511e98d02b91a8dc908f1c53cde (patch)
treee9ad8dfa06abd2f6d9b521bb95e9b7cfaa0d7de7 /sysdeps/x86_64/dl-machine.h
parente6350be7e9cae8f71c96c1f06eab61b9acb227c8 (diff)
downloadglibc-0aac205a814a8511e98d02b91a8dc908f1c53cde.tar
glibc-0aac205a814a8511e98d02b91a8dc908f1c53cde.tar.gz
glibc-0aac205a814a8511e98d02b91a8dc908f1c53cde.tar.bz2
glibc-0aac205a814a8511e98d02b91a8dc908f1c53cde.zip
x86: Update _dl_tlsdesc_dynamic to preserve caller-saved registersHEADmaster
Compiler generates the following instruction sequence for GNU2 dynamic TLS access: leaq tls_var@TLSDESC(%rip), %rax call *tls_var@TLSCALL(%rax) or leal tls_var@TLSDESC(%ebx), %eax call *tls_var@TLSCALL(%eax) CALL instruction is transparent to compiler which assumes all registers, except for EFLAGS and RAX/EAX, are unchanged after CALL. When _dl_tlsdesc_dynamic is called, it calls __tls_get_addr on the slow path. __tls_get_addr is a normal function which doesn't preserve any caller-saved registers. _dl_tlsdesc_dynamic saved and restored integer caller-saved registers, but didn't preserve any other caller-saved registers. Add _dl_tlsdesc_dynamic IFUNC functions for FNSAVE, FXSAVE, XSAVE and XSAVEC to save and restore all caller-saved registers. This fixes BZ #31372. Add GLRO(dl_x86_64_runtime_resolve) with GLRO(dl_x86_tlsdesc_dynamic) to optimize elf_machine_runtime_setup. Reviewed-by: Noah Goldstein <goldstein.w.n@gmail.com>
Diffstat (limited to 'sysdeps/x86_64/dl-machine.h')
-rw-r--r--sysdeps/x86_64/dl-machine.h19
1 files changed, 4 insertions, 15 deletions
diff --git a/sysdeps/x86_64/dl-machine.h b/sysdeps/x86_64/dl-machine.h
index 6d605d0d32..ff5d45f7cb 100644
--- a/sysdeps/x86_64/dl-machine.h
+++ b/sysdeps/x86_64/dl-machine.h
@@ -71,9 +71,6 @@ elf_machine_runtime_setup (struct link_map *l, struct r_scope_elem *scope[],
int lazy, int profile)
{
Elf64_Addr *got;
- extern void _dl_runtime_resolve_fxsave (ElfW(Word)) attribute_hidden;
- extern void _dl_runtime_resolve_xsave (ElfW(Word)) attribute_hidden;
- extern void _dl_runtime_resolve_xsavec (ElfW(Word)) attribute_hidden;
extern void _dl_runtime_profile_sse (ElfW(Word)) attribute_hidden;
extern void _dl_runtime_profile_avx (ElfW(Word)) attribute_hidden;
extern void _dl_runtime_profile_avx512 (ElfW(Word)) attribute_hidden;
@@ -96,8 +93,6 @@ elf_machine_runtime_setup (struct link_map *l, struct r_scope_elem *scope[],
/* Identify this shared object. */
*(ElfW(Addr) *) (got + 1) = (ElfW(Addr)) l;
- const struct cpu_features* cpu_features = __get_cpu_features ();
-
#ifdef SHARED
/* The got[2] entry contains the address of a function which gets
called to get the address of a so far unresolved function and
@@ -107,6 +102,7 @@ elf_machine_runtime_setup (struct link_map *l, struct r_scope_elem *scope[],
end in this function. */
if (__glibc_unlikely (profile))
{
+ const struct cpu_features* cpu_features = __get_cpu_features ();
if (X86_ISA_CPU_FEATURE_USABLE_P (cpu_features, AVX512F))
*(ElfW(Addr) *) (got + 2) = (ElfW(Addr)) &_dl_runtime_profile_avx512;
else if (X86_ISA_CPU_FEATURE_USABLE_P (cpu_features, AVX))
@@ -126,15 +122,8 @@ elf_machine_runtime_setup (struct link_map *l, struct r_scope_elem *scope[],
/* This function will get called to fix up the GOT entry
indicated by the offset on the stack, and then jump to
the resolved address. */
- if (MINIMUM_X86_ISA_LEVEL >= AVX_X86_ISA_LEVEL
- || GLRO(dl_x86_cpu_features).xsave_state_size != 0)
- *(ElfW(Addr) *) (got + 2)
- = (CPU_FEATURE_USABLE_P (cpu_features, XSAVEC)
- ? (ElfW(Addr)) &_dl_runtime_resolve_xsavec
- : (ElfW(Addr)) &_dl_runtime_resolve_xsave);
- else
- *(ElfW(Addr) *) (got + 2)
- = (ElfW(Addr)) &_dl_runtime_resolve_fxsave;
+ *(ElfW(Addr) *) (got + 2)
+ = (ElfW(Addr)) GLRO(dl_x86_64_runtime_resolve);
}
}
@@ -383,7 +372,7 @@ and creates an unsatisfiable circular dependency.\n",
{
td->arg = _dl_make_tlsdesc_dynamic
(sym_map, sym->st_value + reloc->r_addend);
- td->entry = _dl_tlsdesc_dynamic;
+ td->entry = GLRO(dl_x86_tlsdesc_dynamic);
}
else
# endif