aboutsummaryrefslogtreecommitdiff
path: root/sysdeps/x86_64
diff options
context:
space:
mode:
Diffstat (limited to 'sysdeps/x86_64')
-rw-r--r--sysdeps/x86_64/Makefile2
-rw-r--r--sysdeps/x86_64/dl-machine.h19
-rw-r--r--sysdeps/x86_64/dl-procinfo.c16
-rw-r--r--sysdeps/x86_64/dl-tlsdesc-dynamic.h166
-rw-r--r--sysdeps/x86_64/dl-tlsdesc.S108
-rw-r--r--sysdeps/x86_64/dl-trampoline-save.h34
-rw-r--r--sysdeps/x86_64/dl-trampoline-state.h51
-rw-r--r--sysdeps/x86_64/dl-trampoline.S20
-rw-r--r--sysdeps/x86_64/dl-trampoline.h34
-rw-r--r--sysdeps/x86_64/features-offsets.sym6
10 files changed, 306 insertions, 150 deletions
diff --git a/sysdeps/x86_64/Makefile b/sysdeps/x86_64/Makefile
index 145f25e7f6..9337e95093 100644
--- a/sysdeps/x86_64/Makefile
+++ b/sysdeps/x86_64/Makefile
@@ -10,7 +10,7 @@ LDFLAGS-rtld += -Wl,-z,nomark-plt
endif
ifeq ($(subdir),csu)
-gen-as-const-headers += features-offsets.sym link-defines.sym
+gen-as-const-headers += link-defines.sym
endif
ifeq ($(subdir),gmon)
diff --git a/sysdeps/x86_64/dl-machine.h b/sysdeps/x86_64/dl-machine.h
index 6d605d0d32..ff5d45f7cb 100644
--- a/sysdeps/x86_64/dl-machine.h
+++ b/sysdeps/x86_64/dl-machine.h
@@ -71,9 +71,6 @@ elf_machine_runtime_setup (struct link_map *l, struct r_scope_elem *scope[],
int lazy, int profile)
{
Elf64_Addr *got;
- extern void _dl_runtime_resolve_fxsave (ElfW(Word)) attribute_hidden;
- extern void _dl_runtime_resolve_xsave (ElfW(Word)) attribute_hidden;
- extern void _dl_runtime_resolve_xsavec (ElfW(Word)) attribute_hidden;
extern void _dl_runtime_profile_sse (ElfW(Word)) attribute_hidden;
extern void _dl_runtime_profile_avx (ElfW(Word)) attribute_hidden;
extern void _dl_runtime_profile_avx512 (ElfW(Word)) attribute_hidden;
@@ -96,8 +93,6 @@ elf_machine_runtime_setup (struct link_map *l, struct r_scope_elem *scope[],
/* Identify this shared object. */
*(ElfW(Addr) *) (got + 1) = (ElfW(Addr)) l;
- const struct cpu_features* cpu_features = __get_cpu_features ();
-
#ifdef SHARED
/* The got[2] entry contains the address of a function which gets
called to get the address of a so far unresolved function and
@@ -107,6 +102,7 @@ elf_machine_runtime_setup (struct link_map *l, struct r_scope_elem *scope[],
end in this function. */
if (__glibc_unlikely (profile))
{
+ const struct cpu_features* cpu_features = __get_cpu_features ();
if (X86_ISA_CPU_FEATURE_USABLE_P (cpu_features, AVX512F))
*(ElfW(Addr) *) (got + 2) = (ElfW(Addr)) &_dl_runtime_profile_avx512;
else if (X86_ISA_CPU_FEATURE_USABLE_P (cpu_features, AVX))
@@ -126,15 +122,8 @@ elf_machine_runtime_setup (struct link_map *l, struct r_scope_elem *scope[],
/* This function will get called to fix up the GOT entry
indicated by the offset on the stack, and then jump to
the resolved address. */
- if (MINIMUM_X86_ISA_LEVEL >= AVX_X86_ISA_LEVEL
- || GLRO(dl_x86_cpu_features).xsave_state_size != 0)
- *(ElfW(Addr) *) (got + 2)
- = (CPU_FEATURE_USABLE_P (cpu_features, XSAVEC)
- ? (ElfW(Addr)) &_dl_runtime_resolve_xsavec
- : (ElfW(Addr)) &_dl_runtime_resolve_xsave);
- else
- *(ElfW(Addr) *) (got + 2)
- = (ElfW(Addr)) &_dl_runtime_resolve_fxsave;
+ *(ElfW(Addr) *) (got + 2)
+ = (ElfW(Addr)) GLRO(dl_x86_64_runtime_resolve);
}
}
@@ -383,7 +372,7 @@ and creates an unsatisfiable circular dependency.\n",
{
td->arg = _dl_make_tlsdesc_dynamic
(sym_map, sym->st_value + reloc->r_addend);
- td->entry = _dl_tlsdesc_dynamic;
+ td->entry = GLRO(dl_x86_tlsdesc_dynamic);
}
else
# endif
diff --git a/sysdeps/x86_64/dl-procinfo.c b/sysdeps/x86_64/dl-procinfo.c
index 4d1d790fbb..06637a8154 100644
--- a/sysdeps/x86_64/dl-procinfo.c
+++ b/sysdeps/x86_64/dl-procinfo.c
@@ -41,5 +41,21 @@
#include <sysdeps/x86/dl-procinfo.c>
+#if !IS_IN (ldconfig)
+# if !defined PROCINFO_DECL && defined SHARED
+ ._dl_x86_64_runtime_resolve
+# else
+PROCINFO_CLASS void * _dl_x86_64_runtime_resolve
+# endif
+# ifndef PROCINFO_DECL
+= NULL
+# endif
+# if !defined SHARED || defined PROCINFO_DECL
+;
+# else
+,
+# endif
+#endif
+
#undef PROCINFO_DECL
#undef PROCINFO_CLASS
diff --git a/sysdeps/x86_64/dl-tlsdesc-dynamic.h b/sysdeps/x86_64/dl-tlsdesc-dynamic.h
new file mode 100644
index 0000000000..0c2e8d5320
--- /dev/null
+++ b/sysdeps/x86_64/dl-tlsdesc-dynamic.h
@@ -0,0 +1,166 @@
+/* Thread-local storage handling in the ELF dynamic linker. x86_64 version.
+ Copyright (C) 2004-2024 Free Software Foundation, Inc.
+ This file is part of the GNU C Library.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library; if not, see
+ <https://www.gnu.org/licenses/>. */
+
+#ifndef SECTION
+# define SECTION(p) p
+#endif
+
+#undef REGISTER_SAVE_AREA
+#undef LOCAL_STORAGE_AREA
+#undef BASE
+
+#include "dl-trampoline-state.h"
+
+ .section SECTION(.text),"ax",@progbits
+
+ .hidden _dl_tlsdesc_dynamic
+ .global _dl_tlsdesc_dynamic
+ .type _dl_tlsdesc_dynamic,@function
+
+ /* %rax points to the TLS descriptor, such that 0(%rax) points to
+ _dl_tlsdesc_dynamic itself, and 8(%rax) points to a struct
+ tlsdesc_dynamic_arg object. It must return in %rax the offset
+ between the thread pointer and the object denoted by the
+ argument, without clobbering any registers.
+
+ The assembly code that follows is a rendition of the following
+ C code, hand-optimized a little bit.
+
+ptrdiff_t
+_dl_tlsdesc_dynamic (register struct tlsdesc *tdp asm ("%rax"))
+{
+ struct tlsdesc_dynamic_arg *td = tdp->arg;
+ dtv_t *dtv = *(dtv_t **)((char *)__thread_pointer + DTV_OFFSET);
+ if (__builtin_expect (td->gen_count <= dtv[0].counter
+ && (dtv[td->tlsinfo.ti_module].pointer.val
+ != TLS_DTV_UNALLOCATED),
+ 1))
+ return dtv[td->tlsinfo.ti_module].pointer.val + td->tlsinfo.ti_offset
+ - __thread_pointer;
+
+ return __tls_get_addr_internal (&td->tlsinfo) - __thread_pointer;
+}
+*/
+ cfi_startproc
+ .align 16
+_dl_tlsdesc_dynamic:
+ _CET_ENDBR
+ /* Preserve call-clobbered registers that we modify.
+ We need two scratch regs anyway. */
+ movq %rsi, -16(%rsp)
+ mov %fs:DTV_OFFSET, %RSI_LP
+ movq %rdi, -8(%rsp)
+ movq TLSDESC_ARG(%rax), %rdi
+ movq (%rsi), %rax
+ cmpq %rax, TLSDESC_GEN_COUNT(%rdi)
+ ja 2f
+ movq TLSDESC_MODID(%rdi), %rax
+ salq $4, %rax
+ movq (%rax,%rsi), %rax
+ cmpq $-1, %rax
+ je 2f
+ addq TLSDESC_MODOFF(%rdi), %rax
+1:
+ movq -16(%rsp), %rsi
+ sub %fs:0, %RAX_LP
+ movq -8(%rsp), %rdi
+ ret
+2:
+#if DL_RUNTIME_RESOLVE_REALIGN_STACK
+ movq %rbx, -24(%rsp)
+ mov %RSP_LP, %RBX_LP
+ cfi_def_cfa_register(%rbx)
+ and $-STATE_SAVE_ALIGNMENT, %RSP_LP
+#endif
+#ifdef REGISTER_SAVE_AREA
+# if DL_RUNTIME_RESOLVE_REALIGN_STACK
+ /* STATE_SAVE_OFFSET has space for 8 integer registers. But we
+ need space for RCX, RDX, RSI, RDI, R8, R9, R10 and R11, plus
+ RBX above. */
+ sub $(REGISTER_SAVE_AREA + STATE_SAVE_ALIGNMENT), %RSP_LP
+# else
+ sub $REGISTER_SAVE_AREA, %RSP_LP
+ cfi_adjust_cfa_offset(REGISTER_SAVE_AREA)
+# endif
+#else
+ /* Allocate stack space of the required size to save the state. */
+ sub _rtld_local_ro+RTLD_GLOBAL_RO_DL_X86_CPU_FEATURES_OFFSET+XSAVE_STATE_SIZE_OFFSET(%rip), %RSP_LP
+#endif
+ /* Besides rdi and rsi, saved above, save rcx, rdx, r8, r9,
+ r10 and r11. */
+ movq %rcx, REGISTER_SAVE_RCX(%rsp)
+ movq %rdx, REGISTER_SAVE_RDX(%rsp)
+ movq %r8, REGISTER_SAVE_R8(%rsp)
+ movq %r9, REGISTER_SAVE_R9(%rsp)
+ movq %r10, REGISTER_SAVE_R10(%rsp)
+ movq %r11, REGISTER_SAVE_R11(%rsp)
+#ifdef USE_FXSAVE
+ fxsave STATE_SAVE_OFFSET(%rsp)
+#else
+ movl $TLSDESC_CALL_STATE_SAVE_MASK, %eax
+ xorl %edx, %edx
+ /* Clear the XSAVE Header. */
+# ifdef USE_XSAVE
+ movq %rdx, (STATE_SAVE_OFFSET + 512)(%rsp)
+ movq %rdx, (STATE_SAVE_OFFSET + 512 + 8)(%rsp)
+# endif
+ movq %rdx, (STATE_SAVE_OFFSET + 512 + 8 * 2)(%rsp)
+ movq %rdx, (STATE_SAVE_OFFSET + 512 + 8 * 3)(%rsp)
+ movq %rdx, (STATE_SAVE_OFFSET + 512 + 8 * 4)(%rsp)
+ movq %rdx, (STATE_SAVE_OFFSET + 512 + 8 * 5)(%rsp)
+ movq %rdx, (STATE_SAVE_OFFSET + 512 + 8 * 6)(%rsp)
+ movq %rdx, (STATE_SAVE_OFFSET + 512 + 8 * 7)(%rsp)
+# ifdef USE_XSAVE
+ xsave STATE_SAVE_OFFSET(%rsp)
+# else
+ xsavec STATE_SAVE_OFFSET(%rsp)
+# endif
+#endif
+ /* %rdi already points to the tlsinfo data structure. */
+ call HIDDEN_JUMPTARGET (__tls_get_addr)
+ # Get register content back.
+#ifdef USE_FXSAVE
+ fxrstor STATE_SAVE_OFFSET(%rsp)
+#else
+ /* Save and retore __tls_get_addr return value stored in RAX. */
+ mov %RAX_LP, %RCX_LP
+ movl $TLSDESC_CALL_STATE_SAVE_MASK, %eax
+ xorl %edx, %edx
+ xrstor STATE_SAVE_OFFSET(%rsp)
+ mov %RCX_LP, %RAX_LP
+#endif
+ movq REGISTER_SAVE_R11(%rsp), %r11
+ movq REGISTER_SAVE_R10(%rsp), %r10
+ movq REGISTER_SAVE_R9(%rsp), %r9
+ movq REGISTER_SAVE_R8(%rsp), %r8
+ movq REGISTER_SAVE_RDX(%rsp), %rdx
+ movq REGISTER_SAVE_RCX(%rsp), %rcx
+#if DL_RUNTIME_RESOLVE_REALIGN_STACK
+ mov %RBX_LP, %RSP_LP
+ cfi_def_cfa_register(%rsp)
+ movq -24(%rsp), %rbx
+ cfi_restore(%rbx)
+#else
+ add $REGISTER_SAVE_AREA, %RSP_LP
+ cfi_adjust_cfa_offset(-REGISTER_SAVE_AREA)
+#endif
+ jmp 1b
+ cfi_endproc
+ .size _dl_tlsdesc_dynamic, .-_dl_tlsdesc_dynamic
+
+#undef STATE_SAVE_ALIGNMENT
diff --git a/sysdeps/x86_64/dl-tlsdesc.S b/sysdeps/x86_64/dl-tlsdesc.S
index f748af2ece..ea69f5223a 100644
--- a/sysdeps/x86_64/dl-tlsdesc.S
+++ b/sysdeps/x86_64/dl-tlsdesc.S
@@ -18,7 +18,19 @@
#include <sysdep.h>
#include <tls.h>
+#include <cpu-features-offsets.h>
+#include <features-offsets.h>
#include "tlsdesc.h"
+#include "dl-trampoline-save.h"
+
+/* Area on stack to save and restore registers used for parameter
+ passing when calling _dl_tlsdesc_dynamic. */
+#define REGISTER_SAVE_RCX 0
+#define REGISTER_SAVE_RDX (REGISTER_SAVE_RCX + 8)
+#define REGISTER_SAVE_R8 (REGISTER_SAVE_RDX + 8)
+#define REGISTER_SAVE_R9 (REGISTER_SAVE_R8 + 8)
+#define REGISTER_SAVE_R10 (REGISTER_SAVE_R9 + 8)
+#define REGISTER_SAVE_R11 (REGISTER_SAVE_R10 + 8)
.text
@@ -67,80 +79,24 @@ _dl_tlsdesc_undefweak:
.size _dl_tlsdesc_undefweak, .-_dl_tlsdesc_undefweak
#ifdef SHARED
- .hidden _dl_tlsdesc_dynamic
- .global _dl_tlsdesc_dynamic
- .type _dl_tlsdesc_dynamic,@function
-
- /* %rax points to the TLS descriptor, such that 0(%rax) points to
- _dl_tlsdesc_dynamic itself, and 8(%rax) points to a struct
- tlsdesc_dynamic_arg object. It must return in %rax the offset
- between the thread pointer and the object denoted by the
- argument, without clobbering any registers.
-
- The assembly code that follows is a rendition of the following
- C code, hand-optimized a little bit.
-
-ptrdiff_t
-_dl_tlsdesc_dynamic (register struct tlsdesc *tdp asm ("%rax"))
-{
- struct tlsdesc_dynamic_arg *td = tdp->arg;
- dtv_t *dtv = *(dtv_t **)((char *)__thread_pointer + DTV_OFFSET);
- if (__builtin_expect (td->gen_count <= dtv[0].counter
- && (dtv[td->tlsinfo.ti_module].pointer.val
- != TLS_DTV_UNALLOCATED),
- 1))
- return dtv[td->tlsinfo.ti_module].pointer.val + td->tlsinfo.ti_offset
- - __thread_pointer;
-
- return __tls_get_addr_internal (&td->tlsinfo) - __thread_pointer;
-}
-*/
- cfi_startproc
- .align 16
-_dl_tlsdesc_dynamic:
- _CET_ENDBR
- /* Preserve call-clobbered registers that we modify.
- We need two scratch regs anyway. */
- movq %rsi, -16(%rsp)
- mov %fs:DTV_OFFSET, %RSI_LP
- movq %rdi, -8(%rsp)
- movq TLSDESC_ARG(%rax), %rdi
- movq (%rsi), %rax
- cmpq %rax, TLSDESC_GEN_COUNT(%rdi)
- ja .Lslow
- movq TLSDESC_MODID(%rdi), %rax
- salq $4, %rax
- movq (%rax,%rsi), %rax
- cmpq $-1, %rax
- je .Lslow
- addq TLSDESC_MODOFF(%rdi), %rax
-.Lret:
- movq -16(%rsp), %rsi
- sub %fs:0, %RAX_LP
- movq -8(%rsp), %rdi
- ret
-.Lslow:
- /* Besides rdi and rsi, saved above, save rdx, rcx, r8, r9,
- r10 and r11. Also, align the stack, that's off by 8 bytes. */
- subq $72, %rsp
- cfi_adjust_cfa_offset (72)
- movq %rdx, 8(%rsp)
- movq %rcx, 16(%rsp)
- movq %r8, 24(%rsp)
- movq %r9, 32(%rsp)
- movq %r10, 40(%rsp)
- movq %r11, 48(%rsp)
- /* %rdi already points to the tlsinfo data structure. */
- call HIDDEN_JUMPTARGET (__tls_get_addr)
- movq 8(%rsp), %rdx
- movq 16(%rsp), %rcx
- movq 24(%rsp), %r8
- movq 32(%rsp), %r9
- movq 40(%rsp), %r10
- movq 48(%rsp), %r11
- addq $72, %rsp
- cfi_adjust_cfa_offset (-72)
- jmp .Lret
- cfi_endproc
- .size _dl_tlsdesc_dynamic, .-_dl_tlsdesc_dynamic
+# define USE_FXSAVE
+# define STATE_SAVE_ALIGNMENT 16
+# define _dl_tlsdesc_dynamic _dl_tlsdesc_dynamic_fxsave
+# include "dl-tlsdesc-dynamic.h"
+# undef _dl_tlsdesc_dynamic
+# undef USE_FXSAVE
+
+# define USE_XSAVE
+# define STATE_SAVE_ALIGNMENT 64
+# define _dl_tlsdesc_dynamic _dl_tlsdesc_dynamic_xsave
+# include "dl-tlsdesc-dynamic.h"
+# undef _dl_tlsdesc_dynamic
+# undef USE_XSAVE
+
+# define USE_XSAVEC
+# define STATE_SAVE_ALIGNMENT 64
+# define _dl_tlsdesc_dynamic _dl_tlsdesc_dynamic_xsavec
+# include "dl-tlsdesc-dynamic.h"
+# undef _dl_tlsdesc_dynamic
+# undef USE_XSAVEC
#endif /* SHARED */
diff --git a/sysdeps/x86_64/dl-trampoline-save.h b/sysdeps/x86_64/dl-trampoline-save.h
new file mode 100644
index 0000000000..84eac4a8ac
--- /dev/null
+++ b/sysdeps/x86_64/dl-trampoline-save.h
@@ -0,0 +1,34 @@
+/* x86-64 PLT trampoline register save macros.
+ Copyright (C) 2024 Free Software Foundation, Inc.
+ This file is part of the GNU C Library.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library; if not, see
+ <https://www.gnu.org/licenses/>. */
+
+#ifndef DL_STACK_ALIGNMENT
+/* Due to GCC bug:
+
+ https://gcc.gnu.org/bugzilla/show_bug.cgi?id=58066
+
+ __tls_get_addr may be called with 8-byte stack alignment. Although
+ this bug has been fixed in GCC 4.9.4, 5.3 and 6, we can't assume
+ that stack will be always aligned at 16 bytes. */
+# define DL_STACK_ALIGNMENT 8
+#endif
+
+/* True if _dl_runtime_resolve should align stack for STATE_SAVE or align
+ stack to 16 bytes before calling _dl_fixup. */
+#define DL_RUNTIME_RESOLVE_REALIGN_STACK \
+ (STATE_SAVE_ALIGNMENT > DL_STACK_ALIGNMENT \
+ || 16 > DL_STACK_ALIGNMENT)
diff --git a/sysdeps/x86_64/dl-trampoline-state.h b/sysdeps/x86_64/dl-trampoline-state.h
new file mode 100644
index 0000000000..575f120797
--- /dev/null
+++ b/sysdeps/x86_64/dl-trampoline-state.h
@@ -0,0 +1,51 @@
+/* x86-64 PLT dl-trampoline state macros.
+ Copyright (C) 2024 Free Software Foundation, Inc.
+ This file is part of the GNU C Library.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library; if not, see
+ <https://www.gnu.org/licenses/>. */
+
+#if (STATE_SAVE_ALIGNMENT % 16) != 0
+# error STATE_SAVE_ALIGNMENT must be multiple of 16
+#endif
+
+#if (STATE_SAVE_OFFSET % STATE_SAVE_ALIGNMENT) != 0
+# error STATE_SAVE_OFFSET must be multiple of STATE_SAVE_ALIGNMENT
+#endif
+
+#if DL_RUNTIME_RESOLVE_REALIGN_STACK
+/* Local stack area before jumping to function address: RBX. */
+# define LOCAL_STORAGE_AREA 8
+# define BASE rbx
+# ifdef USE_FXSAVE
+/* Use fxsave to save XMM registers. */
+# define REGISTER_SAVE_AREA (512 + STATE_SAVE_OFFSET)
+# if (REGISTER_SAVE_AREA % 16) != 0
+# error REGISTER_SAVE_AREA must be multiple of 16
+# endif
+# endif
+#else
+# ifndef USE_FXSAVE
+# error USE_FXSAVE must be defined
+# endif
+/* Use fxsave to save XMM registers. */
+# define REGISTER_SAVE_AREA (512 + STATE_SAVE_OFFSET + 8)
+/* Local stack area before jumping to function address: All saved
+ registers. */
+# define LOCAL_STORAGE_AREA REGISTER_SAVE_AREA
+# define BASE rsp
+# if (REGISTER_SAVE_AREA % 16) != 8
+# error REGISTER_SAVE_AREA must be odd multiple of 8
+# endif
+#endif
diff --git a/sysdeps/x86_64/dl-trampoline.S b/sysdeps/x86_64/dl-trampoline.S
index b2e7e0f69b..87c5137837 100644
--- a/sysdeps/x86_64/dl-trampoline.S
+++ b/sysdeps/x86_64/dl-trampoline.S
@@ -22,25 +22,7 @@
#include <features-offsets.h>
#include <link-defines.h>
#include <isa-level.h>
-
-#ifndef DL_STACK_ALIGNMENT
-/* Due to GCC bug:
-
- https://gcc.gnu.org/bugzilla/show_bug.cgi?id=58066
-
- __tls_get_addr may be called with 8-byte stack alignment. Although
- this bug has been fixed in GCC 4.9.4, 5.3 and 6, we can't assume
- that stack will be always aligned at 16 bytes. We use unaligned
- 16-byte move to load and store SSE registers, which has no penalty
- on modern processors if stack is 16-byte aligned. */
-# define DL_STACK_ALIGNMENT 8
-#endif
-
-/* True if _dl_runtime_resolve should align stack for STATE_SAVE or align
- stack to 16 bytes before calling _dl_fixup. */
-#define DL_RUNTIME_RESOLVE_REALIGN_STACK \
- (STATE_SAVE_ALIGNMENT > DL_STACK_ALIGNMENT \
- || 16 > DL_STACK_ALIGNMENT)
+#include "dl-trampoline-save.h"
/* Area on stack to save and restore registers used for parameter
passing when calling _dl_fixup. */
diff --git a/sysdeps/x86_64/dl-trampoline.h b/sysdeps/x86_64/dl-trampoline.h
index f55c6ea040..d9ccfb40d4 100644
--- a/sysdeps/x86_64/dl-trampoline.h
+++ b/sysdeps/x86_64/dl-trampoline.h
@@ -27,39 +27,7 @@
# undef LOCAL_STORAGE_AREA
# undef BASE
-# if (STATE_SAVE_ALIGNMENT % 16) != 0
-# error STATE_SAVE_ALIGNMENT must be multiple of 16
-# endif
-
-# if (STATE_SAVE_OFFSET % STATE_SAVE_ALIGNMENT) != 0
-# error STATE_SAVE_OFFSET must be multiple of STATE_SAVE_ALIGNMENT
-# endif
-
-# if DL_RUNTIME_RESOLVE_REALIGN_STACK
-/* Local stack area before jumping to function address: RBX. */
-# define LOCAL_STORAGE_AREA 8
-# define BASE rbx
-# ifdef USE_FXSAVE
-/* Use fxsave to save XMM registers. */
-# define REGISTER_SAVE_AREA (512 + STATE_SAVE_OFFSET)
-# if (REGISTER_SAVE_AREA % 16) != 0
-# error REGISTER_SAVE_AREA must be multiple of 16
-# endif
-# endif
-# else
-# ifndef USE_FXSAVE
-# error USE_FXSAVE must be defined
-# endif
-/* Use fxsave to save XMM registers. */
-# define REGISTER_SAVE_AREA (512 + STATE_SAVE_OFFSET + 8)
-/* Local stack area before jumping to function address: All saved
- registers. */
-# define LOCAL_STORAGE_AREA REGISTER_SAVE_AREA
-# define BASE rsp
-# if (REGISTER_SAVE_AREA % 16) != 8
-# error REGISTER_SAVE_AREA must be odd multiple of 8
-# endif
-# endif
+# include "dl-trampoline-state.h"
.globl _dl_runtime_resolve
.hidden _dl_runtime_resolve
diff --git a/sysdeps/x86_64/features-offsets.sym b/sysdeps/x86_64/features-offsets.sym
deleted file mode 100644
index 9e4be3393a..0000000000
--- a/sysdeps/x86_64/features-offsets.sym
+++ /dev/null
@@ -1,6 +0,0 @@
-#define SHARED 1
-
-#include <ldsodefs.h>
-
-RTLD_GLOBAL_RO_DL_X86_CPU_FEATURES_OFFSET offsetof (struct rtld_global_ro, _dl_x86_cpu_features)
-RTLD_GLOBAL_DL_X86_FEATURE_1_OFFSET offsetof (struct rtld_global, _dl_x86_feature_1)