aboutsummaryrefslogtreecommitdiff
path: root/sysdeps
diff options
context:
space:
mode:
Diffstat (limited to 'sysdeps')
-rw-r--r--sysdeps/aarch64/dl-machine.h4
-rw-r--r--sysdeps/gnu/glob64.c3
-rw-r--r--sysdeps/gnu/globfree64.c10
-rw-r--r--sysdeps/ieee754/dbl-64/e_pow.c8
-rw-r--r--sysdeps/unix/sysv/linux/Makefile2
-rw-r--r--sysdeps/unix/sysv/linux/alpha/glob.c4
-rw-r--r--sysdeps/unix/sysv/linux/alpha/globfree.c37
-rw-r--r--sysdeps/unix/sysv/linux/i386/glob64.c36
-rw-r--r--sysdeps/unix/sysv/linux/mips/mips64/n64/globfree64.c1
-rw-r--r--sysdeps/unix/sysv/linux/oldglob.c42
-rw-r--r--sysdeps/unix/sysv/linux/sh/sh3/ucontext_i.sym32
-rw-r--r--sysdeps/unix/sysv/linux/sh/sh4/ucontext_i.sym32
-rw-r--r--sysdeps/unix/sysv/linux/sh/sys/ucontext.h70
-rw-r--r--sysdeps/unix/sysv/linux/wordsize-64/globfree64.c2
-rw-r--r--sysdeps/unix/sysv/linux/x86_64/x32/globfree.c1
-rw-r--r--sysdeps/wordsize-64/glob.c2
-rw-r--r--sysdeps/wordsize-64/globfree.c5
-rw-r--r--sysdeps/wordsize-64/globfree64.c1
-rw-r--r--sysdeps/x86/cpu-features-offsets.sym1
-rw-r--r--sysdeps/x86/cpu-features.c80
-rw-r--r--sysdeps/x86/cpu-features.h23
-rw-r--r--sysdeps/x86_64/Makefile24
-rw-r--r--sysdeps/x86_64/dl-machine.h38
-rw-r--r--sysdeps/x86_64/dl-tls.c53
-rw-r--r--sysdeps/x86_64/dl-tls.h5
-rw-r--r--sysdeps/x86_64/dl-trampoline.S87
-rw-r--r--sysdeps/x86_64/dl-trampoline.h265
-rw-r--r--sysdeps/x86_64/rtld-offsets.sym6
-rw-r--r--sysdeps/x86_64/tls_get_addr.S61
-rw-r--r--sysdeps/x86_64/tlsdesc.sym3
-rw-r--r--sysdeps/x86_64/tst-avx-aux.c47
-rw-r--r--sysdeps/x86_64/tst-avx.c49
-rw-r--r--sysdeps/x86_64/tst-avx512-aux.c48
-rw-r--r--sysdeps/x86_64/tst-avx512.c57
-rw-r--r--sysdeps/x86_64/tst-avx512mod.c48
-rw-r--r--sysdeps/x86_64/tst-avxmod.c48
-rw-r--r--sysdeps/x86_64/tst-sse.c46
-rw-r--r--sysdeps/x86_64/tst-ssemod.c46
38 files changed, 910 insertions, 417 deletions
diff --git a/sysdeps/aarch64/dl-machine.h b/sysdeps/aarch64/dl-machine.h
index 282805e396..e86d8b5b63 100644
--- a/sysdeps/aarch64/dl-machine.h
+++ b/sysdeps/aarch64/dl-machine.h
@@ -172,8 +172,8 @@ _dl_start_user: \n\
cmp x0, #0 \n\
bne 1b \n\
// Update _dl_argv \n\
- adrp x3, _dl_argv \n\
- str x2, [x3, #:lo12:_dl_argv] \n\
+ adrp x3, __GI__dl_argv \n\
+ str x2, [x3, #:lo12:__GI__dl_argv] \n\
.L_done_stack_adjust: \n\
// compute envp \n\
add x3, x2, x1, lsl #3 \n\
diff --git a/sysdeps/gnu/glob64.c b/sysdeps/gnu/glob64.c
index d1e4e6f0d5..52e97e2f6a 100644
--- a/sysdeps/gnu/glob64.c
+++ b/sysdeps/gnu/glob64.c
@@ -15,11 +15,8 @@
#undef __stat
#define __stat(file, buf) __xstat64 (_STAT_VER, file, buf)
-#define NO_GLOB_PATTERN_P 1
-
#define COMPILE_GLOB64 1
#include <posix/glob.c>
libc_hidden_def (glob64)
-libc_hidden_def (globfree64)
diff --git a/sysdeps/gnu/globfree64.c b/sysdeps/gnu/globfree64.c
new file mode 100644
index 0000000000..f092d0bf8b
--- /dev/null
+++ b/sysdeps/gnu/globfree64.c
@@ -0,0 +1,10 @@
+#include <dirent.h>
+#include <glob.h>
+#include <sys/stat.h>
+
+#define glob_t glob64_t
+#define globfree(pglob) globfree64 (pglob)
+
+#include <posix/globfree.c>
+
+libc_hidden_def (globfree64)
diff --git a/sysdeps/ieee754/dbl-64/e_pow.c b/sysdeps/ieee754/dbl-64/e_pow.c
index 663fa392c2..bd758b5979 100644
--- a/sysdeps/ieee754/dbl-64/e_pow.c
+++ b/sysdeps/ieee754/dbl-64/e_pow.c
@@ -466,15 +466,15 @@ checkint (double x)
return (n & 1) ? -1 : 1; /* odd or even */
if (k > 20)
{
- if (n << (k - 20))
+ if (n << (k - 20) != 0)
return 0; /* if not integer */
- return (n << (k - 21)) ? -1 : 1;
+ return (n << (k - 21) != 0) ? -1 : 1;
}
if (n)
return 0; /*if not integer */
if (k == 20)
return (m & 1) ? -1 : 1;
- if (m << (k + 12))
+ if (m << (k + 12) != 0)
return 0;
- return (m << (k + 11)) ? -1 : 1;
+ return (m << (k + 11) != 0) ? -1 : 1;
}
diff --git a/sysdeps/unix/sysv/linux/Makefile b/sysdeps/unix/sysv/linux/Makefile
index 35e1ed48d2..32beaa67d0 100644
--- a/sysdeps/unix/sysv/linux/Makefile
+++ b/sysdeps/unix/sysv/linux/Makefile
@@ -140,7 +140,7 @@ endif
ifeq ($(subdir),posix)
sysdep_headers += bits/initspin.h
-sysdep_routines += sched_getcpu
+sysdep_routines += sched_getcpu oldglob
tests += tst-affinity tst-affinity-pid
diff --git a/sysdeps/unix/sysv/linux/alpha/glob.c b/sysdeps/unix/sysv/linux/alpha/glob.c
index c5dfb85468..19eb9b1c07 100644
--- a/sysdeps/unix/sysv/linux/alpha/glob.c
+++ b/sysdeps/unix/sysv/linux/alpha/glob.c
@@ -42,10 +42,6 @@ extern void __new_globfree (glob_t *__pglob);
#undef globfree64
versioned_symbol (libc, __new_glob, glob, GLIBC_2_1);
-versioned_symbol (libc, __new_globfree, globfree, GLIBC_2_1);
libc_hidden_ver (__new_glob, glob)
-libc_hidden_ver (__new_globfree, globfree)
weak_alias (__new_glob, glob64)
-weak_alias (__new_globfree, globfree64)
-libc_hidden_ver (__new_globfree, globfree64)
diff --git a/sysdeps/unix/sysv/linux/alpha/globfree.c b/sysdeps/unix/sysv/linux/alpha/globfree.c
new file mode 100644
index 0000000000..98cf1c200b
--- /dev/null
+++ b/sysdeps/unix/sysv/linux/alpha/globfree.c
@@ -0,0 +1,37 @@
+/* Compat globfree. Linux/alpha version.
+ Copyright (C) 2017 Free Software Foundation, Inc.
+ This file is part of the GNU C Library.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library. If not, see
+ <http://www.gnu.org/licenses/>. */
+
+#define globfree64 __no_globfree64_decl
+#include <sys/types.h>
+#include <glob.h>
+#include <shlib-compat.h>
+
+#define globfree(pglob) \
+ __new_globfree (pglob)
+
+extern void __new_globfree (glob_t *__pglob);
+
+#include <posix/globfree.c>
+
+#undef globfree64
+
+versioned_symbol (libc, __new_globfree, globfree, GLIBC_2_1);
+libc_hidden_ver (__new_globfree, globfree)
+
+weak_alias (__new_globfree, globfree64)
+libc_hidden_ver (__new_globfree, globfree64)
diff --git a/sysdeps/unix/sysv/linux/i386/glob64.c b/sysdeps/unix/sysv/linux/i386/glob64.c
index 802c957d6c..c2cc85741f 100644
--- a/sysdeps/unix/sysv/linux/i386/glob64.c
+++ b/sysdeps/unix/sysv/linux/i386/glob64.c
@@ -19,6 +19,7 @@
#include <dirent.h>
#include <glob.h>
#include <sys/stat.h>
+#include <shlib-compat.h>
#define dirent dirent64
#define __readdir(dirp) __readdir64 (dirp)
@@ -33,44 +34,9 @@
#undef __stat
#define __stat(file, buf) __xstat64 (_STAT_VER, file, buf)
-#define NO_GLOB_PATTERN_P 1
-
#define COMPILE_GLOB64 1
#include <posix/glob.c>
-#include "shlib-compat.h"
-
-libc_hidden_def (globfree64)
-
versioned_symbol (libc, __glob64, glob64, GLIBC_2_2);
libc_hidden_ver (__glob64, glob64)
-
-#if SHLIB_COMPAT(libc, GLIBC_2_1, GLIBC_2_2)
-
-#include <sysdeps/unix/sysv/linux/i386/olddirent.h>
-
-int __old_glob64 (const char *__pattern, int __flags,
- int (*__errfunc) (const char *, int),
- glob64_t *__pglob);
-
-#undef dirent
-#define dirent __old_dirent64
-#undef GL_READDIR
-# define GL_READDIR(pglob, stream) \
- ((struct __old_dirent64 *) (pglob)->gl_readdir (stream))
-#undef __readdir
-#define __readdir(dirp) __old_readdir64 (dirp)
-#undef glob
-#define glob(pattern, flags, errfunc, pglob) \
- __old_glob64 (pattern, flags, errfunc, pglob)
-#define convert_dirent __old_convert_dirent
-#define glob_in_dir __old_glob_in_dir
-#define GLOB_ATTRIBUTE attribute_compat_text_section
-
-#define GLOB_ONLY_P 1
-
-#include <posix/glob.c>
-
-compat_symbol (libc, __old_glob64, glob64, GLIBC_2_1);
-#endif
diff --git a/sysdeps/unix/sysv/linux/mips/mips64/n64/globfree64.c b/sysdeps/unix/sysv/linux/mips/mips64/n64/globfree64.c
new file mode 100644
index 0000000000..abc35fdd2b
--- /dev/null
+++ b/sysdeps/unix/sysv/linux/mips/mips64/n64/globfree64.c
@@ -0,0 +1 @@
+/* glob64 is in globfree64.c */
diff --git a/sysdeps/unix/sysv/linux/oldglob.c b/sysdeps/unix/sysv/linux/oldglob.c
new file mode 100644
index 0000000000..8233e57ce9
--- /dev/null
+++ b/sysdeps/unix/sysv/linux/oldglob.c
@@ -0,0 +1,42 @@
+#include <shlib-compat.h>
+
+#if SHLIB_COMPAT(libc, GLIBC_2_1, GLIBC_2_2)
+
+#include <dirent.h>
+#include <glob.h>
+#include <sys/stat.h>
+
+#include <sysdeps/unix/sysv/linux/i386/olddirent.h>
+
+int __old_glob64 (const char *__pattern, int __flags,
+ int (*__errfunc) (const char *, int),
+ glob64_t *__pglob);
+libc_hidden_proto (__old_glob64);
+
+#define dirent __old_dirent64
+#define GL_READDIR(pglob, stream) \
+ ((struct __old_dirent64 *) (pglob)->gl_readdir (stream))
+#undef __readdir
+#define __readdir(dirp) __old_readdir64 (dirp)
+
+#define glob_t glob64_t
+#define glob(pattern, flags, errfunc, pglob) \
+ __old_glob64 (pattern, flags, errfunc, pglob)
+#define globfree(pglob) globfree64(pglob)
+
+#define convert_dirent __old_convert_dirent
+#define glob_in_dir __old_glob_in_dir
+
+#undef stat
+#define stat stat64
+#undef __stat
+#define __stat(file, buf) __xstat64 (_STAT_VER, file, buf)
+
+#define GLOB_ATTRIBUTE attribute_compat_text_section
+
+#include <posix/glob.c>
+
+libc_hidden_def (__old_glob64);
+
+compat_symbol (libc, __old_glob64, glob64, GLIBC_2_1);
+#endif
diff --git a/sysdeps/unix/sysv/linux/sh/sh3/ucontext_i.sym b/sysdeps/unix/sysv/linux/sh/sh3/ucontext_i.sym
index 17397c5511..25f914a93b 100644
--- a/sysdeps/unix/sysv/linux/sh/sh3/ucontext_i.sym
+++ b/sysdeps/unix/sysv/linux/sh/sh3/ucontext_i.sym
@@ -13,22 +13,22 @@ SIG_SETMASK
oLINK ucontext (uc_link)
oSS_SP ucontext (uc_stack.ss_sp)
oSS_SIZE ucontext (uc_stack.ss_size)
-oR0 mcontext (gregs[R0])
-oR1 mcontext (gregs[R1])
-oR2 mcontext (gregs[R2])
-oR3 mcontext (gregs[R3])
-oR4 mcontext (gregs[R4])
-oR5 mcontext (gregs[R5])
-oR6 mcontext (gregs[R6])
-oR7 mcontext (gregs[R7])
-oR8 mcontext (gregs[R8])
-oR9 mcontext (gregs[R9])
-oR10 mcontext (gregs[R10])
-oR11 mcontext (gregs[R11])
-oR12 mcontext (gregs[R12])
-oR13 mcontext (gregs[R13])
-oR14 mcontext (gregs[R14])
-oR15 mcontext (gregs[R15])
+oR0 mcontext (gregs[REG_R0])
+oR1 mcontext (gregs[REG_R1])
+oR2 mcontext (gregs[REG_R2])
+oR3 mcontext (gregs[REG_R3])
+oR4 mcontext (gregs[REG_R4])
+oR5 mcontext (gregs[REG_R5])
+oR6 mcontext (gregs[REG_R6])
+oR7 mcontext (gregs[REG_R7])
+oR8 mcontext (gregs[REG_R8])
+oR9 mcontext (gregs[REG_R9])
+oR10 mcontext (gregs[REG_R10])
+oR11 mcontext (gregs[REG_R11])
+oR12 mcontext (gregs[REG_R12])
+oR13 mcontext (gregs[REG_R13])
+oR14 mcontext (gregs[REG_R14])
+oR15 mcontext (gregs[REG_R15])
oPC mcontext (pc)
oPR mcontext (pr)
oSR mcontext (sr)
diff --git a/sysdeps/unix/sysv/linux/sh/sh4/ucontext_i.sym b/sysdeps/unix/sysv/linux/sh/sh4/ucontext_i.sym
index 65633fbcf4..130f60cd96 100644
--- a/sysdeps/unix/sysv/linux/sh/sh4/ucontext_i.sym
+++ b/sysdeps/unix/sysv/linux/sh/sh4/ucontext_i.sym
@@ -13,22 +13,22 @@ SIG_SETMASK
oLINK ucontext (uc_link)
oSS_SP ucontext (uc_stack.ss_sp)
oSS_SIZE ucontext (uc_stack.ss_size)
-oR0 mcontext (gregs[R0])
-oR1 mcontext (gregs[R1])
-oR2 mcontext (gregs[R2])
-oR3 mcontext (gregs[R3])
-oR4 mcontext (gregs[R4])
-oR5 mcontext (gregs[R5])
-oR6 mcontext (gregs[R6])
-oR7 mcontext (gregs[R7])
-oR8 mcontext (gregs[R8])
-oR9 mcontext (gregs[R9])
-oR10 mcontext (gregs[R10])
-oR11 mcontext (gregs[R11])
-oR12 mcontext (gregs[R12])
-oR13 mcontext (gregs[R13])
-oR14 mcontext (gregs[R14])
-oR15 mcontext (gregs[R15])
+oR0 mcontext (gregs[REG_R0])
+oR1 mcontext (gregs[REG_R1])
+oR2 mcontext (gregs[REG_R2])
+oR3 mcontext (gregs[REG_R3])
+oR4 mcontext (gregs[REG_R4])
+oR5 mcontext (gregs[REG_R5])
+oR6 mcontext (gregs[REG_R6])
+oR7 mcontext (gregs[REG_R7])
+oR8 mcontext (gregs[REG_R8])
+oR9 mcontext (gregs[REG_R9])
+oR10 mcontext (gregs[REG_R10])
+oR11 mcontext (gregs[REG_R11])
+oR12 mcontext (gregs[REG_R12])
+oR13 mcontext (gregs[REG_R13])
+oR14 mcontext (gregs[REG_R14])
+oR15 mcontext (gregs[REG_R15])
oPC mcontext (pc)
oPR mcontext (pr)
oSR mcontext (sr)
diff --git a/sysdeps/unix/sysv/linux/sh/sys/ucontext.h b/sysdeps/unix/sysv/linux/sh/sys/ucontext.h
index ab9a7e66bf..037fbb73e8 100644
--- a/sysdeps/unix/sysv/linux/sh/sys/ucontext.h
+++ b/sysdeps/unix/sysv/linux/sh/sys/ucontext.h
@@ -31,49 +31,47 @@
typedef int greg_t;
/* Number of general registers. */
-#define NGPREG 16
+#define NGREG 16
/* Container for all general registers. */
-typedef greg_t gregset_t[NGPREG];
+typedef greg_t gregset_t[NGREG];
-#ifdef __USE_GNU
/* Number of each register is the `gregset_t' array. */
enum
{
- R0 = 0,
-#define R0 R0
- R1 = 1,
-#define R1 R1
- R2 = 2,
-#define R2 R2
- R3 = 3,
-#define R3 R3
- R4 = 4,
-#define R4 R4
- R5 = 5,
-#define R5 R5
- R6 = 6,
-#define R6 R6
- R7 = 7,
-#define R7 R7
- R8 = 8,
-#define R8 R8
- R9 = 9,
-#define R9 R9
- R10 = 10,
-#define R10 R10
- R11 = 11,
-#define R11 R11
- R12 = 12,
-#define R12 R12
- R13 = 13,
-#define R13 R13
- R14 = 14,
-#define R14 R14
- R15 = 15,
-#define R15 R15
+ REG_R0 = 0,
+#define REG_R0 REG_R0
+ REG_R1 = 1,
+#define REG_R1 REG_R1
+ REG_R2 = 2,
+#define REG_R2 REG_R2
+ REG_R3 = 3,
+#define REG_R3 REG_R3
+ REG_R4 = 4,
+#define REG_R4 REG_R4
+ REG_R5 = 5,
+#define REG_R5 REG_R5
+ REG_R6 = 6,
+#define REG_R6 REG_R6
+ REG_R7 = 7,
+#define REG_R7 REG_R7
+ REG_R8 = 8,
+#define REG_R8 REG_R8
+ REG_R9 = 9,
+#define REG_R9 REG_R9
+ REG_R10 = 10,
+#define REG_R10 REG_R10
+ REG_R11 = 11,
+#define REG_R11 REG_R11
+ REG_R12 = 12,
+#define REG_R12 REG_R12
+ REG_R13 = 13,
+#define REG_R13 REG_R13
+ REG_R14 = 14,
+#define REG_R14 REG_R14
+ REG_R15 = 15,
+#define REG_R15 REG_R15
};
-#endif
#if (defined(__SH4__) || defined(__SH4A__))
typedef int freg_t;
diff --git a/sysdeps/unix/sysv/linux/wordsize-64/globfree64.c b/sysdeps/unix/sysv/linux/wordsize-64/globfree64.c
new file mode 100644
index 0000000000..af035e1514
--- /dev/null
+++ b/sysdeps/unix/sysv/linux/wordsize-64/globfree64.c
@@ -0,0 +1,2 @@
+/* This file is here so sysdeps/gnu/glob64.c doesn't take precedence. */
+#include <sysdeps/wordsize-64/globfree64.c>
diff --git a/sysdeps/unix/sysv/linux/x86_64/x32/globfree.c b/sysdeps/unix/sysv/linux/x86_64/x32/globfree.c
new file mode 100644
index 0000000000..b76a761c17
--- /dev/null
+++ b/sysdeps/unix/sysv/linux/x86_64/x32/globfree.c
@@ -0,0 +1 @@
+#include <sysdeps/wordsize-64/globfree.c>
diff --git a/sysdeps/wordsize-64/glob.c b/sysdeps/wordsize-64/glob.c
index 082faf1c70..954e8d37e2 100644
--- a/sysdeps/wordsize-64/glob.c
+++ b/sysdeps/wordsize-64/glob.c
@@ -4,5 +4,3 @@
#undef glob64
#undef globfree64
weak_alias (glob, glob64)
-weak_alias (globfree, globfree64)
-libc_hidden_ver (globfree, globfree64)
diff --git a/sysdeps/wordsize-64/globfree.c b/sysdeps/wordsize-64/globfree.c
new file mode 100644
index 0000000000..ec8c35b489
--- /dev/null
+++ b/sysdeps/wordsize-64/globfree.c
@@ -0,0 +1,5 @@
+#define globfree64 __no_globfree64_decl
+#include <posix/globfree.c>
+#undef globfree64
+weak_alias (globfree, globfree64)
+libc_hidden_ver (globfree, globfree64)
diff --git a/sysdeps/wordsize-64/globfree64.c b/sysdeps/wordsize-64/globfree64.c
new file mode 100644
index 0000000000..a0f57ff4b3
--- /dev/null
+++ b/sysdeps/wordsize-64/globfree64.c
@@ -0,0 +1 @@
+/* globfree64 is in globfree.c */
diff --git a/sysdeps/x86/cpu-features-offsets.sym b/sysdeps/x86/cpu-features-offsets.sym
index f6739fae81..33dd094e37 100644
--- a/sysdeps/x86/cpu-features-offsets.sym
+++ b/sysdeps/x86/cpu-features-offsets.sym
@@ -15,6 +15,7 @@ CPUID_ECX_OFFSET offsetof (struct cpuid_registers, ecx)
CPUID_EDX_OFFSET offsetof (struct cpuid_registers, edx)
FAMILY_OFFSET offsetof (struct cpu_features, family)
MODEL_OFFSET offsetof (struct cpu_features, model)
+XSAVE_STATE_SIZE_OFFSET offsetof (struct cpu_features, xsave_state_size)
FEATURE_OFFSET offsetof (struct cpu_features, feature)
FEATURE_SIZE sizeof (unsigned int)
diff --git a/sysdeps/x86/cpu-features.c b/sysdeps/x86/cpu-features.c
index d1ee922290..9eca98817d 100644
--- a/sysdeps/x86/cpu-features.c
+++ b/sysdeps/x86/cpu-features.c
@@ -18,6 +18,7 @@
#include <cpuid.h>
#include <cpu-features.h>
+#include <libc-internal.h>
static void
get_common_indeces (struct cpu_features *cpu_features,
@@ -88,6 +89,71 @@ get_common_indeces (struct cpu_features *cpu_features,
cpu_features->feature[index_arch_FMA_Usable]
|= bit_arch_FMA_Usable;
}
+
+ /* For _dl_runtime_resolve, set xsave_state_size to xsave area
+ size + integer register save size and align it to 64 bytes. */
+ if (cpu_features->max_cpuid >= 0xd)
+ {
+ unsigned int eax, ebx, ecx, edx;
+
+ __cpuid_count (0xd, 0, eax, ebx, ecx, edx);
+ if (ebx != 0)
+ {
+ cpu_features->xsave_state_size
+ = ALIGN_UP (ebx + STATE_SAVE_OFFSET, 64);
+
+ __cpuid_count (0xd, 1, eax, ebx, ecx, edx);
+
+ /* Check if XSAVEC is available. */
+ if ((eax & (1 << 1)) != 0)
+ {
+ unsigned int xstate_comp_offsets[32];
+ unsigned int xstate_comp_sizes[32];
+ unsigned int i;
+
+ xstate_comp_offsets[0] = 0;
+ xstate_comp_offsets[1] = 160;
+ xstate_comp_offsets[2] = 576;
+ xstate_comp_sizes[0] = 160;
+ xstate_comp_sizes[1] = 256;
+
+ for (i = 2; i < 32; i++)
+ {
+ if ((STATE_SAVE_MASK & (1 << i)) != 0)
+ {
+ __cpuid_count (0xd, i, eax, ebx, ecx, edx);
+ xstate_comp_sizes[i] = eax;
+ }
+ else
+ {
+ ecx = 0;
+ xstate_comp_sizes[i] = 0;
+ }
+
+ if (i > 2)
+ {
+ xstate_comp_offsets[i]
+ = (xstate_comp_offsets[i - 1]
+ + xstate_comp_sizes[i -1]);
+ if ((ecx & (1 << 1)) != 0)
+ xstate_comp_offsets[i]
+ = ALIGN_UP (xstate_comp_offsets[i], 64);
+ }
+ }
+
+ /* Use XSAVEC. */
+ unsigned int size
+ = xstate_comp_offsets[31] + xstate_comp_sizes[31];
+ if (size)
+ {
+ cpu_features->xsave_state_size
+ = ALIGN_UP (size + STATE_SAVE_OFFSET, 64);
+ cpu_features->feature[index_arch_XSAVEC_Usable]
+ |= bit_arch_XSAVEC_Usable;
+ }
+ }
+ }
+ }
}
}
@@ -213,20 +279,6 @@ init_cpu_features (struct cpu_features *cpu_features)
else
cpu_features->feature[index_arch_Prefer_No_AVX512]
|= bit_arch_Prefer_No_AVX512;
-
- /* To avoid SSE transition penalty, use _dl_runtime_resolve_slow.
- If XGETBV suports ECX == 1, use _dl_runtime_resolve_opt. */
- cpu_features->feature[index_arch_Use_dl_runtime_resolve_slow]
- |= bit_arch_Use_dl_runtime_resolve_slow;
- if (cpu_features->max_cpuid >= 0xd)
- {
- unsigned int eax;
-
- __cpuid_count (0xd, 1, eax, ebx, ecx, edx);
- if ((eax & (1 << 2)) != 0)
- cpu_features->feature[index_arch_Use_dl_runtime_resolve_opt]
- |= bit_arch_Use_dl_runtime_resolve_opt;
- }
}
/* This spells out "AuthenticAMD". */
else if (ebx == 0x68747541 && ecx == 0x444d4163 && edx == 0x69746e65)
diff --git a/sysdeps/x86/cpu-features.h b/sysdeps/x86/cpu-features.h
index 2609ac0999..507a141414 100644
--- a/sysdeps/x86/cpu-features.h
+++ b/sysdeps/x86/cpu-features.h
@@ -37,9 +37,8 @@
#define bit_arch_Prefer_No_VZEROUPPER (1 << 17)
#define bit_arch_Fast_Unaligned_Copy (1 << 18)
#define bit_arch_Prefer_ERMS (1 << 19)
-#define bit_arch_Use_dl_runtime_resolve_opt (1 << 20)
-#define bit_arch_Use_dl_runtime_resolve_slow (1 << 21)
-#define bit_arch_Prefer_No_AVX512 (1 << 22)
+#define bit_arch_Prefer_No_AVX512 (1 << 20)
+#define bit_arch_XSAVEC_Usable (1 << 21)
/* CPUID Feature flags. */
@@ -82,6 +81,15 @@
/* The current maximum size of the feature integer bit array. */
#define FEATURE_INDEX_MAX 1
+/* Offset for fxsave/xsave area used by _dl_runtime_resolve. Also need
+ space to preserve RCX, RDX, RSI, RDI, R8, R9 and RAX. It must be
+ aligned to 16 bytes for fxsave and 64 bytes for xsave. */
+#define STATE_SAVE_OFFSET (8 * 7 + 8)
+
+/* Save SSE, AVX, AVX512, mask and bound registers. */
+#define STATE_SAVE_MASK \
+ ((1 << 1) | (1 << 2) | (1 << 3) | (1 << 5) | (1 << 6) | (1 << 7))
+
#ifdef __ASSEMBLER__
# include <cpu-features-offsets.h>
@@ -206,6 +214,12 @@ struct cpu_features
} cpuid[COMMON_CPUID_INDEX_MAX];
unsigned int family;
unsigned int model;
+ /* The type must be unsigned long int so that we use
+
+ sub xsave_state_size_offset(%rip) %RSP_LP
+
+ in _dl_runtime_resolve. */
+ unsigned long int xsave_state_size;
unsigned int feature[FEATURE_INDEX_MAX];
};
@@ -298,9 +312,8 @@ extern const struct cpu_features *__get_cpu_features (void)
# define index_arch_Prefer_No_VZEROUPPER FEATURE_INDEX_1
# define index_arch_Fast_Unaligned_Copy FEATURE_INDEX_1
# define index_arch_Prefer_ERMS FEATURE_INDEX_1
-# define index_arch_Use_dl_runtime_resolve_opt FEATURE_INDEX_1
-# define index_arch_Use_dl_runtime_resolve_slow FEATURE_INDEX_1
# define index_arch_Prefer_No_AVX512 FEATURE_INDEX_1
+# define index_arch_XSAVEC_Usable FEATURE_INDEX_1
#endif /* !__ASSEMBLER__ */
diff --git a/sysdeps/x86_64/Makefile b/sysdeps/x86_64/Makefile
index 6d99284cd0..cc990a9685 100644
--- a/sysdeps/x86_64/Makefile
+++ b/sysdeps/x86_64/Makefile
@@ -27,7 +27,7 @@ ifeq ($(subdir),elf)
CFLAGS-.os += $(if $(filter $(@F),$(patsubst %,%.os,$(all-rtld-routines))),\
-mno-mmx)
-sysdep-dl-routines += tlsdesc dl-tlsdesc
+sysdep-dl-routines += tlsdesc dl-tlsdesc tls_get_addr
tests += ifuncmain8
modules-names += ifuncmod8
@@ -49,9 +49,12 @@ extra-test-objs += tst-quadmod1pie.o tst-quadmod2pie.o
$(objpfx)tst-quad1pie: $(objpfx)tst-quadmod1pie.o
$(objpfx)tst-quad2pie: $(objpfx)tst-quadmod2pie.o
-tests += tst-audit3 tst-audit4 tst-audit5 tst-audit6 tst-audit7 tst-audit10
-test-extras += tst-audit4-aux tst-audit10-aux
-extra-test-objs += tst-audit4-aux.o tst-audit10-aux.o
+tests += tst-audit3 tst-audit4 tst-audit5 tst-audit6 tst-audit7 \
+ tst-audit10 tst-sse tst-avx tst-avx512
+test-extras += tst-audit4-aux tst-audit10-aux \
+ tst-avx-aux tst-avx512-aux
+extra-test-objs += tst-audit4-aux.o tst-audit10-aux.o \
+ tst-avx-aux.o tst-avx512-aux.o
tests += tst-split-dynreloc
LDFLAGS-tst-split-dynreloc = -Wl,-T,$(..)sysdeps/x86_64/tst-split-dynreloc.lds
@@ -62,7 +65,8 @@ modules-names += tst-auditmod3a tst-auditmod3b \
tst-auditmod5a tst-auditmod5b \
tst-auditmod6a tst-auditmod6b tst-auditmod6c \
tst-auditmod7a tst-auditmod7b \
- tst-auditmod10a tst-auditmod10b
+ tst-auditmod10a tst-auditmod10b \
+ tst-ssemod tst-avxmod tst-avx512mod
$(objpfx)tst-audit3: $(objpfx)tst-auditmod3a.so
$(objpfx)tst-audit3.out: $(objpfx)tst-auditmod3b.so
@@ -89,6 +93,10 @@ $(objpfx)tst-audit10: $(objpfx)tst-audit10-aux.o $(objpfx)tst-auditmod10a.so
$(objpfx)tst-audit10.out: $(objpfx)tst-auditmod10b.so
tst-audit10-ENV = LD_AUDIT=$(objpfx)tst-auditmod10b.so
+$(objpfx)tst-sse: $(objpfx)tst-ssemod.so
+$(objpfx)tst-avx: $(objpfx)tst-avx-aux.o $(objpfx)tst-avxmod.so
+$(objpfx)tst-avx512: $(objpfx)tst-avx512-aux.o $(objpfx)tst-avx512mod.so
+
AVX-CFLAGS=-mavx -mno-vzeroupper
CFLAGS-tst-audit4-aux.c += $(AVX-CFLAGS)
CFLAGS-tst-auditmod4a.c += $(AVX-CFLAGS)
@@ -96,14 +104,18 @@ CFLAGS-tst-auditmod4b.c += $(AVX-CFLAGS)
CFLAGS-tst-auditmod6b.c += $(AVX-CFLAGS)
CFLAGS-tst-auditmod6c.c += $(AVX-CFLAGS)
CFLAGS-tst-auditmod7b.c += $(AVX-CFLAGS)
+CFLAGS-tst-avx-aux.c += $(AVX-CFLAGS)
+CFLAGS-tst-avxmod.c += $(AVX-CFLAGS)
ifeq (yes,$(config-cflags-avx512))
AVX512-CFLAGS = -mavx512f
CFLAGS-tst-audit10-aux.c += $(AVX512-CFLAGS)
CFLAGS-tst-auditmod10a.c += $(AVX512-CFLAGS)
CFLAGS-tst-auditmod10b.c += $(AVX512-CFLAGS)
+CFLAGS-tst-avx512-aux.c += $(AVX512-CFLAGS)
+CFLAGS-tst-avx512mod.c += $(AVX512-CFLAGS)
endif
endif
ifeq ($(subdir),csu)
-gen-as-const-headers += tlsdesc.sym
+gen-as-const-headers += tlsdesc.sym rtld-offsets.sym
endif
diff --git a/sysdeps/x86_64/dl-machine.h b/sysdeps/x86_64/dl-machine.h
index c0f0fa16a2..8355432dfc 100644
--- a/sysdeps/x86_64/dl-machine.h
+++ b/sysdeps/x86_64/dl-machine.h
@@ -66,12 +66,9 @@ static inline int __attribute__ ((unused, always_inline))
elf_machine_runtime_setup (struct link_map *l, int lazy, int profile)
{
Elf64_Addr *got;
- extern void _dl_runtime_resolve_sse (ElfW(Word)) attribute_hidden;
- extern void _dl_runtime_resolve_avx (ElfW(Word)) attribute_hidden;
- extern void _dl_runtime_resolve_avx_slow (ElfW(Word)) attribute_hidden;
- extern void _dl_runtime_resolve_avx_opt (ElfW(Word)) attribute_hidden;
- extern void _dl_runtime_resolve_avx512 (ElfW(Word)) attribute_hidden;
- extern void _dl_runtime_resolve_avx512_opt (ElfW(Word)) attribute_hidden;
+ extern void _dl_runtime_resolve_fxsave (ElfW(Word)) attribute_hidden;
+ extern void _dl_runtime_resolve_xsave (ElfW(Word)) attribute_hidden;
+ extern void _dl_runtime_resolve_xsavec (ElfW(Word)) attribute_hidden;
extern void _dl_runtime_profile_sse (ElfW(Word)) attribute_hidden;
extern void _dl_runtime_profile_avx (ElfW(Word)) attribute_hidden;
extern void _dl_runtime_profile_avx512 (ElfW(Word)) attribute_hidden;
@@ -120,29 +117,14 @@ elf_machine_runtime_setup (struct link_map *l, int lazy, int profile)
/* This function will get called to fix up the GOT entry
indicated by the offset on the stack, and then jump to
the resolved address. */
- if (HAS_ARCH_FEATURE (AVX512F_Usable))
- {
- if (HAS_ARCH_FEATURE (Use_dl_runtime_resolve_opt))
- *(ElfW(Addr) *) (got + 2)
- = (ElfW(Addr)) &_dl_runtime_resolve_avx512_opt;
- else
- *(ElfW(Addr) *) (got + 2)
- = (ElfW(Addr)) &_dl_runtime_resolve_avx512;
- }
- else if (HAS_ARCH_FEATURE (AVX_Usable))
- {
- if (HAS_ARCH_FEATURE (Use_dl_runtime_resolve_opt))
- *(ElfW(Addr) *) (got + 2)
- = (ElfW(Addr)) &_dl_runtime_resolve_avx_opt;
- else if (HAS_ARCH_FEATURE (Use_dl_runtime_resolve_slow))
- *(ElfW(Addr) *) (got + 2)
- = (ElfW(Addr)) &_dl_runtime_resolve_avx_slow;
- else
- *(ElfW(Addr) *) (got + 2)
- = (ElfW(Addr)) &_dl_runtime_resolve_avx;
- }
+ if (GLRO(dl_x86_cpu_features).xsave_state_size != 0)
+ *(ElfW(Addr) *) (got + 2)
+ = (HAS_ARCH_FEATURE (XSAVEC_Usable)
+ ? (ElfW(Addr)) &_dl_runtime_resolve_xsavec
+ : (ElfW(Addr)) &_dl_runtime_resolve_xsave);
else
- *(ElfW(Addr) *) (got + 2) = (ElfW(Addr)) &_dl_runtime_resolve_sse;
+ *(ElfW(Addr) *) (got + 2)
+ = (ElfW(Addr)) &_dl_runtime_resolve_fxsave;
}
}
diff --git a/sysdeps/x86_64/dl-tls.c b/sysdeps/x86_64/dl-tls.c
new file mode 100644
index 0000000000..3584805c8e
--- /dev/null
+++ b/sysdeps/x86_64/dl-tls.c
@@ -0,0 +1,53 @@
+/* Thread-local storage handling in the ELF dynamic linker. x86-64 version.
+ Copyright (C) 2017 Free Software Foundation, Inc.
+ This file is part of the GNU C Library.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library; if not, see
+ <http://www.gnu.org/licenses/>. */
+
+#ifdef SHARED
+/* Work around GCC PR58066, due to which __tls_get_addr may be called
+ with an unaligned stack. The compat implementation is in
+ tls_get_addr-compat.S. */
+
+# include <dl-tls.h>
+
+/* Define __tls_get_addr within elf/dl-tls.c under a different
+ name. */
+extern __typeof__ (__tls_get_addr) ___tls_get_addr;
+
+# define __tls_get_addr ___tls_get_addr
+# include <elf/dl-tls.c>
+# undef __tls_get_addr
+
+hidden_ver (___tls_get_addr, __tls_get_addr)
+
+/* Only handle slow paths for __tls_get_addr. */
+attribute_hidden
+void *
+__tls_get_addr_slow (GET_ADDR_ARGS)
+{
+ dtv_t *dtv = THREAD_DTV ();
+
+ if (__glibc_unlikely (dtv[0].counter != GL(dl_tls_generation)))
+ return update_get_addr (GET_ADDR_PARAM);
+
+ return tls_get_addr_tail (GET_ADDR_PARAM, dtv, NULL);
+}
+#else
+
+/* No compatibility symbol needed. */
+# include <elf/dl-tls.c>
+
+#endif
diff --git a/sysdeps/x86_64/dl-tls.h b/sysdeps/x86_64/dl-tls.h
index cf6c107f54..fa5bf6cd93 100644
--- a/sysdeps/x86_64/dl-tls.h
+++ b/sysdeps/x86_64/dl-tls.h
@@ -16,6 +16,9 @@
License along with the GNU C Library; if not, see
<http://www.gnu.org/licenses/>. */
+#ifndef _X86_64_DL_TLS_H
+#define _X86_64_DL_TLS_H
+
#include <stdint.h>
/* Type used for the representation of TLS information in the GOT. */
@@ -27,3 +30,5 @@ typedef struct dl_tls_index
extern void *__tls_get_addr (tls_index *ti);
+
+#endif /* _X86_64_DL_TLS_H */
diff --git a/sysdeps/x86_64/dl-trampoline.S b/sysdeps/x86_64/dl-trampoline.S
index 50b23633e3..b4cda0f535 100644
--- a/sysdeps/x86_64/dl-trampoline.S
+++ b/sysdeps/x86_64/dl-trampoline.S
@@ -34,41 +34,24 @@
# define DL_STACK_ALIGNMENT 8
#endif
-#ifndef DL_RUNTIME_UNALIGNED_VEC_SIZE
-/* The maximum size in bytes of unaligned vector load and store in the
- dynamic linker. Since SSE optimized memory/string functions with
- aligned SSE register load and store are used in the dynamic linker,
- we must set this to 8 so that _dl_runtime_resolve_sse will align the
- stack before calling _dl_fixup. */
-# define DL_RUNTIME_UNALIGNED_VEC_SIZE 8
-#endif
-
-/* True if _dl_runtime_resolve should align stack to VEC_SIZE bytes. */
+/* True if _dl_runtime_resolve should align stack for STATE_SAVE or align
+ stack to 16 bytes before calling _dl_fixup. */
#define DL_RUNTIME_RESOLVE_REALIGN_STACK \
- (VEC_SIZE > DL_STACK_ALIGNMENT \
- && VEC_SIZE > DL_RUNTIME_UNALIGNED_VEC_SIZE)
-
-/* Align vector register save area to 16 bytes. */
-#define REGISTER_SAVE_VEC_OFF 0
+ (STATE_SAVE_ALIGNMENT > DL_STACK_ALIGNMENT \
+ || 16 > DL_STACK_ALIGNMENT)
/* Area on stack to save and restore registers used for parameter
passing when calling _dl_fixup. */
#ifdef __ILP32__
-# define REGISTER_SAVE_RAX (REGISTER_SAVE_VEC_OFF + VEC_SIZE * 8)
# define PRESERVE_BND_REGS_PREFIX
#else
-/* Align bound register save area to 16 bytes. */
-# define REGISTER_SAVE_BND0 (REGISTER_SAVE_VEC_OFF + VEC_SIZE * 8)
-# define REGISTER_SAVE_BND1 (REGISTER_SAVE_BND0 + 16)
-# define REGISTER_SAVE_BND2 (REGISTER_SAVE_BND1 + 16)
-# define REGISTER_SAVE_BND3 (REGISTER_SAVE_BND2 + 16)
-# define REGISTER_SAVE_RAX (REGISTER_SAVE_BND3 + 16)
# ifdef HAVE_MPX_SUPPORT
# define PRESERVE_BND_REGS_PREFIX bnd
# else
# define PRESERVE_BND_REGS_PREFIX .byte 0xf2
# endif
#endif
+#define REGISTER_SAVE_RAX 0
#define REGISTER_SAVE_RCX (REGISTER_SAVE_RAX + 8)
#define REGISTER_SAVE_RDX (REGISTER_SAVE_RCX + 8)
#define REGISTER_SAVE_RSI (REGISTER_SAVE_RDX + 8)
@@ -80,68 +63,56 @@
#define VEC_SIZE 64
#define VMOVA vmovdqa64
-#if DL_RUNTIME_RESOLVE_REALIGN_STACK || VEC_SIZE <= DL_STACK_ALIGNMENT
-# define VMOV vmovdqa64
-#else
-# define VMOV vmovdqu64
-#endif
#define VEC(i) zmm##i
-#define _dl_runtime_resolve _dl_runtime_resolve_avx512
#define _dl_runtime_profile _dl_runtime_profile_avx512
#include "dl-trampoline.h"
-#undef _dl_runtime_resolve
#undef _dl_runtime_profile
#undef VEC
-#undef VMOV
#undef VMOVA
#undef VEC_SIZE
#define VEC_SIZE 32
#define VMOVA vmovdqa
-#if DL_RUNTIME_RESOLVE_REALIGN_STACK || VEC_SIZE <= DL_STACK_ALIGNMENT
-# define VMOV vmovdqa
-#else
-# define VMOV vmovdqu
-#endif
#define VEC(i) ymm##i
-#define _dl_runtime_resolve _dl_runtime_resolve_avx
-#define _dl_runtime_resolve_opt _dl_runtime_resolve_avx_opt
#define _dl_runtime_profile _dl_runtime_profile_avx
#include "dl-trampoline.h"
-#undef _dl_runtime_resolve
-#undef _dl_runtime_resolve_opt
#undef _dl_runtime_profile
#undef VEC
-#undef VMOV
#undef VMOVA
#undef VEC_SIZE
/* movaps/movups is 1-byte shorter. */
#define VEC_SIZE 16
#define VMOVA movaps
-#if DL_RUNTIME_RESOLVE_REALIGN_STACK || VEC_SIZE <= DL_STACK_ALIGNMENT
-# define VMOV movaps
-#else
-# define VMOV movups
-#endif
#define VEC(i) xmm##i
-#define _dl_runtime_resolve _dl_runtime_resolve_sse
#define _dl_runtime_profile _dl_runtime_profile_sse
#undef RESTORE_AVX
#include "dl-trampoline.h"
-#undef _dl_runtime_resolve
#undef _dl_runtime_profile
-#undef VMOV
+#undef VEC
#undef VMOVA
+#undef VEC_SIZE
-/* Used by _dl_runtime_resolve_avx_opt/_dl_runtime_resolve_avx512_opt
- to preserve the full vector registers with zero upper bits. */
-#define VMOVA vmovdqa
-#if DL_RUNTIME_RESOLVE_REALIGN_STACK || VEC_SIZE <= DL_STACK_ALIGNMENT
-# define VMOV vmovdqa
-#else
-# define VMOV vmovdqu
-#endif
-#define _dl_runtime_resolve _dl_runtime_resolve_sse_vex
-#define _dl_runtime_resolve_opt _dl_runtime_resolve_avx512_opt
+#define USE_FXSAVE
+#define STATE_SAVE_ALIGNMENT 16
+#define _dl_runtime_resolve _dl_runtime_resolve_fxsave
+#include "dl-trampoline.h"
+#undef _dl_runtime_resolve
+#undef USE_FXSAVE
+#undef STATE_SAVE_ALIGNMENT
+
+#define USE_XSAVE
+#define STATE_SAVE_ALIGNMENT 64
+#define _dl_runtime_resolve _dl_runtime_resolve_xsave
+#include "dl-trampoline.h"
+#undef _dl_runtime_resolve
+#undef USE_XSAVE
+#undef STATE_SAVE_ALIGNMENT
+
+#define USE_XSAVEC
+#define STATE_SAVE_ALIGNMENT 64
+#define _dl_runtime_resolve _dl_runtime_resolve_xsavec
#include "dl-trampoline.h"
+#undef _dl_runtime_resolve
+#undef USE_XSAVEC
+#undef STATE_SAVE_ALIGNMENT
diff --git a/sysdeps/x86_64/dl-trampoline.h b/sysdeps/x86_64/dl-trampoline.h
index 32ad3af202..b9c2f1796f 100644
--- a/sysdeps/x86_64/dl-trampoline.h
+++ b/sysdeps/x86_64/dl-trampoline.h
@@ -16,140 +16,47 @@
License along with the GNU C Library; if not, see
<http://www.gnu.org/licenses/>. */
-#undef REGISTER_SAVE_AREA_RAW
-#ifdef __ILP32__
-/* X32 saves RCX, RDX, RSI, RDI, R8 and R9 plus RAX as well as VEC0 to
- VEC7. */
-# define REGISTER_SAVE_AREA_RAW (8 * 7 + VEC_SIZE * 8)
-#else
-/* X86-64 saves RCX, RDX, RSI, RDI, R8 and R9 plus RAX as well as
- BND0, BND1, BND2, BND3 and VEC0 to VEC7. */
-# define REGISTER_SAVE_AREA_RAW (8 * 7 + 16 * 4 + VEC_SIZE * 8)
-#endif
+ .text
+#ifdef _dl_runtime_resolve
-#undef REGISTER_SAVE_AREA
-#undef LOCAL_STORAGE_AREA
-#undef BASE
-#if DL_RUNTIME_RESOLVE_REALIGN_STACK
-# define REGISTER_SAVE_AREA (REGISTER_SAVE_AREA_RAW + 8)
-/* Local stack area before jumping to function address: RBX. */
-# define LOCAL_STORAGE_AREA 8
-# define BASE rbx
-# if (REGISTER_SAVE_AREA % VEC_SIZE) != 0
-# error REGISTER_SAVE_AREA must be multples of VEC_SIZE
-# endif
-#else
-# define REGISTER_SAVE_AREA REGISTER_SAVE_AREA_RAW
-/* Local stack area before jumping to function address: All saved
- registers. */
-# define LOCAL_STORAGE_AREA REGISTER_SAVE_AREA
-# define BASE rsp
-# if (REGISTER_SAVE_AREA % 16) != 8
-# error REGISTER_SAVE_AREA must be odd multples of 8
-# endif
-#endif
+# undef REGISTER_SAVE_AREA
+# undef LOCAL_STORAGE_AREA
+# undef BASE
- .text
-#ifdef _dl_runtime_resolve_opt
-/* Use the smallest vector registers to preserve the full YMM/ZMM
- registers to avoid SSE transition penalty. */
+# if (STATE_SAVE_ALIGNMENT % 16) != 0
+# error STATE_SAVE_ALIGNMENT must be multples of 16
+# endif
-# if VEC_SIZE == 32
-/* Check if the upper 128 bits in %ymm0 - %ymm7 registers are non-zero
- and preserve %xmm0 - %xmm7 registers with the zero upper bits. Since
- there is no SSE transition penalty on AVX512 processors which don't
- support XGETBV with ECX == 1, _dl_runtime_resolve_avx512_slow isn't
- provided. */
- .globl _dl_runtime_resolve_avx_slow
- .hidden _dl_runtime_resolve_avx_slow
- .type _dl_runtime_resolve_avx_slow, @function
- .align 16
-_dl_runtime_resolve_avx_slow:
- cfi_startproc
- cfi_adjust_cfa_offset(16) # Incorporate PLT
- vorpd %ymm0, %ymm1, %ymm8
- vorpd %ymm2, %ymm3, %ymm9
- vorpd %ymm4, %ymm5, %ymm10
- vorpd %ymm6, %ymm7, %ymm11
- vorpd %ymm8, %ymm9, %ymm9
- vorpd %ymm10, %ymm11, %ymm10
- vpcmpeqd %xmm8, %xmm8, %xmm8
- vorpd %ymm9, %ymm10, %ymm10
- vptest %ymm10, %ymm8
- # Preserve %ymm0 - %ymm7 registers if the upper 128 bits of any
- # %ymm0 - %ymm7 registers aren't zero.
- PRESERVE_BND_REGS_PREFIX
- jnc _dl_runtime_resolve_avx
- # Use vzeroupper to avoid SSE transition penalty.
- vzeroupper
- # Preserve %xmm0 - %xmm7 registers with the zero upper 128 bits
- # when the upper 128 bits of %ymm0 - %ymm7 registers are zero.
- PRESERVE_BND_REGS_PREFIX
- jmp _dl_runtime_resolve_sse_vex
- cfi_adjust_cfa_offset(-16) # Restore PLT adjustment
- cfi_endproc
- .size _dl_runtime_resolve_avx_slow, .-_dl_runtime_resolve_avx_slow
+# if (STATE_SAVE_OFFSET % STATE_SAVE_ALIGNMENT) != 0
+# error STATE_SAVE_OFFSET must be multples of STATE_SAVE_ALIGNMENT
# endif
-/* Use XGETBV with ECX == 1 to check which bits in vector registers are
- non-zero and only preserve the non-zero lower bits with zero upper
- bits. */
- .globl _dl_runtime_resolve_opt
- .hidden _dl_runtime_resolve_opt
- .type _dl_runtime_resolve_opt, @function
- .align 16
-_dl_runtime_resolve_opt:
- cfi_startproc
- cfi_adjust_cfa_offset(16) # Incorporate PLT
- pushq %rax
- cfi_adjust_cfa_offset(8)
- cfi_rel_offset(%rax, 0)
- pushq %rcx
- cfi_adjust_cfa_offset(8)
- cfi_rel_offset(%rcx, 0)
- pushq %rdx
- cfi_adjust_cfa_offset(8)
- cfi_rel_offset(%rdx, 0)
- movl $1, %ecx
- xgetbv
- movl %eax, %r11d
- popq %rdx
- cfi_adjust_cfa_offset(-8)
- cfi_restore (%rdx)
- popq %rcx
- cfi_adjust_cfa_offset(-8)
- cfi_restore (%rcx)
- popq %rax
- cfi_adjust_cfa_offset(-8)
- cfi_restore (%rax)
-# if VEC_SIZE == 32
- # For YMM registers, check if YMM state is in use.
- andl $bit_YMM_state, %r11d
- # Preserve %xmm0 - %xmm7 registers with the zero upper 128 bits if
- # YMM state isn't in use.
- PRESERVE_BND_REGS_PREFIX
- jz _dl_runtime_resolve_sse_vex
-# elif VEC_SIZE == 16
- # For ZMM registers, check if YMM state and ZMM state are in
- # use.
- andl $(bit_YMM_state | bit_ZMM0_15_state), %r11d
- cmpl $bit_YMM_state, %r11d
- # Preserve %zmm0 - %zmm7 registers if ZMM state is in use.
- PRESERVE_BND_REGS_PREFIX
- jg _dl_runtime_resolve_avx512
- # Preserve %ymm0 - %ymm7 registers with the zero upper 256 bits if
- # ZMM state isn't in use.
- PRESERVE_BND_REGS_PREFIX
- je _dl_runtime_resolve_avx
- # Preserve %xmm0 - %xmm7 registers with the zero upper 384 bits if
- # neither YMM state nor ZMM state are in use.
+# if DL_RUNTIME_RESOLVE_REALIGN_STACK
+/* Local stack area before jumping to function address: RBX. */
+# define LOCAL_STORAGE_AREA 8
+# define BASE rbx
+# ifdef USE_FXSAVE
+/* Use fxsave to save XMM registers. */
+# define REGISTER_SAVE_AREA (512 + STATE_SAVE_OFFSET)
+# if (REGISTER_SAVE_AREA % 16) != 0
+# error REGISTER_SAVE_AREA must be multples of 16
+# endif
+# endif
# else
-# error Unsupported VEC_SIZE!
+# ifndef USE_FXSAVE
+# error USE_FXSAVE must be defined
+# endif
+/* Use fxsave to save XMM registers. */
+# define REGISTER_SAVE_AREA (512 + STATE_SAVE_OFFSET + 8)
+/* Local stack area before jumping to function address: All saved
+ registers. */
+# define LOCAL_STORAGE_AREA REGISTER_SAVE_AREA
+# define BASE rsp
+# if (REGISTER_SAVE_AREA % 16) != 8
+# error REGISTER_SAVE_AREA must be odd multples of 8
+# endif
# endif
- cfi_adjust_cfa_offset(-16) # Restore PLT adjustment
- cfi_endproc
- .size _dl_runtime_resolve_opt, .-_dl_runtime_resolve_opt
-#endif
+
.globl _dl_runtime_resolve
.hidden _dl_runtime_resolve
.type _dl_runtime_resolve, @function
@@ -157,19 +64,30 @@ _dl_runtime_resolve_opt:
cfi_startproc
_dl_runtime_resolve:
cfi_adjust_cfa_offset(16) # Incorporate PLT
-#if DL_RUNTIME_RESOLVE_REALIGN_STACK
-# if LOCAL_STORAGE_AREA != 8
-# error LOCAL_STORAGE_AREA must be 8
-# endif
+# if DL_RUNTIME_RESOLVE_REALIGN_STACK
+# if LOCAL_STORAGE_AREA != 8
+# error LOCAL_STORAGE_AREA must be 8
+# endif
pushq %rbx # push subtracts stack by 8.
cfi_adjust_cfa_offset(8)
cfi_rel_offset(%rbx, 0)
mov %RSP_LP, %RBX_LP
cfi_def_cfa_register(%rbx)
- and $-VEC_SIZE, %RSP_LP
-#endif
+ and $-STATE_SAVE_ALIGNMENT, %RSP_LP
+# endif
+# ifdef REGISTER_SAVE_AREA
sub $REGISTER_SAVE_AREA, %RSP_LP
+# if !DL_RUNTIME_RESOLVE_REALIGN_STACK
cfi_adjust_cfa_offset(REGISTER_SAVE_AREA)
+# endif
+# else
+ # Allocate stack space of the required size to save the state.
+# if IS_IN (rtld)
+ sub _rtld_local_ro+RTLD_GLOBAL_RO_DL_X86_CPU_FEATURES_OFFSET+XSAVE_STATE_SIZE_OFFSET(%rip), %RSP_LP
+# else
+ sub _dl_x86_cpu_features+XSAVE_STATE_SIZE_OFFSET(%rip), %RSP_LP
+# endif
+# endif
# Preserve registers otherwise clobbered.
movq %rax, REGISTER_SAVE_RAX(%rsp)
movq %rcx, REGISTER_SAVE_RCX(%rsp)
@@ -178,59 +96,42 @@ _dl_runtime_resolve:
movq %rdi, REGISTER_SAVE_RDI(%rsp)
movq %r8, REGISTER_SAVE_R8(%rsp)
movq %r9, REGISTER_SAVE_R9(%rsp)
- VMOV %VEC(0), (REGISTER_SAVE_VEC_OFF)(%rsp)
- VMOV %VEC(1), (REGISTER_SAVE_VEC_OFF + VEC_SIZE)(%rsp)
- VMOV %VEC(2), (REGISTER_SAVE_VEC_OFF + VEC_SIZE * 2)(%rsp)
- VMOV %VEC(3), (REGISTER_SAVE_VEC_OFF + VEC_SIZE * 3)(%rsp)
- VMOV %VEC(4), (REGISTER_SAVE_VEC_OFF + VEC_SIZE * 4)(%rsp)
- VMOV %VEC(5), (REGISTER_SAVE_VEC_OFF + VEC_SIZE * 5)(%rsp)
- VMOV %VEC(6), (REGISTER_SAVE_VEC_OFF + VEC_SIZE * 6)(%rsp)
- VMOV %VEC(7), (REGISTER_SAVE_VEC_OFF + VEC_SIZE * 7)(%rsp)
-#ifndef __ILP32__
- # We also have to preserve bound registers. These are nops if
- # Intel MPX isn't available or disabled.
-# ifdef HAVE_MPX_SUPPORT
- bndmov %bnd0, REGISTER_SAVE_BND0(%rsp)
- bndmov %bnd1, REGISTER_SAVE_BND1(%rsp)
- bndmov %bnd2, REGISTER_SAVE_BND2(%rsp)
- bndmov %bnd3, REGISTER_SAVE_BND3(%rsp)
+# ifdef USE_FXSAVE
+ fxsave STATE_SAVE_OFFSET(%rsp)
# else
-# if REGISTER_SAVE_BND0 == 0
- .byte 0x66,0x0f,0x1b,0x04,0x24
+ movl $STATE_SAVE_MASK, %eax
+ xorl %edx, %edx
+ # Clear the XSAVE Header.
+# ifdef USE_XSAVE
+ movq %rdx, (STATE_SAVE_OFFSET + 512)(%rsp)
+ movq %rdx, (STATE_SAVE_OFFSET + 512 + 8)(%rsp)
+# endif
+ movq %rdx, (STATE_SAVE_OFFSET + 512 + 8 * 2)(%rsp)
+ movq %rdx, (STATE_SAVE_OFFSET + 512 + 8 * 3)(%rsp)
+ movq %rdx, (STATE_SAVE_OFFSET + 512 + 8 * 4)(%rsp)
+ movq %rdx, (STATE_SAVE_OFFSET + 512 + 8 * 5)(%rsp)
+ movq %rdx, (STATE_SAVE_OFFSET + 512 + 8 * 6)(%rsp)
+ movq %rdx, (STATE_SAVE_OFFSET + 512 + 8 * 7)(%rsp)
+# ifdef USE_XSAVE
+ xsave STATE_SAVE_OFFSET(%rsp)
# else
- .byte 0x66,0x0f,0x1b,0x44,0x24,REGISTER_SAVE_BND0
+ xsavec STATE_SAVE_OFFSET(%rsp)
# endif
- .byte 0x66,0x0f,0x1b,0x4c,0x24,REGISTER_SAVE_BND1
- .byte 0x66,0x0f,0x1b,0x54,0x24,REGISTER_SAVE_BND2
- .byte 0x66,0x0f,0x1b,0x5c,0x24,REGISTER_SAVE_BND3
# endif
-#endif
# Copy args pushed by PLT in register.
# %rdi: link_map, %rsi: reloc_index
mov (LOCAL_STORAGE_AREA + 8)(%BASE), %RSI_LP
mov LOCAL_STORAGE_AREA(%BASE), %RDI_LP
call _dl_fixup # Call resolver.
mov %RAX_LP, %R11_LP # Save return value
-#ifndef __ILP32__
- # Restore bound registers. These are nops if Intel MPX isn't
- # avaiable or disabled.
-# ifdef HAVE_MPX_SUPPORT
- bndmov REGISTER_SAVE_BND3(%rsp), %bnd3
- bndmov REGISTER_SAVE_BND2(%rsp), %bnd2
- bndmov REGISTER_SAVE_BND1(%rsp), %bnd1
- bndmov REGISTER_SAVE_BND0(%rsp), %bnd0
+ # Get register content back.
+# ifdef USE_FXSAVE
+ fxrstor STATE_SAVE_OFFSET(%rsp)
# else
- .byte 0x66,0x0f,0x1a,0x5c,0x24,REGISTER_SAVE_BND3
- .byte 0x66,0x0f,0x1a,0x54,0x24,REGISTER_SAVE_BND2
- .byte 0x66,0x0f,0x1a,0x4c,0x24,REGISTER_SAVE_BND1
-# if REGISTER_SAVE_BND0 == 0
- .byte 0x66,0x0f,0x1a,0x04,0x24
-# else
- .byte 0x66,0x0f,0x1a,0x44,0x24,REGISTER_SAVE_BND0
-# endif
+ movl $STATE_SAVE_MASK, %eax
+ xorl %edx, %edx
+ xrstor STATE_SAVE_OFFSET(%rsp)
# endif
-#endif
- # Get register content back.
movq REGISTER_SAVE_R9(%rsp), %r9
movq REGISTER_SAVE_R8(%rsp), %r8
movq REGISTER_SAVE_RDI(%rsp), %rdi
@@ -238,20 +139,12 @@ _dl_runtime_resolve:
movq REGISTER_SAVE_RDX(%rsp), %rdx
movq REGISTER_SAVE_RCX(%rsp), %rcx
movq REGISTER_SAVE_RAX(%rsp), %rax
- VMOV (REGISTER_SAVE_VEC_OFF)(%rsp), %VEC(0)
- VMOV (REGISTER_SAVE_VEC_OFF + VEC_SIZE)(%rsp), %VEC(1)
- VMOV (REGISTER_SAVE_VEC_OFF + VEC_SIZE * 2)(%rsp), %VEC(2)
- VMOV (REGISTER_SAVE_VEC_OFF + VEC_SIZE * 3)(%rsp), %VEC(3)
- VMOV (REGISTER_SAVE_VEC_OFF + VEC_SIZE * 4)(%rsp), %VEC(4)
- VMOV (REGISTER_SAVE_VEC_OFF + VEC_SIZE * 5)(%rsp), %VEC(5)
- VMOV (REGISTER_SAVE_VEC_OFF + VEC_SIZE * 6)(%rsp), %VEC(6)
- VMOV (REGISTER_SAVE_VEC_OFF + VEC_SIZE * 7)(%rsp), %VEC(7)
-#if DL_RUNTIME_RESOLVE_REALIGN_STACK
+# if DL_RUNTIME_RESOLVE_REALIGN_STACK
mov %RBX_LP, %RSP_LP
cfi_def_cfa_register(%rsp)
movq (%rsp), %rbx
cfi_restore(%rbx)
-#endif
+# endif
# Adjust stack(PLT did 2 pushes)
add $(LOCAL_STORAGE_AREA + 16), %RSP_LP
cfi_adjust_cfa_offset(-(LOCAL_STORAGE_AREA + 16))
@@ -260,11 +153,9 @@ _dl_runtime_resolve:
jmp *%r11 # Jump to function address.
cfi_endproc
.size _dl_runtime_resolve, .-_dl_runtime_resolve
+#endif
-/* To preserve %xmm0 - %xmm7 registers, dl-trampoline.h is included
- twice, for _dl_runtime_resolve_sse and _dl_runtime_resolve_sse_vex.
- But we don't need another _dl_runtime_profile for XMM registers. */
#if !defined PROF && defined _dl_runtime_profile
# if (LR_VECTOR_OFFSET % VEC_SIZE) != 0
# error LR_VECTOR_OFFSET must be multples of VEC_SIZE
diff --git a/sysdeps/x86_64/rtld-offsets.sym b/sysdeps/x86_64/rtld-offsets.sym
new file mode 100644
index 0000000000..fd41b51521
--- /dev/null
+++ b/sysdeps/x86_64/rtld-offsets.sym
@@ -0,0 +1,6 @@
+#define SHARED
+#include <ldsodefs.h>
+
+--
+
+GL_TLS_GENERATION_OFFSET offsetof (struct rtld_global, _dl_tls_generation)
diff --git a/sysdeps/x86_64/tls_get_addr.S b/sysdeps/x86_64/tls_get_addr.S
new file mode 100644
index 0000000000..9d38fb3be5
--- /dev/null
+++ b/sysdeps/x86_64/tls_get_addr.S
@@ -0,0 +1,61 @@
+/* Stack-aligning implementation of __tls_get_addr. x86-64 version.
+ Copyright (C) 2017 Free Software Foundation, Inc.
+ This file is part of the GNU C Library.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library; if not, see
+ <http://www.gnu.org/licenses/>. */
+
+#ifdef SHARED
+
+# include <sysdep.h>
+# include "tlsdesc.h"
+# include "rtld-offsets.h"
+
+/* See __tls_get_addr and __tls_get_addr_slow in dl-tls.c. This function
+ call __tls_get_addr_slow on both slow paths. It realigns the stack
+ before the call to work around GCC PR58066. */
+
+ENTRY (__tls_get_addr)
+ mov %fs:DTV_OFFSET, %RDX_LP
+ mov GL_TLS_GENERATION_OFFSET+_rtld_local(%rip), %RAX_LP
+ /* GL(dl_tls_generation) == dtv[0].counter */
+ cmp %RAX_LP, (%rdx)
+ jne 1f
+ mov TI_MODULE_OFFSET(%rdi), %RAX_LP
+ /* dtv[ti->ti_module] */
+# ifdef __LP64__
+ salq $4, %rax
+ movq (%rdx,%rax), %rax
+# else
+ movl (%rdx,%rax, 8), %eax
+# endif
+ cmp $-1, %RAX_LP
+ je 1f
+ add TI_OFFSET_OFFSET(%rdi), %RAX_LP
+ ret
+1:
+ /* On the slow path, align the stack. */
+ pushq %rbp
+ cfi_def_cfa_offset (16)
+ cfi_offset (%rbp, -16)
+ mov %RSP_LP, %RBP_LP
+ cfi_def_cfa_register (%rbp)
+ and $-16, %RSP_LP
+ call __tls_get_addr_slow
+ mov %RBP_LP, %RSP_LP
+ popq %rbp
+ cfi_def_cfa (%rsp, 8)
+ ret
+END (__tls_get_addr)
+#endif /* SHARED */
diff --git a/sysdeps/x86_64/tlsdesc.sym b/sysdeps/x86_64/tlsdesc.sym
index 33854975d0..fc897ab4b5 100644
--- a/sysdeps/x86_64/tlsdesc.sym
+++ b/sysdeps/x86_64/tlsdesc.sym
@@ -15,3 +15,6 @@ TLSDESC_ARG offsetof(struct tlsdesc, arg)
TLSDESC_GEN_COUNT offsetof(struct tlsdesc_dynamic_arg, gen_count)
TLSDESC_MODID offsetof(struct tlsdesc_dynamic_arg, tlsinfo.ti_module)
TLSDESC_MODOFF offsetof(struct tlsdesc_dynamic_arg, tlsinfo.ti_offset)
+
+TI_MODULE_OFFSET offsetof(tls_index, ti_module)
+TI_OFFSET_OFFSET offsetof(tls_index, ti_offset)
diff --git a/sysdeps/x86_64/tst-avx-aux.c b/sysdeps/x86_64/tst-avx-aux.c
new file mode 100644
index 0000000000..e3807de7bb
--- /dev/null
+++ b/sysdeps/x86_64/tst-avx-aux.c
@@ -0,0 +1,47 @@
+/* Test case for preserved AVX registers in dynamic linker, -mavx part.
+ Copyright (C) 2017 Free Software Foundation, Inc.
+ This file is part of the GNU C Library.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library; if not, see
+ <http://www.gnu.org/licenses/>. */
+
+#include <immintrin.h>
+#include <stdlib.h>
+#include <string.h>
+
+int
+tst_avx_aux (void)
+{
+#ifdef __AVX__
+ extern __m256i avx_test (__m256i, __m256i, __m256i, __m256i,
+ __m256i, __m256i, __m256i, __m256i);
+
+ __m256i ymm0 = _mm256_set1_epi32 (0);
+ __m256i ymm1 = _mm256_set1_epi32 (1);
+ __m256i ymm2 = _mm256_set1_epi32 (2);
+ __m256i ymm3 = _mm256_set1_epi32 (3);
+ __m256i ymm4 = _mm256_set1_epi32 (4);
+ __m256i ymm5 = _mm256_set1_epi32 (5);
+ __m256i ymm6 = _mm256_set1_epi32 (6);
+ __m256i ymm7 = _mm256_set1_epi32 (7);
+ __m256i ret = avx_test (ymm0, ymm1, ymm2, ymm3,
+ ymm4, ymm5, ymm6, ymm7);
+ ymm0 = _mm256_set1_epi32 (0x12349876);
+ if (memcmp (&ymm0, &ret, sizeof (ret)))
+ abort ();
+ return 0;
+#else /* __AVX__ */
+ return 77;
+#endif /* __AVX__ */
+}
diff --git a/sysdeps/x86_64/tst-avx.c b/sysdeps/x86_64/tst-avx.c
new file mode 100644
index 0000000000..ec2e3a79ff
--- /dev/null
+++ b/sysdeps/x86_64/tst-avx.c
@@ -0,0 +1,49 @@
+/* Test case for preserved AVX registers in dynamic linker.
+ Copyright (C) 2017 Free Software Foundation, Inc.
+ This file is part of the GNU C Library.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library; if not, see
+ <http://www.gnu.org/licenses/>. */
+
+#include <cpuid.h>
+
+int tst_avx_aux (void);
+
+static int
+avx_enabled (void)
+{
+ unsigned int eax, ebx, ecx, edx;
+
+ if (__get_cpuid (1, &eax, &ebx, &ecx, &edx) == 0
+ || (ecx & (bit_AVX | bit_OSXSAVE)) != (bit_AVX | bit_OSXSAVE))
+ return 0;
+
+ /* Check the OS has AVX and SSE saving enabled. */
+ asm ("xgetbv" : "=a" (eax), "=d" (edx) : "c" (0));
+
+ return (eax & 6) == 6;
+}
+
+static int
+do_test (void)
+{
+ /* Run AVX test only if AVX is supported. */
+ if (avx_enabled ())
+ return tst_avx_aux ();
+ else
+ return 77;
+}
+
+#define TEST_FUNCTION do_test ()
+#include "../../test-skeleton.c"
diff --git a/sysdeps/x86_64/tst-avx512-aux.c b/sysdeps/x86_64/tst-avx512-aux.c
new file mode 100644
index 0000000000..6cebc523f2
--- /dev/null
+++ b/sysdeps/x86_64/tst-avx512-aux.c
@@ -0,0 +1,48 @@
+/* Test case for preserved AVX512 registers in dynamic linker,
+ -mavx512 part.
+ Copyright (C) 2017 Free Software Foundation, Inc.
+ This file is part of the GNU C Library.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library; if not, see
+ <http://www.gnu.org/licenses/>. */
+
+#include <immintrin.h>
+#include <stdlib.h>
+#include <string.h>
+
+int
+tst_avx512_aux (void)
+{
+#ifdef __AVX512F__
+ extern __m512i avx512_test (__m512i, __m512i, __m512i, __m512i,
+ __m512i, __m512i, __m512i, __m512i);
+
+ __m512i zmm0 = _mm512_set1_epi32 (0);
+ __m512i zmm1 = _mm512_set1_epi32 (1);
+ __m512i zmm2 = _mm512_set1_epi32 (2);
+ __m512i zmm3 = _mm512_set1_epi32 (3);
+ __m512i zmm4 = _mm512_set1_epi32 (4);
+ __m512i zmm5 = _mm512_set1_epi32 (5);
+ __m512i zmm6 = _mm512_set1_epi32 (6);
+ __m512i zmm7 = _mm512_set1_epi32 (7);
+ __m512i ret = avx512_test (zmm0, zmm1, zmm2, zmm3,
+ zmm4, zmm5, zmm6, zmm7);
+ zmm0 = _mm512_set1_epi32 (0x12349876);
+ if (memcmp (&zmm0, &ret, sizeof (ret)))
+ abort ();
+ return 0;
+#else /* __AVX512F__ */
+ return 77;
+#endif /* __AVX512F__ */
+}
diff --git a/sysdeps/x86_64/tst-avx512.c b/sysdeps/x86_64/tst-avx512.c
new file mode 100644
index 0000000000..a8e42ef553
--- /dev/null
+++ b/sysdeps/x86_64/tst-avx512.c
@@ -0,0 +1,57 @@
+/* Test case for preserved AVX512 registers in dynamic linker.
+ Copyright (C) 2017 Free Software Foundation, Inc.
+ This file is part of the GNU C Library.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library; if not, see
+ <http://www.gnu.org/licenses/>. */
+
+#include <cpuid.h>
+
+int tst_avx512_aux (void);
+
+static int
+avx512_enabled (void)
+{
+#ifdef bit_AVX512F
+ unsigned int eax, ebx, ecx, edx;
+
+ if (__get_cpuid (1, &eax, &ebx, &ecx, &edx) == 0
+ || (ecx & (bit_AVX | bit_OSXSAVE)) != (bit_AVX | bit_OSXSAVE))
+ return 0;
+
+ __cpuid_count (7, 0, eax, ebx, ecx, edx);
+ if (!(ebx & bit_AVX512F))
+ return 0;
+
+ asm ("xgetbv" : "=a" (eax), "=d" (edx) : "c" (0));
+
+ /* Verify that ZMM, YMM and XMM states are enabled. */
+ return (eax & 0xe6) == 0xe6;
+#else
+ return 0;
+#endif
+}
+
+static int
+do_test (void)
+{
+ /* Run AVX512 test only if AVX512 is supported. */
+ if (avx512_enabled ())
+ return tst_avx512_aux ();
+ else
+ return 77;
+}
+
+#define TEST_FUNCTION do_test ()
+#include "../../test-skeleton.c"
diff --git a/sysdeps/x86_64/tst-avx512mod.c b/sysdeps/x86_64/tst-avx512mod.c
new file mode 100644
index 0000000000..4cfb3a2c3d
--- /dev/null
+++ b/sysdeps/x86_64/tst-avx512mod.c
@@ -0,0 +1,48 @@
+/* Test case for x86-64 preserved AVX512 registers in dynamic linker. */
+
+#ifdef __AVX512F__
+#include <stdlib.h>
+#include <string.h>
+#include <immintrin.h>
+
+__m512i
+avx512_test (__m512i x0, __m512i x1, __m512i x2, __m512i x3,
+ __m512i x4, __m512i x5, __m512i x6, __m512i x7)
+{
+ __m512i zmm;
+
+ zmm = _mm512_set1_epi32 (0);
+ if (memcmp (&zmm, &x0, sizeof (zmm)))
+ abort ();
+
+ zmm = _mm512_set1_epi32 (1);
+ if (memcmp (&zmm, &x1, sizeof (zmm)))
+ abort ();
+
+ zmm = _mm512_set1_epi32 (2);
+ if (memcmp (&zmm, &x2, sizeof (zmm)))
+ abort ();
+
+ zmm = _mm512_set1_epi32 (3);
+ if (memcmp (&zmm, &x3, sizeof (zmm)))
+ abort ();
+
+ zmm = _mm512_set1_epi32 (4);
+ if (memcmp (&zmm, &x4, sizeof (zmm)))
+ abort ();
+
+ zmm = _mm512_set1_epi32 (5);
+ if (memcmp (&zmm, &x5, sizeof (zmm)))
+ abort ();
+
+ zmm = _mm512_set1_epi32 (6);
+ if (memcmp (&zmm, &x6, sizeof (zmm)))
+ abort ();
+
+ zmm = _mm512_set1_epi32 (7);
+ if (memcmp (&zmm, &x7, sizeof (zmm)))
+ abort ();
+
+ return _mm512_set1_epi32 (0x12349876);
+}
+#endif
diff --git a/sysdeps/x86_64/tst-avxmod.c b/sysdeps/x86_64/tst-avxmod.c
new file mode 100644
index 0000000000..6e5b154997
--- /dev/null
+++ b/sysdeps/x86_64/tst-avxmod.c
@@ -0,0 +1,48 @@
+/* Test case for x86-64 preserved AVX registers in dynamic linker. */
+
+#ifdef __AVX__
+#include <stdlib.h>
+#include <string.h>
+#include <immintrin.h>
+
+__m256i
+avx_test (__m256i x0, __m256i x1, __m256i x2, __m256i x3,
+ __m256i x4, __m256i x5, __m256i x6, __m256i x7)
+{
+ __m256i ymm;
+
+ ymm = _mm256_set1_epi32 (0);
+ if (memcmp (&ymm, &x0, sizeof (ymm)))
+ abort ();
+
+ ymm = _mm256_set1_epi32 (1);
+ if (memcmp (&ymm, &x1, sizeof (ymm)))
+ abort ();
+
+ ymm = _mm256_set1_epi32 (2);
+ if (memcmp (&ymm, &x2, sizeof (ymm)))
+ abort ();
+
+ ymm = _mm256_set1_epi32 (3);
+ if (memcmp (&ymm, &x3, sizeof (ymm)))
+ abort ();
+
+ ymm = _mm256_set1_epi32 (4);
+ if (memcmp (&ymm, &x4, sizeof (ymm)))
+ abort ();
+
+ ymm = _mm256_set1_epi32 (5);
+ if (memcmp (&ymm, &x5, sizeof (ymm)))
+ abort ();
+
+ ymm = _mm256_set1_epi32 (6);
+ if (memcmp (&ymm, &x6, sizeof (ymm)))
+ abort ();
+
+ ymm = _mm256_set1_epi32 (7);
+ if (memcmp (&ymm, &x7, sizeof (ymm)))
+ abort ();
+
+ return _mm256_set1_epi32 (0x12349876);
+}
+#endif
diff --git a/sysdeps/x86_64/tst-sse.c b/sysdeps/x86_64/tst-sse.c
new file mode 100644
index 0000000000..dd1537cf27
--- /dev/null
+++ b/sysdeps/x86_64/tst-sse.c
@@ -0,0 +1,46 @@
+/* Test case for preserved SSE registers in dynamic linker.
+ Copyright (C) 2017 Free Software Foundation, Inc.
+ This file is part of the GNU C Library.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library; if not, see
+ <http://www.gnu.org/licenses/>. */
+
+#include <immintrin.h>
+#include <stdlib.h>
+#include <string.h>
+
+extern __m128i sse_test (__m128i, __m128i, __m128i, __m128i,
+ __m128i, __m128i, __m128i, __m128i);
+
+static int
+do_test (void)
+{
+ __m128i xmm0 = _mm_set1_epi32 (0);
+ __m128i xmm1 = _mm_set1_epi32 (1);
+ __m128i xmm2 = _mm_set1_epi32 (2);
+ __m128i xmm3 = _mm_set1_epi32 (3);
+ __m128i xmm4 = _mm_set1_epi32 (4);
+ __m128i xmm5 = _mm_set1_epi32 (5);
+ __m128i xmm6 = _mm_set1_epi32 (6);
+ __m128i xmm7 = _mm_set1_epi32 (7);
+ __m128i ret = sse_test (xmm0, xmm1, xmm2, xmm3,
+ xmm4, xmm5, xmm6, xmm7);
+ xmm0 = _mm_set1_epi32 (0x12349876);
+ if (memcmp (&xmm0, &ret, sizeof (ret)))
+ abort ();
+ return 0;
+}
+
+#define TEST_FUNCTION do_test ()
+#include "../../test-skeleton.c"
diff --git a/sysdeps/x86_64/tst-ssemod.c b/sysdeps/x86_64/tst-ssemod.c
new file mode 100644
index 0000000000..907a64c69e
--- /dev/null
+++ b/sysdeps/x86_64/tst-ssemod.c
@@ -0,0 +1,46 @@
+/* Test case for x86-64 preserved SSE registers in dynamic linker. */
+
+#include <stdlib.h>
+#include <string.h>
+#include <immintrin.h>
+
+__m128i
+sse_test (__m128i x0, __m128i x1, __m128i x2, __m128i x3,
+ __m128i x4, __m128i x5, __m128i x6, __m128i x7)
+{
+ __m128i xmm;
+
+ xmm = _mm_set1_epi32 (0);
+ if (memcmp (&xmm, &x0, sizeof (xmm)))
+ abort ();
+
+ xmm = _mm_set1_epi32 (1);
+ if (memcmp (&xmm, &x1, sizeof (xmm)))
+ abort ();
+
+ xmm = _mm_set1_epi32 (2);
+ if (memcmp (&xmm, &x2, sizeof (xmm)))
+ abort ();
+
+ xmm = _mm_set1_epi32 (3);
+ if (memcmp (&xmm, &x3, sizeof (xmm)))
+ abort ();
+
+ xmm = _mm_set1_epi32 (4);
+ if (memcmp (&xmm, &x4, sizeof (xmm)))
+ abort ();
+
+ xmm = _mm_set1_epi32 (5);
+ if (memcmp (&xmm, &x5, sizeof (xmm)))
+ abort ();
+
+ xmm = _mm_set1_epi32 (6);
+ if (memcmp (&xmm, &x6, sizeof (xmm)))
+ abort ();
+
+ xmm = _mm_set1_epi32 (7);
+ if (memcmp (&xmm, &x7, sizeof (xmm)))
+ abort ();
+
+ return _mm_set1_epi32 (0x12349876);
+}