diff options
author | H.J. Lu <hongjiu.lu@intel.com> | 2009-08-08 10:54:42 -0700 |
---|---|---|
committer | Ulrich Drepper <drepper@redhat.com> | 2009-08-08 10:54:42 -0700 |
commit | 4e1e2f42472744569f1540dd8410d23180e24bf9 (patch) | |
tree | 420047379cb0d341d37510158d4ca1a88ec57606 | |
parent | fc1870e6a484ad3211648c9ae51bc076913518aa (diff) | |
download | glibc-4e1e2f42472744569f1540dd8410d23180e24bf9.tar glibc-4e1e2f42472744569f1540dd8410d23180e24bf9.tar.gz glibc-4e1e2f42472744569f1540dd8410d23180e24bf9.tar.bz2 glibc-4e1e2f42472744569f1540dd8410d23180e24bf9.zip |
Support mixed SSE/AVX audit and check AVX only once.
This patch fixes mixed SSE/AVX audit and checks AVX only once in
_dl_runtime_profile. When an AVX or SSE register value in pltenter is
modified, we have to make sure that the SSE part value is the same in both
lr_xmm and lr_vector fields so that pltexit will get the correct value
from either lr_xmm or lr_vector fields. AVX-enabled pltenter should
update both lr_xmm and lr_vector fields to support stacked AVX/SSE
pltenter functions.
-rw-r--r-- | ChangeLog | 28 | ||||
-rw-r--r-- | elf/Makefile | 21 | ||||
-rw-r--r-- | elf/tst-audit6.c | 28 | ||||
-rw-r--r-- | elf/tst-audit7.c | 1 | ||||
-rw-r--r-- | elf/tst-auditmod6a.c | 46 | ||||
-rw-r--r-- | elf/tst-auditmod6b.c | 220 | ||||
-rw-r--r-- | elf/tst-auditmod6c.c | 225 | ||||
-rw-r--r-- | elf/tst-auditmod7a.c | 1 | ||||
-rw-r--r-- | elf/tst-auditmod7b.c | 218 | ||||
-rw-r--r-- | sysdeps/x86_64/dl-trampoline.S | 244 | ||||
-rw-r--r-- | sysdeps/x86_64/dl-trampoline.h | 269 |
11 files changed, 1062 insertions, 239 deletions
@@ -1,3 +1,31 @@ +2009-08-01 H.J. Lu <hongjiu.lu@intel.com> + + * elf/Makefile (distribute): Add tst-audit6.c tst-auditmod6a.c + tst-auditmod6b.c tst-auditmod6c.c tst-audit7.c tst-auditmod7a.c + tst-auditmod7b.c. + (tests): Add tst-audit6 tst-audit7. + (modules-names): Add st-auditmod6a tst-auditmod6b tst-auditmod6c + tst-auditmod7a tst-auditmod7b. + ($(objpfx)tst-audit6): New. + ($(objpfx)tst-audit6.out): Likewise. + ($(objpfx)tst-audit7): Likewise. + ($(objpfx)tst-audit7.out): Likewise. + (tst-audit6-ENV): Likewise. + (tst-audit7-ENV): Likewise. + (CFLAGS-tst-auditmod6b.c): Likewise. + (CFLAGS-tst-auditmod6c.c): Likewise. + (CFLAGS-tst-auditmod7b.c): Likewise. + * elf/tst-audit6.c: New file. + * elf/tst-audit7.c: New file. + * elf/tst-auditmod6a.c: New file. + * elf/tst-auditmod6b.c: New file. + * elf/tst-auditmod6c.c: New file. + * elf/tst-auditmod7a.c: New file. + * elf/tst-auditmod7b.c: New file. + * sysdeps/x86_64/dl-trampoline.S (_dl_runtime_profile): Move + saving and restoring SSE/AVX registers to ... + * sysdeps/x86_64/dl-trampoline.h: This. New file. + 2009-08-07 H.J. Lu <hongjiu.lu@intel.com> * sysdeps/i386/i686/multiarch/strcspn.S (STRCSPN): Use PIC diff --git a/elf/Makefile b/elf/Makefile index 3baad9621d..d57c7fe7ed 100644 --- a/elf/Makefile +++ b/elf/Makefile @@ -93,6 +93,9 @@ distribute := rtld-Rules \ tst-auditmod1.c tst-auditmod3a.c tst-auditmod3b.c \ tst-auditmod4a.c tst-auditmod4b.c \ tst-audit5.c tst-auditmod5a.c tst-auditmod5b.c \ + tst-audit6.c tst-auditmod6a.c tst-auditmod6b.c \ + tst-auditmod6c.c \ + tst-audit7.c tst-auditmod7a.c tst-auditmod7b.c \ order2mod1.c order2mod2.c order2mod3.c order2mod4.c \ tst-stackguard1.c tst-stackguard1-static.c \ tst-array5.c tst-array5-static.c tst-array5dep.c \ @@ -200,7 +203,7 @@ tests += loadtest restest1 preloadtest loadfail multiload origtest resolvfail \ test-srcs = tst-pathopt tests-execstack-yes = tst-execstack tst-execstack-needed tst-execstack-prog ifeq (x86_64,$(config-machine)) -tests += tst-audit3 tst-audit4 tst-audit5 +tests += tst-audit3 tst-audit4 tst-audit5 tst-audit6 tst-audit7 endif endif ifeq (yesyes,$(have-fpie)$(build-shared)) @@ -255,7 +258,9 @@ endif ifeq (x86_64,$(config-machine)) modules-names += tst-auditmod3a tst-auditmod3b \ tst-auditmod4a tst-auditmod4b \ - tst-auditmod5a tst-auditmod5b + tst-auditmod5a tst-auditmod5b \ + tst-auditmod6a tst-auditmod6b tst-auditmod6c \ + tst-auditmod7a tst-auditmod7b endif modules-execstack-yes = tst-execstack-mod extra-test-objs += $(addsuffix .os,$(strip $(modules-names))) @@ -987,6 +992,15 @@ $(objpfx)tst-audit5: $(objpfx)tst-auditmod5a.so $(objpfx)tst-audit5.out: $(objpfx)tst-auditmod5b.so tst-audit5-ENV = LD_AUDIT=$(objpfx)tst-auditmod5b.so +$(objpfx)tst-audit6: $(objpfx)tst-auditmod6a.so +$(objpfx)tst-audit6.out: $(objpfx)tst-auditmod6b.so \ + $(objpfx)tst-auditmod6c.so +tst-audit6-ENV = LD_AUDIT=$(objpfx)tst-auditmod6b.so:$(objpfx)tst-auditmod6c.so + +$(objpfx)tst-audit7: $(objpfx)tst-auditmod7a.so +$(objpfx)tst-audit7.out: $(objpfx)tst-auditmod7b.so +tst-audit7-ENV = LD_AUDIT=$(objpfx)tst-auditmod7b.so + $(objpfx)tst-global1: $(libdl) $(objpfx)tst-global1.out: $(objpfx)testobj6.so $(objpfx)testobj2.so @@ -1134,4 +1148,7 @@ ifeq (yes,$(config-cflags-avx)) CFLAGS-tst-audit4.c += -mavx CFLAGS-tst-auditmod4a.c += -mavx CFLAGS-tst-auditmod4b.c += -mavx +CFLAGS-tst-auditmod6b.c += -mavx +CFLAGS-tst-auditmod6c.c += -mavx +CFLAGS-tst-auditmod7b.c += -mavx endif diff --git a/elf/tst-audit6.c b/elf/tst-audit6.c new file mode 100644 index 0000000000..1f6dcb16e9 --- /dev/null +++ b/elf/tst-audit6.c @@ -0,0 +1,28 @@ +/* Test case for x86-64 preserved registers in dynamic linker. */ + +#include <stdlib.h> +#include <string.h> +#include <cpuid.h> +#include <emmintrin.h> + +extern __m128i audit_test (__m128i, __m128i, __m128i, __m128i, + __m128i, __m128i, __m128i, __m128i); + +int +main (void) +{ + unsigned int eax, ebx, ecx, edx; + + /* Run AVX test only if AVX is supported. */ + if (__get_cpuid (1, &eax, &ebx, &ecx, &edx) + && (ecx & bit_AVX)) + { + __m128i xmm = _mm_setzero_si128 (); + __m128i ret = audit_test (xmm, xmm, xmm, xmm, xmm, xmm, xmm, xmm); + + xmm = _mm_set1_epi32 (0x98abcdef); + if (memcmp (&xmm, &ret, sizeof (ret))) + abort (); + } + return 0; +} diff --git a/elf/tst-audit7.c b/elf/tst-audit7.c new file mode 100644 index 0000000000..1d2a7de439 --- /dev/null +++ b/elf/tst-audit7.c @@ -0,0 +1 @@ +#include "tst-audit6.c" diff --git a/elf/tst-auditmod6a.c b/elf/tst-auditmod6a.c new file mode 100644 index 0000000000..c3a850ce98 --- /dev/null +++ b/elf/tst-auditmod6a.c @@ -0,0 +1,46 @@ +/* Test case for x86-64 preserved registers in dynamic linker. */ + +#include <stdlib.h> +#include <string.h> +#include <emmintrin.h> + +__m128i +audit_test (__m128i x0, __m128i x1, __m128i x2, __m128i x3, + __m128i x4, __m128i x5, __m128i x6, __m128i x7) +{ + __m128i xmm; + + xmm = _mm_set1_epi32 (0x100); + if (memcmp (&xmm, &x0, sizeof (xmm))) + abort (); + + xmm = _mm_set1_epi32 (0x101); + if (memcmp (&xmm, &x1, sizeof (xmm))) + abort (); + + xmm = _mm_set1_epi32 (0x102); + if (memcmp (&xmm, &x2, sizeof (xmm))) + abort (); + + xmm = _mm_set1_epi32 (0x103); + if (memcmp (&xmm, &x3, sizeof (xmm))) + abort (); + + xmm = _mm_set1_epi32 (0x104); + if (memcmp (&xmm, &x4, sizeof (xmm))) + abort (); + + xmm = _mm_set1_epi32 (0x105); + if (memcmp (&xmm, &x5, sizeof (xmm))) + abort (); + + xmm = _mm_set1_epi32 (0x106); + if (memcmp (&xmm, &x6, sizeof (xmm))) + abort (); + + xmm = _mm_set1_epi32 (0x107); + if (memcmp (&xmm, &x7, sizeof (xmm))) + abort (); + + return _mm_setzero_si128 (); +} diff --git a/elf/tst-auditmod6b.c b/elf/tst-auditmod6b.c new file mode 100644 index 0000000000..f756b50227 --- /dev/null +++ b/elf/tst-auditmod6b.c @@ -0,0 +1,220 @@ +/* Verify that changing AVX registers in audit library won't affect + function parameter passing/return. */ + +#include <dlfcn.h> +#include <stdint.h> +#include <stdio.h> +#include <stdlib.h> +#include <string.h> +#include <unistd.h> +#include <bits/wordsize.h> +#include <gnu/lib-names.h> + +unsigned int +la_version (unsigned int v) +{ + setlinebuf (stdout); + + printf ("version: %u\n", v); + + char buf[20]; + sprintf (buf, "%u", v); + + return v; +} + +void +la_activity (uintptr_t *cookie, unsigned int flag) +{ + if (flag == LA_ACT_CONSISTENT) + printf ("activity: consistent\n"); + else if (flag == LA_ACT_ADD) + printf ("activity: add\n"); + else if (flag == LA_ACT_DELETE) + printf ("activity: delete\n"); + else + printf ("activity: unknown activity %u\n", flag); +} + +char * +la_objsearch (const char *name, uintptr_t *cookie, unsigned int flag) +{ + char buf[100]; + const char *flagstr; + if (flag == LA_SER_ORIG) + flagstr = "LA_SET_ORIG"; + else if (flag == LA_SER_LIBPATH) + flagstr = "LA_SER_LIBPATH"; + else if (flag == LA_SER_RUNPATH) + flagstr = "LA_SER_RUNPATH"; + else if (flag == LA_SER_CONFIG) + flagstr = "LA_SER_CONFIG"; + else if (flag == LA_SER_DEFAULT) + flagstr = "LA_SER_DEFAULT"; + else if (flag == LA_SER_SECURE) + flagstr = "LA_SER_SECURE"; + else + { + sprintf (buf, "unknown flag %d", flag); + flagstr = buf; + } + printf ("objsearch: %s, %s\n", name, flagstr); + + return (char *) name; +} + +unsigned int +la_objopen (struct link_map *l, Lmid_t lmid, uintptr_t *cookie) +{ + printf ("objopen: %ld, %s\n", lmid, l->l_name); + + return 3; +} + +void +la_preinit (uintptr_t *cookie) +{ + printf ("preinit\n"); +} + +unsigned int +la_objclose (uintptr_t *cookie) +{ + printf ("objclose\n"); + return 0; +} + +uintptr_t +la_symbind64 (Elf64_Sym *sym, unsigned int ndx, uintptr_t *refcook, + uintptr_t *defcook, unsigned int *flags, const char *symname) +{ + printf ("symbind64: symname=%s, st_value=%#lx, ndx=%u, flags=%u\n", + symname, (long int) sym->st_value, ndx, *flags); + + return sym->st_value; +} + +#define pltenter la_x86_64_gnu_pltenter +#define pltexit la_x86_64_gnu_pltexit +#define La_regs La_x86_64_regs +#define La_retval La_x86_64_retval +#define int_retval lrv_rax + +#include <tst-audit.h> + +#ifdef __AVX__ +#include <immintrin.h> +#include <cpuid.h> + +static int avx = -1; + +static int +__attribute ((always_inline)) +check_avx (void) +{ + if (avx == -1) + { + unsigned int eax, ebx, ecx, edx; + + if (__get_cpuid (1, &eax, &ebx, &ecx, &edx) + && (ecx & bit_AVX)) + avx = 1; + else + avx = 0; + } + return avx; +} +#else +#include <emmintrin.h> +#endif + +ElfW(Addr) +pltenter (ElfW(Sym) *sym, unsigned int ndx, uintptr_t *refcook, + uintptr_t *defcook, La_regs *regs, unsigned int *flags, + const char *symname, long int *framesizep) +{ + printf ("pltenter: symname=%s, st_value=%#lx, ndx=%u, flags=%u\n", + symname, (long int) sym->st_value, ndx, *flags); + +#ifdef __AVX__ + if (check_avx () && strcmp (symname, "audit_test") == 0) + { + int i; + + __m128i xmm = _mm_setzero_si128 (); + for (i = 0; i < 8; i++) + if (memcmp (®s->lr_xmm[i], &xmm, sizeof (xmm)) + || memcmp (®s->lr_vector[i], &xmm, sizeof (xmm))) + abort (); + + for (i = 0; i < 8; i += 2) + { + regs->lr_xmm[i] = (La_x86_64_xmm) _mm_set1_epi32 (i + 1); + regs->lr_vector[i].xmm[0] = regs->lr_xmm[i]; + regs->lr_vector[i + 1].ymm[0] + = (La_x86_64_ymm) _mm256_set1_epi32 (i + 2); + regs->lr_xmm[i + 1] = regs->lr_vector[i + 1].xmm[0]; + } + + __m256i ymm = _mm256_set1_epi32 (-1); + asm volatile ("vmovdqa %0, %%ymm0" : : "x" (ymm) : "xmm0" ); + asm volatile ("vmovdqa %0, %%ymm1" : : "x" (ymm) : "xmm1" ); + asm volatile ("vmovdqa %0, %%ymm2" : : "x" (ymm) : "xmm2" ); + asm volatile ("vmovdqa %0, %%ymm3" : : "x" (ymm) : "xmm3" ); + asm volatile ("vmovdqa %0, %%ymm4" : : "x" (ymm) : "xmm4" ); + asm volatile ("vmovdqa %0, %%ymm5" : : "x" (ymm) : "xmm5" ); + asm volatile ("vmovdqa %0, %%ymm6" : : "x" (ymm) : "xmm6" ); + asm volatile ("vmovdqa %0, %%ymm7" : : "x" (ymm) : "xmm7" ); + + *framesizep = 1024; + } +#endif + + return sym->st_value; +} + +unsigned int +pltexit (ElfW(Sym) *sym, unsigned int ndx, uintptr_t *refcook, + uintptr_t *defcook, const La_regs *inregs, La_retval *outregs, + const char *symname) +{ + printf ("pltexit: symname=%s, st_value=%#lx, ndx=%u, retval=%tu\n", + symname, (long int) sym->st_value, ndx, outregs->int_retval); + +#ifdef __AVX__ + if (check_avx () && strcmp (symname, "audit_test") == 0) + { + int i; + + __m128i xmm = _mm_setzero_si128 (); + if (memcmp (&outregs->lrv_xmm0, &xmm, sizeof (xmm)) + || memcmp (&outregs->lrv_vector0, &xmm, sizeof (xmm))) + abort (); + + __m256i ymm; + + for (i = 0; i < 8; i += 2) + { + xmm = _mm_set1_epi32 (i + 0x100); + if (memcmp (&inregs->lr_xmm[i], &xmm, sizeof (xmm)) + || memcmp (&inregs->lr_vector[i], &xmm, sizeof (xmm))) + abort (); + + ymm = _mm256_set1_epi32 (i + 0x101); + if (memcmp (&inregs->lr_xmm[i + 1], + &inregs->lr_vector[i + 1].xmm[0], sizeof (xmm)) + || memcmp (&inregs->lr_vector[i + 1], &ymm, sizeof (ymm))) + abort (); + } + + outregs->lrv_vector0.ymm[0] + = (La_x86_64_ymm) _mm256_set1_epi32 (0x12349876); + + ymm = _mm256_set1_epi32 (-1); + asm volatile ("vmovdqa %0, %%ymm0" : : "x" (ymm) : "xmm0" ); + asm volatile ("vmovdqa %0, %%ymm1" : : "x" (ymm) : "xmm1" ); + } +#endif + + return 0; +} diff --git a/elf/tst-auditmod6c.c b/elf/tst-auditmod6c.c new file mode 100644 index 0000000000..49cbf05492 --- /dev/null +++ b/elf/tst-auditmod6c.c @@ -0,0 +1,225 @@ +/* Verify that changing AVX registers in audit library won't affect + function parameter passing/return. */ + +#include <dlfcn.h> +#include <stdint.h> +#include <stdio.h> +#include <stdlib.h> +#include <string.h> +#include <unistd.h> +#include <bits/wordsize.h> +#include <gnu/lib-names.h> + +unsigned int +la_version (unsigned int v) +{ + setlinebuf (stdout); + + printf ("version: %u\n", v); + + char buf[20]; + sprintf (buf, "%u", v); + + return v; +} + +void +la_activity (uintptr_t *cookie, unsigned int flag) +{ + if (flag == LA_ACT_CONSISTENT) + printf ("activity: consistent\n"); + else if (flag == LA_ACT_ADD) + printf ("activity: add\n"); + else if (flag == LA_ACT_DELETE) + printf ("activity: delete\n"); + else + printf ("activity: unknown activity %u\n", flag); +} + +char * +la_objsearch (const char *name, uintptr_t *cookie, unsigned int flag) +{ + char buf[100]; + const char *flagstr; + if (flag == LA_SER_ORIG) + flagstr = "LA_SET_ORIG"; + else if (flag == LA_SER_LIBPATH) + flagstr = "LA_SER_LIBPATH"; + else if (flag == LA_SER_RUNPATH) + flagstr = "LA_SER_RUNPATH"; + else if (flag == LA_SER_CONFIG) + flagstr = "LA_SER_CONFIG"; + else if (flag == LA_SER_DEFAULT) + flagstr = "LA_SER_DEFAULT"; + else if (flag == LA_SER_SECURE) + flagstr = "LA_SER_SECURE"; + else + { + sprintf (buf, "unknown flag %d", flag); + flagstr = buf; + } + printf ("objsearch: %s, %s\n", name, flagstr); + + return (char *) name; +} + +unsigned int +la_objopen (struct link_map *l, Lmid_t lmid, uintptr_t *cookie) +{ + printf ("objopen: %ld, %s\n", lmid, l->l_name); + + return 3; +} + +void +la_preinit (uintptr_t *cookie) +{ + printf ("preinit\n"); +} + +unsigned int +la_objclose (uintptr_t *cookie) +{ + printf ("objclose\n"); + return 0; +} + +uintptr_t +la_symbind64 (Elf64_Sym *sym, unsigned int ndx, uintptr_t *refcook, + uintptr_t *defcook, unsigned int *flags, const char *symname) +{ + printf ("symbind64: symname=%s, st_value=%#lx, ndx=%u, flags=%u\n", + symname, (long int) sym->st_value, ndx, *flags); + + return sym->st_value; +} + +#define pltenter la_x86_64_gnu_pltenter +#define pltexit la_x86_64_gnu_pltexit +#define La_regs La_x86_64_regs +#define La_retval La_x86_64_retval +#define int_retval lrv_rax + +#include <tst-audit.h> + +#ifdef __AVX__ +#include <immintrin.h> +#include <cpuid.h> + +static int avx = -1; + +static int +__attribute ((always_inline)) +check_avx (void) +{ + if (avx == -1) + { + unsigned int eax, ebx, ecx, edx; + + if (__get_cpuid (1, &eax, &ebx, &ecx, &edx) + && (ecx & bit_AVX)) + avx = 1; + else + avx = 0; + } + return avx; +} +#else +#include <emmintrin.h> +#endif + +ElfW(Addr) +pltenter (ElfW(Sym) *sym, unsigned int ndx, uintptr_t *refcook, + uintptr_t *defcook, La_regs *regs, unsigned int *flags, + const char *symname, long int *framesizep) +{ + printf ("pltenter: symname=%s, st_value=%#lx, ndx=%u, flags=%u\n", + symname, (long int) sym->st_value, ndx, *flags); + +#ifdef __AVX__ + if (check_avx () && strcmp (symname, "audit_test") == 0) + { + int i; + __m128i xmm; + __m256i ymm; + + for (i = 0; i < 8; i += 2) + { + xmm = _mm_set1_epi32 (i + 1); + if (memcmp (®s->lr_xmm[i], &xmm, sizeof (xmm)) + || memcmp (®s->lr_vector[i], &xmm, sizeof (xmm))) + abort (); + regs->lr_xmm[i] = (La_x86_64_xmm) _mm_set1_epi32 (i + 0x100); + regs->lr_vector[i].xmm[0] = regs->lr_xmm[i]; + + ymm = _mm256_set1_epi32 (i + 2); + if (memcmp (®s->lr_xmm[i + 1], + ®s->lr_vector[i + 1].xmm[0], sizeof (xmm)) + || memcmp (®s->lr_vector[i + 1], &ymm, sizeof (ymm))) + abort (); + regs->lr_vector[i + 1].ymm[0] + = (La_x86_64_ymm) _mm256_set1_epi32 (i + 0x101); + regs->lr_xmm[i + 1] = regs->lr_vector[i + 1].xmm[0]; + } + + ymm = _mm256_set1_epi32 (-1); + asm volatile ("vmovdqa %0, %%ymm0" : : "x" (ymm) : "xmm0" ); + asm volatile ("vmovdqa %0, %%ymm1" : : "x" (ymm) : "xmm1" ); + asm volatile ("vmovdqa %0, %%ymm2" : : "x" (ymm) : "xmm2" ); + asm volatile ("vmovdqa %0, %%ymm3" : : "x" (ymm) : "xmm3" ); + asm volatile ("vmovdqa %0, %%ymm4" : : "x" (ymm) : "xmm4" ); + asm volatile ("vmovdqa %0, %%ymm5" : : "x" (ymm) : "xmm5" ); + asm volatile ("vmovdqa %0, %%ymm6" : : "x" (ymm) : "xmm6" ); + asm volatile ("vmovdqa %0, %%ymm7" : : "x" (ymm) : "xmm7" ); + + *framesizep = 1024; + } +#endif + + return sym->st_value; +} + +unsigned int +pltexit (ElfW(Sym) *sym, unsigned int ndx, uintptr_t *refcook, + uintptr_t *defcook, const La_regs *inregs, La_retval *outregs, + const char *symname) +{ + printf ("pltexit: symname=%s, st_value=%#lx, ndx=%u, retval=%tu\n", + symname, (long int) sym->st_value, ndx, outregs->int_retval); + +#ifdef __AVX__ + if (check_avx () && strcmp (symname, "audit_test") == 0) + { + int i; + + __m256i ymm = _mm256_set1_epi32 (0x12349876);; + if (memcmp (&outregs->lrv_vector0, &ymm, sizeof (ymm))) + abort (); + + __m128i xmm; + + for (i = 0; i < 8; i += 2) + { + xmm = _mm_set1_epi32 (i + 0x100); + if (memcmp (&inregs->lr_xmm[i], &xmm, sizeof (xmm)) + || memcmp (&inregs->lr_vector[i], &xmm, sizeof (xmm))) + abort (); + + ymm = _mm256_set1_epi32 (i + 0x101); + if (memcmp (&inregs->lr_xmm[i + 1], + &inregs->lr_vector[i + 1].xmm[0], sizeof (xmm)) + || memcmp (&inregs->lr_vector[i + 1], &ymm, sizeof (ymm))) + abort (); + } + + outregs->lrv_vector0.ymm[0] + = (La_x86_64_ymm) _mm256_set1_epi32 (0x98abcdef); + + ymm = _mm256_set1_epi32 (-1); + asm volatile ("vmovdqa %0, %%ymm0" : : "x" (ymm) : "xmm0" ); + asm volatile ("vmovdqa %0, %%ymm1" : : "x" (ymm) : "xmm1" ); + } +#endif + + return 0; +} diff --git a/elf/tst-auditmod7a.c b/elf/tst-auditmod7a.c new file mode 100644 index 0000000000..b379df75d6 --- /dev/null +++ b/elf/tst-auditmod7a.c @@ -0,0 +1 @@ +#include "tst-auditmod6a.c" diff --git a/elf/tst-auditmod7b.c b/elf/tst-auditmod7b.c new file mode 100644 index 0000000000..eb237586fe --- /dev/null +++ b/elf/tst-auditmod7b.c @@ -0,0 +1,218 @@ +/* Verify that changing AVX registers in audit library won't affect + function parameter passing/return. */ + +#include <dlfcn.h> +#include <stdint.h> +#include <stdio.h> +#include <stdlib.h> +#include <string.h> +#include <unistd.h> +#include <bits/wordsize.h> +#include <gnu/lib-names.h> + +unsigned int +la_version (unsigned int v) +{ + setlinebuf (stdout); + + printf ("version: %u\n", v); + + char buf[20]; + sprintf (buf, "%u", v); + + return v; +} + +void +la_activity (uintptr_t *cookie, unsigned int flag) +{ + if (flag == LA_ACT_CONSISTENT) + printf ("activity: consistent\n"); + else if (flag == LA_ACT_ADD) + printf ("activity: add\n"); + else if (flag == LA_ACT_DELETE) + printf ("activity: delete\n"); + else + printf ("activity: unknown activity %u\n", flag); +} + +char * +la_objsearch (const char *name, uintptr_t *cookie, unsigned int flag) +{ + char buf[100]; + const char *flagstr; + if (flag == LA_SER_ORIG) + flagstr = "LA_SET_ORIG"; + else if (flag == LA_SER_LIBPATH) + flagstr = "LA_SER_LIBPATH"; + else if (flag == LA_SER_RUNPATH) + flagstr = "LA_SER_RUNPATH"; + else if (flag == LA_SER_CONFIG) + flagstr = "LA_SER_CONFIG"; + else if (flag == LA_SER_DEFAULT) + flagstr = "LA_SER_DEFAULT"; + else if (flag == LA_SER_SECURE) + flagstr = "LA_SER_SECURE"; + else + { + sprintf (buf, "unknown flag %d", flag); + flagstr = buf; + } + printf ("objsearch: %s, %s\n", name, flagstr); + + return (char *) name; +} + +unsigned int +la_objopen (struct link_map *l, Lmid_t lmid, uintptr_t *cookie) +{ + printf ("objopen: %ld, %s\n", lmid, l->l_name); + + return 3; +} + +void +la_preinit (uintptr_t *cookie) +{ + printf ("preinit\n"); +} + +unsigned int +la_objclose (uintptr_t *cookie) +{ + printf ("objclose\n"); + return 0; +} + +uintptr_t +la_symbind64 (Elf64_Sym *sym, unsigned int ndx, uintptr_t *refcook, + uintptr_t *defcook, unsigned int *flags, const char *symname) +{ + printf ("symbind64: symname=%s, st_value=%#lx, ndx=%u, flags=%u\n", + symname, (long int) sym->st_value, ndx, *flags); + + return sym->st_value; +} + +#define pltenter la_x86_64_gnu_pltenter +#define pltexit la_x86_64_gnu_pltexit +#define La_regs La_x86_64_regs +#define La_retval La_x86_64_retval +#define int_retval lrv_rax + +#include <tst-audit.h> + +#ifdef __AVX__ +#include <immintrin.h> +#include <cpuid.h> + +static int avx = -1; + +static int +__attribute ((always_inline)) +check_avx (void) +{ + if (avx == -1) + { + unsigned int eax, ebx, ecx, edx; + + if (__get_cpuid (1, &eax, &ebx, &ecx, &edx) + && (ecx & bit_AVX)) + avx = 1; + else + avx = 0; + } + return avx; +} +#else +#include <emmintrin.h> +#endif + +ElfW(Addr) +pltenter (ElfW(Sym) *sym, unsigned int ndx, uintptr_t *refcook, + uintptr_t *defcook, La_regs *regs, unsigned int *flags, + const char *symname, long int *framesizep) +{ + printf ("pltenter: symname=%s, st_value=%#lx, ndx=%u, flags=%u\n", + symname, (long int) sym->st_value, ndx, *flags); + +#ifdef __AVX__ + if (check_avx () && strcmp (symname, "audit_test") == 0) + { + int i; + + __m128i xmm = _mm_setzero_si128 (); + for (i = 0; i < 8; i++) + if (memcmp (®s->lr_xmm[i], &xmm, sizeof (xmm)) + || memcmp (®s->lr_vector[i], &xmm, sizeof (xmm))) + abort (); + + for (i = 0; i < 8; i += 2) + { + regs->lr_xmm[i] = (La_x86_64_xmm) _mm_set1_epi32 (i + 0x100); + regs->lr_vector[i + 1].ymm[0] + = (La_x86_64_ymm) _mm256_set1_epi32 (i + 0x101); + } + + __m256i ymm = _mm256_set1_epi32 (-1); + asm volatile ("vmovdqa %0, %%ymm0" : : "x" (ymm) : "xmm0" ); + asm volatile ("vmovdqa %0, %%ymm1" : : "x" (ymm) : "xmm1" ); + asm volatile ("vmovdqa %0, %%ymm2" : : "x" (ymm) : "xmm2" ); + asm volatile ("vmovdqa %0, %%ymm3" : : "x" (ymm) : "xmm3" ); + asm volatile ("vmovdqa %0, %%ymm4" : : "x" (ymm) : "xmm4" ); + asm volatile ("vmovdqa %0, %%ymm5" : : "x" (ymm) : "xmm5" ); + asm volatile ("vmovdqa %0, %%ymm6" : : "x" (ymm) : "xmm6" ); + asm volatile ("vmovdqa %0, %%ymm7" : : "x" (ymm) : "xmm7" ); + + *framesizep = 1024; + } +#endif + + return sym->st_value; +} + +unsigned int +pltexit (ElfW(Sym) *sym, unsigned int ndx, uintptr_t *refcook, + uintptr_t *defcook, const La_regs *inregs, La_retval *outregs, + const char *symname) +{ + printf ("pltexit: symname=%s, st_value=%#lx, ndx=%u, retval=%tu\n", + symname, (long int) sym->st_value, ndx, outregs->int_retval); + +#ifdef __AVX__ + if (check_avx () && strcmp (symname, "audit_test") == 0) + { + int i; + + __m128i xmm = _mm_setzero_si128 (); + if (memcmp (&outregs->lrv_xmm0, &xmm, sizeof (xmm)) + || memcmp (&outregs->lrv_vector0, &xmm, sizeof (xmm))) + abort (); + + __m256i ymm; + + for (i = 0; i < 8; i += 2) + { + xmm = _mm_set1_epi32 (i + 0x100); + if (memcmp (&inregs->lr_xmm[i], &xmm, sizeof (xmm)) + || memcmp (&inregs->lr_vector[i], &xmm, sizeof (xmm))) + abort (); + + ymm = _mm256_set1_epi32 (i + 0x101); + if (memcmp (&inregs->lr_xmm[i + 1], + &inregs->lr_vector[i + 1].xmm[0], sizeof (xmm)) + || memcmp (&inregs->lr_vector[i + 1], &ymm, sizeof (ymm))) + abort (); + } + + outregs->lrv_vector0.ymm[0] + = (La_x86_64_ymm) _mm256_set1_epi32 (0x98abcdef); + + ymm = _mm256_set1_epi32 (-1); + asm volatile ("vmovdqa %0, %%ymm0" : : "x" (ymm) : "xmm0" ); + asm volatile ("vmovdqa %0, %%ymm1" : : "x" (ymm) : "xmm1" ); + } +#endif + + return 0; +} diff --git a/sysdeps/x86_64/dl-trampoline.S b/sysdeps/x86_64/dl-trampoline.S index 20da6956f1..f9c60ad5cf 100644 --- a/sysdeps/x86_64/dl-trampoline.S +++ b/sysdeps/x86_64/dl-trampoline.S @@ -146,247 +146,17 @@ L(have_avx): 2: movl %eax, L(have_avx)(%rip) cmpl $0, %eax -1: js L(no_avx1) +1: js L(no_avx) - /* This is to support AVX audit modules. */ - vmovdqu %ymm0, (LR_VECTOR_OFFSET)(%rsp) - vmovdqu %ymm1, (LR_VECTOR_OFFSET + VECTOR_SIZE)(%rsp) - vmovdqu %ymm2, (LR_VECTOR_OFFSET + VECTOR_SIZE*2)(%rsp) - vmovdqu %ymm3, (LR_VECTOR_OFFSET + VECTOR_SIZE*3)(%rsp) - vmovdqu %ymm4, (LR_VECTOR_OFFSET + VECTOR_SIZE*4)(%rsp) - vmovdqu %ymm5, (LR_VECTOR_OFFSET + VECTOR_SIZE*5)(%rsp) - vmovdqu %ymm6, (LR_VECTOR_OFFSET + VECTOR_SIZE*6)(%rsp) - vmovdqu %ymm7, (LR_VECTOR_OFFSET + VECTOR_SIZE*7)(%rsp) +# define RESTORE_AVX +# include "dl-trampoline.h" - /* Save xmm0-xmm7 registers to detect if any of them are - changed by audit module. */ - vmovdqa %xmm0, (LR_SIZE)(%rsp) - vmovdqa %xmm1, (LR_SIZE + XMM_SIZE)(%rsp) - vmovdqa %xmm2, (LR_SIZE + XMM_SIZE*2)(%rsp) - vmovdqa %xmm3, (LR_SIZE + XMM_SIZE*3)(%rsp) - vmovdqa %xmm4, (LR_SIZE + XMM_SIZE*4)(%rsp) - vmovdqa %xmm5, (LR_SIZE + XMM_SIZE*5)(%rsp) - vmovdqa %xmm6, (LR_SIZE + XMM_SIZE*6)(%rsp) - vmovdqa %xmm7, (LR_SIZE + XMM_SIZE*7)(%rsp) - -L(no_avx1): -# endif - - movq %rsp, %rcx # La_x86_64_regs pointer to %rcx. - movq 48(%rbx), %rdx # Load return address if needed. - movq 40(%rbx), %rsi # Copy args pushed by PLT in register. - movq 32(%rbx), %rdi # %rdi: link_map, %rsi: reloc_index - leaq 16(%rbx), %r8 - call _dl_profile_fixup # Call resolver. - - movq %rax, %r11 # Save return value. - - movq 8(%rbx), %rax # Get back register content. - movq LR_RDX_OFFSET(%rsp), %rdx - movq LR_R8_OFFSET(%rsp), %r8 - movq LR_R9_OFFSET(%rsp), %r9 - - movaps (LR_XMM_OFFSET)(%rsp), %xmm0 - movaps (LR_XMM_OFFSET + XMM_SIZE)(%rsp), %xmm1 - movaps (LR_XMM_OFFSET + XMM_SIZE*2)(%rsp), %xmm2 - movaps (LR_XMM_OFFSET + XMM_SIZE*3)(%rsp), %xmm3 - movaps (LR_XMM_OFFSET + XMM_SIZE*4)(%rsp), %xmm4 - movaps (LR_XMM_OFFSET + XMM_SIZE*5)(%rsp), %xmm5 - movaps (LR_XMM_OFFSET + XMM_SIZE*6)(%rsp), %xmm6 - movaps (LR_XMM_OFFSET + XMM_SIZE*7)(%rsp), %xmm7 - -# ifdef HAVE_AVX_SUPPORT - cmpl $0, L(have_avx)(%rip) - js L(no_avx2) - - /* Check if any xmm0-xmm7 registers are changed by audit - module. */ - vpcmpeqq (LR_SIZE)(%rsp), %xmm0, %xmm8 - vpmovmskb %xmm8, %esi - cmpl $0xffff, %esi - jne 1f - vmovdqu (LR_VECTOR_OFFSET)(%rsp), %ymm0 - -1: vpcmpeqq (LR_SIZE + XMM_SIZE)(%rsp), %xmm1, %xmm8 - vpmovmskb %xmm8, %esi - cmpl $0xffff, %esi - jne 1f - vmovdqu (LR_VECTOR_OFFSET + VECTOR_SIZE)(%rsp), %ymm1 - -1: vpcmpeqq (LR_SIZE + XMM_SIZE*2)(%rsp), %xmm2, %xmm8 - vpmovmskb %xmm8, %esi - cmpl $0xffff, %esi - jne 1f - vmovdqu (LR_VECTOR_OFFSET + VECTOR_SIZE*2)(%rsp), %ymm2 - -1: vpcmpeqq (LR_SIZE + XMM_SIZE*3)(%rsp), %xmm3, %xmm8 - vpmovmskb %xmm8, %esi - cmpl $0xffff, %esi - jne 1f - vmovdqu (LR_VECTOR_OFFSET + VECTOR_SIZE*3)(%rsp), %ymm3 - -1: vpcmpeqq (LR_SIZE + XMM_SIZE*4)(%rsp), %xmm4, %xmm8 - vpmovmskb %xmm8, %esi - cmpl $0xffff, %esi - jne 1f - vmovdqu (LR_VECTOR_OFFSET + VECTOR_SIZE*4)(%rsp), %ymm4 - -1: vpcmpeqq (LR_SIZE + XMM_SIZE*5)(%rsp), %xmm5, %xmm8 - vpmovmskb %xmm8, %esi - cmpl $0xffff, %esi - jne 1f - vmovdqu (LR_VECTOR_OFFSET + VECTOR_SIZE*5)(%rsp), %ymm5 - -1: vpcmpeqq (LR_SIZE + XMM_SIZE*6)(%rsp), %xmm6, %xmm8 - vpmovmskb %xmm8, %esi - cmpl $0xffff, %esi - jne 1f - vmovdqu (LR_VECTOR_OFFSET + VECTOR_SIZE*6)(%rsp), %ymm6 - -1: vpcmpeqq (LR_SIZE + XMM_SIZE*7)(%rsp), %xmm7, %xmm8 - vpmovmskb %xmm8, %esi - cmpl $0xffff, %esi - jne 1f - vmovdqu (LR_VECTOR_OFFSET + VECTOR_SIZE*7)(%rsp), %ymm7 - -L(no_avx2): -1: -# endif - movq 16(%rbx), %r10 # Anything in framesize? - testq %r10, %r10 - jns 3f - - /* There's nothing in the frame size, so there - will be no call to the _dl_call_pltexit. */ - - /* Get back registers content. */ - movq LR_RCX_OFFSET(%rsp), %rcx - movq LR_RSI_OFFSET(%rsp), %rsi - movq LR_RDI_OFFSET(%rsp), %rdi - - movq %rbx, %rsp - movq (%rsp), %rbx - cfi_restore(rbx) - cfi_def_cfa_register(%rsp) - - addq $48, %rsp # Adjust the stack to the return value - # (eats the reloc index and link_map) - cfi_adjust_cfa_offset(-48) - jmp *%r11 # Jump to function address. - -3: - cfi_adjust_cfa_offset(48) - cfi_rel_offset(%rbx, 0) - cfi_def_cfa_register(%rbx) - - /* At this point we need to prepare new stack for the function - which has to be called. We copy the original stack to a - temporary buffer of the size specified by the 'framesize' - returned from _dl_profile_fixup */ - - leaq LR_RSP_OFFSET(%rbx), %rsi # stack - addq $8, %r10 - andq $0xfffffffffffffff0, %r10 - movq %r10, %rcx - subq %r10, %rsp - movq %rsp, %rdi - shrq $3, %rcx - rep - movsq - - movq 24(%rdi), %rcx # Get back register content. - movq 32(%rdi), %rsi - movq 40(%rdi), %rdi - - call *%r11 - - mov 24(%rbx), %rsp # Drop the copied stack content - - /* Now we have to prepare the La_x86_64_retval structure for the - _dl_call_pltexit. The La_x86_64_regs is being pointed by rsp now, - so we just need to allocate the sizeof(La_x86_64_retval) space on - the stack, since the alignment has already been taken care of. */ -# ifdef HAVE_AVX_SUPPORT - /* sizeof(La_x86_64_retval). Need extra space for 2 SSE - registers to detect if xmm0/xmm1 registers are changed - by audit module. */ - subq $(LRV_SIZE + XMM_SIZE*2), %rsp -# else - subq $LRV_SIZE, %rsp # sizeof(La_x86_64_retval) -# endif - movq %rsp, %rcx # La_x86_64_retval argument to %rcx. - - /* Fill in the La_x86_64_retval structure. */ - movq %rax, LRV_RAX_OFFSET(%rcx) - movq %rdx, LRV_RDX_OFFSET(%rcx) - - movaps %xmm0, LRV_XMM0_OFFSET(%rcx) - movaps %xmm1, LRV_XMM1_OFFSET(%rcx) - -# ifdef HAVE_AVX_SUPPORT - cmpl $0, L(have_avx)(%rip) - js L(no_avx3) - - /* This is to support AVX audit modules. */ - vmovdqu %ymm0, LRV_VECTOR0_OFFSET(%rcx) - vmovdqu %ymm1, LRV_VECTOR1_OFFSET(%rcx) - - /* Save xmm0/xmm1 registers to detect if they are changed - by audit module. */ - vmovdqa %xmm0, (LRV_SIZE)(%rcx) - vmovdqa %xmm1, (LRV_SIZE + XMM_SIZE)(%rcx) - -L(no_avx3): -# endif - - fstpt LRV_ST0_OFFSET(%rcx) - fstpt LRV_ST1_OFFSET(%rcx) - - movq 24(%rbx), %rdx # La_x86_64_regs argument to %rdx. - movq 40(%rbx), %rsi # Copy args pushed by PLT in register. - movq 32(%rbx), %rdi # %rdi: link_map, %rsi: reloc_index - call _dl_call_pltexit - - /* Restore return registers. */ - movq LRV_RAX_OFFSET(%rsp), %rax - movq LRV_RDX_OFFSET(%rsp), %rdx - - movaps LRV_XMM0_OFFSET(%rsp), %xmm0 - movaps LRV_XMM1_OFFSET(%rsp), %xmm1 - -# ifdef HAVE_AVX_SUPPORT - cmpl $0, L(have_avx)(%rip) - js L(no_avx4) - - /* Check if xmm0/xmm1 registers are changed by audit module. */ - vpcmpeqq (LRV_SIZE)(%rsp), %xmm0, %xmm2 - vpmovmskb %xmm2, %esi - cmpl $0xffff, %esi - jne 1f - vmovdqu LRV_VECTOR0_OFFSET(%rsp), %ymm0 - -1: vpcmpeqq (LRV_SIZE + XMM_SIZE)(%rsp), %xmm1, %xmm2 - vpmovmskb %xmm2, %esi - cmpl $0xffff, %esi - jne 1f - vmovdqu LRV_VECTOR1_OFFSET(%rsp), %ymm1 - -L(no_avx4): -1: + .align 16 +L(no_avx): # endif - fldt LRV_ST1_OFFSET(%rsp) - fldt LRV_ST0_OFFSET(%rsp) - - movq %rbx, %rsp - movq (%rsp), %rbx - cfi_restore(rbx) - cfi_def_cfa_register(%rsp) - - addq $48, %rsp # Adjust the stack to the return value - # (eats the reloc index and link_map) - cfi_adjust_cfa_offset(-48) - retq +# undef RESTORE_AVX +# include "dl-trampoline.h" cfi_endproc .size _dl_runtime_profile, .-_dl_runtime_profile diff --git a/sysdeps/x86_64/dl-trampoline.h b/sysdeps/x86_64/dl-trampoline.h new file mode 100644 index 0000000000..5d49ed4408 --- /dev/null +++ b/sysdeps/x86_64/dl-trampoline.h @@ -0,0 +1,269 @@ +/* Partial PLT profile trampoline to save and restore x86-64 vector + registers. + Copyright (C) 2009 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, write to the Free + Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA + 02111-1307 USA. */ + +#ifdef RESTORE_AVX + /* This is to support AVX audit modules. */ + vmovdqu %ymm0, (LR_VECTOR_OFFSET)(%rsp) + vmovdqu %ymm1, (LR_VECTOR_OFFSET + VECTOR_SIZE)(%rsp) + vmovdqu %ymm2, (LR_VECTOR_OFFSET + VECTOR_SIZE*2)(%rsp) + vmovdqu %ymm3, (LR_VECTOR_OFFSET + VECTOR_SIZE*3)(%rsp) + vmovdqu %ymm4, (LR_VECTOR_OFFSET + VECTOR_SIZE*4)(%rsp) + vmovdqu %ymm5, (LR_VECTOR_OFFSET + VECTOR_SIZE*5)(%rsp) + vmovdqu %ymm6, (LR_VECTOR_OFFSET + VECTOR_SIZE*6)(%rsp) + vmovdqu %ymm7, (LR_VECTOR_OFFSET + VECTOR_SIZE*7)(%rsp) + + /* Save xmm0-xmm7 registers to detect if any of them are + changed by audit module. */ + vmovdqa %xmm0, (LR_SIZE)(%rsp) + vmovdqa %xmm1, (LR_SIZE + XMM_SIZE)(%rsp) + vmovdqa %xmm2, (LR_SIZE + XMM_SIZE*2)(%rsp) + vmovdqa %xmm3, (LR_SIZE + XMM_SIZE*3)(%rsp) + vmovdqa %xmm4, (LR_SIZE + XMM_SIZE*4)(%rsp) + vmovdqa %xmm5, (LR_SIZE + XMM_SIZE*5)(%rsp) + vmovdqa %xmm6, (LR_SIZE + XMM_SIZE*6)(%rsp) + vmovdqa %xmm7, (LR_SIZE + XMM_SIZE*7)(%rsp) +#endif + + movq %rsp, %rcx # La_x86_64_regs pointer to %rcx. + movq 48(%rbx), %rdx # Load return address if needed. + movq 40(%rbx), %rsi # Copy args pushed by PLT in register. + movq 32(%rbx), %rdi # %rdi: link_map, %rsi: reloc_index + leaq 16(%rbx), %r8 + call _dl_profile_fixup # Call resolver. + + movq %rax, %r11 # Save return value. + + movq 8(%rbx), %rax # Get back register content. + movq LR_RDX_OFFSET(%rsp), %rdx + movq LR_R8_OFFSET(%rsp), %r8 + movq LR_R9_OFFSET(%rsp), %r9 + + movaps (LR_XMM_OFFSET)(%rsp), %xmm0 + movaps (LR_XMM_OFFSET + XMM_SIZE)(%rsp), %xmm1 + movaps (LR_XMM_OFFSET + XMM_SIZE*2)(%rsp), %xmm2 + movaps (LR_XMM_OFFSET + XMM_SIZE*3)(%rsp), %xmm3 + movaps (LR_XMM_OFFSET + XMM_SIZE*4)(%rsp), %xmm4 + movaps (LR_XMM_OFFSET + XMM_SIZE*5)(%rsp), %xmm5 + movaps (LR_XMM_OFFSET + XMM_SIZE*6)(%rsp), %xmm6 + movaps (LR_XMM_OFFSET + XMM_SIZE*7)(%rsp), %xmm7 + +#ifdef RESTORE_AVX + /* Check if any xmm0-xmm7 registers are changed by audit + module. */ + vpcmpeqq (LR_SIZE)(%rsp), %xmm0, %xmm8 + vpmovmskb %xmm8, %esi + cmpl $0xffff, %esi + je 2f + vmovdqa %xmm0, (LR_VECTOR_OFFSET)(%rsp) + jmp 1f +2: vmovdqu (LR_VECTOR_OFFSET)(%rsp), %ymm0 + vmovdqa %xmm0, (LR_XMM_OFFSET)(%rsp) + +1: vpcmpeqq (LR_SIZE + XMM_SIZE)(%rsp), %xmm1, %xmm8 + vpmovmskb %xmm8, %esi + cmpl $0xffff, %esi + je 2f + vmovdqa %xmm1, (LR_VECTOR_OFFSET + VECTOR_SIZE)(%rsp) + jmp 1f +2: vmovdqu (LR_VECTOR_OFFSET + VECTOR_SIZE)(%rsp), %ymm1 + vmovdqa %xmm1, (LR_XMM_OFFSET + XMM_SIZE)(%rsp) + +1: vpcmpeqq (LR_SIZE + XMM_SIZE*2)(%rsp), %xmm2, %xmm8 + vpmovmskb %xmm8, %esi + cmpl $0xffff, %esi + je 2f + vmovdqa %xmm2, (LR_VECTOR_OFFSET + VECTOR_SIZE*2)(%rsp) + jmp 1f +2: vmovdqu (LR_VECTOR_OFFSET + VECTOR_SIZE*2)(%rsp), %ymm2 + vmovdqa %xmm2, (LR_XMM_OFFSET + XMM_SIZE*2)(%rsp) + +1: vpcmpeqq (LR_SIZE + XMM_SIZE*3)(%rsp), %xmm3, %xmm8 + vpmovmskb %xmm8, %esi + cmpl $0xffff, %esi + je 2f + vmovdqa %xmm3, (LR_VECTOR_OFFSET + VECTOR_SIZE*3)(%rsp) + jmp 1f +2: vmovdqu (LR_VECTOR_OFFSET + VECTOR_SIZE*3)(%rsp), %ymm3 + vmovdqa %xmm3, (LR_XMM_OFFSET + XMM_SIZE*3)(%rsp) + +1: vpcmpeqq (LR_SIZE + XMM_SIZE*4)(%rsp), %xmm4, %xmm8 + vpmovmskb %xmm8, %esi + cmpl $0xffff, %esi + je 2f + vmovdqa %xmm4, (LR_VECTOR_OFFSET + VECTOR_SIZE*4)(%rsp) + jmp 1f +2: vmovdqu (LR_VECTOR_OFFSET + VECTOR_SIZE*4)(%rsp), %ymm4 + vmovdqa %xmm4, (LR_XMM_OFFSET + XMM_SIZE*4)(%rsp) + +1: vpcmpeqq (LR_SIZE + XMM_SIZE*5)(%rsp), %xmm5, %xmm8 + vpmovmskb %xmm8, %esi + cmpl $0xffff, %esi + je 2f + vmovdqa %xmm5, (LR_VECTOR_OFFSET + VECTOR_SIZE*5)(%rsp) + jmp 1f +2: vmovdqu (LR_VECTOR_OFFSET + VECTOR_SIZE*5)(%rsp), %ymm5 + vmovdqa %xmm5, (LR_XMM_OFFSET + XMM_SIZE*5)(%rsp) + +1: vpcmpeqq (LR_SIZE + XMM_SIZE*6)(%rsp), %xmm6, %xmm8 + vpmovmskb %xmm8, %esi + cmpl $0xffff, %esi + je 2f + vmovdqa %xmm6, (LR_VECTOR_OFFSET + VECTOR_SIZE*6)(%rsp) + jmp 1f +2: vmovdqu (LR_VECTOR_OFFSET + VECTOR_SIZE*6)(%rsp), %ymm6 + vmovdqa %xmm6, (LR_XMM_OFFSET + XMM_SIZE*6)(%rsp) + +1: vpcmpeqq (LR_SIZE + XMM_SIZE*7)(%rsp), %xmm7, %xmm8 + vpmovmskb %xmm8, %esi + cmpl $0xffff, %esi + je 2f + vmovdqa %xmm7, (LR_VECTOR_OFFSET + VECTOR_SIZE*7)(%rsp) + jmp 1f +2: vmovdqu (LR_VECTOR_OFFSET + VECTOR_SIZE*7)(%rsp), %ymm7 + vmovdqa %xmm7, (LR_XMM_OFFSET + XMM_SIZE*7)(%rsp) + +1: +#endif + movq 16(%rbx), %r10 # Anything in framesize? + testq %r10, %r10 + jns 3f + + /* There's nothing in the frame size, so there + will be no call to the _dl_call_pltexit. */ + + /* Get back registers content. */ + movq LR_RCX_OFFSET(%rsp), %rcx + movq LR_RSI_OFFSET(%rsp), %rsi + movq LR_RDI_OFFSET(%rsp), %rdi + + movq %rbx, %rsp + movq (%rsp), %rbx + cfi_restore(rbx) + cfi_def_cfa_register(%rsp) + + addq $48, %rsp # Adjust the stack to the return value + # (eats the reloc index and link_map) + cfi_adjust_cfa_offset(-48) + jmp *%r11 # Jump to function address. + +3: + cfi_adjust_cfa_offset(48) + cfi_rel_offset(%rbx, 0) + cfi_def_cfa_register(%rbx) + + /* At this point we need to prepare new stack for the function + which has to be called. We copy the original stack to a + temporary buffer of the size specified by the 'framesize' + returned from _dl_profile_fixup */ + + leaq LR_RSP_OFFSET(%rbx), %rsi # stack + addq $8, %r10 + andq $0xfffffffffffffff0, %r10 + movq %r10, %rcx + subq %r10, %rsp + movq %rsp, %rdi + shrq $3, %rcx + rep + movsq + + movq 24(%rdi), %rcx # Get back register content. + movq 32(%rdi), %rsi + movq 40(%rdi), %rdi + + call *%r11 + + mov 24(%rbx), %rsp # Drop the copied stack content + + /* Now we have to prepare the La_x86_64_retval structure for the + _dl_call_pltexit. The La_x86_64_regs is being pointed by rsp now, + so we just need to allocate the sizeof(La_x86_64_retval) space on + the stack, since the alignment has already been taken care of. */ +# ifdef RESTORE_AVX + /* sizeof(La_x86_64_retval). Need extra space for 2 SSE + registers to detect if xmm0/xmm1 registers are changed + by audit module. */ + subq $(LRV_SIZE + XMM_SIZE*2), %rsp +# else + subq $LRV_SIZE, %rsp # sizeof(La_x86_64_retval) +# endif + movq %rsp, %rcx # La_x86_64_retval argument to %rcx. + + /* Fill in the La_x86_64_retval structure. */ + movq %rax, LRV_RAX_OFFSET(%rcx) + movq %rdx, LRV_RDX_OFFSET(%rcx) + + movaps %xmm0, LRV_XMM0_OFFSET(%rcx) + movaps %xmm1, LRV_XMM1_OFFSET(%rcx) + +# ifdef RESTORE_AVX + /* This is to support AVX audit modules. */ + vmovdqu %ymm0, LRV_VECTOR0_OFFSET(%rcx) + vmovdqu %ymm1, LRV_VECTOR1_OFFSET(%rcx) + + /* Save xmm0/xmm1 registers to detect if they are changed + by audit module. */ + vmovdqa %xmm0, (LRV_SIZE)(%rcx) + vmovdqa %xmm1, (LRV_SIZE + XMM_SIZE)(%rcx) +# endif + + fstpt LRV_ST0_OFFSET(%rcx) + fstpt LRV_ST1_OFFSET(%rcx) + + movq 24(%rbx), %rdx # La_x86_64_regs argument to %rdx. + movq 40(%rbx), %rsi # Copy args pushed by PLT in register. + movq 32(%rbx), %rdi # %rdi: link_map, %rsi: reloc_index + call _dl_call_pltexit + + /* Restore return registers. */ + movq LRV_RAX_OFFSET(%rsp), %rax + movq LRV_RDX_OFFSET(%rsp), %rdx + + movaps LRV_XMM0_OFFSET(%rsp), %xmm0 + movaps LRV_XMM1_OFFSET(%rsp), %xmm1 + +# ifdef RESTORE_AVX + /* Check if xmm0/xmm1 registers are changed by audit module. */ + vpcmpeqq (LRV_SIZE)(%rsp), %xmm0, %xmm2 + vpmovmskb %xmm2, %esi + cmpl $0xffff, %esi + jne 1f + vmovdqu LRV_VECTOR0_OFFSET(%rsp), %ymm0 + +1: vpcmpeqq (LRV_SIZE + XMM_SIZE)(%rsp), %xmm1, %xmm2 + vpmovmskb %xmm2, %esi + cmpl $0xffff, %esi + jne 1f + vmovdqu LRV_VECTOR1_OFFSET(%rsp), %ymm1 + +1: +# endif + + fldt LRV_ST1_OFFSET(%rsp) + fldt LRV_ST0_OFFSET(%rsp) + + movq %rbx, %rsp + movq (%rsp), %rbx + cfi_restore(rbx) + cfi_def_cfa_register(%rsp) + + addq $48, %rsp # Adjust the stack to the return value + # (eats the reloc index and link_map) + cfi_adjust_cfa_offset(-48) + retq |