diff options
-rw-r--r-- | ChangeLog | 15 | ||||
-rw-r--r-- | elf/dl-lookup.c | 13 | ||||
-rw-r--r-- | elf/dl-runtime.c | 8 | ||||
-rw-r--r-- | nptl/ChangeLog | 8 | ||||
-rw-r--r-- | nptl/sysdeps/x86_64/tcb-offsets.sym | 1 | ||||
-rw-r--r-- | nptl/sysdeps/x86_64/tls.h | 73 | ||||
-rw-r--r-- | stdio-common/scanf15.c | 1 | ||||
-rw-r--r-- | stdio-common/scanf17.c | 1 | ||||
-rw-r--r-- | sysdeps/x86_64/dl-trampoline.S | 82 | ||||
-rwxr-xr-x | sysdeps/x86_64/tst-xmmymm.sh | 7 |
10 files changed, 188 insertions, 21 deletions
@@ -1,3 +1,18 @@ +2009-07-29 Ulrich Drepper <drepper@redhat.com> + + * elf/dl-runtime.c (_dl_fixup): Indicate before _dl_lookup_symbol_x + call that registers used in calling conventions need to be preserved. + * elf/dl-lookup.c (do_lookup_x): Use RTLD_*_FOREIGN_CALL macros + to preserve register content if necessary. + * sysdeps/x86_64/dl-trampoline.S (_dl_x86_64_save_sse): New function. + (_dl_x86_64_restore_sse): New function. + * sysdeps/x86_64/tst-xmmymm.sh: There is now one more function that + is allowed to modify xmm/ymm registers. + + * stdio-common/scanf15.c: Undefine _LIBC. We want to test from an + application's perspective. + * stdio-common/scanf17.c: Likewise. + 2009-07-28 Ulrich Drepper <drepper@redhat.com> * csu/libc-tls.c (__libc_setup_tls) [TLS_TCB_AT_TP]: Don't add TCB diff --git a/elf/dl-lookup.c b/elf/dl-lookup.c index 1d68d67a35..56724c9b4d 100644 --- a/elf/dl-lookup.c +++ b/elf/dl-lookup.c @@ -380,6 +380,10 @@ do_lookup_x (const char *undef_name, uint_fast32_t new_hash, if (size * 3 <= tab->n_elements * 4) { /* Expand the table. */ +#ifdef RTLD_CHECK_FOREIGN_CALL + /* This must not happen during runtime relocations. */ + assert (!RTLD_CHECK_FOREIGN_CALL); +#endif size_t newsize = _dl_higher_prime_number (size + 1); struct unique_sym *newentries = calloc (sizeof (struct unique_sym), newsize); @@ -405,6 +409,11 @@ do_lookup_x (const char *undef_name, uint_fast32_t new_hash, } else { +#ifdef RTLD_CHECK_FOREIGN_CALL + /* This must not happen during runtime relocations. */ + assert (!RTLD_CHECK_FOREIGN_CALL); +#endif + #define INITIAL_NUNIQUE_SYM_TABLE 31 size = INITIAL_NUNIQUE_SYM_TABLE; entries = calloc (sizeof (struct unique_sym), size); @@ -600,6 +609,10 @@ add_dependency (struct link_map *undef_map, struct link_map *map, int flags) unsigned int max = undef_map->l_reldepsmax ? undef_map->l_reldepsmax * 2 : 10; +#ifdef RTLD_PREPARE_FOREIGN_CALL + RTLD_PREPARE_FOREIGN_CALL; +#endif + newp = malloc (sizeof (*newp) + max * sizeof (struct link_map *)); if (newp == NULL) { diff --git a/elf/dl-runtime.c b/elf/dl-runtime.c index 0eb7d4e3b9..a52120d121 100644 --- a/elf/dl-runtime.c +++ b/elf/dl-runtime.c @@ -111,6 +111,10 @@ _dl_fixup ( flags |= DL_LOOKUP_GSCOPE_LOCK; } +#ifdef RTLD_ENABLE_FOREIGN_CALL + RTLD_ENABLE_FOREIGN_CALL; +#endif + result = _dl_lookup_symbol_x (strtab + sym->st_name, l, &sym, l->l_scope, version, ELF_RTYPE_CLASS_PLT, flags, NULL); @@ -118,6 +122,10 @@ _dl_fixup ( if (!RTLD_SINGLE_THREAD_P) THREAD_GSCOPE_RESET_FLAG (); +#ifdef RTLD_FINALIZE_FOREIGN_CALL + RTLD_FINALIZE_FOREIGN_CALL; +#endif + /* Currently result contains the base load address (or link map) of the object that defines sym. Now add in the symbol offset. */ diff --git a/nptl/ChangeLog b/nptl/ChangeLog index 8f37da7936..24fd28a0dc 100644 --- a/nptl/ChangeLog +++ b/nptl/ChangeLog @@ -1,3 +1,11 @@ +2009-07-29 Ulrich Drepper <drepper@redhat.com> + + * sysdeps/x86_64/tls.h (tcbhead_t): Add room for SSE registers the + dynamic linker might have to save. Define RTLD_CHECK_FOREIGN_CALL, + RTLD_ENABLE_FOREIGN_CALL, RTLD_PREPARE_FOREIGN_CALL, and + RTLD_FINALIZE_FOREIGN_CALL. Pretty printing. + * sysdeps/x86_64/tcb-offsets.sym: Add RTLD_SAVESPACE_SSE. + 2009-07-28 Ulrich Drepper <drepper@redhat.com> * pthread_mutex_lock.c [NO_INCR] (__pthread_mutex_cond_lock_adjust): diff --git a/nptl/sysdeps/x86_64/tcb-offsets.sym b/nptl/sysdeps/x86_64/tcb-offsets.sym index 1c70c6bde7..51f35c61cf 100644 --- a/nptl/sysdeps/x86_64/tcb-offsets.sym +++ b/nptl/sysdeps/x86_64/tcb-offsets.sym @@ -15,3 +15,4 @@ VGETCPU_CACHE_OFFSET offsetof (tcbhead_t, vgetcpu_cache) #ifndef __ASSUME_PRIVATE_FUTEX PRIVATE_FUTEX offsetof (tcbhead_t, private_futex) #endif +RTLD_SAVESPACE_SSE offsetof (tcbhead_t, rtld_savespace_sse) diff --git a/nptl/sysdeps/x86_64/tls.h b/nptl/sysdeps/x86_64/tls.h index ea89f3b1a2..a51b77052a 100644 --- a/nptl/sysdeps/x86_64/tls.h +++ b/nptl/sysdeps/x86_64/tls.h @@ -29,6 +29,7 @@ # include <sysdep.h> # include <kernel-features.h> # include <bits/wordsize.h> +# include <xmmintrin.h> /* Type for the dtv. */ @@ -55,16 +56,23 @@ typedef struct uintptr_t stack_guard; uintptr_t pointer_guard; unsigned long int vgetcpu_cache[2]; -#ifndef __ASSUME_PRIVATE_FUTEX +# ifndef __ASSUME_PRIVATE_FUTEX int private_futex; -#else +# else int __unused1; -#endif -#if __WORDSIZE == 64 - int __pad1; -#endif +# endif +# if __WORDSIZE == 64 + int rtld_must_xmm_save; +# endif /* Reservation of some values for the TM ABI. */ void *__private_tm[5]; +# if __WORDSIZE == 64 + long int __unused2; + /* Have space for the post-AVX register size. */ + __m128 rtld_savespace_sse[8][4]; + + void *__padding[8]; +# endif } tcbhead_t; #else /* __ASSEMBLER__ */ @@ -298,7 +306,7 @@ typedef struct /* Atomic compare and exchange on TLS, returning old value. */ -#define THREAD_ATOMIC_CMPXCHG_VAL(descr, member, newval, oldval) \ +# define THREAD_ATOMIC_CMPXCHG_VAL(descr, member, newval, oldval) \ ({ __typeof (descr->member) __ret; \ __typeof (oldval) __old = (oldval); \ if (sizeof (descr->member) == 4) \ @@ -313,7 +321,7 @@ typedef struct /* Atomic logical and. */ -#define THREAD_ATOMIC_AND(descr, member, val) \ +# define THREAD_ATOMIC_AND(descr, member, val) \ (void) ({ if (sizeof ((descr)->member) == 4) \ asm volatile (LOCK_PREFIX "andl %1, %%fs:%P0" \ :: "i" (offsetof (struct pthread, member)), \ @@ -324,7 +332,7 @@ typedef struct /* Atomic set bit. */ -#define THREAD_ATOMIC_BIT_SET(descr, member, bit) \ +# define THREAD_ATOMIC_BIT_SET(descr, member, bit) \ (void) ({ if (sizeof ((descr)->member) == 4) \ asm volatile (LOCK_PREFIX "orl %1, %%fs:%P0" \ :: "i" (offsetof (struct pthread, member)), \ @@ -334,7 +342,7 @@ typedef struct abort (); }) -#define CALL_THREAD_FCT(descr) \ +# define CALL_THREAD_FCT(descr) \ ({ void *__res; \ asm volatile ("movq %%fs:%P2, %%rdi\n\t" \ "callq *%%fs:%P1" \ @@ -355,18 +363,18 @@ typedef struct /* Set the pointer guard field in the TCB head. */ -#define THREAD_SET_POINTER_GUARD(value) \ +# define THREAD_SET_POINTER_GUARD(value) \ THREAD_SETMEM (THREAD_SELF, header.pointer_guard, value) -#define THREAD_COPY_POINTER_GUARD(descr) \ +# define THREAD_COPY_POINTER_GUARD(descr) \ ((descr)->header.pointer_guard \ = THREAD_GETMEM (THREAD_SELF, header.pointer_guard)) /* Get and set the global scope generation counter in the TCB head. */ -#define THREAD_GSCOPE_FLAG_UNUSED 0 -#define THREAD_GSCOPE_FLAG_USED 1 -#define THREAD_GSCOPE_FLAG_WAIT 2 -#define THREAD_GSCOPE_RESET_FLAG() \ +# define THREAD_GSCOPE_FLAG_UNUSED 0 +# define THREAD_GSCOPE_FLAG_USED 1 +# define THREAD_GSCOPE_FLAG_WAIT 2 +# define THREAD_GSCOPE_RESET_FLAG() \ do \ { int __res; \ asm volatile ("xchgl %0, %%fs:%P1" \ @@ -377,11 +385,40 @@ typedef struct lll_futex_wake (&THREAD_SELF->header.gscope_flag, 1, LLL_PRIVATE); \ } \ while (0) -#define THREAD_GSCOPE_SET_FLAG() \ +# define THREAD_GSCOPE_SET_FLAG() \ THREAD_SETMEM (THREAD_SELF, header.gscope_flag, THREAD_GSCOPE_FLAG_USED) -#define THREAD_GSCOPE_WAIT() \ +# define THREAD_GSCOPE_WAIT() \ GL(dl_wait_lookup_done) () + +# ifdef SHARED +/* Defined in dl-trampoline.S. */ +extern void _dl_x86_64_save_sse (void); +extern void _dl_x86_64_restore_sse (void); + +# define RTLD_CHECK_FOREIGN_CALL \ + (THREAD_GETMEM (THREAD_SELF, header.rtld_must_xmm_save) != 0) + +# define RTLD_ENABLE_FOREIGN_CALL \ + THREAD_SETMEM (THREAD_SELF, header.rtld_must_xmm_save, 1) + +# define RTLD_PREPARE_FOREIGN_CALL \ + do if (THREAD_GETMEM (THREAD_SELF, header.rtld_must_xmm_save)) \ + { \ + _dl_x86_64_save_sse (); \ + THREAD_SETMEM (THREAD_SELF, header.rtld_must_xmm_save, 0); \ + } \ + while (0) + +# define RTLD_FINALIZE_FOREIGN_CALL \ + do { \ + if (THREAD_GETMEM (THREAD_SELF, header.rtld_must_xmm_save) == 0) \ + _dl_x86_64_restore_sse (); \ + THREAD_SETMEM (THREAD_SELF, header.rtld_must_xmm_save, 0); \ + } while (0) +# endif + + #endif /* __ASSEMBLER__ */ #endif /* tls.h */ diff --git a/stdio-common/scanf15.c b/stdio-common/scanf15.c index c56715c486..851466b3a9 100644 --- a/stdio-common/scanf15.c +++ b/stdio-common/scanf15.c @@ -1,5 +1,6 @@ #undef _GNU_SOURCE #define _XOPEN_SOURCE 600 +#undef _LIBC /* The following macro definitions are a hack. They word around disabling the GNU extension while still using a few internal headers. */ #define u_char unsigned char diff --git a/stdio-common/scanf17.c b/stdio-common/scanf17.c index ee9024f9b7..4478a7022f 100644 --- a/stdio-common/scanf17.c +++ b/stdio-common/scanf17.c @@ -1,5 +1,6 @@ #undef _GNU_SOURCE #define _XOPEN_SOURCE 600 +#undef _LIBC /* The following macro definitions are a hack. They word around disabling the GNU extension while still using a few internal headers. */ #define u_char unsigned char diff --git a/sysdeps/x86_64/dl-trampoline.S b/sysdeps/x86_64/dl-trampoline.S index 49d239f075..7ecf1b0c64 100644 --- a/sysdeps/x86_64/dl-trampoline.S +++ b/sysdeps/x86_64/dl-trampoline.S @@ -390,3 +390,85 @@ L(no_avx4): cfi_endproc .size _dl_runtime_profile, .-_dl_runtime_profile #endif + + +#ifdef SHARED + .globl _dl_x86_64_save_sse + .type _dl_x86_64_save_sse, @function + .align 16 + cfi_startproc +_dl_x86_64_save_sse: +# ifdef HAVE_AVX_SUPPORT + cmpl $0, L(have_avx)(%rip) + jne 1f + movq %rbx, %r11 # Save rbx + movl $1, %eax + cpuid + movq %r11,%rbx # Restore rbx + movl $1, %eax + testl $(1 << 28), %ecx + jne 2f + negl %eax +2: movl %eax, L(have_avx)(%rip) + cmpl $0, %eax + +1: js L(no_avx5) + +# define YMM_SIZE 32 + vmovdqa %ymm0, %fs:RTLD_SAVESPACE_SSE+0*YMM_SIZE + vmovdqa %ymm1, %fs:RTLD_SAVESPACE_SSE+1*YMM_SIZE + vmovdqa %ymm2, %fs:RTLD_SAVESPACE_SSE+2*YMM_SIZE + vmovdqa %ymm3, %fs:RTLD_SAVESPACE_SSE+3*YMM_SIZE + vmovdqa %ymm4, %fs:RTLD_SAVESPACE_SSE+4*YMM_SIZE + vmovdqa %ymm5, %fs:RTLD_SAVESPACE_SSE+5*YMM_SIZE + vmovdqa %ymm6, %fs:RTLD_SAVESPACE_SSE+6*YMM_SIZE + vmovdqa %ymm7, %fs:RTLD_SAVESPACE_SSE+7*YMM_SIZE + ret +L(no_avx5): +# endif +# define YMM_SIZE 16 + movdqa %xmm0, %fs:RTLD_SAVESPACE_SSE+0*XMM_SIZE + movdqa %xmm1, %fs:RTLD_SAVESPACE_SSE+1*XMM_SIZE + movdqa %xmm2, %fs:RTLD_SAVESPACE_SSE+2*XMM_SIZE + movdqa %xmm3, %fs:RTLD_SAVESPACE_SSE+3*XMM_SIZE + movdqa %xmm4, %fs:RTLD_SAVESPACE_SSE+4*XMM_SIZE + movdqa %xmm5, %fs:RTLD_SAVESPACE_SSE+5*XMM_SIZE + movdqa %xmm6, %fs:RTLD_SAVESPACE_SSE+6*XMM_SIZE + movdqa %xmm7, %fs:RTLD_SAVESPACE_SSE+7*XMM_SIZE + ret + cfi_endproc + .size _dl_x86_64_save_sse, .-_dl_x86_64_save_sse + + + .globl _dl_x86_64_restore_sse + .type _dl_x86_64_restore_sse, @function + .align 16 + cfi_startproc +_dl_x86_64_restore_sse: +# ifdef HAVE_AVX_SUPPORT + cmpl $0, L(have_avx)(%rip) + js L(no_avx6) + + vmovdqa %fs:RTLD_SAVESPACE_SSE+0*YMM_SIZE, %ymm0 + vmovdqa %fs:RTLD_SAVESPACE_SSE+1*YMM_SIZE, %ymm1 + vmovdqa %fs:RTLD_SAVESPACE_SSE+2*YMM_SIZE, %ymm2 + vmovdqa %fs:RTLD_SAVESPACE_SSE+3*YMM_SIZE, %ymm3 + vmovdqa %fs:RTLD_SAVESPACE_SSE+4*YMM_SIZE, %ymm4 + vmovdqa %fs:RTLD_SAVESPACE_SSE+5*YMM_SIZE, %ymm5 + vmovdqa %fs:RTLD_SAVESPACE_SSE+6*YMM_SIZE, %ymm6 + vmovdqa %fs:RTLD_SAVESPACE_SSE+7*YMM_SIZE, %ymm7 + ret +L(no_avx6): +# endif + movdqa %fs:RTLD_SAVESPACE_SSE+0*XMM_SIZE, %xmm0 + movdqa %fs:RTLD_SAVESPACE_SSE+1*XMM_SIZE, %xmm1 + movdqa %fs:RTLD_SAVESPACE_SSE+2*XMM_SIZE, %xmm2 + movdqa %fs:RTLD_SAVESPACE_SSE+3*XMM_SIZE, %xmm3 + movdqa %fs:RTLD_SAVESPACE_SSE+4*XMM_SIZE, %xmm4 + movdqa %fs:RTLD_SAVESPACE_SSE+5*XMM_SIZE, %xmm5 + movdqa %fs:RTLD_SAVESPACE_SSE+6*XMM_SIZE, %xmm6 + movdqa %fs:RTLD_SAVESPACE_SSE+7*XMM_SIZE, %xmm7 + ret + cfi_endproc + .size _dl_x86_64_restore_sse, .-_dl_x86_64_restore_sse +#endif diff --git a/sysdeps/x86_64/tst-xmmymm.sh b/sysdeps/x86_64/tst-xmmymm.sh index a576e7da0d..da8af7e686 100755 --- a/sysdeps/x86_64/tst-xmmymm.sh +++ b/sysdeps/x86_64/tst-xmmymm.sh @@ -59,10 +59,11 @@ for f in $tocheck; do objdump -d "$objpfx"../*/"$f" | awk 'BEGIN { last="" } /^[[:xdigit:]]* <[_[:alnum:]]*>:$/ { fct=substr($2, 2, length($2)-3) } /,%[xy]mm[[:digit:]]*$/ { if (last != fct) { print fct; last=fct} }' | while read fct; do - if test "$fct" != "_dl_runtime_profile"; then - echo "function $fct in $f modifies xmm/ymm" >> "$tmp" - result=1 + if test "$fct" = "_dl_runtime_profile" -o "$fct" = "_dl_x86_64_restore_sse"; then + continue; fi + echo "function $fct in $f modifies xmm/ymm" >> "$tmp" + result=1 done done |