diff options
author | Jakub Jelinek <jakub@redhat.com> | 2009-03-20 20:00:20 +0000 |
---|---|---|
committer | Jakub Jelinek <jakub@redhat.com> | 2009-03-20 20:00:20 +0000 |
commit | ef860a878bf532436a469fc1f89fe3366862a949 (patch) | |
tree | c4a2666ac77cdaa00f41bd8f66a828b39e2642ff /sysdeps/x86_64 | |
parent | d4c583b4466962a9d9d4ca54ab6108dc7b42cdcc (diff) | |
download | glibc-ef860a878bf532436a469fc1f89fe3366862a949.tar glibc-ef860a878bf532436a469fc1f89fe3366862a949.tar.gz glibc-ef860a878bf532436a469fc1f89fe3366862a949.tar.bz2 glibc-ef860a878bf532436a469fc1f89fe3366862a949.zip |
Updated to fedora-glibc-20090320T1944cvs/fedora-glibc-2_9_90-11
Diffstat (limited to 'sysdeps/x86_64')
-rw-r--r-- | sysdeps/x86_64/cacheinfo.c | 32 | ||||
-rw-r--r-- | sysdeps/x86_64/dl-machine.h | 114 | ||||
-rw-r--r-- | sysdeps/x86_64/dl-runtime.c | 9 | ||||
-rw-r--r-- | sysdeps/x86_64/dl-trampoline.S | 249 | ||||
-rw-r--r-- | sysdeps/x86_64/multiarch/Makefile | 3 | ||||
-rw-r--r-- | sysdeps/x86_64/multiarch/init-arch.c | 65 | ||||
-rw-r--r-- | sysdeps/x86_64/multiarch/init-arch.h | 70 | ||||
-rw-r--r-- | sysdeps/x86_64/multiarch/sched_cpucount.c | 42 |
8 files changed, 413 insertions, 171 deletions
diff --git a/sysdeps/x86_64/cacheinfo.c b/sysdeps/x86_64/cacheinfo.c index a7e3fc7633..8769e9c966 100644 --- a/sysdeps/x86_64/cacheinfo.c +++ b/sysdeps/x86_64/cacheinfo.c @@ -23,6 +23,10 @@ #include <stdlib.h> #include <unistd.h> +#ifdef USE_MULTIARCH +# include "multiarch/init-arch.h" +#endif + static const struct intel_02_cache_info { unsigned int idx; @@ -443,19 +447,32 @@ init_cacheinfo (void) unsigned int ebx; unsigned int ecx; unsigned int edx; - int max_cpuid; int max_cpuid_ex; long int data = -1; long int shared = -1; unsigned int level; unsigned int threads = 0; +#ifdef USE_MULTIARCH + if (__cpu_features.kind == arch_kind_unknown) + __init_cpu_features (); +# define is_intel __cpu_features.kind == arch_kind_intel +# define is_amd __cpu_features.kind == arch_kind_amd +# define max_cpuid __cpu_features.max_cpuid +#else + int max_cpuid; asm volatile ("cpuid" : "=a" (max_cpuid), "=b" (ebx), "=c" (ecx), "=d" (edx) : "0" (0)); - /* This spells out "GenuineIntel". */ - if (ebx == 0x756e6547 && ecx == 0x6c65746e && edx == 0x49656e69) +# define is_intel \ + ebx == 0x756e6547 && ecx == 0x6c65746e && edx == 0x49656e69 + /* This spells out "AuthenticAMD". */ +# define is_amd \ + ebx == 0x68747541 && ecx == 0x444d4163 && edx == 0x69746e65 +#endif + + if (is_intel) { data = handle_intel (_SC_LEVEL1_DCACHE_SIZE, max_cpuid); @@ -470,9 +487,16 @@ init_cacheinfo (void) shared = handle_intel (_SC_LEVEL2_CACHE_SIZE, max_cpuid); } +#ifdef USE_MULTIARCH + eax = __cpu_features.cpuid[INTEL_CPUID_INDEX_1].eax; + ebx = __cpu_features.cpuid[INTEL_CPUID_INDEX_1].ebx; + ecx = __cpu_features.cpuid[INTEL_CPUID_INDEX_1].ecx; + edx = __cpu_features.cpuid[INTEL_CPUID_INDEX_1].edx; +#else asm volatile ("cpuid" : "=a" (eax), "=b" (ebx), "=c" (ecx), "=d" (edx) : "0" (1)); +#endif /* Intel prefers SSSE3 instructions for memory/string routines if they are avaiable. */ @@ -519,7 +543,7 @@ init_cacheinfo (void) shared /= threads; } /* This spells out "AuthenticAMD". */ - else if (ebx == 0x68747541 && ecx == 0x444d4163 && edx == 0x69746e65) + else if (is_amd) { data = handle_amd (_SC_LEVEL1_DCACHE_SIZE); long int core = handle_amd (_SC_LEVEL2_CACHE_SIZE); diff --git a/sysdeps/x86_64/dl-machine.h b/sysdeps/x86_64/dl-machine.h index 959b1328d7..8c67b5b5f9 100644 --- a/sysdeps/x86_64/dl-machine.h +++ b/sysdeps/x86_64/dl-machine.h @@ -1,5 +1,5 @@ /* Machine-dependent ELF dynamic relocation inline functions. x86-64 version. - Copyright (C) 2001-2005, 2006 Free Software Foundation, Inc. + Copyright (C) 2001-2005, 2006, 2008, 2009 Free Software Foundation, Inc. This file is part of the GNU C Library. Contributed by Andreas Jaeger <aj@suse.de>. @@ -266,40 +266,45 @@ elf_machine_rela (struct link_map *map, const Elf64_Rela *reloc, Elf64_Addr *const reloc_addr = reloc_addr_arg; const unsigned long int r_type = ELF64_R_TYPE (reloc->r_info); -#if !defined RTLD_BOOTSTRAP || !defined HAVE_Z_COMBRELOC +# if !defined RTLD_BOOTSTRAP || !defined HAVE_Z_COMBRELOC if (__builtin_expect (r_type == R_X86_64_RELATIVE, 0)) { -# if !defined RTLD_BOOTSTRAP && !defined HAVE_Z_COMBRELOC +# if !defined RTLD_BOOTSTRAP && !defined HAVE_Z_COMBRELOC /* This is defined in rtld.c, but nowhere in the static libc.a; make the reference weak so static programs can still link. This declaration cannot be done when compiling rtld.c (i.e. #ifdef RTLD_BOOTSTRAP) because rtld.c contains the common defn for _dl_rtld_map, which is incompatible with a weak decl in the same file. */ -# ifndef SHARED +# ifndef SHARED weak_extern (GL(dl_rtld_map)); -# endif +# endif if (map != &GL(dl_rtld_map)) /* Already done in rtld itself. */ -# endif +# endif *reloc_addr = map->l_addr + reloc->r_addend; } else -#endif +# endif if (__builtin_expect (r_type == R_X86_64_NONE, 0)) return; else { -#ifndef RTLD_BOOTSTRAP +# ifndef RTLD_BOOTSTRAP const Elf64_Sym *const refsym = sym; -#endif +# endif struct link_map *sym_map = RESOLVE_MAP (&sym, version, r_type); Elf64_Addr value = (sym == NULL ? 0 : (Elf64_Addr) sym_map->l_addr + sym->st_value); -#if defined RTLD_BOOTSTRAP && !USE___THREAD + if (sym != NULL + && __builtin_expect (ELFW(ST_TYPE) (sym->st_info) == STT_GNU_IFUNC, + 0)) + value = ((Elf64_Addr (*) (void)) value) (); + +# if defined RTLD_BOOTSTRAP && !USE___THREAD assert (r_type == R_X86_64_GLOB_DAT || r_type == R_X86_64_JUMP_SLOT); *reloc_addr = value + reloc->r_addend; -#else +# else switch (r_type) { case R_X86_64_GLOB_DAT: @@ -307,47 +312,47 @@ elf_machine_rela (struct link_map *map, const Elf64_Rela *reloc, *reloc_addr = value + reloc->r_addend; break; -#ifndef RESOLVE_CONFLICT_FIND_MAP +# ifndef RESOLVE_CONFLICT_FIND_MAP case R_X86_64_DTPMOD64: -# ifdef RTLD_BOOTSTRAP +# ifdef RTLD_BOOTSTRAP /* During startup the dynamic linker is always the module with index 1. XXX If this relocation is necessary move before RESOLVE call. */ *reloc_addr = 1; -# else +# else /* Get the information from the link map returned by the resolve function. */ if (sym_map != NULL) *reloc_addr = sym_map->l_tls_modid; -# endif +# endif break; case R_X86_64_DTPOFF64: -# ifndef RTLD_BOOTSTRAP +# ifndef RTLD_BOOTSTRAP /* During relocation all TLS symbols are defined and used. Therefore the offset is already correct. */ if (sym != NULL) *reloc_addr = sym->st_value + reloc->r_addend; -# endif +# endif break; case R_X86_64_TLSDESC: { struct tlsdesc volatile *td = (struct tlsdesc volatile *)reloc_addr; -# ifndef RTLD_BOOTSTRAP +# ifndef RTLD_BOOTSTRAP if (! sym) { td->arg = (void*)reloc->r_addend; td->entry = _dl_tlsdesc_undefweak; } else -# endif +# endif { -# ifndef RTLD_BOOTSTRAP -# ifndef SHARED +# ifndef RTLD_BOOTSTRAP +# ifndef SHARED CHECK_STATIC_TLS (map, sym_map); -# else +# else if (!TRY_STATIC_TLS (map, sym_map)) { td->arg = _dl_make_tlsdesc_dynamic @@ -355,8 +360,8 @@ elf_machine_rela (struct link_map *map, const Elf64_Rela *reloc, td->entry = _dl_tlsdesc_dynamic; } else +# endif # endif -# endif { td->arg = (void*)(sym->st_value - sym_map->l_tls_offset + reloc->r_addend); @@ -367,13 +372,13 @@ elf_machine_rela (struct link_map *map, const Elf64_Rela *reloc, } case R_X86_64_TPOFF64: /* The offset is negative, forward from the thread pointer. */ -# ifndef RTLD_BOOTSTRAP +# ifndef RTLD_BOOTSTRAP if (sym != NULL) -# endif +# endif { -# ifndef RTLD_BOOTSTRAP +# ifndef RTLD_BOOTSTRAP CHECK_STATIC_TLS (map, sym_map); -# endif +# endif /* We know the offset of the object the symbol is contained in. It is a negative value which will be added to the thread pointer. */ @@ -381,42 +386,41 @@ elf_machine_rela (struct link_map *map, const Elf64_Rela *reloc, - sym_map->l_tls_offset); } break; -#endif +# endif -#ifndef RTLD_BOOTSTRAP +# ifndef RTLD_BOOTSTRAP case R_X86_64_64: *reloc_addr = value + reloc->r_addend; break; case R_X86_64_32: - *(unsigned int *) reloc_addr = value + reloc->r_addend; - if (value + reloc->r_addend > UINT_MAX) + value += reloc->r_addend; + *(unsigned int *) reloc_addr = value; + + const char *fmt; + if (__builtin_expect (value > UINT_MAX, 0)) { const char *strtab; + fmt = "\ +%s: Symbol `%s' causes overflow in R_X86_64_32 relocation\n"; + print_err: strtab = (const char *) D_PTR (map, l_info[DT_STRTAB]); - _dl_error_printf ("\ -%s: Symbol `%s' causes overflow in R_X86_64_32 relocation\n", + _dl_error_printf (fmt, rtld_progname ?: "<program name unknown>", strtab + refsym->st_name); } break; -# ifndef RESOLVE_CONFLICT_FIND_MAP +# ifndef RESOLVE_CONFLICT_FIND_MAP /* Not needed for dl-conflict.c. */ case R_X86_64_PC32: - *(unsigned int *) reloc_addr = value + reloc->r_addend - - (Elf64_Addr) reloc_addr; - if (value + reloc->r_addend - (Elf64_Addr) reloc_addr - != (int)(value + reloc->r_addend - (Elf64_Addr) reloc_addr)) + value += reloc->r_addend - (Elf64_Addr) reloc_addr; + *(unsigned int *) reloc_addr = value; + if (__builtin_expect (value != (unsigned int) value, 0)) { - const char *strtab; - - strtab = (const char *) D_PTR (map, l_info[DT_STRTAB]); - - _dl_error_printf ("\ -%s: Symbol `%s' causes overflow in R_X86_64_PC32 relocation\n", - rtld_progname ?: "<program name unknown>", - strtab + refsym->st_name); + fmt = "\ +%s: Symbol `%s' causes overflow in R_X86_64_PC32 relocation\n"; + goto print_err; } break; case R_X86_64_COPY: @@ -424,26 +428,22 @@ elf_machine_rela (struct link_map *map, const Elf64_Rela *reloc, /* This can happen in trace mode if an object could not be found. */ break; + memcpy (reloc_addr_arg, (void *) value, + MIN (sym->st_size, refsym->st_size)); if (__builtin_expect (sym->st_size > refsym->st_size, 0) || (__builtin_expect (sym->st_size < refsym->st_size, 0) && GLRO(dl_verbose))) { - const char *strtab; - - strtab = (const char *) D_PTR (map, l_info[DT_STRTAB]); - _dl_error_printf ("\ -%s: Symbol `%s' has different size in shared object, consider re-linking\n", - rtld_progname ?: "<program name unknown>", - strtab + refsym->st_name); + fmt = "\ +%s: Symbol `%s' has different size in shared object, consider re-linking\n"; + goto print_err; } - memcpy (reloc_addr_arg, (void *) value, - MIN (sym->st_size, refsym->st_size)); break; -# endif +# endif default: _dl_reloc_bad_type (map, r_type, 0); break; -#endif +# endif } #endif } diff --git a/sysdeps/x86_64/dl-runtime.c b/sysdeps/x86_64/dl-runtime.c new file mode 100644 index 0000000000..b625d1e882 --- /dev/null +++ b/sysdeps/x86_64/dl-runtime.c @@ -0,0 +1,9 @@ +/* The ABI calls for the PLT stubs to pass the index of the relocation + and not its offset. In _dl_profile_fixup and _dl_call_pltexit we + also use the index. Therefore it is wasteful to compute the offset + in the trampoline just to reverse the operation immediately + afterwards. */ +#define reloc_offset reloc_arg * sizeof (PLTREL) +#define reloc_index reloc_arg + +#include <elf/dl-runtime.c> diff --git a/sysdeps/x86_64/dl-trampoline.S b/sysdeps/x86_64/dl-trampoline.S index 3e2d182758..d8d9bc12a4 100644 --- a/sysdeps/x86_64/dl-trampoline.S +++ b/sysdeps/x86_64/dl-trampoline.S @@ -1,5 +1,5 @@ /* PLT trampolines. x86-64 version. - Copyright (C) 2004, 2005, 2007 Free Software Foundation, Inc. + Copyright (C) 2004, 2005, 2007, 2009 Free Software Foundation, Inc. This file is part of the GNU C Library. The GNU C Library is free software; you can redistribute it and/or @@ -35,11 +35,7 @@ _dl_runtime_resolve: movq %r8, 40(%rsp) movq %r9, 48(%rsp) movq 64(%rsp), %rsi # Copy args pushed by PLT in register. - movq %rsi, %r11 # Multiply by 24 - addq %r11, %rsi - addq %r11, %rsi - shlq $3, %rsi - movq 56(%rsp), %rdi # %rdi: link_map, %rsi: reloc_offset + movq 56(%rsp), %rdi # %rdi: link_map, %rsi: reloc_index call _dl_fixup # Call resolver. movq %rax, %r11 # Save return value movq 48(%rsp), %r9 # Get register content back. @@ -61,132 +57,165 @@ _dl_runtime_resolve: .type _dl_runtime_profile, @function .align 16 cfi_startproc + _dl_runtime_profile: - subq $88, %rsp - cfi_adjust_cfa_offset(104) # Incorporate PLT - movq %rax, (%rsp) # Preserve registers otherwise clobbered. - movq %rdx, 8(%rsp) - movq %r8, 16(%rsp) - movq %r9, 24(%rsp) - movq %rcx, 32(%rsp) - movq %rsi, 40(%rsp) - movq %rdi, 48(%rsp) - movq %rbp, 56(%rsp) # Information for auditors. - leaq 104(%rsp), %rax - movq %rax, 64(%rsp) - leaq 8(%rsp), %rcx - movq 104(%rsp), %rdx # Load return address if needed - movq 96(%rsp), %rsi # Copy args pushed by PLT in register. - movq %rsi,%r11 # Multiply by 24 - addq %r11,%rsi - addq %r11,%rsi - shlq $3, %rsi - movq 88(%rsp), %rdi # %rdi: link_map, %rsi: reloc_offset - leaq 72(%rsp), %r8 + /* The La_x86_64_regs data structure pointed to by the + fourth paramater must be 16-byte aligned. This must + be explicitly enforced. We have the set up a dynamically + sized stack frame. %rbx points to the top half which + has a fixed size and preserves the original stack pointer. */ + + subq $32, %rsp # Allocate the local storage. + cfi_adjust_cfa_offset(48) # Incorporate PLT + movq %rbx, (%rsp) + cfi_rel_offset(%rbx, 0) + + /* On the stack: + 56(%rbx) parameter #1 + 48(%rbx) return address + + 40(%rbx) reloc index + 32(%rbx) link_map + + 24(%rbx) La_x86_64_regs pointer + 16(%rbx) framesize + 8(%rbx) rax + (%rbx) rbx + */ + + movq %rax, 8(%rsp) + movq %rsp, %rbx + cfi_def_cfa_register(%rbx) + + /* Actively align the La_x86_64_regs structure. */ + andq $0xfffffffffffffff0, %rsp + subq $192, %rsp # sizeof(La_x86_64_regs) + movq %rsp, 24(%rbx) + + movq %rdx, (%rsp) # Fill the La_x86_64_regs structure. + movq %r8, 8(%rsp) + movq %r9, 16(%rsp) + movq %rcx, 24(%rsp) + movq %rsi, 32(%rsp) + movq %rdi, 40(%rsp) + movq %rbp, 48(%rsp) + leaq 48(%rbx), %rax + movq %rax, 56(%rsp) + movaps %xmm0, 64(%rsp) + movaps %xmm1, 80(%rsp) + movaps %xmm2, 96(%rsp) + movaps %xmm3, 112(%rsp) + movaps %xmm4, 128(%rsp) + movaps %xmm5, 144(%rsp) + movaps %xmm7, 160(%rsp) + + movq %rsp, %rcx # La_x86_64_regs pointer to %rcx. + movq 48(%rbx), %rdx # Load return address if needed. + movq 40(%rbx), %rsi # Copy args pushed by PLT in register. + movq 32(%rbx), %rdi # %rdi: link_map, %rsi: reloc_index + leaq 16(%rbx), %r8 call _dl_profile_fixup # Call resolver. - movq %rax, %r11 # Save return value - movq 8(%rsp), %rdx # Get back register content. - movq 16(%rsp), %r8 - movq 24(%rsp), %r9 - movq (%rsp),%rax - movq 72(%rsp), %r10 + + movq %rax, %r11 # Save return value. + + movq 8(%rbx), %rax # Get back register content. + movq (%rsp), %rdx + movq 8(%rsp), %r8 + movq 16(%rsp), %r9 + movaps 64(%rsp), %xmm0 + movaps 80(%rsp), %xmm1 + movaps 96(%rsp), %xmm2 + movaps 112(%rsp), %xmm3 + movaps 128(%rsp), %xmm4 + movaps 144(%rsp), %xmm5 + movaps 160(%rsp), %xmm7 + + movq 16(%rbx), %r10 # Anything in framesize? testq %r10, %r10 jns 1f - movq 32(%rsp), %rcx - movq 40(%rsp), %rsi - movq 48(%rsp), %rdi - addq $104,%rsp # Adjust stack - cfi_adjust_cfa_offset (-104) + + /* There's nothing in the frame size, so there + will be no call to the _dl_call_pltexit. */ + + movq 24(%rsp), %rcx # Get back registers content. + movq 32(%rsp), %rsi + movq 40(%rsp), %rdi + + movq %rbx, %rsp + movq (%rsp), %rbx + cfi_restore(rbx) + cfi_def_cfa_register(%rsp) + + addq $48, %rsp # Adjust the stack to the return value + # (eats the reloc index and link_map) + cfi_adjust_cfa_offset(-48) jmp *%r11 # Jump to function address. - /* - +104 return address - +96 PLT2 - +88 PLT1 - +80 free - +72 free - +64 %rsp - +56 %rbp - +48 %rdi - +40 %rsi - +32 %rcx - +24 %r9 - +16 %r8 - +8 %rdx - %rsp %rax - */ - cfi_adjust_cfa_offset (104) -1: movq %rbx, 72(%rsp) - cfi_rel_offset (rbx, 72) - leaq 112(%rsp), %rsi - movq %rsp, %rbx - cfi_def_cfa_register (%rbx) - movq %r10, %rcx +1: + cfi_adjust_cfa_offset(48) + cfi_rel_offset(%rbx, 0) + cfi_def_cfa_register(%rbx) + + /* At this point we need to prepare new stack for the function + which has to be called. We copy the original stack to a + temporary buffer of the size specified by the 'framesize' + returned from _dl_profile_fixup */ + + leaq 56(%rbx), %rsi # stack addq $8, %r10 andq $0xfffffffffffffff0, %r10 + movq %r10, %rcx subq %r10, %rsp movq %rsp, %rdi shrq $3, %rcx rep movsq - movq 32(%rbx), %rcx - movq 40(%rbx), %rsi - movq 48(%rbx), %rdi + + movq 24(%rdi), %rcx # Get back register content. + movq 32(%rdi), %rsi + movq 40(%rdi), %rdi + call *%r11 - movq %rbx, %rsp - cfi_def_cfa_register (%rsp) - subq $72, %rsp - cfi_adjust_cfa_offset (72) - movq %rsp, %rcx - movq %rax, (%rcx) + + mov 24(%rbx), %rsp # Drop the copied stack content + + /* Now we have to prepare the La_x86_64_retval structure for the + _dl_call_pltexit. The La_x86_64_regs is being pointed by rsp now, + so we just need to allocate the sizeof(La_x86_64_retval) space on + the stack, since the alignment has already been taken care of. */ + + subq $80, %rsp # sizeof(La_x86_64_retval) + movq %rsp, %rcx # La_x86_64_retval argument to %rcx. + + movq %rax, (%rcx) # Fill in the La_x86_64_retval structure. movq %rdx, 8(%rcx) - /* Even though the stack is correctly aligned to allow using movaps - we use movups. Some callers might provide an incorrectly aligned - stack and we do not want to have it blow up here. */ - movups %xmm0, 16(%rcx) - movups %xmm1, 32(%rcx) + movaps %xmm0, 16(%rcx) + movaps %xmm1, 32(%rcx) fstpt 48(%rcx) fstpt 64(%rcx) - /* - +176 return address - +168 PLT2 - +160 PLT1 - +152 free - +144 free - +136 %rsp - +128 %rbp - +120 %rdi - +112 %rsi - +104 %rcx - +96 %r9 - +88 %r8 - +80 %rdx - +64 %st1 result - +48 %st result - +32 %xmm1 result - +16 %xmm0 result - +8 %rdx result - %rsp %rax result - */ - leaq 80(%rsp), %rdx - movq 144(%rsp), %rbx - cfi_restore (rbx) - movq 168(%rsp), %rsi # Copy args pushed by PLT in register. - movq %rsi,%r11 # Multiply by 24 - addq %r11,%rsi - addq %r11,%rsi - shlq $3, %rsi - movq 160(%rsp), %rdi # %rdi: link_map, %rsi: reloc_offset + + movq 24(%rbx), %rdx # La_x86_64_regs argument to %rdx. + movq 40(%rbx), %rsi # Copy args pushed by PLT in register. + movq 32(%rbx), %rdi # %rdi: link_map, %rsi: reloc_index call _dl_call_pltexit - movq (%rsp), %rax + + movq (%rsp), %rax # Restore return registers. movq 8(%rsp), %rdx - movups 16(%rsp), %xmm0 - movups 32(%rsp), %xmm1 + movaps 16(%rsp), %xmm0 + movaps 32(%rsp), %xmm1 fldt 64(%rsp) fldt 48(%rsp) - addq $176, %rsp - cfi_adjust_cfa_offset (-176) + + movq %rbx, %rsp + movq (%rsp), %rbx + cfi_restore(rbx) + cfi_def_cfa_register(%rsp) + + addq $48, %rsp # Adjust the stack to the return value + # (eats the reloc index and link_map) + cfi_adjust_cfa_offset(-48) retq + cfi_endproc .size _dl_runtime_profile, .-_dl_runtime_profile #endif diff --git a/sysdeps/x86_64/multiarch/Makefile b/sysdeps/x86_64/multiarch/Makefile new file mode 100644 index 0000000000..2a1e910e06 --- /dev/null +++ b/sysdeps/x86_64/multiarch/Makefile @@ -0,0 +1,3 @@ +ifeq ($(subdir),csu) +aux += init-arch +endif diff --git a/sysdeps/x86_64/multiarch/init-arch.c b/sysdeps/x86_64/multiarch/init-arch.c new file mode 100644 index 0000000000..eb4365fe32 --- /dev/null +++ b/sysdeps/x86_64/multiarch/init-arch.c @@ -0,0 +1,65 @@ +/* Initialize CPU feature data. + This file is part of the GNU C Library. + Copyright (C) 2008 Free Software Foundation, Inc. + Contributed by Ulrich Drepper <drepper@redhat.com>. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, write to the Free + Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA + 02111-1307 USA. */ + +#include "init-arch.h" + + +struct cpu_features __cpu_features attribute_hidden; + + +void +__init_cpu_features (void) +{ + unsigned int ebx; + unsigned int ecx; + unsigned int edx; + + asm volatile ("cpuid" + : "=a" (__cpu_features.max_cpuid), "=b" (ebx), "=c" (ecx), + "=d" (edx) + : "0" (0)); + + /* This spells out "GenuineIntel". */ + if (ebx == 0x756e6547 && ecx == 0x6c65746e && edx == 0x49656e69) + { + __cpu_features.kind = arch_kind_intel; + + asm volatile ("cpuid" + : "=a" (__cpu_features.cpuid[INTEL_CPUID_INDEX_1].eax), + "=b" (__cpu_features.cpuid[INTEL_CPUID_INDEX_1].ebx), + "=c" (__cpu_features.cpuid[INTEL_CPUID_INDEX_1].ecx), + "=d" (__cpu_features.cpuid[INTEL_CPUID_INDEX_1].edx) + : "0" (1)); + } + /* This spells out "AuthenticAMD". */ + else if (ebx == 0x68747541 && ecx == 0x444d4163 && edx == 0x69746e65) + { + __cpu_features.kind = arch_kind_amd; + + asm volatile ("cpuid" + : "=a" (__cpu_features.cpuid[AMD_CPUID_INDEX_1].eax), + "=b" (__cpu_features.cpuid[AMD_CPUID_INDEX_1].ebx), + "=c" (__cpu_features.cpuid[AMD_CPUID_INDEX_1].ecx), + "=d" (__cpu_features.cpuid[AMD_CPUID_INDEX_1].edx) + : "0" (1)); + } + else + __cpu_features.kind = arch_kind_other; +} diff --git a/sysdeps/x86_64/multiarch/init-arch.h b/sysdeps/x86_64/multiarch/init-arch.h new file mode 100644 index 0000000000..86cd83dc4c --- /dev/null +++ b/sysdeps/x86_64/multiarch/init-arch.h @@ -0,0 +1,70 @@ +/* This file is part of the GNU C Library. + Copyright (C) 2008 Free Software Foundation, Inc. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, write to the Free + Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA + 02111-1307 USA. */ + +#include <sys/param.h> + +enum + { + INTEL_CPUID_INDEX_1 = 0, + /* Keep the following line at the end. */ + INTEL_CPUID_INDEX_MAX + }; + +enum + { + AMD_CPUID_INDEX_1 = 0, + /* Keep the following line at the end. */ + AMD_CPUID_INDEX_MAX + }; + +extern struct cpu_features +{ + enum + { + arch_kind_unknown = 0, + arch_kind_intel, + arch_kind_amd, + arch_kind_other + } kind; + int max_cpuid; + struct + { + unsigned int eax; + unsigned int ebx; + unsigned int ecx; + unsigned int edx; + } cpuid[MAX (INTEL_CPUID_INDEX_MAX, AMD_CPUID_INDEX_MAX)]; +} __cpu_features attribute_hidden; + + +extern void __init_cpu_features (void) attribute_hidden; +#define INIT_ARCH()\ + do \ + if (__cpu_features.kind == arch_kind_unknown) \ + __init_cpu_features (); \ + while (0) + +/* Following are the feature tests used throughout libc. */ + +#define INTEL_HAS_POPCOUNT \ + (__cpu_features.kind == arch_kind_intel \ + && (__cpu_features.cpuid[INTEL_CPUID_INDEX_1].ecx & (1 << 23)) != 0) + +#define AMD_HAS_POPCOUNT \ + (__cpu_features.kind == arch_kind_amd \ + && (__cpu_features.cpuid[AMD_CPUID_INDEX_1].ecx & (1 << 23)) != 0) diff --git a/sysdeps/x86_64/multiarch/sched_cpucount.c b/sysdeps/x86_64/multiarch/sched_cpucount.c new file mode 100644 index 0000000000..dc20182df4 --- /dev/null +++ b/sysdeps/x86_64/multiarch/sched_cpucount.c @@ -0,0 +1,42 @@ +/* Count bits in CPU set. x86-64 multi-arch version. + This file is part of the GNU C Library. + Copyright (C) 2008 Free Software Foundation, Inc. + Contributed by Ulrich Drepper <drepper@redhat.com>. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, write to the Free + Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA + 02111-1307 USA. */ + +#ifdef SHARED +# include <sched.h> +# include "init-arch.h" + +# define __sched_cpucount static generic_cpucount +# include <posix/sched_cpucount.c> +# undef __sched_cpucount + +# define POPCNT(l) \ + ({ __cpu_mask r; \ + asm ("popcntq %1, %0" : "=r" (r) : "0" (l));\ + r; }) +# define __sched_cpucount static popcount_cpucount +# include <posix/sched_cpucount.c> +# undef __sched_cpucount + +libc_ifunc (__sched_cpucount, + INTEL_HAS_POPCOUNT || AMD_HAS_POPCOUNT + ? popcount_cpucount : generic_cpucount); +#else +# include_next <sched_cpucount.c> +#endif |