diff options
Diffstat (limited to 'REORG.TODO/sysdeps/x86')
47 files changed, 7077 insertions, 0 deletions
diff --git a/REORG.TODO/sysdeps/x86/Makefile b/REORG.TODO/sysdeps/x86/Makefile new file mode 100644 index 0000000000..0d0326c21a --- /dev/null +++ b/REORG.TODO/sysdeps/x86/Makefile @@ -0,0 +1,10 @@ +ifeq ($(subdir),csu) +gen-as-const-headers += cpu-features-offsets.sym +endif + +ifeq ($(subdir),elf) +sysdep-dl-routines += dl-get-cpu-features + +tests += tst-get-cpu-features +tests-static += tst-get-cpu-features-static +endif diff --git a/REORG.TODO/sysdeps/x86/Versions b/REORG.TODO/sysdeps/x86/Versions new file mode 100644 index 0000000000..e02923708e --- /dev/null +++ b/REORG.TODO/sysdeps/x86/Versions @@ -0,0 +1,5 @@ +ld { + GLIBC_PRIVATE { + __get_cpu_features; + } +} diff --git a/REORG.TODO/sysdeps/x86/bits/byteswap-16.h b/REORG.TODO/sysdeps/x86/bits/byteswap-16.h new file mode 100644 index 0000000000..bcc768709d --- /dev/null +++ b/REORG.TODO/sysdeps/x86/bits/byteswap-16.h @@ -0,0 +1,49 @@ +/* Macros to swap the order of bytes in 16-bit integer values. + Copyright (C) 2012-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#ifndef _BITS_BYTESWAP_H +# error "Never use <bits/byteswap-16.h> directly; include <byteswap.h> instead." +#endif + +#ifdef __GNUC__ +# if __GNUC__ >= 2 +# define __bswap_16(x) \ + (__extension__ \ + ({ unsigned short int __v, __x = (unsigned short int) (x); \ + if (__builtin_constant_p (__x)) \ + __v = __bswap_constant_16 (__x); \ + else \ + __asm__ ("rorw $8, %w0" \ + : "=r" (__v) \ + : "0" (__x) \ + : "cc"); \ + __v; })) +# else +/* This is better than nothing. */ +# define __bswap_16(x) \ + (__extension__ \ + ({ unsigned short int __x = (unsigned short int) (x); \ + __bswap_constant_16 (__x); })) +# endif +#else +static __inline unsigned short int +__bswap_16 (unsigned short int __bsx) +{ + return __bswap_constant_16 (__bsx); +} +#endif diff --git a/REORG.TODO/sysdeps/x86/bits/byteswap.h b/REORG.TODO/sysdeps/x86/bits/byteswap.h new file mode 100644 index 0000000000..5dfc5331ec --- /dev/null +++ b/REORG.TODO/sysdeps/x86/bits/byteswap.h @@ -0,0 +1,155 @@ +/* Macros to swap the order of bytes in integer values. + Copyright (C) 1997-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#if !defined _BYTESWAP_H && !defined _NETINET_IN_H && !defined _ENDIAN_H +# error "Never use <bits/byteswap.h> directly; include <byteswap.h> instead." +#endif + +#ifndef _BITS_BYTESWAP_H +#define _BITS_BYTESWAP_H 1 + +#include <features.h> +#include <bits/types.h> +#include <bits/wordsize.h> + +/* Swap bytes in 16 bit value. */ +#define __bswap_constant_16(x) \ + ((unsigned short int) ((((x) >> 8) & 0xff) | (((x) & 0xff) << 8))) + +/* Get __bswap_16. */ +#include <bits/byteswap-16.h> + +/* Swap bytes in 32 bit value. */ +#define __bswap_constant_32(x) \ + ((((x) & 0xff000000) >> 24) | (((x) & 0x00ff0000) >> 8) | \ + (((x) & 0x0000ff00) << 8) | (((x) & 0x000000ff) << 24)) + +#ifdef __GNUC__ +# if __GNUC_PREREQ (4, 3) +static __inline unsigned int +__bswap_32 (unsigned int __bsx) +{ + return __builtin_bswap32 (__bsx); +} +# elif __GNUC__ >= 2 +# if __WORDSIZE == 64 || (defined __i486__ || defined __pentium__ \ + || defined __pentiumpro__ || defined __pentium4__ \ + || defined __k8__ || defined __athlon__ \ + || defined __k6__ || defined __nocona__ \ + || defined __core2__ || defined __geode__ \ + || defined __amdfam10__) +/* To swap the bytes in a word the i486 processors and up provide the + `bswap' opcode. On i386 we have to use three instructions. */ +# define __bswap_32(x) \ + (__extension__ \ + ({ unsigned int __v, __x = (x); \ + if (__builtin_constant_p (__x)) \ + __v = __bswap_constant_32 (__x); \ + else \ + __asm__ ("bswap %0" : "=r" (__v) : "0" (__x)); \ + __v; })) +# else +# define __bswap_32(x) \ + (__extension__ \ + ({ unsigned int __v, __x = (x); \ + if (__builtin_constant_p (__x)) \ + __v = __bswap_constant_32 (__x); \ + else \ + __asm__ ("rorw $8, %w0;" \ + "rorl $16, %0;" \ + "rorw $8, %w0" \ + : "=r" (__v) \ + : "0" (__x) \ + : "cc"); \ + __v; })) +# endif +# else +# define __bswap_32(x) \ + (__extension__ \ + ({ unsigned int __x = (x); __bswap_constant_32 (__x); })) +# endif +#else +static __inline unsigned int +__bswap_32 (unsigned int __bsx) +{ + return __bswap_constant_32 (__bsx); +} +#endif + + +#if __GNUC_PREREQ (2, 0) +/* Swap bytes in 64 bit value. */ +# define __bswap_constant_64(x) \ + (__extension__ ((((x) & 0xff00000000000000ull) >> 56) \ + | (((x) & 0x00ff000000000000ull) >> 40) \ + | (((x) & 0x0000ff0000000000ull) >> 24) \ + | (((x) & 0x000000ff00000000ull) >> 8) \ + | (((x) & 0x00000000ff000000ull) << 8) \ + | (((x) & 0x0000000000ff0000ull) << 24) \ + | (((x) & 0x000000000000ff00ull) << 40) \ + | (((x) & 0x00000000000000ffull) << 56))) + +# if __GNUC_PREREQ (4, 3) +static __inline __uint64_t +__bswap_64 (__uint64_t __bsx) +{ + return __builtin_bswap64 (__bsx); +} +# elif __WORDSIZE == 64 +# define __bswap_64(x) \ + (__extension__ \ + ({ __uint64_t __v, __x = (x); \ + if (__builtin_constant_p (__x)) \ + __v = __bswap_constant_64 (__x); \ + else \ + __asm__ ("bswap %q0" : "=r" (__v) : "0" (__x)); \ + __v; })) +# else +# define __bswap_64(x) \ + (__extension__ \ + ({ union { __extension__ __uint64_t __ll; \ + unsigned int __l[2]; } __w, __r; \ + if (__builtin_constant_p (x)) \ + __r.__ll = __bswap_constant_64 (x); \ + else \ + { \ + __w.__ll = (x); \ + __r.__l[0] = __bswap_32 (__w.__l[1]); \ + __r.__l[1] = __bswap_32 (__w.__l[0]); \ + } \ + __r.__ll; })) +# endif +#else +# define __bswap_constant_64(x) \ + ((((x) & 0xff00000000000000ull) >> 56) \ + | (((x) & 0x00ff000000000000ull) >> 40) \ + | (((x) & 0x0000ff0000000000ull) >> 24) \ + | (((x) & 0x000000ff00000000ull) >> 8) \ + | (((x) & 0x00000000ff000000ull) << 8) \ + | (((x) & 0x0000000000ff0000ull) << 24) \ + | (((x) & 0x000000000000ff00ull) << 40) \ + | (((x) & 0x00000000000000ffull) << 56)) + +static __inline __uint64_t +__bswap_64 (__uint64_t __bsx) +{ + return __bswap_constant_64 (__bsx); +} +#endif + +#endif /* _BITS_BYTESWAP_H */ diff --git a/REORG.TODO/sysdeps/x86/bits/endian.h b/REORG.TODO/sysdeps/x86/bits/endian.h new file mode 100644 index 0000000000..5a56c726f7 --- /dev/null +++ b/REORG.TODO/sysdeps/x86/bits/endian.h @@ -0,0 +1,7 @@ +/* i386/x86_64 are little-endian. */ + +#ifndef _ENDIAN_H +# error "Never use <bits/endian.h> directly; include <endian.h> instead." +#endif + +#define __BYTE_ORDER __LITTLE_ENDIAN diff --git a/REORG.TODO/sysdeps/x86/bits/flt-eval-method.h b/REORG.TODO/sysdeps/x86/bits/flt-eval-method.h new file mode 100644 index 0000000000..24da08eaa3 --- /dev/null +++ b/REORG.TODO/sysdeps/x86/bits/flt-eval-method.h @@ -0,0 +1,33 @@ +/* Define __GLIBC_FLT_EVAL_METHOD. x86 version. + Copyright (C) 2016-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#ifndef _MATH_H +# error "Never use <bits/flt-eval-method.h> directly; include <math.h> instead." +#endif + +#ifdef __FLT_EVAL_METHOD__ +# if __FLT_EVAL_METHOD__ == -1 +# define __GLIBC_FLT_EVAL_METHOD 2 +# else +# define __GLIBC_FLT_EVAL_METHOD __FLT_EVAL_METHOD__ +# endif +#elif defined __x86_64__ +# define __GLIBC_FLT_EVAL_METHOD 0 +#else +# define __GLIBC_FLT_EVAL_METHOD 2 +#endif diff --git a/REORG.TODO/sysdeps/x86/bits/fp-logb.h b/REORG.TODO/sysdeps/x86/bits/fp-logb.h new file mode 100644 index 0000000000..8ee0bbcc3b --- /dev/null +++ b/REORG.TODO/sysdeps/x86/bits/fp-logb.h @@ -0,0 +1,24 @@ +/* Define __FP_LOGB0_IS_MIN and __FP_LOGBNAN_IS_MIN. x86 version. + Copyright (C) 2016-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#ifndef _MATH_H +# error "Never use <bits/fp-logb.h> directly; include <math.h> instead." +#endif + +#define __FP_LOGB0_IS_MIN 1 +#define __FP_LOGBNAN_IS_MIN 1 diff --git a/REORG.TODO/sysdeps/x86/bits/huge_vall.h b/REORG.TODO/sysdeps/x86/bits/huge_vall.h new file mode 100644 index 0000000000..9749bacc4b --- /dev/null +++ b/REORG.TODO/sysdeps/x86/bits/huge_vall.h @@ -0,0 +1,41 @@ +/* `HUGE_VALL' constant for ix86 (where it is infinity). + Used by <stdlib.h> and <math.h> functions for overflow. + Copyright (C) 1992-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#ifndef _MATH_H +# error "Never use <bits/huge_vall.h> directly; include <math.h> instead." +#endif + +#if __GNUC_PREREQ(3,3) +# define HUGE_VALL (__builtin_huge_vall()) +#elif __GNUC_PREREQ(2,96) +# define HUGE_VALL (__extension__ 0x1.0p32767L) +#else + +# define __HUGE_VALL_bytes { 0, 0, 0, 0, 0, 0, 0, 0x80, 0xff, 0x7f, 0, 0 } + +# define __huge_vall_t union { unsigned char __c[12]; long double __ld; } +# ifdef __GNUC__ +# define HUGE_VALL (__extension__ \ + ((__huge_vall_t) { __c: __HUGE_VALL_bytes }).__ld) +# else /* Not GCC. */ +static __huge_vall_t __huge_vall = { __HUGE_VALL_bytes }; +# define HUGE_VALL (__huge_vall.__ld) +# endif /* GCC. */ + +#endif /* GCC 2.95 */ diff --git a/REORG.TODO/sysdeps/x86/bits/link.h b/REORG.TODO/sysdeps/x86/bits/link.h new file mode 100644 index 0000000000..d41b350639 --- /dev/null +++ b/REORG.TODO/sysdeps/x86/bits/link.h @@ -0,0 +1,159 @@ +/* Copyright (C) 2004-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#ifndef _LINK_H +# error "Never include <bits/link.h> directly; use <link.h> instead." +#endif + + +#ifndef __x86_64__ +/* Registers for entry into PLT on IA-32. */ +typedef struct La_i86_regs +{ + uint32_t lr_edx; + uint32_t lr_ecx; + uint32_t lr_eax; + uint32_t lr_ebp; + uint32_t lr_esp; +} La_i86_regs; + +/* Return values for calls from PLT on IA-32. */ +typedef struct La_i86_retval +{ + uint32_t lrv_eax; + uint32_t lrv_edx; + long double lrv_st0; + long double lrv_st1; + uint64_t lrv_bnd0; + uint64_t lrv_bnd1; +} La_i86_retval; + + +__BEGIN_DECLS + +extern Elf32_Addr la_i86_gnu_pltenter (Elf32_Sym *__sym, unsigned int __ndx, + uintptr_t *__refcook, + uintptr_t *__defcook, + La_i86_regs *__regs, + unsigned int *__flags, + const char *__symname, + long int *__framesizep); +extern unsigned int la_i86_gnu_pltexit (Elf32_Sym *__sym, unsigned int __ndx, + uintptr_t *__refcook, + uintptr_t *__defcook, + const La_i86_regs *__inregs, + La_i86_retval *__outregs, + const char *symname); + +__END_DECLS + +#else + +/* Registers for entry into PLT on x86-64. */ +# if __GNUC_PREREQ (4,0) +typedef float La_x86_64_xmm __attribute__ ((__vector_size__ (16))); +typedef float La_x86_64_ymm + __attribute__ ((__vector_size__ (32), __aligned__ (16))); +typedef double La_x86_64_zmm + __attribute__ ((__vector_size__ (64), __aligned__ (16))); +# else +typedef float La_x86_64_xmm __attribute__ ((__mode__ (__V4SF__))); +# endif + +typedef union +{ +# if __GNUC_PREREQ (4,0) + La_x86_64_ymm ymm[2]; + La_x86_64_zmm zmm[1]; +# endif + La_x86_64_xmm xmm[4]; +} La_x86_64_vector __attribute__ ((__aligned__ (16))); + +typedef struct La_x86_64_regs +{ + uint64_t lr_rdx; + uint64_t lr_r8; + uint64_t lr_r9; + uint64_t lr_rcx; + uint64_t lr_rsi; + uint64_t lr_rdi; + uint64_t lr_rbp; + uint64_t lr_rsp; + La_x86_64_xmm lr_xmm[8]; + La_x86_64_vector lr_vector[8]; +#ifndef __ILP32__ + __int128_t lr_bnd[4]; +#endif +} La_x86_64_regs; + +/* Return values for calls from PLT on x86-64. */ +typedef struct La_x86_64_retval +{ + uint64_t lrv_rax; + uint64_t lrv_rdx; + La_x86_64_xmm lrv_xmm0; + La_x86_64_xmm lrv_xmm1; + long double lrv_st0; + long double lrv_st1; + La_x86_64_vector lrv_vector0; + La_x86_64_vector lrv_vector1; +#ifndef __ILP32__ + __int128_t lrv_bnd0; + __int128_t lrv_bnd1; +#endif +} La_x86_64_retval; + +#define La_x32_regs La_x86_64_regs +#define La_x32_retval La_x86_64_retval + +__BEGIN_DECLS + +extern Elf64_Addr la_x86_64_gnu_pltenter (Elf64_Sym *__sym, + unsigned int __ndx, + uintptr_t *__refcook, + uintptr_t *__defcook, + La_x86_64_regs *__regs, + unsigned int *__flags, + const char *__symname, + long int *__framesizep); +extern unsigned int la_x86_64_gnu_pltexit (Elf64_Sym *__sym, + unsigned int __ndx, + uintptr_t *__refcook, + uintptr_t *__defcook, + const La_x86_64_regs *__inregs, + La_x86_64_retval *__outregs, + const char *__symname); + +extern Elf32_Addr la_x32_gnu_pltenter (Elf32_Sym *__sym, + unsigned int __ndx, + uintptr_t *__refcook, + uintptr_t *__defcook, + La_x32_regs *__regs, + unsigned int *__flags, + const char *__symname, + long int *__framesizep); +extern unsigned int la_x32_gnu_pltexit (Elf32_Sym *__sym, + unsigned int __ndx, + uintptr_t *__refcook, + uintptr_t *__defcook, + const La_x32_regs *__inregs, + La_x32_retval *__outregs, + const char *__symname); + +__END_DECLS + +#endif diff --git a/REORG.TODO/sysdeps/x86/bits/select.h b/REORG.TODO/sysdeps/x86/bits/select.h new file mode 100644 index 0000000000..129de9cf02 --- /dev/null +++ b/REORG.TODO/sysdeps/x86/bits/select.h @@ -0,0 +1,63 @@ +/* Copyright (C) 1997-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#ifndef _SYS_SELECT_H +# error "Never use <bits/select.h> directly; include <sys/select.h> instead." +#endif + +#include <bits/wordsize.h> + + +#if defined __GNUC__ && __GNUC__ >= 2 + +# if __WORDSIZE == 64 +# define __FD_ZERO_STOS "stosq" +# else +# define __FD_ZERO_STOS "stosl" +# endif + +# define __FD_ZERO(fdsp) \ + do { \ + int __d0, __d1; \ + __asm__ __volatile__ ("cld; rep; " __FD_ZERO_STOS \ + : "=c" (__d0), "=D" (__d1) \ + : "a" (0), "0" (sizeof (fd_set) \ + / sizeof (__fd_mask)), \ + "1" (&__FDS_BITS (fdsp)[0]) \ + : "memory"); \ + } while (0) + +#else /* ! GNU CC */ + +/* We don't use `memset' because this would require a prototype and + the array isn't too big. */ +# define __FD_ZERO(set) \ + do { \ + unsigned int __i; \ + fd_set *__arr = (set); \ + for (__i = 0; __i < sizeof (fd_set) / sizeof (__fd_mask); ++__i) \ + __FDS_BITS (__arr)[__i] = 0; \ + } while (0) + +#endif /* GNU CC */ + +#define __FD_SET(d, set) \ + ((void) (__FDS_BITS (set)[__FD_ELT (d)] |= __FD_MASK (d))) +#define __FD_CLR(d, set) \ + ((void) (__FDS_BITS (set)[__FD_ELT (d)] &= ~__FD_MASK (d))) +#define __FD_ISSET(d, set) \ + ((__FDS_BITS (set)[__FD_ELT (d)] & __FD_MASK (d)) != 0) diff --git a/REORG.TODO/sysdeps/x86/bits/semaphore.h b/REORG.TODO/sysdeps/x86/bits/semaphore.h new file mode 100644 index 0000000000..a498ae46da --- /dev/null +++ b/REORG.TODO/sysdeps/x86/bits/semaphore.h @@ -0,0 +1,40 @@ +/* Copyright (C) 2002-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + Contributed by Ulrich Drepper <drepper@redhat.com>, 2002. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#ifndef _SEMAPHORE_H +# error "Never use <bits/semaphore.h> directly; include <semaphore.h> instead." +#endif + +#include <bits/wordsize.h> + +#if __WORDSIZE == 64 +# define __SIZEOF_SEM_T 32 +#else +# define __SIZEOF_SEM_T 16 +#endif + + +/* Value returned if `sem_open' failed. */ +#define SEM_FAILED ((sem_t *) 0) + + +typedef union +{ + char __size[__SIZEOF_SEM_T]; + long int __align; +} sem_t; diff --git a/REORG.TODO/sysdeps/x86/bits/setjmp.h b/REORG.TODO/sysdeps/x86/bits/setjmp.h new file mode 100644 index 0000000000..4ddf9e3c6d --- /dev/null +++ b/REORG.TODO/sysdeps/x86/bits/setjmp.h @@ -0,0 +1,40 @@ +/* Copyright (C) 2001-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +/* Define the machine-dependent type `jmp_buf'. x86-64 version. */ +#ifndef _BITS_SETJMP_H +#define _BITS_SETJMP_H 1 + +#if !defined _SETJMP_H && !defined _PTHREAD_H +# error "Never include <bits/setjmp.h> directly; use <setjmp.h> instead." +#endif + +#include <bits/wordsize.h> + +#ifndef _ASM + +# if __WORDSIZE == 64 +typedef long int __jmp_buf[8]; +# elif defined __x86_64__ +__extension__ typedef long long int __jmp_buf[8]; +# else +typedef int __jmp_buf[6]; +# endif + +#endif + +#endif /* bits/setjmp.h */ diff --git a/REORG.TODO/sysdeps/x86/bits/string.h b/REORG.TODO/sysdeps/x86/bits/string.h new file mode 100644 index 0000000000..94cba8e76f --- /dev/null +++ b/REORG.TODO/sysdeps/x86/bits/string.h @@ -0,0 +1,1996 @@ +/* Optimized, inlined string functions. i486/x86-64 version. + Copyright (C) 2001-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#ifndef _STRING_H +# error "Never use <bits/string.h> directly; include <string.h> instead." +#endif + +/* Use the unaligned string inline ABI. */ +#define _STRING_INLINE_unaligned 1 + +/* Don't inline mempcpy into memcpy as x86 has an optimized mempcpy. */ +#define _HAVE_STRING_ARCH_mempcpy 1 + +/* Enable inline functions only for i486 or better when compiling for + ia32. */ +#if !defined __x86_64__ && (defined __i486__ || defined __pentium__ \ + || defined __pentiumpro__ || defined __pentium4__ \ + || defined __nocona__ || defined __atom__ \ + || defined __core2__ || defined __corei7__ \ + || defined __sandybridge__ || defined __haswell__ \ + || defined __bonnell__ || defined __silvermont__ \ + || defined __k6__ || defined __geode__ \ + || defined __k8__ || defined __athlon__ \ + || defined __amdfam10__ || defined __bdver1__ \ + || defined __bdver2__ || defined __bdver3__ \ + || defined __bdver4__ || defined __btver1__ \ + || defined __btver2__) + +/* We only provide optimizations if the user selects them and if + GNU CC is used. */ +# if !defined __NO_STRING_INLINES && defined __USE_STRING_INLINES \ + && defined __GNUC__ && __GNUC__ >= 2 + +# ifndef __STRING_INLINE +# ifndef __extern_inline +# define __STRING_INLINE inline +# else +# define __STRING_INLINE __extern_inline +# endif +# endif + +/* The macros are used in some of the optimized implementations below. */ +# define __STRING_SMALL_GET16(src, idx) \ + ((((const unsigned char *) (src))[idx + 1] << 8) \ + | ((const unsigned char *) (src))[idx]) +# define __STRING_SMALL_GET32(src, idx) \ + (((((const unsigned char *) (src))[idx + 3] << 8 \ + | ((const unsigned char *) (src))[idx + 2]) << 8 \ + | ((const unsigned char *) (src))[idx + 1]) << 8 \ + | ((const unsigned char *) (src))[idx]) + + +/* Copy N bytes of SRC to DEST. */ +# define _HAVE_STRING_ARCH_memcpy 1 +# define memcpy(dest, src, n) \ + (__extension__ (__builtin_constant_p (n) \ + ? __memcpy_c ((dest), (src), (n)) \ + : __memcpy_g ((dest), (src), (n)))) +# define __memcpy_c(dest, src, n) \ + ((n) == 0 \ + ? (dest) \ + : (((n) % 4 == 0) \ + ? __memcpy_by4 (dest, src, n) \ + : (((n) % 2 == 0) \ + ? __memcpy_by2 (dest, src, n) \ + : __memcpy_g (dest, src, n)))) + +__STRING_INLINE void *__memcpy_by4 (void *__dest, const void *__src, + size_t __n); + +__STRING_INLINE void * +__memcpy_by4 (void *__dest, const void *__src, size_t __n) +{ + register unsigned long int __d0, __d1; + register void *__tmp = __dest; + __asm__ __volatile__ + ("1:\n\t" + "movl (%2),%0\n\t" + "leal 4(%2),%2\n\t" + "movl %0,(%1)\n\t" + "leal 4(%1),%1\n\t" + "decl %3\n\t" + "jnz 1b" + : "=&r" (__d0), "=&r" (__tmp), "=&r" (__src), "=&r" (__d1) + : "1" (__tmp), "2" (__src), "3" (__n / 4) + : "memory", "cc"); + return __dest; +} + +__STRING_INLINE void *__memcpy_by2 (void *__dest, const void *__src, + size_t __n); + +__STRING_INLINE void * +__memcpy_by2 (void *__dest, const void *__src, size_t __n) +{ + register unsigned long int __d0, __d1; + register void *__tmp = __dest; + __asm__ __volatile__ + ("shrl $1,%3\n\t" + "jz 2f\n" /* only a word */ + "1:\n\t" + "movl (%2),%0\n\t" + "leal 4(%2),%2\n\t" + "movl %0,(%1)\n\t" + "leal 4(%1),%1\n\t" + "decl %3\n\t" + "jnz 1b\n" + "2:\n\t" + "movw (%2),%w0\n\t" + "movw %w0,(%1)" + : "=&q" (__d0), "=&r" (__tmp), "=&r" (__src), "=&r" (__d1) + : "1" (__tmp), "2" (__src), "3" (__n / 2) + : "memory", "cc"); + return __dest; +} + +__STRING_INLINE void *__memcpy_g (void *__dest, const void *__src, size_t __n); + +__STRING_INLINE void * +__memcpy_g (void *__dest, const void *__src, size_t __n) +{ + register unsigned long int __d0, __d1, __d2; + register void *__tmp = __dest; + __asm__ __volatile__ + ("cld\n\t" + "shrl $1,%%ecx\n\t" + "jnc 1f\n\t" + "movsb\n" + "1:\n\t" + "shrl $1,%%ecx\n\t" + "jnc 2f\n\t" + "movsw\n" + "2:\n\t" + "rep; movsl" + : "=&c" (__d0), "=&D" (__d1), "=&S" (__d2), + "=m" ( *(struct { __extension__ char __x[__n]; } *)__dest) + : "0" (__n), "1" (__tmp), "2" (__src), + "m" ( *(struct { __extension__ char __x[__n]; } *)__src) + : "cc"); + return __dest; +} + +# define _HAVE_STRING_ARCH_memmove 1 +# ifndef _FORCE_INLINES +/* Copy N bytes of SRC to DEST, guaranteeing + correct behavior for overlapping strings. */ +# define memmove(dest, src, n) __memmove_g (dest, src, n) + +__STRING_INLINE void *__memmove_g (void *, const void *, size_t) + __asm__ ("memmove"); + +__STRING_INLINE void * +__memmove_g (void *__dest, const void *__src, size_t __n) +{ + register unsigned long int __d0, __d1, __d2; + register void *__tmp = __dest; + if (__dest < __src) + __asm__ __volatile__ + ("cld\n\t" + "rep; movsb" + : "=&c" (__d0), "=&S" (__d1), "=&D" (__d2), + "=m" ( *(struct { __extension__ char __x[__n]; } *)__dest) + : "0" (__n), "1" (__src), "2" (__tmp), + "m" ( *(struct { __extension__ char __x[__n]; } *)__src)); + else + __asm__ __volatile__ + ("decl %1\n\t" + "decl %2\n\t" + "std\n\t" + "rep; movsb\n\t" + "cld" + : "=&c" (__d0), "=&S" (__d1), "=&D" (__d2), + "=m" ( *(struct { __extension__ char __x[__n]; } *)__dest) + : "0" (__n), "1" (__n + (const char *) __src), + "2" (__n + (char *) __tmp), + "m" ( *(struct { __extension__ char __x[__n]; } *)__src)); + return __dest; +} +# endif + +/* Compare N bytes of S1 and S2. */ +# define _HAVE_STRING_ARCH_memcmp 1 +# ifndef _FORCE_INLINES +# ifndef __PIC__ +/* gcc has problems to spill registers when using PIC. */ +__STRING_INLINE int +memcmp (const void *__s1, const void *__s2, size_t __n) +{ + register unsigned long int __d0, __d1, __d2; + register int __res; + __asm__ __volatile__ + ("cld\n\t" + "testl %3,%3\n\t" + "repe; cmpsb\n\t" + "je 1f\n\t" + "sbbl %0,%0\n\t" + "orl $1,%0\n" + "1:" + : "=&a" (__res), "=&S" (__d0), "=&D" (__d1), "=&c" (__d2) + : "0" (0), "1" (__s1), "2" (__s2), "3" (__n), + "m" ( *(struct { __extension__ char __x[__n]; } *)__s1), + "m" ( *(struct { __extension__ char __x[__n]; } *)__s2) + : "cc"); + return __res; +} +# endif +# endif + +/* Set N bytes of S to C. */ +# define _HAVE_STRING_ARCH_memset 1 +# define _USE_STRING_ARCH_memset 1 +# define memset(s, c, n) \ + (__extension__ (__builtin_constant_p (n) && (n) <= 16 \ + ? ((n) == 1 \ + ? __memset_c1 ((s), (c)) \ + : __memset_gc ((s), (c), (n))) \ + : (__builtin_constant_p (c) \ + ? (__builtin_constant_p (n) \ + ? __memset_ccn ((s), (c), (n)) \ + : memset ((s), (c), (n))) \ + : (__builtin_constant_p (n) \ + ? __memset_gcn ((s), (c), (n)) \ + : memset ((s), (c), (n)))))) + +# define __memset_c1(s, c) ({ void *__s = (s); \ + *((unsigned char *) __s) = (unsigned char) (c); \ + __s; }) + +# define __memset_gc(s, c, n) \ + ({ void *__s = (s); \ + union { \ + unsigned int __ui; \ + unsigned short int __usi; \ + unsigned char __uc; \ + } *__u = __s; \ + unsigned int __c = ((unsigned int) ((unsigned char) (c))) * 0x01010101; \ + \ + /* We apply a trick here. `gcc' would implement the following \ + assignments using immediate operands. But this uses to much \ + memory (7, instead of 4 bytes). So we force the value in a \ + registers. */ \ + if ((n) == 3 || (n) >= 5) \ + __asm__ __volatile__ ("" : "=r" (__c) : "0" (__c)); \ + \ + /* This `switch' statement will be removed at compile-time. */ \ + switch (n) \ + { \ + case 15: \ + __u->__ui = __c; \ + __u = __extension__ ((void *) __u + 4); \ + case 11: \ + __u->__ui = __c; \ + __u = __extension__ ((void *) __u + 4); \ + case 7: \ + __u->__ui = __c; \ + __u = __extension__ ((void *) __u + 4); \ + case 3: \ + __u->__usi = (unsigned short int) __c; \ + __u = __extension__ ((void *) __u + 2); \ + __u->__uc = (unsigned char) __c; \ + break; \ + \ + case 14: \ + __u->__ui = __c; \ + __u = __extension__ ((void *) __u + 4); \ + case 10: \ + __u->__ui = __c; \ + __u = __extension__ ((void *) __u + 4); \ + case 6: \ + __u->__ui = __c; \ + __u = __extension__ ((void *) __u + 4); \ + case 2: \ + __u->__usi = (unsigned short int) __c; \ + break; \ + \ + case 13: \ + __u->__ui = __c; \ + __u = __extension__ ((void *) __u + 4); \ + case 9: \ + __u->__ui = __c; \ + __u = __extension__ ((void *) __u + 4); \ + case 5: \ + __u->__ui = __c; \ + __u = __extension__ ((void *) __u + 4); \ + case 1: \ + __u->__uc = (unsigned char) __c; \ + break; \ + \ + case 16: \ + __u->__ui = __c; \ + __u = __extension__ ((void *) __u + 4); \ + case 12: \ + __u->__ui = __c; \ + __u = __extension__ ((void *) __u + 4); \ + case 8: \ + __u->__ui = __c; \ + __u = __extension__ ((void *) __u + 4); \ + case 4: \ + __u->__ui = __c; \ + case 0: \ + break; \ + } \ + \ + __s; }) + +# define __memset_ccn(s, c, n) \ + (((n) % 4 == 0) \ + ? __memset_ccn_by4 (s, ((unsigned int) ((unsigned char) (c))) * 0x01010101,\ + n) \ + : (((n) % 2 == 0) \ + ? __memset_ccn_by2 (s, \ + ((unsigned int) ((unsigned char) (c))) * 0x01010101,\ + n) \ + : memset (s, c, n))) + +__STRING_INLINE void *__memset_ccn_by4 (void *__s, unsigned int __c, + size_t __n); + +__STRING_INLINE void * +__memset_ccn_by4 (void *__s, unsigned int __c, size_t __n) +{ + register void *__tmp = __s; + register unsigned long int __d0; +# ifdef __i686__ + __asm__ __volatile__ + ("cld\n\t" + "rep; stosl" + : "=&a" (__c), "=&D" (__tmp), "=&c" (__d0), + "=m" ( *(struct { __extension__ char __x[__n]; } *)__s) + : "0" ((unsigned int) __c), "1" (__tmp), "2" (__n / 4) + : "cc"); +# else + __asm__ __volatile__ + ("1:\n\t" + "movl %0,(%1)\n\t" + "addl $4,%1\n\t" + "decl %2\n\t" + "jnz 1b\n" + : "=&r" (__c), "=&r" (__tmp), "=&r" (__d0), + "=m" ( *(struct { __extension__ char __x[__n]; } *)__s) + : "0" ((unsigned int) __c), "1" (__tmp), "2" (__n / 4) + : "cc"); +# endif + return __s; +} + +__STRING_INLINE void *__memset_ccn_by2 (void *__s, unsigned int __c, + size_t __n); + +__STRING_INLINE void * +__memset_ccn_by2 (void *__s, unsigned int __c, size_t __n) +{ + register unsigned long int __d0, __d1; + register void *__tmp = __s; +# ifdef __i686__ + __asm__ __volatile__ + ("cld\n\t" + "rep; stosl\n" + "stosw" + : "=&a" (__d0), "=&D" (__tmp), "=&c" (__d1), + "=m" ( *(struct { __extension__ char __x[__n]; } *)__s) + : "0" ((unsigned int) __c), "1" (__tmp), "2" (__n / 4) + : "cc"); +# else + __asm__ __volatile__ + ("1:\tmovl %0,(%1)\n\t" + "leal 4(%1),%1\n\t" + "decl %2\n\t" + "jnz 1b\n" + "movw %w0,(%1)" + : "=&q" (__d0), "=&r" (__tmp), "=&r" (__d1), + "=m" ( *(struct { __extension__ char __x[__n]; } *)__s) + : "0" ((unsigned int) __c), "1" (__tmp), "2" (__n / 4) + : "cc"); +#endif + return __s; +} + +# define __memset_gcn(s, c, n) \ + (((n) % 4 == 0) \ + ? __memset_gcn_by4 (s, c, n) \ + : (((n) % 2 == 0) \ + ? __memset_gcn_by2 (s, c, n) \ + : memset (s, c, n))) + +__STRING_INLINE void *__memset_gcn_by4 (void *__s, int __c, size_t __n); + +__STRING_INLINE void * +__memset_gcn_by4 (void *__s, int __c, size_t __n) +{ + register void *__tmp = __s; + register unsigned long int __d0; + __asm__ __volatile__ + ("movb %b0,%h0\n" + "pushw %w0\n\t" + "shll $16,%0\n\t" + "popw %w0\n" + "1:\n\t" + "movl %0,(%1)\n\t" + "addl $4,%1\n\t" + "decl %2\n\t" + "jnz 1b\n" + : "=&q" (__c), "=&r" (__tmp), "=&r" (__d0), + "=m" ( *(struct { __extension__ char __x[__n]; } *)__s) + : "0" ((unsigned int) __c), "1" (__tmp), "2" (__n / 4) + : "cc"); + return __s; +} + +__STRING_INLINE void *__memset_gcn_by2 (void *__s, int __c, size_t __n); + +__STRING_INLINE void * +__memset_gcn_by2 (void *__s, int __c, size_t __n) +{ + register unsigned long int __d0, __d1; + register void *__tmp = __s; + __asm__ __volatile__ + ("movb %b0,%h0\n\t" + "pushw %w0\n\t" + "shll $16,%0\n\t" + "popw %w0\n" + "1:\n\t" + "movl %0,(%1)\n\t" + "leal 4(%1),%1\n\t" + "decl %2\n\t" + "jnz 1b\n" + "movw %w0,(%1)" + : "=&q" (__d0), "=&r" (__tmp), "=&r" (__d1), + "=m" ( *(struct { __extension__ char __x[__n]; } *)__s) + : "0" ((unsigned int) __c), "1" (__tmp), "2" (__n / 4) + : "cc"); + return __s; +} + + +/* Search N bytes of S for C. */ +# define _HAVE_STRING_ARCH_memchr 1 +# ifndef _FORCE_INLINES +__STRING_INLINE void * +memchr (const void *__s, int __c, size_t __n) +{ + register unsigned long int __d0; +# ifdef __i686__ + register unsigned long int __d1; +# endif + register unsigned char *__res; + if (__n == 0) + return NULL; +# ifdef __i686__ + __asm__ __volatile__ + ("cld\n\t" + "repne; scasb\n\t" + "cmovne %2,%0" + : "=D" (__res), "=&c" (__d0), "=&r" (__d1) + : "a" (__c), "0" (__s), "1" (__n), "2" (1), + "m" ( *(struct { __extension__ char __x[__n]; } *)__s) + : "cc"); +# else + __asm__ __volatile__ + ("cld\n\t" + "repne; scasb\n\t" + "je 1f\n\t" + "movl $1,%0\n" + "1:" + : "=D" (__res), "=&c" (__d0) + : "a" (__c), "0" (__s), "1" (__n), + "m" ( *(struct { __extension__ char __x[__n]; } *)__s) + : "cc"); +# endif + return __res - 1; +} +# endif + +# define _HAVE_STRING_ARCH_memrchr 1 +# ifndef _FORCE_INLINES +__STRING_INLINE void *__memrchr (const void *__s, int __c, size_t __n); + +__STRING_INLINE void * +__memrchr (const void *__s, int __c, size_t __n) +{ + register unsigned long int __d0; +# ifdef __i686__ + register unsigned long int __d1; +# endif + register void *__res; + if (__n == 0) + return NULL; +# ifdef __i686__ + __asm__ __volatile__ + ("std\n\t" + "repne; scasb\n\t" + "cmovne %2,%0\n\t" + "cld\n\t" + "incl %0" + : "=D" (__res), "=&c" (__d0), "=&r" (__d1) + : "a" (__c), "0" (__s + __n - 1), "1" (__n), "2" (-1), + "m" ( *(struct { __extension__ char __x[__n]; } *)__s) + : "cc"); +# else + __asm__ __volatile__ + ("std\n\t" + "repne; scasb\n\t" + "je 1f\n\t" + "orl $-1,%0\n" + "1:\tcld\n\t" + "incl %0" + : "=D" (__res), "=&c" (__d0) + : "a" (__c), "0" (__s + __n - 1), "1" (__n), + "m" ( *(struct { __extension__ char __x[__n]; } *)__s) + : "cc"); +# endif + return __res; +} +# ifdef __USE_GNU +# define memrchr(s, c, n) __memrchr ((s), (c), (n)) +# endif +# endif + +/* Return pointer to C in S. */ +# define _HAVE_STRING_ARCH_rawmemchr 1 +__STRING_INLINE void *__rawmemchr (const void *__s, int __c); + +# ifndef _FORCE_INLINES +__STRING_INLINE void * +__rawmemchr (const void *__s, int __c) +{ + register unsigned long int __d0; + register unsigned char *__res; + __asm__ __volatile__ + ("cld\n\t" + "repne; scasb\n\t" + : "=D" (__res), "=&c" (__d0) + : "a" (__c), "0" (__s), "1" (0xffffffff), + "m" ( *(struct { char __x[0xfffffff]; } *)__s) + : "cc"); + return __res - 1; +} +# ifdef __USE_GNU +__STRING_INLINE void * +rawmemchr (const void *__s, int __c) +{ + return __rawmemchr (__s, __c); +} +# endif /* use GNU */ +# endif + + +/* Return the length of S. */ +# define _HAVE_STRING_ARCH_strlen 1 +# define strlen(str) \ + (__extension__ (__builtin_constant_p (str) \ + ? __builtin_strlen (str) \ + : __strlen_g (str))) +__STRING_INLINE size_t __strlen_g (const char *__str); + +__STRING_INLINE size_t +__strlen_g (const char *__str) +{ + register char __dummy; + register const char *__tmp = __str; + __asm__ __volatile__ + ("1:\n\t" + "movb (%0),%b1\n\t" + "leal 1(%0),%0\n\t" + "testb %b1,%b1\n\t" + "jne 1b" + : "=r" (__tmp), "=&q" (__dummy) + : "0" (__str), + "m" ( *(struct { char __x[0xfffffff]; } *)__str) + : "cc" ); + return __tmp - __str - 1; +} + + +/* Copy SRC to DEST. */ +# define _HAVE_STRING_ARCH_strcpy 1 +# define strcpy(dest, src) \ + (__extension__ (__builtin_constant_p (src) \ + ? (sizeof ((src)[0]) == 1 && strlen (src) + 1 <= 8 \ + ? __strcpy_a_small ((dest), (src), strlen (src) + 1) \ + : (char *) memcpy ((char *) (dest), \ + (const char *) (src), \ + strlen (src) + 1)) \ + : __strcpy_g ((dest), (src)))) + +# define __strcpy_a_small(dest, src, srclen) \ + (__extension__ ({ char *__dest = (dest); \ + union { \ + unsigned int __ui; \ + unsigned short int __usi; \ + unsigned char __uc; \ + char __c; \ + } *__u = (void *) __dest; \ + switch (srclen) \ + { \ + case 1: \ + __u->__uc = '\0'; \ + break; \ + case 2: \ + __u->__usi = __STRING_SMALL_GET16 (src, 0); \ + break; \ + case 3: \ + __u->__usi = __STRING_SMALL_GET16 (src, 0); \ + __u = __extension__ ((void *) __u + 2); \ + __u->__uc = '\0'; \ + break; \ + case 4: \ + __u->__ui = __STRING_SMALL_GET32 (src, 0); \ + break; \ + case 5: \ + __u->__ui = __STRING_SMALL_GET32 (src, 0); \ + __u = __extension__ ((void *) __u + 4); \ + __u->__uc = '\0'; \ + break; \ + case 6: \ + __u->__ui = __STRING_SMALL_GET32 (src, 0); \ + __u = __extension__ ((void *) __u + 4); \ + __u->__usi = __STRING_SMALL_GET16 (src, 4); \ + break; \ + case 7: \ + __u->__ui = __STRING_SMALL_GET32 (src, 0); \ + __u = __extension__ ((void *) __u + 4); \ + __u->__usi = __STRING_SMALL_GET16 (src, 4); \ + __u = __extension__ ((void *) __u + 2); \ + __u->__uc = '\0'; \ + break; \ + case 8: \ + __u->__ui = __STRING_SMALL_GET32 (src, 0); \ + __u = __extension__ ((void *) __u + 4); \ + __u->__ui = __STRING_SMALL_GET32 (src, 4); \ + break; \ + } \ + (char *) __dest; })) + +__STRING_INLINE char *__strcpy_g (char *__dest, const char *__src); + +__STRING_INLINE char * +__strcpy_g (char *__dest, const char *__src) +{ + register char *__tmp = __dest; + register char __dummy; + __asm__ __volatile__ + ( + "1:\n\t" + "movb (%0),%b2\n\t" + "leal 1(%0),%0\n\t" + "movb %b2,(%1)\n\t" + "leal 1(%1),%1\n\t" + "testb %b2,%b2\n\t" + "jne 1b" + : "=&r" (__src), "=&r" (__tmp), "=&q" (__dummy), + "=m" ( *(struct { char __x[0xfffffff]; } *)__dest) + : "0" (__src), "1" (__tmp), + "m" ( *(struct { char __x[0xfffffff]; } *)__src) + : "cc"); + return __dest; +} + + +# ifdef __USE_GNU +# define _HAVE_STRING_ARCH_stpcpy 1 +/* Copy SRC to DEST. */ +# define __stpcpy(dest, src) \ + (__extension__ (__builtin_constant_p (src) \ + ? (strlen (src) + 1 <= 8 \ + ? __stpcpy_a_small ((dest), (src), strlen (src) + 1) \ + : __stpcpy_c ((dest), (src), strlen (src) + 1)) \ + : __stpcpy_g ((dest), (src)))) +# define __stpcpy_c(dest, src, srclen) \ + ((srclen) % 4 == 0 \ + ? __mempcpy_by4 (dest, src, srclen) - 1 \ + : ((srclen) % 2 == 0 \ + ? __mempcpy_by2 (dest, src, srclen) - 1 \ + : __mempcpy_byn (dest, src, srclen) - 1)) + +/* In glibc itself we use this symbol for namespace reasons. */ +# define stpcpy(dest, src) __stpcpy ((dest), (src)) + +# define __stpcpy_a_small(dest, src, srclen) \ + (__extension__ ({ union { \ + unsigned int __ui; \ + unsigned short int __usi; \ + unsigned char __uc; \ + char __c; \ + } *__u = (void *) (dest); \ + switch (srclen) \ + { \ + case 1: \ + __u->__uc = '\0'; \ + break; \ + case 2: \ + __u->__usi = __STRING_SMALL_GET16 (src, 0); \ + __u = __extension__ ((void *) __u + 1); \ + break; \ + case 3: \ + __u->__usi = __STRING_SMALL_GET16 (src, 0); \ + __u = __extension__ ((void *) __u + 2); \ + __u->__uc = '\0'; \ + break; \ + case 4: \ + __u->__ui = __STRING_SMALL_GET32 (src, 0); \ + __u = __extension__ ((void *) __u + 3); \ + break; \ + case 5: \ + __u->__ui = __STRING_SMALL_GET32 (src, 0); \ + __u = __extension__ ((void *) __u + 4); \ + __u->__uc = '\0'; \ + break; \ + case 6: \ + __u->__ui = __STRING_SMALL_GET32 (src, 0); \ + __u = __extension__ ((void *) __u + 4); \ + __u->__usi = __STRING_SMALL_GET16 (src, 4); \ + __u = __extension__ ((void *) __u + 1); \ + break; \ + case 7: \ + __u->__ui = __STRING_SMALL_GET32 (src, 0); \ + __u = __extension__ ((void *) __u + 4); \ + __u->__usi = __STRING_SMALL_GET16 (src, 4); \ + __u = __extension__ ((void *) __u + 2); \ + __u->__uc = '\0'; \ + break; \ + case 8: \ + __u->__ui = __STRING_SMALL_GET32 (src, 0); \ + __u = __extension__ ((void *) __u + 4); \ + __u->__ui = __STRING_SMALL_GET32 (src, 4); \ + __u = __extension__ ((void *) __u + 3); \ + break; \ + } \ + (char *) __u; })) + +__STRING_INLINE char *__mempcpy_by4 (char *__dest, const char *__src, + size_t __srclen); + +__STRING_INLINE char * +__mempcpy_by4 (char *__dest, const char *__src, size_t __srclen) +{ + register char *__tmp = __dest; + register unsigned long int __d0, __d1; + __asm__ __volatile__ + ("1:\n\t" + "movl (%2),%0\n\t" + "leal 4(%2),%2\n\t" + "movl %0,(%1)\n\t" + "leal 4(%1),%1\n\t" + "decl %3\n\t" + "jnz 1b" + : "=&r" (__d0), "=r" (__tmp), "=&r" (__src), "=&r" (__d1) + : "1" (__tmp), "2" (__src), "3" (__srclen / 4) + : "memory", "cc"); + return __tmp; +} + +__STRING_INLINE char *__mempcpy_by2 (char *__dest, const char *__src, + size_t __srclen); + +__STRING_INLINE char * +__mempcpy_by2 (char *__dest, const char *__src, size_t __srclen) +{ + register char *__tmp = __dest; + register unsigned long int __d0, __d1; + __asm__ __volatile__ + ("shrl $1,%3\n\t" + "jz 2f\n" /* only a word */ + "1:\n\t" + "movl (%2),%0\n\t" + "leal 4(%2),%2\n\t" + "movl %0,(%1)\n\t" + "leal 4(%1),%1\n\t" + "decl %3\n\t" + "jnz 1b\n" + "2:\n\t" + "movw (%2),%w0\n\t" + "movw %w0,(%1)" + : "=&q" (__d0), "=r" (__tmp), "=&r" (__src), "=&r" (__d1), + "=m" ( *(struct { __extension__ char __x[__srclen]; } *)__dest) + : "1" (__tmp), "2" (__src), "3" (__srclen / 2), + "m" ( *(struct { __extension__ char __x[__srclen]; } *)__src) + : "cc"); + return __tmp + 2; +} + +__STRING_INLINE char *__mempcpy_byn (char *__dest, const char *__src, + size_t __srclen); + +__STRING_INLINE char * +__mempcpy_byn (char *__dest, const char *__src, size_t __srclen) +{ + register unsigned long __d0, __d1; + register char *__tmp = __dest; + __asm__ __volatile__ + ("cld\n\t" + "shrl $1,%%ecx\n\t" + "jnc 1f\n\t" + "movsb\n" + "1:\n\t" + "shrl $1,%%ecx\n\t" + "jnc 2f\n\t" + "movsw\n" + "2:\n\t" + "rep; movsl" + : "=D" (__tmp), "=&c" (__d0), "=&S" (__d1), + "=m" ( *(struct { __extension__ char __x[__srclen]; } *)__dest) + : "0" (__tmp), "1" (__srclen), "2" (__src), + "m" ( *(struct { __extension__ char __x[__srclen]; } *)__src) + : "cc"); + return __tmp; +} + +__STRING_INLINE char *__stpcpy_g (char *__dest, const char *__src); + +__STRING_INLINE char * +__stpcpy_g (char *__dest, const char *__src) +{ + register char *__tmp = __dest; + register char __dummy; + __asm__ __volatile__ + ( + "1:\n\t" + "movb (%0),%b2\n\t" + "leal 1(%0),%0\n\t" + "movb %b2,(%1)\n\t" + "leal 1(%1),%1\n\t" + "testb %b2,%b2\n\t" + "jne 1b" + : "=&r" (__src), "=r" (__tmp), "=&q" (__dummy), + "=m" ( *(struct { char __x[0xfffffff]; } *)__dest) + : "0" (__src), "1" (__tmp), + "m" ( *(struct { char __x[0xfffffff]; } *)__src) + : "cc"); + return __tmp - 1; +} +# endif + + +/* Copy no more than N characters of SRC to DEST. */ +# define _HAVE_STRING_ARCH_strncpy 1 +# define strncpy(dest, src, n) \ + (__extension__ (__builtin_constant_p (src) \ + ? ((strlen (src) + 1 >= ((size_t) (n)) \ + ? (char *) memcpy ((char *) (dest), \ + (const char *) (src), n) \ + : __strncpy_cg ((dest), (src), strlen (src) + 1, n))) \ + : __strncpy_gg ((dest), (src), n))) +# define __strncpy_cg(dest, src, srclen, n) \ + (((srclen) % 4 == 0) \ + ? __strncpy_by4 (dest, src, srclen, n) \ + : (((srclen) % 2 == 0) \ + ? __strncpy_by2 (dest, src, srclen, n) \ + : __strncpy_byn (dest, src, srclen, n))) + +__STRING_INLINE char *__strncpy_by4 (char *__dest, const char __src[], + size_t __srclen, size_t __n); + +__STRING_INLINE char * +__strncpy_by4 (char *__dest, const char __src[], size_t __srclen, size_t __n) +{ + register char *__tmp = __dest; + register int __dummy1, __dummy2; + __asm__ __volatile__ + ("1:\n\t" + "movl (%2),%0\n\t" + "leal 4(%2),%2\n\t" + "movl %0,(%1)\n\t" + "leal 4(%1),%1\n\t" + "decl %3\n\t" + "jnz 1b" + : "=&r" (__dummy1), "=r" (__tmp), "=&r" (__src), "=&r" (__dummy2), + "=m" ( *(struct { __extension__ char __x[__srclen]; } *)__dest) + : "1" (__tmp), "2" (__src), "3" (__srclen / 4), + "m" ( *(struct { __extension__ char __x[__srclen]; } *)__src) + : "cc"); + (void) memset (__tmp, '\0', __n - __srclen); + return __dest; +} + +__STRING_INLINE char *__strncpy_by2 (char *__dest, const char __src[], + size_t __srclen, size_t __n); + +__STRING_INLINE char * +__strncpy_by2 (char *__dest, const char __src[], size_t __srclen, size_t __n) +{ + register char *__tmp = __dest; + register int __dummy1, __dummy2; + __asm__ __volatile__ + ("shrl $1,%3\n\t" + "jz 2f\n" /* only a word */ + "1:\n\t" + "movl (%2),%0\n\t" + "leal 4(%2),%2\n\t" + "movl %0,(%1)\n\t" + "leal 4(%1),%1\n\t" + "decl %3\n\t" + "jnz 1b\n" + "2:\n\t" + "movw (%2),%w0\n\t" + "movw %w0,(%1)\n\t" + : "=&q" (__dummy1), "=r" (__tmp), "=&r" (__src), "=&r" (__dummy2), + "=m" ( *(struct { __extension__ char __x[__srclen]; } *)__dest) + : "1" (__tmp), "2" (__src), "3" (__srclen / 2), + "m" ( *(struct { __extension__ char __x[__srclen]; } *)__src) + : "cc"); + (void) memset (__tmp + 2, '\0', __n - __srclen); + return __dest; +} + +__STRING_INLINE char *__strncpy_byn (char *__dest, const char __src[], + size_t __srclen, size_t __n); + +__STRING_INLINE char * +__strncpy_byn (char *__dest, const char __src[], size_t __srclen, size_t __n) +{ + register unsigned long int __d0, __d1; + register char *__tmp = __dest; + __asm__ __volatile__ + ("cld\n\t" + "shrl $1,%1\n\t" + "jnc 1f\n\t" + "movsb\n" + "1:\n\t" + "shrl $1,%1\n\t" + "jnc 2f\n\t" + "movsw\n" + "2:\n\t" + "rep; movsl" + : "=D" (__tmp), "=&c" (__d0), "=&S" (__d1), + "=m" ( *(struct { __extension__ char __x[__srclen]; } *)__dest) + : "1" (__srclen), "0" (__tmp),"2" (__src), + "m" ( *(struct { __extension__ char __x[__srclen]; } *)__src) + : "cc"); + (void) memset (__tmp, '\0', __n - __srclen); + return __dest; +} + +__STRING_INLINE char *__strncpy_gg (char *__dest, const char *__src, + size_t __n); + +__STRING_INLINE char * +__strncpy_gg (char *__dest, const char *__src, size_t __n) +{ + register char *__tmp = __dest; + register char __dummy; + if (__n > 0) + __asm__ __volatile__ + ("1:\n\t" + "movb (%0),%2\n\t" + "incl %0\n\t" + "movb %2,(%1)\n\t" + "incl %1\n\t" + "decl %3\n\t" + "je 3f\n\t" + "testb %2,%2\n\t" + "jne 1b\n\t" + "2:\n\t" + "movb %2,(%1)\n\t" + "incl %1\n\t" + "decl %3\n\t" + "jne 2b\n\t" + "3:" + : "=&r" (__src), "=&r" (__tmp), "=&q" (__dummy), "=&r" (__n) + : "0" (__src), "1" (__tmp), "3" (__n) + : "memory", "cc"); + + return __dest; +} + + +/* Append SRC onto DEST. */ +# define _HAVE_STRING_ARCH_strcat 1 +# define strcat(dest, src) \ + (__extension__ (__builtin_constant_p (src) \ + ? __strcat_c ((dest), (src), strlen (src) + 1) \ + : __strcat_g ((dest), (src)))) + +__STRING_INLINE char *__strcat_c (char *__dest, const char __src[], + size_t __srclen); + +__STRING_INLINE char * +__strcat_c (char *__dest, const char __src[], size_t __srclen) +{ +# ifdef __i686__ + register unsigned long int __d0; + register char *__tmp; + __asm__ __volatile__ + ("repne; scasb" + : "=D" (__tmp), "=&c" (__d0), + "=m" ( *(struct { char __x[0xfffffff]; } *)__dest) + : "0" (__dest), "1" (0xffffffff), "a" (0), + "m" ( *(struct { __extension__ char __x[__srclen]; } *)__src) + : "cc"); + --__tmp; +# else + register char *__tmp = __dest; + __asm__ __volatile__ + ("decl %0\n\t" + "1:\n\t" + "incl %0\n\t" + "cmpb $0,(%0)\n\t" + "jne 1b\n" + : "=r" (__tmp), + "=m" ( *(struct { char __x[0xfffffff]; } *)__dest) + : "0" (__tmp), + "m" ( *(struct { __extension__ char __x[__srclen]; } *)__src) + : "cc"); +# endif + (void) memcpy (__tmp, __src, __srclen); + return __dest; +} + +__STRING_INLINE char *__strcat_g (char *__dest, const char *__src); + +__STRING_INLINE char * +__strcat_g (char *__dest, const char *__src) +{ + register char *__tmp = __dest; + register char __dummy; + __asm__ __volatile__ + ("decl %1\n\t" + "1:\n\t" + "incl %1\n\t" + "cmpb $0,(%1)\n\t" + "jne 1b\n" + "2:\n\t" + "movb (%2),%b0\n\t" + "incl %2\n\t" + "movb %b0,(%1)\n\t" + "incl %1\n\t" + "testb %b0,%b0\n\t" + "jne 2b\n" + : "=&q" (__dummy), "=&r" (__tmp), "=&r" (__src), + "=m" ( *(struct { char __x[0xfffffff]; } *)__dest) + : "1" (__tmp), "2" (__src), + "m" ( *(struct { char __x[0xfffffff]; } *)__src) + : "memory", "cc"); + return __dest; +} + + +/* Append no more than N characters from SRC onto DEST. */ +# define _HAVE_STRING_ARCH_strncat 1 +# define strncat(dest, src, n) \ + (__extension__ ({ char *__dest = (dest); \ + __builtin_constant_p (src) && __builtin_constant_p (n) \ + ? (strlen (src) < ((size_t) (n)) \ + ? strcat (__dest, (src)) \ + : (*(char *)__mempcpy (strchr (__dest, '\0'), \ + (const char *) (src), \ + (n)) = 0, __dest)) \ + : __strncat_g (__dest, (src), (n)); })) + +__STRING_INLINE char *__strncat_g (char *__dest, const char __src[], + size_t __n); + +__STRING_INLINE char * +__strncat_g (char *__dest, const char __src[], size_t __n) +{ + register char *__tmp = __dest; + register char __dummy; +# ifdef __i686__ + __asm__ __volatile__ + ("repne; scasb\n" + "movl %4, %3\n\t" + "decl %1\n\t" + "1:\n\t" + "subl $1,%3\n\t" + "jc 2f\n\t" + "movb (%2),%b0\n\t" + "movsb\n\t" + "testb %b0,%b0\n\t" + "jne 1b\n\t" + "decl %1\n" + "2:\n\t" + "movb $0,(%1)" + : "=&a" (__dummy), "=&D" (__tmp), "=&S" (__src), "=&c" (__n) + : "g" (__n), "0" (0), "1" (__tmp), "2" (__src), "3" (0xffffffff) + : "memory", "cc"); +# else + __asm__ __volatile__ + ("1:\n\t" + "cmpb $0,1(%1)\n\t" + "leal 1(%1),%1\n\t" + "jne 1b\n" + "2:\n\t" + "subl $1,%3\n\t" + "jc 3f\n\t" + "movb (%2),%b0\n\t" + "leal 1(%2),%2\n\t" + "movb %b0,(%1)\n\t" + "leal 1(%1),%1\n\t" + "testb %b0,%b0\n\t" + "jne 2b\n\t" + "decl %1\n" + "3:\n\t" + "movb $0,(%1)" + : "=&q" (__dummy), "=&r" (__tmp), "=&r" (__src), "=&r" (__n) + : "1" ((unsigned long) __tmp - 1), "2" (__src), "3" (__n) + : "memory", "cc"); +#endif + return __dest; +} + + +/* Compare S1 and S2. */ +# define _HAVE_STRING_ARCH_strcmp 1 +# define strcmp(s1, s2) \ + (__extension__ (__builtin_constant_p (s1) && __builtin_constant_p (s2) \ + && (sizeof ((s1)[0]) != 1 || strlen (s1) >= 4) \ + && (sizeof ((s2)[0]) != 1 || strlen (s2) >= 4) \ + ? memcmp ((const char *) (s1), (const char *) (s2), \ + (strlen (s1) < strlen (s2) \ + ? strlen (s1) : strlen (s2)) + 1) \ + : (__builtin_constant_p (s1) && sizeof ((s1)[0]) == 1 \ + && sizeof ((s2)[0]) == 1 && strlen (s1) < 4 \ + ? (__builtin_constant_p (s2) && sizeof ((s2)[0]) == 1 \ + ? __strcmp_cc ((const unsigned char *) (s1), \ + (const unsigned char *) (s2), \ + strlen (s1)) \ + : __strcmp_cg ((const unsigned char *) (s1), \ + (const unsigned char *) (s2), \ + strlen (s1))) \ + : (__builtin_constant_p (s2) && sizeof ((s1)[0]) == 1 \ + && sizeof ((s2)[0]) == 1 && strlen (s2) < 4 \ + ? (__builtin_constant_p (s1) \ + ? __strcmp_cc ((const unsigned char *) (s1), \ + (const unsigned char *) (s2), \ + strlen (s2)) \ + : __strcmp_gc ((const unsigned char *) (s1), \ + (const unsigned char *) (s2), \ + strlen (s2))) \ + : __strcmp_gg ((s1), (s2)))))) + +# define __strcmp_cc(s1, s2, l) \ + (__extension__ ({ register int __result = (s1)[0] - (s2)[0]; \ + if (l > 0 && __result == 0) \ + { \ + __result = (s1)[1] - (s2)[1]; \ + if (l > 1 && __result == 0) \ + { \ + __result = (s1)[2] - (s2)[2]; \ + if (l > 2 && __result == 0) \ + __result = (s1)[3] - (s2)[3]; \ + } \ + } \ + __result; })) + +# define __strcmp_cg(s1, s2, l1) \ + (__extension__ ({ const unsigned char *__s2 = (s2); \ + register int __result = (s1)[0] - __s2[0]; \ + if (l1 > 0 && __result == 0) \ + { \ + __result = (s1)[1] - __s2[1]; \ + if (l1 > 1 && __result == 0) \ + { \ + __result = (s1)[2] - __s2[2]; \ + if (l1 > 2 && __result == 0) \ + __result = (s1)[3] - __s2[3]; \ + } \ + } \ + __result; })) + +# define __strcmp_gc(s1, s2, l2) \ + (__extension__ ({ const unsigned char *__s1 = (s1); \ + register int __result = __s1[0] - (s2)[0]; \ + if (l2 > 0 && __result == 0) \ + { \ + __result = __s1[1] - (s2)[1]; \ + if (l2 > 1 && __result == 0) \ + { \ + __result = __s1[2] - (s2)[2]; \ + if (l2 > 2 && __result == 0) \ + __result = __s1[3] - (s2)[3]; \ + } \ + } \ + __result; })) + +__STRING_INLINE int __strcmp_gg (const char *__s1, const char *__s2); + +__STRING_INLINE int +__strcmp_gg (const char *__s1, const char *__s2) +{ + register int __res; + __asm__ __volatile__ + ("1:\n\t" + "movb (%1),%b0\n\t" + "leal 1(%1),%1\n\t" + "cmpb %b0,(%2)\n\t" + "jne 2f\n\t" + "leal 1(%2),%2\n\t" + "testb %b0,%b0\n\t" + "jne 1b\n\t" + "xorl %0,%0\n\t" + "jmp 3f\n" + "2:\n\t" + "movl $1,%0\n\t" + "jb 3f\n\t" + "negl %0\n" + "3:" + : "=q" (__res), "=&r" (__s1), "=&r" (__s2) + : "1" (__s1), "2" (__s2), + "m" ( *(struct { char __x[0xfffffff]; } *)__s1), + "m" ( *(struct { char __x[0xfffffff]; } *)__s2) + : "cc"); + return __res; +} + + +/* Compare N characters of S1 and S2. */ +# define _HAVE_STRING_ARCH_strncmp 1 +# define strncmp(s1, s2, n) \ + (__extension__ (__builtin_constant_p (s1) && strlen (s1) < ((size_t) (n)) \ + ? strcmp ((s1), (s2)) \ + : (__builtin_constant_p (s2) && strlen (s2) < ((size_t) (n))\ + ? strcmp ((s1), (s2)) \ + : __strncmp_g ((s1), (s2), (n))))) + +__STRING_INLINE int __strncmp_g (const char *__s1, const char *__s2, + size_t __n); + +__STRING_INLINE int +__strncmp_g (const char *__s1, const char *__s2, size_t __n) +{ + register int __res; + __asm__ __volatile__ + ("1:\n\t" + "subl $1,%3\n\t" + "jc 2f\n\t" + "movb (%1),%b0\n\t" + "incl %1\n\t" + "cmpb %b0,(%2)\n\t" + "jne 3f\n\t" + "incl %2\n\t" + "testb %b0,%b0\n\t" + "jne 1b\n" + "2:\n\t" + "xorl %0,%0\n\t" + "jmp 4f\n" + "3:\n\t" + "movl $1,%0\n\t" + "jb 4f\n\t" + "negl %0\n" + "4:" + : "=q" (__res), "=&r" (__s1), "=&r" (__s2), "=&r" (__n) + : "1" (__s1), "2" (__s2), "3" (__n), + "m" ( *(struct { __extension__ char __x[__n]; } *)__s1), + "m" ( *(struct { __extension__ char __x[__n]; } *)__s2) + : "cc"); + return __res; +} + + +/* Find the first occurrence of C in S. */ +# define _HAVE_STRING_ARCH_strchr 1 +# define _USE_STRING_ARCH_strchr 1 +# define strchr(s, c) \ + (__extension__ (__builtin_constant_p (c) \ + ? ((c) == '\0' \ + ? (char *) __rawmemchr ((s), (c)) \ + : __strchr_c ((s), ((c) & 0xff) << 8)) \ + : __strchr_g ((s), (c)))) + +__STRING_INLINE char *__strchr_c (const char *__s, int __c); + +__STRING_INLINE char * +__strchr_c (const char *__s, int __c) +{ + register unsigned long int __d0; + register char *__res; + __asm__ __volatile__ + ("1:\n\t" + "movb (%0),%%al\n\t" + "cmpb %%ah,%%al\n\t" + "je 2f\n\t" + "leal 1(%0),%0\n\t" + "testb %%al,%%al\n\t" + "jne 1b\n\t" + "xorl %0,%0\n" + "2:" + : "=r" (__res), "=&a" (__d0) + : "0" (__s), "1" (__c), + "m" ( *(struct { char __x[0xfffffff]; } *)__s) + : "cc"); + return __res; +} + +__STRING_INLINE char *__strchr_g (const char *__s, int __c); + +__STRING_INLINE char * +__strchr_g (const char *__s, int __c) +{ + register unsigned long int __d0; + register char *__res; + __asm__ __volatile__ + ("movb %%al,%%ah\n" + "1:\n\t" + "movb (%0),%%al\n\t" + "cmpb %%ah,%%al\n\t" + "je 2f\n\t" + "leal 1(%0),%0\n\t" + "testb %%al,%%al\n\t" + "jne 1b\n\t" + "xorl %0,%0\n" + "2:" + : "=r" (__res), "=&a" (__d0) + : "0" (__s), "1" (__c), + "m" ( *(struct { char __x[0xfffffff]; } *)__s) + : "cc"); + return __res; +} + + +/* Find the first occurrence of C in S or the final NUL byte. */ +# define _HAVE_STRING_ARCH_strchrnul 1 +# define __strchrnul(s, c) \ + (__extension__ (__builtin_constant_p (c) \ + ? ((c) == '\0' \ + ? (char *) __rawmemchr ((s), c) \ + : __strchrnul_c ((s), ((c) & 0xff) << 8)) \ + : __strchrnul_g ((s), c))) + +__STRING_INLINE char *__strchrnul_c (const char *__s, int __c); + +__STRING_INLINE char * +__strchrnul_c (const char *__s, int __c) +{ + register unsigned long int __d0; + register char *__res; + __asm__ __volatile__ + ("1:\n\t" + "movb (%0),%%al\n\t" + "cmpb %%ah,%%al\n\t" + "je 2f\n\t" + "leal 1(%0),%0\n\t" + "testb %%al,%%al\n\t" + "jne 1b\n\t" + "decl %0\n" + "2:" + : "=r" (__res), "=&a" (__d0) + : "0" (__s), "1" (__c), + "m" ( *(struct { char __x[0xfffffff]; } *)__s) + : "cc"); + return __res; +} + +__STRING_INLINE char *__strchrnul_g (const char *__s, int __c); + +__STRING_INLINE char * +__strchrnul_g (const char *__s, int __c) +{ + register unsigned long int __d0; + register char *__res; + __asm__ __volatile__ + ("movb %%al,%%ah\n" + "1:\n\t" + "movb (%0),%%al\n\t" + "cmpb %%ah,%%al\n\t" + "je 2f\n\t" + "leal 1(%0),%0\n\t" + "testb %%al,%%al\n\t" + "jne 1b\n\t" + "decl %0\n" + "2:" + : "=r" (__res), "=&a" (__d0) + : "0" (__s), "1" (__c), + "m" ( *(struct { char __x[0xfffffff]; } *)__s) + : "cc"); + return __res; +} +# ifdef __USE_GNU +# define strchrnul(s, c) __strchrnul ((s), (c)) +# endif + + +# if defined __USE_MISC || defined __USE_XOPEN_EXTENDED +/* Find the first occurrence of C in S. This is the BSD name. */ +# define _HAVE_STRING_ARCH_index 1 +# define index(s, c) \ + (__extension__ (__builtin_constant_p (c) \ + ? __strchr_c ((s), ((c) & 0xff) << 8) \ + : __strchr_g ((s), (c)))) +# endif + + +/* Find the last occurrence of C in S. */ +# define _HAVE_STRING_ARCH_strrchr 1 +# define strrchr(s, c) \ + (__extension__ (__builtin_constant_p (c) \ + ? __strrchr_c ((s), ((c) & 0xff) << 8) \ + : __strrchr_g ((s), (c)))) + +# ifdef __i686__ +__STRING_INLINE char *__strrchr_c (const char *__s, int __c); + +__STRING_INLINE char * +__strrchr_c (const char *__s, int __c) +{ + register unsigned long int __d0, __d1; + register char *__res; + __asm__ __volatile__ + ("cld\n" + "1:\n\t" + "lodsb\n\t" + "cmpb %h2,%b2\n\t" + "cmove %1,%0\n\t" + "testb %b2,%b2\n\t" + "jne 1b" + : "=d" (__res), "=&S" (__d0), "=&a" (__d1) + : "0" (1), "1" (__s), "2" (__c), + "m" ( *(struct { char __x[0xfffffff]; } *)__s) + : "cc"); + return __res - 1; +} + +__STRING_INLINE char *__strrchr_g (const char *__s, int __c); + +__STRING_INLINE char * +__strrchr_g (const char *__s, int __c) +{ + register unsigned long int __d0, __d1; + register char *__res; + __asm__ __volatile__ + ("movb %b2,%h2\n" + "cld\n\t" + "1:\n\t" + "lodsb\n\t" + "cmpb %h2,%b2\n\t" + "cmove %1,%0\n\t" + "testb %b2,%b2\n\t" + "jne 1b" + : "=d" (__res), "=&S" (__d0), "=&a" (__d1) + : "0" (1), "1" (__s), "2" (__c), + "m" ( *(struct { char __x[0xfffffff]; } *)__s) + : "cc"); + return __res - 1; +} +# else +__STRING_INLINE char *__strrchr_c (const char *__s, int __c); + +__STRING_INLINE char * +__strrchr_c (const char *__s, int __c) +{ + register unsigned long int __d0, __d1; + register char *__res; + __asm__ __volatile__ + ("cld\n" + "1:\n\t" + "lodsb\n\t" + "cmpb %%ah,%%al\n\t" + "jne 2f\n\t" + "leal -1(%%esi),%0\n" + "2:\n\t" + "testb %%al,%%al\n\t" + "jne 1b" + : "=d" (__res), "=&S" (__d0), "=&a" (__d1) + : "0" (0), "1" (__s), "2" (__c), + "m" ( *(struct { char __x[0xfffffff]; } *)__s) + : "cc"); + return __res; +} + +__STRING_INLINE char *__strrchr_g (const char *__s, int __c); + +__STRING_INLINE char * +__strrchr_g (const char *__s, int __c) +{ + register unsigned long int __d0, __d1; + register char *__res; + __asm__ __volatile__ + ("movb %%al,%%ah\n" + "cld\n\t" + "1:\n\t" + "lodsb\n\t" + "cmpb %%ah,%%al\n\t" + "jne 2f\n\t" + "leal -1(%%esi),%0\n" + "2:\n\t" + "testb %%al,%%al\n\t" + "jne 1b" + : "=r" (__res), "=&S" (__d0), "=&a" (__d1) + : "0" (0), "1" (__s), "2" (__c), + "m" ( *(struct { char __x[0xfffffff]; } *)__s) + : "cc"); + return __res; +} +# endif + + +# if defined __USE_MISC || defined __USE_XOPEN_EXTENDED +/* Find the last occurrence of C in S. This is the BSD name. */ +# define _HAVE_STRING_ARCH_rindex 1 +# define rindex(s, c) \ + (__extension__ (__builtin_constant_p (c) \ + ? __strrchr_c ((s), ((c) & 0xff) << 8) \ + : __strrchr_g ((s), (c)))) +# endif + + +/* Return the length of the initial segment of S which + consists entirely of characters not in REJECT. */ +# define _HAVE_STRING_ARCH_strcspn 1 +# define strcspn(s, reject) \ + (__extension__ (__builtin_constant_p (reject) && sizeof ((reject)[0]) == 1 \ + ? ((reject)[0] == '\0' \ + ? strlen (s) \ + : ((reject)[1] == '\0' \ + ? __strcspn_c1 ((s), (((reject)[0] << 8) & 0xff00)) \ + : __strcspn_cg ((s), (reject), strlen (reject)))) \ + : __strcspn_g ((s), (reject)))) + +__STRING_INLINE size_t __strcspn_c1 (const char *__s, int __reject); + +# ifndef _FORCE_INLINES +__STRING_INLINE size_t +__strcspn_c1 (const char *__s, int __reject) +{ + register unsigned long int __d0; + register char *__res; + __asm__ __volatile__ + ("1:\n\t" + "movb (%0),%%al\n\t" + "leal 1(%0),%0\n\t" + "cmpb %%ah,%%al\n\t" + "je 2f\n\t" + "testb %%al,%%al\n\t" + "jne 1b\n" + "2:" + : "=r" (__res), "=&a" (__d0) + : "0" (__s), "1" (__reject), + "m" ( *(struct { char __x[0xfffffff]; } *)__s) + : "cc"); + return (__res - 1) - __s; +} +# endif + +__STRING_INLINE size_t __strcspn_cg (const char *__s, const char __reject[], + size_t __reject_len); + +__STRING_INLINE size_t +__strcspn_cg (const char *__s, const char __reject[], size_t __reject_len) +{ + register unsigned long int __d0, __d1, __d2; + register const char *__res; + __asm__ __volatile__ + ("cld\n" + "1:\n\t" + "lodsb\n\t" + "testb %%al,%%al\n\t" + "je 2f\n\t" + "movl %5,%%edi\n\t" + "movl %6,%%ecx\n\t" + "repne; scasb\n\t" + "jne 1b\n" + "2:" + : "=S" (__res), "=&a" (__d0), "=&c" (__d1), "=&D" (__d2) + : "0" (__s), "d" (__reject), "g" (__reject_len) + : "memory", "cc"); + return (__res - 1) - __s; +} + +__STRING_INLINE size_t __strcspn_g (const char *__s, const char *__reject); +# ifdef __PIC__ + +__STRING_INLINE size_t +__strcspn_g (const char *__s, const char *__reject) +{ + register unsigned long int __d0, __d1, __d2; + register const char *__res; + __asm__ __volatile__ + ("pushl %%ebx\n\t" + "movl %4,%%edi\n\t" + "cld\n\t" + "repne; scasb\n\t" + "notl %%ecx\n\t" + "leal -1(%%ecx),%%ebx\n" + "1:\n\t" + "lodsb\n\t" + "testb %%al,%%al\n\t" + "je 2f\n\t" + "movl %4,%%edi\n\t" + "movl %%ebx,%%ecx\n\t" + "repne; scasb\n\t" + "jne 1b\n" + "2:\n\t" + "popl %%ebx" + : "=S" (__res), "=&a" (__d0), "=&c" (__d1), "=&D" (__d2) + : "r" (__reject), "0" (__s), "1" (0), "2" (0xffffffff) + : "memory", "cc"); + return (__res - 1) - __s; +} +# else +__STRING_INLINE size_t +__strcspn_g (const char *__s, const char *__reject) +{ + register unsigned long int __d0, __d1, __d2, __d3; + register const char *__res; + __asm__ __volatile__ + ("cld\n\t" + "repne; scasb\n\t" + "notl %%ecx\n\t" + "leal -1(%%ecx),%%edx\n" + "1:\n\t" + "lodsb\n\t" + "testb %%al,%%al\n\t" + "je 2f\n\t" + "movl %%ebx,%%edi\n\t" + "movl %%edx,%%ecx\n\t" + "repne; scasb\n\t" + "jne 1b\n" + "2:" + : "=S" (__res), "=&a" (__d0), "=&c" (__d1), "=&D" (__d2), "=&d" (__d3) + : "0" (__s), "1" (0), "2" (0xffffffff), "3" (__reject), "b" (__reject) + /* Clobber memory, otherwise GCC cannot handle this. */ + : "memory", "cc"); + return (__res - 1) - __s; +} +# endif + + +/* Return the length of the initial segment of S which + consists entirely of characters in ACCEPT. */ +# define _HAVE_STRING_ARCH_strspn 1 +# define strspn(s, accept) \ + (__extension__ (__builtin_constant_p (accept) && sizeof ((accept)[0]) == 1 \ + ? ((accept)[0] == '\0' \ + ? ((void) (s), 0) \ + : ((accept)[1] == '\0' \ + ? __strspn_c1 ((s), (((accept)[0] << 8 ) & 0xff00)) \ + : __strspn_cg ((s), (accept), strlen (accept)))) \ + : __strspn_g ((s), (accept)))) + +# ifndef _FORCE_INLINES +__STRING_INLINE size_t __strspn_c1 (const char *__s, int __accept); + +__STRING_INLINE size_t +__strspn_c1 (const char *__s, int __accept) +{ + register unsigned long int __d0; + register char *__res; + /* Please note that __accept never can be '\0'. */ + __asm__ __volatile__ + ("1:\n\t" + "movb (%0),%b1\n\t" + "leal 1(%0),%0\n\t" + "cmpb %h1,%b1\n\t" + "je 1b" + : "=r" (__res), "=&q" (__d0) + : "0" (__s), "1" (__accept), + "m" ( *(struct { char __x[0xfffffff]; } *)__s) + : "cc"); + return (__res - 1) - __s; +} +# endif + +__STRING_INLINE size_t __strspn_cg (const char *__s, const char __accept[], + size_t __accept_len); + +__STRING_INLINE size_t +__strspn_cg (const char *__s, const char __accept[], size_t __accept_len) +{ + register unsigned long int __d0, __d1, __d2; + register const char *__res; + __asm__ __volatile__ + ("cld\n" + "1:\n\t" + "lodsb\n\t" + "testb %%al,%%al\n\t" + "je 2f\n\t" + "movl %5,%%edi\n\t" + "movl %6,%%ecx\n\t" + "repne; scasb\n\t" + "je 1b\n" + "2:" + : "=S" (__res), "=&a" (__d0), "=&c" (__d1), "=&D" (__d2) + : "0" (__s), "g" (__accept), "g" (__accept_len), + /* Since we do not know how large the memory we access it, use a + really large amount. */ + "m" ( *(struct { char __x[0xfffffff]; } *)__s), + "m" ( *(struct { __extension__ char __x[__accept_len]; } *)__accept) + : "cc"); + return (__res - 1) - __s; +} + +__STRING_INLINE size_t __strspn_g (const char *__s, const char *__accept); +# ifdef __PIC__ + +__STRING_INLINE size_t +__strspn_g (const char *__s, const char *__accept) +{ + register unsigned long int __d0, __d1, __d2; + register const char *__res; + __asm__ __volatile__ + ("pushl %%ebx\n\t" + "cld\n\t" + "repne; scasb\n\t" + "notl %%ecx\n\t" + "leal -1(%%ecx),%%ebx\n" + "1:\n\t" + "lodsb\n\t" + "testb %%al,%%al\n\t" + "je 2f\n\t" + "movl %%edx,%%edi\n\t" + "movl %%ebx,%%ecx\n\t" + "repne; scasb\n\t" + "je 1b\n" + "2:\n\t" + "popl %%ebx" + : "=S" (__res), "=&a" (__d0), "=&c" (__d1), "=&D" (__d2) + : "d" (__accept), "0" (__s), "1" (0), "2" (0xffffffff), "3" (__accept) + : "memory", "cc"); + return (__res - 1) - __s; +} +# else +__STRING_INLINE size_t +__strspn_g (const char *__s, const char *__accept) +{ + register unsigned long int __d0, __d1, __d2, __d3; + register const char *__res; + __asm__ __volatile__ + ("cld\n\t" + "repne; scasb\n\t" + "notl %%ecx\n\t" + "leal -1(%%ecx),%%edx\n" + "1:\n\t" + "lodsb\n\t" + "testb %%al,%%al\n\t" + "je 2f\n\t" + "movl %%ebx,%%edi\n\t" + "movl %%edx,%%ecx\n\t" + "repne; scasb\n\t" + "je 1b\n" + "2:" + : "=S" (__res), "=&a" (__d0), "=&c" (__d1), "=&D" (__d2), "=&d" (__d3) + : "0" (__s), "1" (0), "2" (0xffffffff), "3" (__accept), "b" (__accept) + : "memory", "cc"); + return (__res - 1) - __s; +} +# endif + + +/* Find the first occurrence in S of any character in ACCEPT. */ +# define _HAVE_STRING_ARCH_strpbrk 1 +# define strpbrk(s, accept) \ + (__extension__ (__builtin_constant_p (accept) && sizeof ((accept)[0]) == 1 \ + ? ((accept)[0] == '\0' \ + ? ((void) (s), (char *) 0) \ + : ((accept)[1] == '\0' \ + ? strchr ((s), (accept)[0]) \ + : __strpbrk_cg ((s), (accept), strlen (accept)))) \ + : __strpbrk_g ((s), (accept)))) + +__STRING_INLINE char *__strpbrk_cg (const char *__s, const char __accept[], + size_t __accept_len); + +__STRING_INLINE char * +__strpbrk_cg (const char *__s, const char __accept[], size_t __accept_len) +{ + register unsigned long int __d0, __d1, __d2; + register char *__res; + __asm__ __volatile__ + ("cld\n" + "1:\n\t" + "lodsb\n\t" + "testb %%al,%%al\n\t" + "je 2f\n\t" + "movl %5,%%edi\n\t" + "movl %6,%%ecx\n\t" + "repne; scasb\n\t" + "jne 1b\n\t" + "decl %0\n\t" + "jmp 3f\n" + "2:\n\t" + "xorl %0,%0\n" + "3:" + : "=S" (__res), "=&a" (__d0), "=&c" (__d1), "=&D" (__d2) + : "0" (__s), "d" (__accept), "g" (__accept_len) + : "memory", "cc"); + return __res; +} + +__STRING_INLINE char *__strpbrk_g (const char *__s, const char *__accept); +# ifdef __PIC__ + +__STRING_INLINE char * +__strpbrk_g (const char *__s, const char *__accept) +{ + register unsigned long int __d0, __d1, __d2; + register char *__res; + __asm__ __volatile__ + ("pushl %%ebx\n\t" + "movl %%edx,%%edi\n\t" + "cld\n\t" + "repne; scasb\n\t" + "notl %%ecx\n\t" + "leal -1(%%ecx),%%ebx\n" + "1:\n\t" + "lodsb\n\t" + "testb %%al,%%al\n\t" + "je 2f\n\t" + "movl %%edx,%%edi\n\t" + "movl %%ebx,%%ecx\n\t" + "repne; scasb\n\t" + "jne 1b\n\t" + "decl %0\n\t" + "jmp 3f\n" + "2:\n\t" + "xorl %0,%0\n" + "3:\n\t" + "popl %%ebx" + : "=S" (__res), "=&a" (__d0), "=&c" (__d1), "=&D" (__d2) + : "d" (__accept), "0" (__s), "1" (0), "2" (0xffffffff) + : "memory", "cc"); + return __res; +} +# else +__STRING_INLINE char * +__strpbrk_g (const char *__s, const char *__accept) +{ + register unsigned long int __d0, __d1, __d2, __d3; + register char *__res; + __asm__ __volatile__ + ("movl %%ebx,%%edi\n\t" + "cld\n\t" + "repne; scasb\n\t" + "notl %%ecx\n\t" + "leal -1(%%ecx),%%edx\n" + "1:\n\t" + "lodsb\n\t" + "testb %%al,%%al\n\t" + "je 2f\n\t" + "movl %%ebx,%%edi\n\t" + "movl %%edx,%%ecx\n\t" + "repne; scasb\n\t" + "jne 1b\n\t" + "decl %0\n\t" + "jmp 3f\n" + "2:\n\t" + "xorl %0,%0\n" + "3:" + : "=S" (__res), "=&a" (__d0), "=&c" (__d1), "=&d" (__d2), "=&D" (__d3) + : "0" (__s), "1" (0), "2" (0xffffffff), "b" (__accept) + : "memory", "cc"); + return __res; +} +# endif + + +/* Find the first occurrence of NEEDLE in HAYSTACK. */ +# define _HAVE_STRING_ARCH_strstr 1 +# define strstr(haystack, needle) \ + (__extension__ (__builtin_constant_p (needle) && sizeof ((needle)[0]) == 1 \ + ? ((needle)[0] == '\0' \ + ? (haystack) \ + : ((needle)[1] == '\0' \ + ? strchr ((haystack), (needle)[0]) \ + : __strstr_cg ((haystack), (needle), \ + strlen (needle)))) \ + : __strstr_g ((haystack), (needle)))) + +/* Please note that this function need not handle NEEDLEs with a + length shorter than two. */ +__STRING_INLINE char *__strstr_cg (const char *__haystack, + const char __needle[], + size_t __needle_len); + +__STRING_INLINE char * +__strstr_cg (const char *__haystack, const char __needle[], + size_t __needle_len) +{ + register unsigned long int __d0, __d1, __d2; + register char *__res; + __asm__ __volatile__ + ("cld\n" \ + "1:\n\t" + "movl %6,%%edi\n\t" + "movl %5,%%eax\n\t" + "movl %4,%%ecx\n\t" + "repe; cmpsb\n\t" + "je 2f\n\t" + "cmpb $0,-1(%%esi)\n\t" + "leal 1(%%eax),%5\n\t" + "jne 1b\n\t" + "xorl %%eax,%%eax\n" + "2:" + : "=&a" (__res), "=&S" (__d0), "=&D" (__d1), "=&c" (__d2) + : "g" (__needle_len), "1" (__haystack), "d" (__needle) + : "memory", "cc"); + return __res; +} + +__STRING_INLINE char *__strstr_g (const char *__haystack, + const char *__needle); +# ifdef __PIC__ + +__STRING_INLINE char * +__strstr_g (const char *__haystack, const char *__needle) +{ + register unsigned long int __d0, __d1, __d2; + register char *__res; + __asm__ __volatile__ + ("cld\n\t" + "repne; scasb\n\t" + "notl %%ecx\n\t" + "pushl %%ebx\n\t" + "decl %%ecx\n\t" /* NOTE! This also sets Z if searchstring='' */ + "movl %%ecx,%%ebx\n" + "1:\n\t" + "movl %%edx,%%edi\n\t" + "movl %%esi,%%eax\n\t" + "movl %%ebx,%%ecx\n\t" + "repe; cmpsb\n\t" + "je 2f\n\t" /* also works for empty string, see above */ + "cmpb $0,-1(%%esi)\n\t" + "leal 1(%%eax),%%esi\n\t" + "jne 1b\n\t" + "xorl %%eax,%%eax\n" + "2:\n\t" + "popl %%ebx" + : "=&a" (__res), "=&c" (__d0), "=&S" (__d1), "=&D" (__d2) + : "0" (0), "1" (0xffffffff), "2" (__haystack), "3" (__needle), + "d" (__needle) + : "memory", "cc"); + return __res; +} +# else +__STRING_INLINE char * +__strstr_g (const char *__haystack, const char *__needle) +{ + register unsigned long int __d0, __d1, __d2, __d3; + register char *__res; + __asm__ __volatile__ + ("cld\n\t" + "repne; scasb\n\t" + "notl %%ecx\n\t" + "decl %%ecx\n\t" /* NOTE! This also sets Z if searchstring='' */ + "movl %%ecx,%%edx\n" + "1:\n\t" + "movl %%ebx,%%edi\n\t" + "movl %%esi,%%eax\n\t" + "movl %%edx,%%ecx\n\t" + "repe; cmpsb\n\t" + "je 2f\n\t" /* also works for empty string, see above */ + "cmpb $0,-1(%%esi)\n\t" + "leal 1(%%eax),%%esi\n\t" + "jne 1b\n\t" + "xorl %%eax,%%eax\n" + "2:" + : "=&a" (__res), "=&c" (__d0), "=&S" (__d1), "=&D" (__d2), "=&d" (__d3) + : "0" (0), "1" (0xffffffff), "2" (__haystack), "3" (__needle), + "b" (__needle) + : "memory", "cc"); + return __res; +} +# endif + + +/* Bit find functions. We define only the i686 version since for the other + processors gcc generates good code. */ +# if defined __USE_MISC || defined __USE_XOPEN_EXTENDED +# ifdef __i686__ +# define _HAVE_STRING_ARCH_ffs 1 +# define ffs(word) (__builtin_constant_p (word) \ + ? __builtin_ffs (word) \ + : ({ int __cnt, __tmp; \ + __asm__ __volatile__ \ + ("bsfl %2,%0\n\t" \ + "cmovel %1,%0" \ + : "=&r" (__cnt), "=r" (__tmp) \ + : "rm" (word), "1" (-1)); \ + __cnt + 1; })) + +# ifndef ffsl +# define ffsl(word) ffs(word) +# endif +# endif /* i686 */ +# endif /* Misc || X/Open */ + +# ifndef _FORCE_INLINES +# undef __STRING_INLINE +# endif + +# endif /* use string inlines && GNU CC */ + +#endif diff --git a/REORG.TODO/sysdeps/x86/bits/wordsize.h b/REORG.TODO/sysdeps/x86/bits/wordsize.h new file mode 100644 index 0000000000..70f652bca1 --- /dev/null +++ b/REORG.TODO/sysdeps/x86/bits/wordsize.h @@ -0,0 +1,17 @@ +/* Determine the wordsize from the preprocessor defines. */ + +#if defined __x86_64__ && !defined __ILP32__ +# define __WORDSIZE 64 +#else +# define __WORDSIZE 32 +#define __WORDSIZE32_SIZE_ULONG 0 +#define __WORDSIZE32_PTRDIFF_LONG 0 +#endif + +#ifdef __x86_64__ +# define __WORDSIZE_TIME64_COMPAT32 1 +/* Both x86-64 and x32 use the 64-bit system call interface. */ +# define __SYSCALL_WORDSIZE 64 +#else +# define __WORDSIZE_TIME64_COMPAT32 0 +#endif diff --git a/REORG.TODO/sysdeps/x86/bits/xtitypes.h b/REORG.TODO/sysdeps/x86/bits/xtitypes.h new file mode 100644 index 0000000000..e153a18133 --- /dev/null +++ b/REORG.TODO/sysdeps/x86/bits/xtitypes.h @@ -0,0 +1,33 @@ +/* bits/xtitypes.h -- Define some types used by <bits/stropts.h>. x86-64. + Copyright (C) 2002-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#ifndef _STROPTS_H +# error "Never include <bits/xtitypes.h> directly; use <stropts.h> instead." +#endif + +#ifndef _BITS_XTITYPES_H +#define _BITS_XTITYPES_H 1 + +#include <bits/types.h> + +/* This type is used by some structs in <bits/stropts.h>. */ +typedef __SLONG32_TYPE __t_scalar_t; +typedef __ULONG32_TYPE __t_uscalar_t; + + +#endif /* bits/xtitypes.h */ diff --git a/REORG.TODO/sysdeps/x86/cacheinfo.c b/REORG.TODO/sysdeps/x86/cacheinfo.c new file mode 100644 index 0000000000..8000fd1e99 --- /dev/null +++ b/REORG.TODO/sysdeps/x86/cacheinfo.c @@ -0,0 +1,783 @@ +/* x86_64 cache info. + Copyright (C) 2003-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#if IS_IN (libc) + +#include <assert.h> +#include <stdbool.h> +#include <stdlib.h> +#include <unistd.h> +#include <cpuid.h> +#include <init-arch.h> + +static const struct intel_02_cache_info +{ + unsigned char idx; + unsigned char assoc; + unsigned char linesize; + unsigned char rel_name; + unsigned int size; +} intel_02_known [] = + { +#define M(sc) ((sc) - _SC_LEVEL1_ICACHE_SIZE) + { 0x06, 4, 32, M(_SC_LEVEL1_ICACHE_SIZE), 8192 }, + { 0x08, 4, 32, M(_SC_LEVEL1_ICACHE_SIZE), 16384 }, + { 0x09, 4, 32, M(_SC_LEVEL1_ICACHE_SIZE), 32768 }, + { 0x0a, 2, 32, M(_SC_LEVEL1_DCACHE_SIZE), 8192 }, + { 0x0c, 4, 32, M(_SC_LEVEL1_DCACHE_SIZE), 16384 }, + { 0x0d, 4, 64, M(_SC_LEVEL1_DCACHE_SIZE), 16384 }, + { 0x0e, 6, 64, M(_SC_LEVEL1_DCACHE_SIZE), 24576 }, + { 0x21, 8, 64, M(_SC_LEVEL2_CACHE_SIZE), 262144 }, + { 0x22, 4, 64, M(_SC_LEVEL3_CACHE_SIZE), 524288 }, + { 0x23, 8, 64, M(_SC_LEVEL3_CACHE_SIZE), 1048576 }, + { 0x25, 8, 64, M(_SC_LEVEL3_CACHE_SIZE), 2097152 }, + { 0x29, 8, 64, M(_SC_LEVEL3_CACHE_SIZE), 4194304 }, + { 0x2c, 8, 64, M(_SC_LEVEL1_DCACHE_SIZE), 32768 }, + { 0x30, 8, 64, M(_SC_LEVEL1_ICACHE_SIZE), 32768 }, + { 0x39, 4, 64, M(_SC_LEVEL2_CACHE_SIZE), 131072 }, + { 0x3a, 6, 64, M(_SC_LEVEL2_CACHE_SIZE), 196608 }, + { 0x3b, 2, 64, M(_SC_LEVEL2_CACHE_SIZE), 131072 }, + { 0x3c, 4, 64, M(_SC_LEVEL2_CACHE_SIZE), 262144 }, + { 0x3d, 6, 64, M(_SC_LEVEL2_CACHE_SIZE), 393216 }, + { 0x3e, 4, 64, M(_SC_LEVEL2_CACHE_SIZE), 524288 }, + { 0x3f, 2, 64, M(_SC_LEVEL2_CACHE_SIZE), 262144 }, + { 0x41, 4, 32, M(_SC_LEVEL2_CACHE_SIZE), 131072 }, + { 0x42, 4, 32, M(_SC_LEVEL2_CACHE_SIZE), 262144 }, + { 0x43, 4, 32, M(_SC_LEVEL2_CACHE_SIZE), 524288 }, + { 0x44, 4, 32, M(_SC_LEVEL2_CACHE_SIZE), 1048576 }, + { 0x45, 4, 32, M(_SC_LEVEL2_CACHE_SIZE), 2097152 }, + { 0x46, 4, 64, M(_SC_LEVEL3_CACHE_SIZE), 4194304 }, + { 0x47, 8, 64, M(_SC_LEVEL3_CACHE_SIZE), 8388608 }, + { 0x48, 12, 64, M(_SC_LEVEL2_CACHE_SIZE), 3145728 }, + { 0x49, 16, 64, M(_SC_LEVEL2_CACHE_SIZE), 4194304 }, + { 0x4a, 12, 64, M(_SC_LEVEL3_CACHE_SIZE), 6291456 }, + { 0x4b, 16, 64, M(_SC_LEVEL3_CACHE_SIZE), 8388608 }, + { 0x4c, 12, 64, M(_SC_LEVEL3_CACHE_SIZE), 12582912 }, + { 0x4d, 16, 64, M(_SC_LEVEL3_CACHE_SIZE), 16777216 }, + { 0x4e, 24, 64, M(_SC_LEVEL2_CACHE_SIZE), 6291456 }, + { 0x60, 8, 64, M(_SC_LEVEL1_DCACHE_SIZE), 16384 }, + { 0x66, 4, 64, M(_SC_LEVEL1_DCACHE_SIZE), 8192 }, + { 0x67, 4, 64, M(_SC_LEVEL1_DCACHE_SIZE), 16384 }, + { 0x68, 4, 64, M(_SC_LEVEL1_DCACHE_SIZE), 32768 }, + { 0x78, 8, 64, M(_SC_LEVEL2_CACHE_SIZE), 1048576 }, + { 0x79, 8, 64, M(_SC_LEVEL2_CACHE_SIZE), 131072 }, + { 0x7a, 8, 64, M(_SC_LEVEL2_CACHE_SIZE), 262144 }, + { 0x7b, 8, 64, M(_SC_LEVEL2_CACHE_SIZE), 524288 }, + { 0x7c, 8, 64, M(_SC_LEVEL2_CACHE_SIZE), 1048576 }, + { 0x7d, 8, 64, M(_SC_LEVEL2_CACHE_SIZE), 2097152 }, + { 0x7f, 2, 64, M(_SC_LEVEL2_CACHE_SIZE), 524288 }, + { 0x80, 8, 64, M(_SC_LEVEL2_CACHE_SIZE), 524288 }, + { 0x82, 8, 32, M(_SC_LEVEL2_CACHE_SIZE), 262144 }, + { 0x83, 8, 32, M(_SC_LEVEL2_CACHE_SIZE), 524288 }, + { 0x84, 8, 32, M(_SC_LEVEL2_CACHE_SIZE), 1048576 }, + { 0x85, 8, 32, M(_SC_LEVEL2_CACHE_SIZE), 2097152 }, + { 0x86, 4, 64, M(_SC_LEVEL2_CACHE_SIZE), 524288 }, + { 0x87, 8, 64, M(_SC_LEVEL2_CACHE_SIZE), 1048576 }, + { 0xd0, 4, 64, M(_SC_LEVEL3_CACHE_SIZE), 524288 }, + { 0xd1, 4, 64, M(_SC_LEVEL3_CACHE_SIZE), 1048576 }, + { 0xd2, 4, 64, M(_SC_LEVEL3_CACHE_SIZE), 2097152 }, + { 0xd6, 8, 64, M(_SC_LEVEL3_CACHE_SIZE), 1048576 }, + { 0xd7, 8, 64, M(_SC_LEVEL3_CACHE_SIZE), 2097152 }, + { 0xd8, 8, 64, M(_SC_LEVEL3_CACHE_SIZE), 4194304 }, + { 0xdc, 12, 64, M(_SC_LEVEL3_CACHE_SIZE), 2097152 }, + { 0xdd, 12, 64, M(_SC_LEVEL3_CACHE_SIZE), 4194304 }, + { 0xde, 12, 64, M(_SC_LEVEL3_CACHE_SIZE), 8388608 }, + { 0xe2, 16, 64, M(_SC_LEVEL3_CACHE_SIZE), 2097152 }, + { 0xe3, 16, 64, M(_SC_LEVEL3_CACHE_SIZE), 4194304 }, + { 0xe4, 16, 64, M(_SC_LEVEL3_CACHE_SIZE), 8388608 }, + { 0xea, 24, 64, M(_SC_LEVEL3_CACHE_SIZE), 12582912 }, + { 0xeb, 24, 64, M(_SC_LEVEL3_CACHE_SIZE), 18874368 }, + { 0xec, 24, 64, M(_SC_LEVEL3_CACHE_SIZE), 25165824 }, + }; + +#define nintel_02_known (sizeof (intel_02_known) / sizeof (intel_02_known [0])) + +static int +intel_02_known_compare (const void *p1, const void *p2) +{ + const struct intel_02_cache_info *i1; + const struct intel_02_cache_info *i2; + + i1 = (const struct intel_02_cache_info *) p1; + i2 = (const struct intel_02_cache_info *) p2; + + if (i1->idx == i2->idx) + return 0; + + return i1->idx < i2->idx ? -1 : 1; +} + + +static long int +__attribute__ ((noinline)) +intel_check_word (int name, unsigned int value, bool *has_level_2, + bool *no_level_2_or_3, + const struct cpu_features *cpu_features) +{ + if ((value & 0x80000000) != 0) + /* The register value is reserved. */ + return 0; + + /* Fold the name. The _SC_ constants are always in the order SIZE, + ASSOC, LINESIZE. */ + int folded_rel_name = (M(name) / 3) * 3; + + while (value != 0) + { + unsigned int byte = value & 0xff; + + if (byte == 0x40) + { + *no_level_2_or_3 = true; + + if (folded_rel_name == M(_SC_LEVEL3_CACHE_SIZE)) + /* No need to look further. */ + break; + } + else if (byte == 0xff) + { + /* CPUID leaf 0x4 contains all the information. We need to + iterate over it. */ + unsigned int eax; + unsigned int ebx; + unsigned int ecx; + unsigned int edx; + + unsigned int round = 0; + while (1) + { + __cpuid_count (4, round, eax, ebx, ecx, edx); + + enum { null = 0, data = 1, inst = 2, uni = 3 } type = eax & 0x1f; + if (type == null) + /* That was the end. */ + break; + + unsigned int level = (eax >> 5) & 0x7; + + if ((level == 1 && type == data + && folded_rel_name == M(_SC_LEVEL1_DCACHE_SIZE)) + || (level == 1 && type == inst + && folded_rel_name == M(_SC_LEVEL1_ICACHE_SIZE)) + || (level == 2 && folded_rel_name == M(_SC_LEVEL2_CACHE_SIZE)) + || (level == 3 && folded_rel_name == M(_SC_LEVEL3_CACHE_SIZE)) + || (level == 4 && folded_rel_name == M(_SC_LEVEL4_CACHE_SIZE))) + { + unsigned int offset = M(name) - folded_rel_name; + + if (offset == 0) + /* Cache size. */ + return (((ebx >> 22) + 1) + * (((ebx >> 12) & 0x3ff) + 1) + * ((ebx & 0xfff) + 1) + * (ecx + 1)); + if (offset == 1) + return (ebx >> 22) + 1; + + assert (offset == 2); + return (ebx & 0xfff) + 1; + } + + ++round; + } + /* There is no other cache information anywhere else. */ + break; + } + else + { + if (byte == 0x49 && folded_rel_name == M(_SC_LEVEL3_CACHE_SIZE)) + { + /* Intel reused this value. For family 15, model 6 it + specifies the 3rd level cache. Otherwise the 2nd + level cache. */ + unsigned int family = cpu_features->family; + unsigned int model = cpu_features->model; + + if (family == 15 && model == 6) + { + /* The level 3 cache is encoded for this model like + the level 2 cache is for other models. Pretend + the caller asked for the level 2 cache. */ + name = (_SC_LEVEL2_CACHE_SIZE + + (name - _SC_LEVEL3_CACHE_SIZE)); + folded_rel_name = M(_SC_LEVEL2_CACHE_SIZE); + } + } + + struct intel_02_cache_info *found; + struct intel_02_cache_info search; + + search.idx = byte; + found = bsearch (&search, intel_02_known, nintel_02_known, + sizeof (intel_02_known[0]), intel_02_known_compare); + if (found != NULL) + { + if (found->rel_name == folded_rel_name) + { + unsigned int offset = M(name) - folded_rel_name; + + if (offset == 0) + /* Cache size. */ + return found->size; + if (offset == 1) + return found->assoc; + + assert (offset == 2); + return found->linesize; + } + + if (found->rel_name == M(_SC_LEVEL2_CACHE_SIZE)) + *has_level_2 = true; + } + } + + /* Next byte for the next round. */ + value >>= 8; + } + + /* Nothing found. */ + return 0; +} + + +static long int __attribute__ ((noinline)) +handle_intel (int name, const struct cpu_features *cpu_features) +{ + unsigned int maxidx = cpu_features->max_cpuid; + + /* Return -1 for older CPUs. */ + if (maxidx < 2) + return -1; + + /* OK, we can use the CPUID instruction to get all info about the + caches. */ + unsigned int cnt = 0; + unsigned int max = 1; + long int result = 0; + bool no_level_2_or_3 = false; + bool has_level_2 = false; + + while (cnt++ < max) + { + unsigned int eax; + unsigned int ebx; + unsigned int ecx; + unsigned int edx; + __cpuid (2, eax, ebx, ecx, edx); + + /* The low byte of EAX in the first round contain the number of + rounds we have to make. At least one, the one we are already + doing. */ + if (cnt == 1) + { + max = eax & 0xff; + eax &= 0xffffff00; + } + + /* Process the individual registers' value. */ + result = intel_check_word (name, eax, &has_level_2, + &no_level_2_or_3, cpu_features); + if (result != 0) + return result; + + result = intel_check_word (name, ebx, &has_level_2, + &no_level_2_or_3, cpu_features); + if (result != 0) + return result; + + result = intel_check_word (name, ecx, &has_level_2, + &no_level_2_or_3, cpu_features); + if (result != 0) + return result; + + result = intel_check_word (name, edx, &has_level_2, + &no_level_2_or_3, cpu_features); + if (result != 0) + return result; + } + + if (name >= _SC_LEVEL2_CACHE_SIZE && name <= _SC_LEVEL3_CACHE_LINESIZE + && no_level_2_or_3) + return -1; + + return 0; +} + + +static long int __attribute__ ((noinline)) +handle_amd (int name) +{ + unsigned int eax; + unsigned int ebx; + unsigned int ecx; + unsigned int edx; + __cpuid (0x80000000, eax, ebx, ecx, edx); + + /* No level 4 cache (yet). */ + if (name > _SC_LEVEL3_CACHE_LINESIZE) + return 0; + + unsigned int fn = 0x80000005 + (name >= _SC_LEVEL2_CACHE_SIZE); + if (eax < fn) + return 0; + + __cpuid (fn, eax, ebx, ecx, edx); + + if (name < _SC_LEVEL1_DCACHE_SIZE) + { + name += _SC_LEVEL1_DCACHE_SIZE - _SC_LEVEL1_ICACHE_SIZE; + ecx = edx; + } + + switch (name) + { + case _SC_LEVEL1_DCACHE_SIZE: + return (ecx >> 14) & 0x3fc00; + + case _SC_LEVEL1_DCACHE_ASSOC: + ecx >>= 16; + if ((ecx & 0xff) == 0xff) + /* Fully associative. */ + return (ecx << 2) & 0x3fc00; + return ecx & 0xff; + + case _SC_LEVEL1_DCACHE_LINESIZE: + return ecx & 0xff; + + case _SC_LEVEL2_CACHE_SIZE: + return (ecx & 0xf000) == 0 ? 0 : (ecx >> 6) & 0x3fffc00; + + case _SC_LEVEL2_CACHE_ASSOC: + switch ((ecx >> 12) & 0xf) + { + case 0: + case 1: + case 2: + case 4: + return (ecx >> 12) & 0xf; + case 6: + return 8; + case 8: + return 16; + case 10: + return 32; + case 11: + return 48; + case 12: + return 64; + case 13: + return 96; + case 14: + return 128; + case 15: + return ((ecx >> 6) & 0x3fffc00) / (ecx & 0xff); + default: + return 0; + } + /* NOTREACHED */ + + case _SC_LEVEL2_CACHE_LINESIZE: + return (ecx & 0xf000) == 0 ? 0 : ecx & 0xff; + + case _SC_LEVEL3_CACHE_SIZE: + return (edx & 0xf000) == 0 ? 0 : (edx & 0x3ffc0000) << 1; + + case _SC_LEVEL3_CACHE_ASSOC: + switch ((edx >> 12) & 0xf) + { + case 0: + case 1: + case 2: + case 4: + return (edx >> 12) & 0xf; + case 6: + return 8; + case 8: + return 16; + case 10: + return 32; + case 11: + return 48; + case 12: + return 64; + case 13: + return 96; + case 14: + return 128; + case 15: + return ((edx & 0x3ffc0000) << 1) / (edx & 0xff); + default: + return 0; + } + /* NOTREACHED */ + + case _SC_LEVEL3_CACHE_LINESIZE: + return (edx & 0xf000) == 0 ? 0 : edx & 0xff; + + default: + assert (! "cannot happen"); + } + return -1; +} + + +/* Get the value of the system variable NAME. */ +long int +attribute_hidden +__cache_sysconf (int name) +{ + const struct cpu_features *cpu_features = __get_cpu_features (); + + if (cpu_features->kind == arch_kind_intel) + return handle_intel (name, cpu_features); + + if (cpu_features->kind == arch_kind_amd) + return handle_amd (name); + + // XXX Fill in more vendors. + + /* CPU not known, we have no information. */ + return 0; +} + + +/* Data cache size for use in memory and string routines, typically + L1 size, rounded to multiple of 256 bytes. */ +long int __x86_data_cache_size_half attribute_hidden = 32 * 1024 / 2; +long int __x86_data_cache_size attribute_hidden = 32 * 1024; +/* Similar to __x86_data_cache_size_half, but not rounded. */ +long int __x86_raw_data_cache_size_half attribute_hidden = 32 * 1024 / 2; +/* Similar to __x86_data_cache_size, but not rounded. */ +long int __x86_raw_data_cache_size attribute_hidden = 32 * 1024; +/* Shared cache size for use in memory and string routines, typically + L2 or L3 size, rounded to multiple of 256 bytes. */ +long int __x86_shared_cache_size_half attribute_hidden = 1024 * 1024 / 2; +long int __x86_shared_cache_size attribute_hidden = 1024 * 1024; +/* Similar to __x86_shared_cache_size_half, but not rounded. */ +long int __x86_raw_shared_cache_size_half attribute_hidden = 1024 * 1024 / 2; +/* Similar to __x86_shared_cache_size, but not rounded. */ +long int __x86_raw_shared_cache_size attribute_hidden = 1024 * 1024; + +/* Threshold to use non temporal store. */ +long int __x86_shared_non_temporal_threshold attribute_hidden; + +#ifndef DISABLE_PREFETCHW +/* PREFETCHW support flag for use in memory and string routines. */ +int __x86_prefetchw attribute_hidden; +#endif + + +static void +__attribute__((constructor)) +init_cacheinfo (void) +{ + /* Find out what brand of processor. */ + unsigned int eax; + unsigned int ebx; + unsigned int ecx; + unsigned int edx; + int max_cpuid_ex; + long int data = -1; + long int shared = -1; + unsigned int level; + unsigned int threads = 0; + const struct cpu_features *cpu_features = __get_cpu_features (); + int max_cpuid = cpu_features->max_cpuid; + + if (cpu_features->kind == arch_kind_intel) + { + data = handle_intel (_SC_LEVEL1_DCACHE_SIZE, cpu_features); + + long int core = handle_intel (_SC_LEVEL2_CACHE_SIZE, cpu_features); + bool inclusive_cache = true; + + /* Try L3 first. */ + level = 3; + shared = handle_intel (_SC_LEVEL3_CACHE_SIZE, cpu_features); + + /* Number of logical processors sharing L2 cache. */ + int threads_l2; + + /* Number of logical processors sharing L3 cache. */ + int threads_l3; + + if (shared <= 0) + { + /* Try L2 otherwise. */ + level = 2; + shared = core; + threads_l2 = 0; + threads_l3 = -1; + } + else + { + threads_l2 = 0; + threads_l3 = 0; + } + + /* A value of 0 for the HTT bit indicates there is only a single + logical processor. */ + if (HAS_CPU_FEATURE (HTT)) + { + /* Figure out the number of logical threads that share the + highest cache level. */ + if (max_cpuid >= 4) + { + unsigned int family = cpu_features->family; + unsigned int model = cpu_features->model; + + int i = 0; + + /* Query until cache level 2 and 3 are enumerated. */ + int check = 0x1 | (threads_l3 == 0) << 1; + do + { + __cpuid_count (4, i++, eax, ebx, ecx, edx); + + /* There seems to be a bug in at least some Pentium Ds + which sometimes fail to iterate all cache parameters. + Do not loop indefinitely here, stop in this case and + assume there is no such information. */ + if ((eax & 0x1f) == 0) + goto intel_bug_no_cache_info; + + switch ((eax >> 5) & 0x7) + { + default: + break; + case 2: + if ((check & 0x1)) + { + /* Get maximum number of logical processors + sharing L2 cache. */ + threads_l2 = (eax >> 14) & 0x3ff; + check &= ~0x1; + } + break; + case 3: + if ((check & (0x1 << 1))) + { + /* Get maximum number of logical processors + sharing L3 cache. */ + threads_l3 = (eax >> 14) & 0x3ff; + + /* Check if L2 and L3 caches are inclusive. */ + inclusive_cache = (edx & 0x2) != 0; + check &= ~(0x1 << 1); + } + break; + } + } + while (check); + + /* If max_cpuid >= 11, THREADS_L2/THREADS_L3 are the maximum + numbers of addressable IDs for logical processors sharing + the cache, instead of the maximum number of threads + sharing the cache. */ + if (max_cpuid >= 11) + { + /* Find the number of logical processors shipped in + one core and apply count mask. */ + i = 0; + + /* Count SMT only if there is L3 cache. Always count + core if there is no L3 cache. */ + int count = ((threads_l2 > 0 && level == 3) + | ((threads_l3 > 0 + || (threads_l2 > 0 && level == 2)) << 1)); + + while (count) + { + __cpuid_count (11, i++, eax, ebx, ecx, edx); + + int shipped = ebx & 0xff; + int type = ecx & 0xff00; + if (shipped == 0 || type == 0) + break; + else if (type == 0x100) + { + /* Count SMT. */ + if ((count & 0x1)) + { + int count_mask; + + /* Compute count mask. */ + asm ("bsr %1, %0" + : "=r" (count_mask) : "g" (threads_l2)); + count_mask = ~(-1 << (count_mask + 1)); + threads_l2 = (shipped - 1) & count_mask; + count &= ~0x1; + } + } + else if (type == 0x200) + { + /* Count core. */ + if ((count & (0x1 << 1))) + { + int count_mask; + int threads_core + = (level == 2 ? threads_l2 : threads_l3); + + /* Compute count mask. */ + asm ("bsr %1, %0" + : "=r" (count_mask) : "g" (threads_core)); + count_mask = ~(-1 << (count_mask + 1)); + threads_core = (shipped - 1) & count_mask; + if (level == 2) + threads_l2 = threads_core; + else + threads_l3 = threads_core; + count &= ~(0x1 << 1); + } + } + } + } + if (threads_l2 > 0) + threads_l2 += 1; + if (threads_l3 > 0) + threads_l3 += 1; + if (level == 2) + { + if (threads_l2) + { + threads = threads_l2; + if (threads > 2 && family == 6) + switch (model) + { + case 0x37: + case 0x4a: + case 0x4d: + case 0x5a: + case 0x5d: + /* Silvermont has L2 cache shared by 2 cores. */ + threads = 2; + break; + default: + break; + } + } + } + else if (threads_l3) + threads = threads_l3; + } + else + { +intel_bug_no_cache_info: + /* Assume that all logical threads share the highest cache + level. */ + + threads + = ((cpu_features->cpuid[COMMON_CPUID_INDEX_1].ebx + >> 16) & 0xff); + } + + /* Cap usage of highest cache level to the number of supported + threads. */ + if (shared > 0 && threads > 0) + shared /= threads; + } + + /* Account for non-inclusive L2 and L3 caches. */ + if (!inclusive_cache) + { + if (threads_l2 > 0) + core /= threads_l2; + shared += core; + } + } + else if (cpu_features->kind == arch_kind_amd) + { + data = handle_amd (_SC_LEVEL1_DCACHE_SIZE); + long int core = handle_amd (_SC_LEVEL2_CACHE_SIZE); + shared = handle_amd (_SC_LEVEL3_CACHE_SIZE); + + /* Get maximum extended function. */ + __cpuid (0x80000000, max_cpuid_ex, ebx, ecx, edx); + + if (shared <= 0) + /* No shared L3 cache. All we have is the L2 cache. */ + shared = core; + else + { + /* Figure out the number of logical threads that share L3. */ + if (max_cpuid_ex >= 0x80000008) + { + /* Get width of APIC ID. */ + __cpuid (0x80000008, max_cpuid_ex, ebx, ecx, edx); + threads = 1 << ((ecx >> 12) & 0x0f); + } + + if (threads == 0) + { + /* If APIC ID width is not available, use logical + processor count. */ + __cpuid (0x00000001, max_cpuid_ex, ebx, ecx, edx); + + if ((edx & (1 << 28)) != 0) + threads = (ebx >> 16) & 0xff; + } + + /* Cap usage of highest cache level to the number of + supported threads. */ + if (threads > 0) + shared /= threads; + + /* Account for exclusive L2 and L3 caches. */ + shared += core; + } + +#ifndef DISABLE_PREFETCHW + if (max_cpuid_ex >= 0x80000001) + { + __cpuid (0x80000001, eax, ebx, ecx, edx); + /* PREFETCHW || 3DNow! */ + if ((ecx & 0x100) || (edx & 0x80000000)) + __x86_prefetchw = -1; + } +#endif + } + + if (data > 0) + { + __x86_raw_data_cache_size_half = data / 2; + __x86_raw_data_cache_size = data; + /* Round data cache size to multiple of 256 bytes. */ + data = data & ~255L; + __x86_data_cache_size_half = data / 2; + __x86_data_cache_size = data; + } + + if (shared > 0) + { + __x86_raw_shared_cache_size_half = shared / 2; + __x86_raw_shared_cache_size = shared; + /* Round shared cache size to multiple of 256 bytes. */ + shared = shared & ~255L; + __x86_shared_cache_size_half = shared / 2; + __x86_shared_cache_size = shared; + } + + /* The large memcpy micro benchmark in glibc shows that 6 times of + shared cache size is the approximate value above which non-temporal + store becomes faster on a 8-core processor. This is the 3/4 of the + total shared cache size. */ + __x86_shared_non_temporal_threshold + = __x86_shared_cache_size * threads * 3 / 4; +} + +#endif diff --git a/REORG.TODO/sysdeps/x86/cpu-features-offsets.sym b/REORG.TODO/sysdeps/x86/cpu-features-offsets.sym new file mode 100644 index 0000000000..f6739fae81 --- /dev/null +++ b/REORG.TODO/sysdeps/x86/cpu-features-offsets.sym @@ -0,0 +1,23 @@ +#define SHARED 1 + +#include <ldsodefs.h> + +#define rtld_global_ro_offsetof(mem) offsetof (struct rtld_global_ro, mem) + +RTLD_GLOBAL_RO_DL_X86_CPU_FEATURES_OFFSET rtld_global_ro_offsetof (_dl_x86_cpu_features) + +CPU_FEATURES_SIZE sizeof (struct cpu_features) +CPUID_OFFSET offsetof (struct cpu_features, cpuid) +CPUID_SIZE sizeof (struct cpuid_registers) +CPUID_EAX_OFFSET offsetof (struct cpuid_registers, eax) +CPUID_EBX_OFFSET offsetof (struct cpuid_registers, ebx) +CPUID_ECX_OFFSET offsetof (struct cpuid_registers, ecx) +CPUID_EDX_OFFSET offsetof (struct cpuid_registers, edx) +FAMILY_OFFSET offsetof (struct cpu_features, family) +MODEL_OFFSET offsetof (struct cpu_features, model) +FEATURE_OFFSET offsetof (struct cpu_features, feature) +FEATURE_SIZE sizeof (unsigned int) + +COMMON_CPUID_INDEX_1 +COMMON_CPUID_INDEX_7 +FEATURE_INDEX_1 diff --git a/REORG.TODO/sysdeps/x86/cpu-features.c b/REORG.TODO/sysdeps/x86/cpu-features.c new file mode 100644 index 0000000000..4288001cdd --- /dev/null +++ b/REORG.TODO/sysdeps/x86/cpu-features.c @@ -0,0 +1,363 @@ +/* Initialize CPU feature data. + This file is part of the GNU C Library. + Copyright (C) 2008-2017 Free Software Foundation, Inc. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <cpuid.h> +#include <cpu-features.h> +#include <dl-hwcap.h> + +static void +get_common_indeces (struct cpu_features *cpu_features, + unsigned int *family, unsigned int *model, + unsigned int *extended_model, unsigned int *stepping) +{ + if (family) + { + unsigned int eax; + __cpuid (1, eax, cpu_features->cpuid[COMMON_CPUID_INDEX_1].ebx, + cpu_features->cpuid[COMMON_CPUID_INDEX_1].ecx, + cpu_features->cpuid[COMMON_CPUID_INDEX_1].edx); + cpu_features->cpuid[COMMON_CPUID_INDEX_1].eax = eax; + *family = (eax >> 8) & 0x0f; + *model = (eax >> 4) & 0x0f; + *extended_model = (eax >> 12) & 0xf0; + *stepping = eax & 0x0f; + if (*family == 0x0f) + { + *family += (eax >> 20) & 0xff; + *model += *extended_model; + } + } + + if (cpu_features->max_cpuid >= 7) + __cpuid_count (7, 0, + cpu_features->cpuid[COMMON_CPUID_INDEX_7].eax, + cpu_features->cpuid[COMMON_CPUID_INDEX_7].ebx, + cpu_features->cpuid[COMMON_CPUID_INDEX_7].ecx, + cpu_features->cpuid[COMMON_CPUID_INDEX_7].edx); + + /* Can we call xgetbv? */ + if (CPU_FEATURES_CPU_P (cpu_features, OSXSAVE)) + { + unsigned int xcrlow; + unsigned int xcrhigh; + asm ("xgetbv" : "=a" (xcrlow), "=d" (xcrhigh) : "c" (0)); + /* Is YMM and XMM state usable? */ + if ((xcrlow & (bit_YMM_state | bit_XMM_state)) == + (bit_YMM_state | bit_XMM_state)) + { + /* Determine if AVX is usable. */ + if (CPU_FEATURES_CPU_P (cpu_features, AVX)) + { + cpu_features->feature[index_arch_AVX_Usable] + |= bit_arch_AVX_Usable; + /* The following features depend on AVX being usable. */ + /* Determine if AVX2 is usable. */ + if (CPU_FEATURES_CPU_P (cpu_features, AVX2)) + cpu_features->feature[index_arch_AVX2_Usable] + |= bit_arch_AVX2_Usable; + /* Determine if FMA is usable. */ + if (CPU_FEATURES_CPU_P (cpu_features, FMA)) + cpu_features->feature[index_arch_FMA_Usable] + |= bit_arch_FMA_Usable; + } + + /* Check if OPMASK state, upper 256-bit of ZMM0-ZMM15 and + ZMM16-ZMM31 state are enabled. */ + if ((xcrlow & (bit_Opmask_state | bit_ZMM0_15_state + | bit_ZMM16_31_state)) == + (bit_Opmask_state | bit_ZMM0_15_state | bit_ZMM16_31_state)) + { + /* Determine if AVX512F is usable. */ + if (CPU_FEATURES_CPU_P (cpu_features, AVX512F)) + { + cpu_features->feature[index_arch_AVX512F_Usable] + |= bit_arch_AVX512F_Usable; + /* Determine if AVX512DQ is usable. */ + if (CPU_FEATURES_CPU_P (cpu_features, AVX512DQ)) + cpu_features->feature[index_arch_AVX512DQ_Usable] + |= bit_arch_AVX512DQ_Usable; + } + } + } + } +} + +static inline void +init_cpu_features (struct cpu_features *cpu_features) +{ + unsigned int ebx, ecx, edx; + unsigned int family = 0; + unsigned int model = 0; + enum cpu_features_kind kind; + +#if !HAS_CPUID + if (__get_cpuid_max (0, 0) == 0) + { + kind = arch_kind_other; + goto no_cpuid; + } +#endif + + __cpuid (0, cpu_features->max_cpuid, ebx, ecx, edx); + + /* This spells out "GenuineIntel". */ + if (ebx == 0x756e6547 && ecx == 0x6c65746e && edx == 0x49656e69) + { + unsigned int extended_model, stepping; + + kind = arch_kind_intel; + + get_common_indeces (cpu_features, &family, &model, &extended_model, + &stepping); + + if (family == 0x06) + { + model += extended_model; + switch (model) + { + case 0x1c: + case 0x26: + /* BSF is slow on Atom. */ + cpu_features->feature[index_arch_Slow_BSF] + |= bit_arch_Slow_BSF; + break; + + case 0x57: + /* Knights Landing. Enable Silvermont optimizations. */ + + case 0x5c: + case 0x5f: + /* Unaligned load versions are faster than SSSE3 + on Goldmont. */ + + case 0x4c: + /* Airmont is a die shrink of Silvermont. */ + + case 0x37: + case 0x4a: + case 0x4d: + case 0x5a: + case 0x5d: + /* Unaligned load versions are faster than SSSE3 + on Silvermont. */ +#if index_arch_Fast_Unaligned_Load != index_arch_Prefer_PMINUB_for_stringop +# error index_arch_Fast_Unaligned_Load != index_arch_Prefer_PMINUB_for_stringop +#endif +#if index_arch_Fast_Unaligned_Load != index_arch_Slow_SSE4_2 +# error index_arch_Fast_Unaligned_Load != index_arch_Slow_SSE4_2 +#endif +#if index_arch_Fast_Unaligned_Load != index_arch_Fast_Unaligned_Copy +# error index_arch_Fast_Unaligned_Load != index_arch_Fast_Unaligned_Copy +#endif + cpu_features->feature[index_arch_Fast_Unaligned_Load] + |= (bit_arch_Fast_Unaligned_Load + | bit_arch_Fast_Unaligned_Copy + | bit_arch_Prefer_PMINUB_for_stringop + | bit_arch_Slow_SSE4_2); + break; + + default: + /* Unknown family 0x06 processors. Assuming this is one + of Core i3/i5/i7 processors if AVX is available. */ + if (!CPU_FEATURES_CPU_P (cpu_features, AVX)) + break; + + case 0x1a: + case 0x1e: + case 0x1f: + case 0x25: + case 0x2c: + case 0x2e: + case 0x2f: + /* Rep string instructions, unaligned load, unaligned copy, + and pminub are fast on Intel Core i3, i5 and i7. */ +#if index_arch_Fast_Rep_String != index_arch_Fast_Unaligned_Load +# error index_arch_Fast_Rep_String != index_arch_Fast_Unaligned_Load +#endif +#if index_arch_Fast_Rep_String != index_arch_Prefer_PMINUB_for_stringop +# error index_arch_Fast_Rep_String != index_arch_Prefer_PMINUB_for_stringop +#endif +#if index_arch_Fast_Rep_String != index_arch_Fast_Unaligned_Copy +# error index_arch_Fast_Rep_String != index_arch_Fast_Unaligned_Copy +#endif + cpu_features->feature[index_arch_Fast_Rep_String] + |= (bit_arch_Fast_Rep_String + | bit_arch_Fast_Unaligned_Load + | bit_arch_Fast_Unaligned_Copy + | bit_arch_Prefer_PMINUB_for_stringop); + break; + + case 0x3f: + /* Xeon E7 v3 with stepping >= 4 has working TSX. */ + if (stepping >= 4) + break; + case 0x3c: + case 0x45: + case 0x46: + /* Disable Intel TSX on Haswell processors (except Xeon E7 v3 + with stepping >= 4) to avoid TSX on kernels that weren't + updated with the latest microcode package (which disables + broken feature by default). */ + cpu_features->cpuid[index_cpu_RTM].reg_RTM &= ~bit_cpu_RTM; + break; + } + } + + /* Unaligned load with 256-bit AVX registers are faster on + Intel processors with AVX2. */ + if (CPU_FEATURES_ARCH_P (cpu_features, AVX2_Usable)) + cpu_features->feature[index_arch_AVX_Fast_Unaligned_Load] + |= bit_arch_AVX_Fast_Unaligned_Load; + + /* Since AVX512ER is unique to Xeon Phi, set Prefer_No_VZEROUPPER + if AVX512ER is available. Don't use AVX512 to avoid lower CPU + frequency if AVX512ER isn't available. */ + if (CPU_FEATURES_CPU_P (cpu_features, AVX512ER)) + cpu_features->feature[index_arch_Prefer_No_VZEROUPPER] + |= bit_arch_Prefer_No_VZEROUPPER; + else + cpu_features->feature[index_arch_Prefer_No_AVX512] + |= bit_arch_Prefer_No_AVX512; + + /* To avoid SSE transition penalty, use _dl_runtime_resolve_slow. + If XGETBV suports ECX == 1, use _dl_runtime_resolve_opt. */ + cpu_features->feature[index_arch_Use_dl_runtime_resolve_slow] + |= bit_arch_Use_dl_runtime_resolve_slow; + if (cpu_features->max_cpuid >= 0xd) + { + unsigned int eax; + + __cpuid_count (0xd, 1, eax, ebx, ecx, edx); + if ((eax & (1 << 2)) != 0) + cpu_features->feature[index_arch_Use_dl_runtime_resolve_opt] + |= bit_arch_Use_dl_runtime_resolve_opt; + } + } + /* This spells out "AuthenticAMD". */ + else if (ebx == 0x68747541 && ecx == 0x444d4163 && edx == 0x69746e65) + { + unsigned int extended_model, stepping; + + kind = arch_kind_amd; + + get_common_indeces (cpu_features, &family, &model, &extended_model, + &stepping); + + ecx = cpu_features->cpuid[COMMON_CPUID_INDEX_1].ecx; + + unsigned int eax; + __cpuid (0x80000000, eax, ebx, ecx, edx); + if (eax >= 0x80000001) + __cpuid (0x80000001, + cpu_features->cpuid[COMMON_CPUID_INDEX_80000001].eax, + cpu_features->cpuid[COMMON_CPUID_INDEX_80000001].ebx, + cpu_features->cpuid[COMMON_CPUID_INDEX_80000001].ecx, + cpu_features->cpuid[COMMON_CPUID_INDEX_80000001].edx); + + if (HAS_ARCH_FEATURE (AVX_Usable)) + { + /* Since the FMA4 bit is in COMMON_CPUID_INDEX_80000001 and + FMA4 requires AVX, determine if FMA4 is usable here. */ + if (CPU_FEATURES_CPU_P (cpu_features, FMA4)) + cpu_features->feature[index_arch_FMA4_Usable] + |= bit_arch_FMA4_Usable; + } + + if (family == 0x15) + { +#if index_arch_Fast_Unaligned_Load != index_arch_Fast_Copy_Backward +# error index_arch_Fast_Unaligned_Load != index_arch_Fast_Copy_Backward +#endif + /* "Excavator" */ + if (model >= 0x60 && model <= 0x7f) + cpu_features->feature[index_arch_Fast_Unaligned_Load] + |= (bit_arch_Fast_Unaligned_Load + | bit_arch_Fast_Copy_Backward); + } + } + else + { + kind = arch_kind_other; + get_common_indeces (cpu_features, NULL, NULL, NULL, NULL); + } + + /* Support i586 if CX8 is available. */ + if (CPU_FEATURES_CPU_P (cpu_features, CX8)) + cpu_features->feature[index_arch_I586] |= bit_arch_I586; + + /* Support i686 if CMOV is available. */ + if (CPU_FEATURES_CPU_P (cpu_features, CMOV)) + cpu_features->feature[index_arch_I686] |= bit_arch_I686; + +#if !HAS_CPUID +no_cpuid: +#endif + + cpu_features->family = family; + cpu_features->model = model; + cpu_features->kind = kind; + + /* Reuse dl_platform, dl_hwcap and dl_hwcap_mask for x86. */ + GLRO(dl_platform) = NULL; + GLRO(dl_hwcap) = 0; +#if !HAVE_TUNABLES && defined SHARED + /* The glibc.tune.hwcap_mask tunable is initialized already, so no need to do + this. */ + GLRO(dl_hwcap_mask) = HWCAP_IMPORTANT; +#endif + +#ifdef __x86_64__ + if (cpu_features->kind == arch_kind_intel) + { + if (CPU_FEATURES_ARCH_P (cpu_features, AVX512F_Usable) + && CPU_FEATURES_CPU_P (cpu_features, AVX512CD)) + { + if (CPU_FEATURES_CPU_P (cpu_features, AVX512ER)) + { + if (CPU_FEATURES_CPU_P (cpu_features, AVX512PF)) + GLRO(dl_platform) = "xeon_phi"; + } + else + { + if (CPU_FEATURES_CPU_P (cpu_features, AVX512BW) + && CPU_FEATURES_CPU_P (cpu_features, AVX512DQ) + && CPU_FEATURES_CPU_P (cpu_features, AVX512VL)) + GLRO(dl_hwcap) |= HWCAP_X86_AVX512_1; + } + } + + if (GLRO(dl_platform) == NULL + && CPU_FEATURES_ARCH_P (cpu_features, AVX2_Usable) + && CPU_FEATURES_ARCH_P (cpu_features, FMA_Usable) + && CPU_FEATURES_CPU_P (cpu_features, BMI1) + && CPU_FEATURES_CPU_P (cpu_features, BMI2) + && CPU_FEATURES_CPU_P (cpu_features, LZCNT) + && CPU_FEATURES_CPU_P (cpu_features, MOVBE) + && CPU_FEATURES_CPU_P (cpu_features, POPCNT)) + GLRO(dl_platform) = "haswell"; + } +#else + if (CPU_FEATURES_CPU_P (cpu_features, SSE2)) + GLRO(dl_hwcap) |= HWCAP_X86_SSE2; + + if (CPU_FEATURES_ARCH_P (cpu_features, I686)) + GLRO(dl_platform) = "i686"; + else if (CPU_FEATURES_ARCH_P (cpu_features, I586)) + GLRO(dl_platform) = "i586"; +#endif +} diff --git a/REORG.TODO/sysdeps/x86/cpu-features.h b/REORG.TODO/sysdeps/x86/cpu-features.h new file mode 100644 index 0000000000..f2329ea696 --- /dev/null +++ b/REORG.TODO/sysdeps/x86/cpu-features.h @@ -0,0 +1,355 @@ +/* This file is part of the GNU C Library. + Copyright (C) 2008-2017 Free Software Foundation, Inc. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#ifndef cpu_features_h +#define cpu_features_h + +#define bit_arch_Fast_Rep_String (1 << 0) +#define bit_arch_Fast_Copy_Backward (1 << 1) +#define bit_arch_Slow_BSF (1 << 2) +#define bit_arch_Fast_Unaligned_Load (1 << 4) +#define bit_arch_Prefer_PMINUB_for_stringop (1 << 5) +#define bit_arch_AVX_Usable (1 << 6) +#define bit_arch_FMA_Usable (1 << 7) +#define bit_arch_FMA4_Usable (1 << 8) +#define bit_arch_Slow_SSE4_2 (1 << 9) +#define bit_arch_AVX2_Usable (1 << 10) +#define bit_arch_AVX_Fast_Unaligned_Load (1 << 11) +#define bit_arch_AVX512F_Usable (1 << 12) +#define bit_arch_AVX512DQ_Usable (1 << 13) +#define bit_arch_I586 (1 << 14) +#define bit_arch_I686 (1 << 15) +#define bit_arch_Prefer_MAP_32BIT_EXEC (1 << 16) +#define bit_arch_Prefer_No_VZEROUPPER (1 << 17) +#define bit_arch_Fast_Unaligned_Copy (1 << 18) +#define bit_arch_Prefer_ERMS (1 << 19) +#define bit_arch_Use_dl_runtime_resolve_opt (1 << 20) +#define bit_arch_Use_dl_runtime_resolve_slow (1 << 21) +#define bit_arch_Prefer_No_AVX512 (1 << 22) + +/* CPUID Feature flags. */ + +/* COMMON_CPUID_INDEX_1. */ +#define bit_cpu_CX8 (1 << 8) +#define bit_cpu_CMOV (1 << 15) +#define bit_cpu_SSE (1 << 25) +#define bit_cpu_SSE2 (1 << 26) +#define bit_cpu_SSSE3 (1 << 9) +#define bit_cpu_SSE4_1 (1 << 19) +#define bit_cpu_SSE4_2 (1 << 20) +#define bit_cpu_OSXSAVE (1 << 27) +#define bit_cpu_AVX (1 << 28) +#define bit_cpu_POPCOUNT (1 << 23) +#define bit_cpu_FMA (1 << 12) +#define bit_cpu_FMA4 (1 << 16) +#define bit_cpu_HTT (1 << 28) +#define bit_cpu_LZCNT (1 << 5) +#define bit_cpu_MOVBE (1 << 22) +#define bit_cpu_POPCNT (1 << 23) + +/* COMMON_CPUID_INDEX_7. */ +#define bit_cpu_BMI1 (1 << 3) +#define bit_cpu_BMI2 (1 << 8) +#define bit_cpu_ERMS (1 << 9) +#define bit_cpu_RTM (1 << 11) +#define bit_cpu_AVX2 (1 << 5) +#define bit_cpu_AVX512F (1 << 16) +#define bit_cpu_AVX512DQ (1 << 17) +#define bit_cpu_AVX512PF (1 << 26) +#define bit_cpu_AVX512ER (1 << 27) +#define bit_cpu_AVX512CD (1 << 28) +#define bit_cpu_AVX512BW (1 << 30) +#define bit_cpu_AVX512VL (1u << 31) + +/* XCR0 Feature flags. */ +#define bit_XMM_state (1 << 1) +#define bit_YMM_state (1 << 2) +#define bit_Opmask_state (1 << 5) +#define bit_ZMM0_15_state (1 << 6) +#define bit_ZMM16_31_state (1 << 7) + +/* The integer bit array index for the first set of internal feature bits. */ +#define FEATURE_INDEX_1 0 + +/* The current maximum size of the feature integer bit array. */ +#define FEATURE_INDEX_MAX 1 + +#ifdef __ASSEMBLER__ + +# include <cpu-features-offsets.h> + +# define index_cpu_CX8 COMMON_CPUID_INDEX_1*CPUID_SIZE+CPUID_EDX_OFFSET +# define index_cpu_CMOV COMMON_CPUID_INDEX_1*CPUID_SIZE+CPUID_EDX_OFFSET +# define index_cpu_SSE COMMON_CPUID_INDEX_1*CPUID_SIZE+CPUID_EDX_OFFSET +# define index_cpu_SSE2 COMMON_CPUID_INDEX_1*CPUID_SIZE+CPUID_EDX_OFFSET +# define index_cpu_SSSE3 COMMON_CPUID_INDEX_1*CPUID_SIZE+CPUID_ECX_OFFSET +# define index_cpu_SSE4_1 COMMON_CPUID_INDEX_1*CPUID_SIZE+CPUID_ECX_OFFSET +# define index_cpu_SSE4_2 COMMON_CPUID_INDEX_1*CPUID_SIZE+CPUID_ECX_OFFSET +# define index_cpu_AVX COMMON_CPUID_INDEX_1*CPUID_SIZE+CPUID_ECX_OFFSET +# define index_cpu_AVX2 COMMON_CPUID_INDEX_7*CPUID_SIZE+CPUID_EBX_OFFSET +# define index_cpu_ERMS COMMON_CPUID_INDEX_7*CPUID_SIZE+CPUID_EBX_OFFSET +# define index_cpu_MOVBE COMMON_CPUID_INDEX_1*CPUID_SIZE+CPUID_ECX_OFFSET + +# define index_arch_Fast_Rep_String FEATURE_INDEX_1*FEATURE_SIZE +# define index_arch_Fast_Copy_Backward FEATURE_INDEX_1*FEATURE_SIZE +# define index_arch_Slow_BSF FEATURE_INDEX_1*FEATURE_SIZE +# define index_arch_Fast_Unaligned_Load FEATURE_INDEX_1*FEATURE_SIZE +# define index_arch_Prefer_PMINUB_for_stringop FEATURE_INDEX_1*FEATURE_SIZE +# define index_arch_AVX_Usable FEATURE_INDEX_1*FEATURE_SIZE +# define index_arch_FMA_Usable FEATURE_INDEX_1*FEATURE_SIZE +# define index_arch_FMA4_Usable FEATURE_INDEX_1*FEATURE_SIZE +# define index_arch_Slow_SSE4_2 FEATURE_INDEX_1*FEATURE_SIZE +# define index_arch_AVX2_Usable FEATURE_INDEX_1*FEATURE_SIZE +# define index_arch_AVX_Fast_Unaligned_Load FEATURE_INDEX_1*FEATURE_SIZE +# define index_arch_AVX512F_Usable FEATURE_INDEX_1*FEATURE_SIZE +# define index_arch_AVX512DQ_Usable FEATURE_INDEX_1*FEATURE_SIZE +# define index_arch_I586 FEATURE_INDEX_1*FEATURE_SIZE +# define index_arch_I686 FEATURE_INDEX_1*FEATURE_SIZE +# define index_arch_Prefer_MAP_32BIT_EXEC FEATURE_INDEX_1*FEATURE_SIZE +# define index_arch_Prefer_No_VZEROUPPER FEATURE_INDEX_1*FEATURE_SIZE +# define index_arch_Fast_Unaligned_Copy FEATURE_INDEX_1*FEATURE_SIZE +# define index_arch_Prefer_ERMS FEATURE_INDEX_1*FEATURE_SIZE +# define index_arch_Use_dl_runtime_resolve_opt FEATURE_INDEX_1*FEATURE_SIZE +# define index_arch_Use_dl_runtime_resolve_slow FEATURE_INDEX_1*FEATURE_SIZE +# define index_arch_Prefer_No_AVX512 FEATURE_INDEX_1*FEATURE_SIZE + + +# if defined (_LIBC) && !IS_IN (nonlib) +# ifdef __x86_64__ +# ifdef SHARED +# if IS_IN (rtld) +# define LOAD_RTLD_GLOBAL_RO_RDX +# define HAS_FEATURE(offset, field, name) \ + testl $(bit_##field##_##name), \ + _rtld_local_ro+offset+(index_##field##_##name)(%rip) +# else +# define LOAD_RTLD_GLOBAL_RO_RDX \ + mov _rtld_global_ro@GOTPCREL(%rip), %RDX_LP +# define HAS_FEATURE(offset, field, name) \ + testl $(bit_##field##_##name), \ + RTLD_GLOBAL_RO_DL_X86_CPU_FEATURES_OFFSET+offset+(index_##field##_##name)(%rdx) +# endif +# else /* SHARED */ +# define LOAD_RTLD_GLOBAL_RO_RDX +# define HAS_FEATURE(offset, field, name) \ + testl $(bit_##field##_##name), \ + _dl_x86_cpu_features+offset+(index_##field##_##name)(%rip) +# endif /* !SHARED */ +# else /* __x86_64__ */ +# ifdef SHARED +# define LOAD_FUNC_GOT_EAX(func) \ + leal func@GOTOFF(%edx), %eax +# if IS_IN (rtld) +# define LOAD_GOT_AND_RTLD_GLOBAL_RO \ + LOAD_PIC_REG(dx) +# define HAS_FEATURE(offset, field, name) \ + testl $(bit_##field##_##name), \ + offset+(index_##field##_##name)+_rtld_local_ro@GOTOFF(%edx) +# else +# define LOAD_GOT_AND_RTLD_GLOBAL_RO \ + LOAD_PIC_REG(dx); \ + mov _rtld_global_ro@GOT(%edx), %ecx +# define HAS_FEATURE(offset, field, name) \ + testl $(bit_##field##_##name), \ + RTLD_GLOBAL_RO_DL_X86_CPU_FEATURES_OFFSET+offset+(index_##field##_##name)(%ecx) +# endif +# else /* SHARED */ +# define LOAD_FUNC_GOT_EAX(func) \ + leal func, %eax +# define LOAD_GOT_AND_RTLD_GLOBAL_RO +# define HAS_FEATURE(offset, field, name) \ + testl $(bit_##field##_##name), \ + _dl_x86_cpu_features+offset+(index_##field##_##name) +# endif /* !SHARED */ +# endif /* !__x86_64__ */ +# else /* _LIBC && !nonlib */ +# error "Sorry, <cpu-features.h> is unimplemented for assembler" +# endif /* !_LIBC || nonlib */ + +/* HAS_* evaluates to true if we may use the feature at runtime. */ +# define HAS_CPU_FEATURE(name) HAS_FEATURE (CPUID_OFFSET, cpu, name) +# define HAS_ARCH_FEATURE(name) HAS_FEATURE (FEATURE_OFFSET, arch, name) + +#else /* __ASSEMBLER__ */ + +enum + { + COMMON_CPUID_INDEX_1 = 0, + COMMON_CPUID_INDEX_7, + COMMON_CPUID_INDEX_80000001, /* for AMD */ + /* Keep the following line at the end. */ + COMMON_CPUID_INDEX_MAX + }; + +struct cpu_features +{ + enum cpu_features_kind + { + arch_kind_unknown = 0, + arch_kind_intel, + arch_kind_amd, + arch_kind_other + } kind; + int max_cpuid; + struct cpuid_registers + { + unsigned int eax; + unsigned int ebx; + unsigned int ecx; + unsigned int edx; + } cpuid[COMMON_CPUID_INDEX_MAX]; + unsigned int family; + unsigned int model; + unsigned int feature[FEATURE_INDEX_MAX]; +}; + +/* Used from outside of glibc to get access to the CPU features + structure. */ +extern const struct cpu_features *__get_cpu_features (void) + __attribute__ ((const)); + +# if defined (_LIBC) && !IS_IN (nonlib) +/* Unused for x86. */ +# define INIT_ARCH() +# define __get_cpu_features() (&GLRO(dl_x86_cpu_features)) +# endif + + +/* Only used directly in cpu-features.c. */ +# define CPU_FEATURES_CPU_P(ptr, name) \ + ((ptr->cpuid[index_cpu_##name].reg_##name & (bit_cpu_##name)) != 0) +# define CPU_FEATURES_ARCH_P(ptr, name) \ + ((ptr->feature[index_arch_##name] & (bit_arch_##name)) != 0) + +/* HAS_* evaluates to true if we may use the feature at runtime. */ +# define HAS_CPU_FEATURE(name) \ + CPU_FEATURES_CPU_P (__get_cpu_features (), name) +# define HAS_ARCH_FEATURE(name) \ + CPU_FEATURES_ARCH_P (__get_cpu_features (), name) + +# define index_cpu_CX8 COMMON_CPUID_INDEX_1 +# define index_cpu_CMOV COMMON_CPUID_INDEX_1 +# define index_cpu_SSE COMMON_CPUID_INDEX_1 +# define index_cpu_SSE2 COMMON_CPUID_INDEX_1 +# define index_cpu_SSSE3 COMMON_CPUID_INDEX_1 +# define index_cpu_SSE4_1 COMMON_CPUID_INDEX_1 +# define index_cpu_SSE4_2 COMMON_CPUID_INDEX_1 +# define index_cpu_AVX COMMON_CPUID_INDEX_1 +# define index_cpu_AVX2 COMMON_CPUID_INDEX_7 +# define index_cpu_AVX512F COMMON_CPUID_INDEX_7 +# define index_cpu_AVX512DQ COMMON_CPUID_INDEX_7 +# define index_cpu_AVX512PF COMMON_CPUID_INDEX_7 +# define index_cpu_AVX512ER COMMON_CPUID_INDEX_7 +# define index_cpu_AVX512CD COMMON_CPUID_INDEX_7 +# define index_cpu_AVX512BW COMMON_CPUID_INDEX_7 +# define index_cpu_AVX512VL COMMON_CPUID_INDEX_7 +# define index_cpu_ERMS COMMON_CPUID_INDEX_7 +# define index_cpu_RTM COMMON_CPUID_INDEX_7 +# define index_cpu_FMA COMMON_CPUID_INDEX_1 +# define index_cpu_FMA4 COMMON_CPUID_INDEX_80000001 +# define index_cpu_POPCOUNT COMMON_CPUID_INDEX_1 +# define index_cpu_OSXSAVE COMMON_CPUID_INDEX_1 +# define index_cpu_HTT COMMON_CPUID_INDEX_1 +# define index_cpu_BMI1 COMMON_CPUID_INDEX_7 +# define index_cpu_BMI2 COMMON_CPUID_INDEX_7 +# define index_cpu_LZCNT COMMON_CPUID_INDEX_1 +# define index_cpu_MOVBE COMMON_CPUID_INDEX_1 +# define index_cpu_POPCNT COMMON_CPUID_INDEX_1 + +# define reg_CX8 edx +# define reg_CMOV edx +# define reg_SSE edx +# define reg_SSE2 edx +# define reg_SSSE3 ecx +# define reg_SSE4_1 ecx +# define reg_SSE4_2 ecx +# define reg_AVX ecx +# define reg_AVX2 ebx +# define reg_AVX512F ebx +# define reg_AVX512DQ ebx +# define reg_AVX512PF ebx +# define reg_AVX512ER ebx +# define reg_AVX512CD ebx +# define reg_AVX512BW ebx +# define reg_AVX512VL ebx +# define reg_ERMS ebx +# define reg_RTM ebx +# define reg_FMA ecx +# define reg_FMA4 ecx +# define reg_POPCOUNT ecx +# define reg_OSXSAVE ecx +# define reg_HTT edx +# define reg_BMI1 ebx +# define reg_BMI2 ebx +# define reg_LZCNT ecx +# define reg_MOVBE ecx +# define reg_POPCNT ecx + +# define index_arch_Fast_Rep_String FEATURE_INDEX_1 +# define index_arch_Fast_Copy_Backward FEATURE_INDEX_1 +# define index_arch_Slow_BSF FEATURE_INDEX_1 +# define index_arch_Fast_Unaligned_Load FEATURE_INDEX_1 +# define index_arch_Prefer_PMINUB_for_stringop FEATURE_INDEX_1 +# define index_arch_AVX_Usable FEATURE_INDEX_1 +# define index_arch_FMA_Usable FEATURE_INDEX_1 +# define index_arch_FMA4_Usable FEATURE_INDEX_1 +# define index_arch_Slow_SSE4_2 FEATURE_INDEX_1 +# define index_arch_AVX2_Usable FEATURE_INDEX_1 +# define index_arch_AVX_Fast_Unaligned_Load FEATURE_INDEX_1 +# define index_arch_AVX512F_Usable FEATURE_INDEX_1 +# define index_arch_AVX512DQ_Usable FEATURE_INDEX_1 +# define index_arch_I586 FEATURE_INDEX_1 +# define index_arch_I686 FEATURE_INDEX_1 +# define index_arch_Prefer_MAP_32BIT_EXEC FEATURE_INDEX_1 +# define index_arch_Prefer_No_VZEROUPPER FEATURE_INDEX_1 +# define index_arch_Fast_Unaligned_Copy FEATURE_INDEX_1 +# define index_arch_Prefer_ERMS FEATURE_INDEX_1 +# define index_arch_Use_dl_runtime_resolve_opt FEATURE_INDEX_1 +# define index_arch_Use_dl_runtime_resolve_slow FEATURE_INDEX_1 +# define index_arch_Prefer_No_AVX512 FEATURE_INDEX_1 + +#endif /* !__ASSEMBLER__ */ + +#ifdef __x86_64__ +# define HAS_CPUID 1 +#elif defined __i586__ || defined __pentium__ +# define HAS_CPUID 1 +# define HAS_I586 1 +# define HAS_I686 HAS_ARCH_FEATURE (I686) +#elif (defined __i686__ || defined __pentiumpro__ \ + || defined __pentium4__ || defined __nocona__ \ + || defined __atom__ || defined __core2__ \ + || defined __corei7__ || defined __corei7_avx__ \ + || defined __core_avx2__ || defined __nehalem__ \ + || defined __sandybridge__ || defined __haswell__ \ + || defined __knl__ || defined __bonnell__ \ + || defined __silvermont__ \ + || defined __k6__ || defined __k8__ \ + || defined __athlon__ || defined __amdfam10__ \ + || defined __bdver1__ || defined __bdver2__ \ + || defined __bdver3__ || defined __bdver4__ \ + || defined __btver1__ || defined __btver2__) +# define HAS_CPUID 1 +# define HAS_I586 1 +# define HAS_I686 1 +#else +# define HAS_CPUID 0 +# define HAS_I586 HAS_ARCH_FEATURE (I586) +# define HAS_I686 HAS_ARCH_FEATURE (I686) +#endif + +#endif /* cpu_features_h */ diff --git a/REORG.TODO/sysdeps/x86/dl-get-cpu-features.c b/REORG.TODO/sysdeps/x86/dl-get-cpu-features.c new file mode 100644 index 0000000000..c9bc6820aa --- /dev/null +++ b/REORG.TODO/sysdeps/x86/dl-get-cpu-features.c @@ -0,0 +1,27 @@ +/* This file is part of the GNU C Library. + Copyright (C) 2015-2017 Free Software Foundation, Inc. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + + +#include <ldsodefs.h> + +#undef __get_cpu_features + +const struct cpu_features * +__get_cpu_features (void) +{ + return &GLRO(dl_x86_cpu_features); +} diff --git a/REORG.TODO/sysdeps/x86/dl-hwcap.h b/REORG.TODO/sysdeps/x86/dl-hwcap.h new file mode 100644 index 0000000000..c95668415a --- /dev/null +++ b/REORG.TODO/sysdeps/x86/dl-hwcap.h @@ -0,0 +1,75 @@ +/* x86 version of hardware capability information handling macros. + Copyright (C) 2017 Free Software Foundation, Inc. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#ifndef _DL_HWCAP_H +#define _DL_HWCAP_H + +#if IS_IN (ldconfig) +/* Since ldconfig processes both i386 and x86-64 libraries, it needs + to cover all platforms and hardware capabilities. */ +# define HWCAP_PLATFORMS_START 0 +# define HWCAP_PLATFORMS_COUNT 4 +# define HWCAP_START 0 +# define HWCAP_COUNT 2 +# define HWCAP_IMPORTANT (HWCAP_X86_SSE2 | HWCAP_X86_AVX512_1) +#elif defined __x86_64__ +/* For 64 bit, only cover x86-64 platforms and capabilities. */ +# define HWCAP_PLATFORMS_START 2 +# define HWCAP_PLATFORMS_COUNT 4 +# define HWCAP_START 1 +# define HWCAP_COUNT 2 +# define HWCAP_IMPORTANT (HWCAP_X86_AVX512_1) +#else +/* For 32 bit, only cover i586, i686 and SSE2. */ +# define HWCAP_PLATFORMS_START 0 +# define HWCAP_PLATFORMS_COUNT 2 +# define HWCAP_START 0 +# define HWCAP_COUNT 1 +# define HWCAP_IMPORTANT (HWCAP_X86_SSE2) +#endif + +enum +{ + HWCAP_X86_SSE2 = 1 << 0, + HWCAP_X86_AVX512_1 = 1 << 1 +}; + +static inline const char * +__attribute__ ((unused)) +_dl_hwcap_string (int idx) +{ + return GLRO(dl_x86_hwcap_flags)[idx]; +}; + +static inline int +__attribute__ ((unused, always_inline)) +_dl_string_hwcap (const char *str) +{ + int i; + + for (i = HWCAP_START; i < HWCAP_COUNT; i++) + { + if (strcmp (str, GLRO(dl_x86_hwcap_flags)[i]) == 0) + return i; + } + return -1; +}; + +/* We cannot provide a general printing function. */ +#define _dl_procinfo(type, word) -1 + +#endif /* dl-hwcap.h */ diff --git a/REORG.TODO/sysdeps/x86/dl-procinfo.c b/REORG.TODO/sysdeps/x86/dl-procinfo.c new file mode 100644 index 0000000000..43ab8fe25b --- /dev/null +++ b/REORG.TODO/sysdeps/x86/dl-procinfo.c @@ -0,0 +1,88 @@ +/* Data for x86 version of processor capability information. + Copyright (C) 2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +/* This information must be kept in sync with the _DL_HWCAP_COUNT, + HWCAP_PLATFORMS_START and HWCAP_PLATFORMS_COUNT definitions in + dl-hwcap.h. + + If anything should be added here check whether the size of each string + is still ok with the given array size. + + All the #ifdefs in the definitions are quite irritating but + necessary if we want to avoid duplicating the information. There + are three different modes: + + - PROCINFO_DECL is defined. This means we are only interested in + declarations. + + - PROCINFO_DECL is not defined: + + + if SHARED is defined the file is included in an array + initializer. The .element = { ... } syntax is needed. + + + if SHARED is not defined a normal array initialization is + needed. + */ + +#if !IS_IN (ldconfig) +# if !defined PROCINFO_DECL && defined SHARED + ._dl_x86_cpu_features +# else +PROCINFO_CLASS struct cpu_features _dl_x86_cpu_features +# endif +# ifndef PROCINFO_DECL += { } +# endif +# if !defined SHARED || defined PROCINFO_DECL +; +# else +, +# endif +#endif + +#if !defined PROCINFO_DECL && defined SHARED + ._dl_x86_hwcap_flags +#else +PROCINFO_CLASS const char _dl_x86_hwcap_flags[2][9] +#endif +#ifndef PROCINFO_DECL += { + "sse2", "avx512_1" + } +#endif +#if !defined SHARED || defined PROCINFO_DECL +; +#else +, +#endif + +#if !defined PROCINFO_DECL && defined SHARED + ._dl_x86_platforms +#else +PROCINFO_CLASS const char _dl_x86_platforms[4][9] +#endif +#ifndef PROCINFO_DECL += { + "i586", "i686", "haswell", "xeon_phi" + } +#endif +#if !defined SHARED || defined PROCINFO_DECL +; +#else +, +#endif diff --git a/REORG.TODO/sysdeps/x86/dl-procinfo.h b/REORG.TODO/sysdeps/x86/dl-procinfo.h new file mode 100644 index 0000000000..5feb1467e4 --- /dev/null +++ b/REORG.TODO/sysdeps/x86/dl-procinfo.h @@ -0,0 +1,48 @@ +/* x86 version of processor capability information handling macros. + Copyright (C) 2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#ifndef _DL_PROCINFO_H +#define _DL_PROCINFO_H 1 +#include <ldsodefs.h> +#include <dl-hwcap.h> + +#define _DL_HWCAP_COUNT HWCAP_COUNT +#define _DL_PLATFORMS_COUNT HWCAP_PLATFORMS_COUNT + +/* Start at 48 to reserve spaces for hardware capabilities. */ +#define _DL_FIRST_PLATFORM 48 +/* Mask to filter out platforms. */ +#define _DL_HWCAP_PLATFORM (((1ULL << _DL_PLATFORMS_COUNT) - 1) \ + << _DL_FIRST_PLATFORM) + +static inline int +__attribute__ ((unused, always_inline)) +_dl_string_platform (const char *str) +{ + int i; + + if (str != NULL) + for (i = HWCAP_PLATFORMS_START; i < HWCAP_PLATFORMS_COUNT; ++i) + { + if (strcmp (str, GLRO(dl_x86_platforms)[i]) == 0) + return _DL_FIRST_PLATFORM + i; + } + return -1; +}; + +#endif /* dl-procinfo.h */ diff --git a/REORG.TODO/sysdeps/x86/elide.h b/REORG.TODO/sysdeps/x86/elide.h new file mode 100644 index 0000000000..53de41836e --- /dev/null +++ b/REORG.TODO/sysdeps/x86/elide.h @@ -0,0 +1,119 @@ +/* elide.h: Generic lock elision support. + Copyright (C) 2014-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ +#ifndef ELIDE_H +#define ELIDE_H 1 + +#include <hle.h> +#include <elision-conf.h> +#include <atomic.h> + + +/* Adapt elision with ADAPT_COUNT and STATUS and decide retries. */ + +static inline bool +elision_adapt(signed char *adapt_count, unsigned int status) +{ + if (status & _XABORT_RETRY) + return false; + if ((status & _XABORT_EXPLICIT) + && _XABORT_CODE (status) == _ABORT_LOCK_BUSY) + { + /* Right now we skip here. Better would be to wait a bit + and retry. This likely needs some spinning. Be careful + to avoid writing the lock. + Using relaxed MO and separate atomic accesses is sufficient because + adapt_count is just a hint. */ + if (atomic_load_relaxed (adapt_count) != __elision_aconf.skip_lock_busy) + atomic_store_relaxed (adapt_count, __elision_aconf.skip_lock_busy); + } + /* Internal abort. There is no chance for retry. + Use the normal locking and next time use lock. + Be careful to avoid writing to the lock. See above for MO. */ + else if (atomic_load_relaxed (adapt_count) + != __elision_aconf.skip_lock_internal_abort) + atomic_store_relaxed (adapt_count, + __elision_aconf.skip_lock_internal_abort); + return true; +} + +/* is_lock_free must be executed inside the transaction */ + +/* Returns true if lock defined by IS_LOCK_FREE was elided. + ADAPT_COUNT is a per-lock state variable; it must be accessed atomically + to avoid data races but is just a hint, so using relaxed MO and separate + atomic loads and stores instead of atomic read-modify-write operations is + sufficient. */ + +#define ELIDE_LOCK(adapt_count, is_lock_free) \ + ({ \ + int ret = 0; \ + \ + if (atomic_load_relaxed (&(adapt_count)) <= 0) \ + { \ + for (int i = __elision_aconf.retry_try_xbegin; i > 0; i--) \ + { \ + unsigned int status; \ + if ((status = _xbegin ()) == _XBEGIN_STARTED) \ + { \ + if (is_lock_free) \ + { \ + ret = 1; \ + break; \ + } \ + _xabort (_ABORT_LOCK_BUSY); \ + } \ + if (!elision_adapt (&(adapt_count), status)) \ + break; \ + } \ + } \ + else \ + atomic_store_relaxed (&(adapt_count), \ + atomic_load_relaxed (&(adapt_count)) - 1); \ + ret; \ + }) + +/* Returns true if lock defined by IS_LOCK_FREE was try-elided. + ADAPT_COUNT is a per-lock state variable. */ + +#define ELIDE_TRYLOCK(adapt_count, is_lock_free, write) ({ \ + int ret = 0; \ + if (__elision_aconf.retry_try_xbegin > 0) \ + { \ + if (write) \ + _xabort (_ABORT_NESTED_TRYLOCK); \ + ret = ELIDE_LOCK (adapt_count, is_lock_free); \ + } \ + ret; \ + }) + +/* Returns true if lock defined by IS_LOCK_FREE was elided. The call + to _xend crashes if the application incorrectly tries to unlock a + lock which has not been locked. */ + +#define ELIDE_UNLOCK(is_lock_free) \ + ({ \ + int ret = 0; \ + if (is_lock_free) \ + { \ + _xend (); \ + ret = 1; \ + } \ + ret; \ + }) + +#endif diff --git a/REORG.TODO/sysdeps/x86/fpu/Makefile b/REORG.TODO/sysdeps/x86/fpu/Makefile new file mode 100644 index 0000000000..a8047a4504 --- /dev/null +++ b/REORG.TODO/sysdeps/x86/fpu/Makefile @@ -0,0 +1,11 @@ +ifeq ($(subdir),math) +libm-support += powl_helper +tests += test-fenv-sse test-fenv-clear-sse test-fenv-x87 test-fenv-sse-2 \ + test-flt-eval-method-387 test-flt-eval-method-sse +CFLAGS-test-fenv-sse.c += -msse2 -mfpmath=sse +CFLAGS-test-fenv-clear-sse.c += -msse2 -mfpmath=sse +CFLAGS-test-fenv-sse-2.c += -msse2 -mfpmath=sse +CFLAGS-test-flt-eval-method-387.c += -fexcess-precision=standard -mfpmath=387 +CFLAGS-test-flt-eval-method-sse.c += -fexcess-precision=standard -msse2 \ + -mfpmath=sse +endif diff --git a/REORG.TODO/sysdeps/x86/fpu/bits/fenv.h b/REORG.TODO/sysdeps/x86/fpu/bits/fenv.h new file mode 100644 index 0000000000..fd7327902c --- /dev/null +++ b/REORG.TODO/sysdeps/x86/fpu/bits/fenv.h @@ -0,0 +1,168 @@ +/* Copyright (C) 1997-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#ifndef _FENV_H +# error "Never use <bits/fenv.h> directly; include <fenv.h> instead." +#endif + +/* Define bits representing the exception. We use the bit positions + of the appropriate bits in the FPU control word. */ +enum + { + FE_INVALID = +#define FE_INVALID 0x01 + FE_INVALID, + __FE_DENORM = 0x02, + FE_DIVBYZERO = +#define FE_DIVBYZERO 0x04 + FE_DIVBYZERO, + FE_OVERFLOW = +#define FE_OVERFLOW 0x08 + FE_OVERFLOW, + FE_UNDERFLOW = +#define FE_UNDERFLOW 0x10 + FE_UNDERFLOW, + FE_INEXACT = +#define FE_INEXACT 0x20 + FE_INEXACT + }; + +#define FE_ALL_EXCEPT \ + (FE_INEXACT | FE_DIVBYZERO | FE_UNDERFLOW | FE_OVERFLOW | FE_INVALID) + +/* The ix87 FPU supports all of the four defined rounding modes. We + use again the bit positions in the FPU control word as the values + for the appropriate macros. */ +enum + { + FE_TONEAREST = +#define FE_TONEAREST 0 + FE_TONEAREST, + FE_DOWNWARD = +#define FE_DOWNWARD 0x400 + FE_DOWNWARD, + FE_UPWARD = +#define FE_UPWARD 0x800 + FE_UPWARD, + FE_TOWARDZERO = +#define FE_TOWARDZERO 0xc00 + FE_TOWARDZERO + }; + + +/* Type representing exception flags. */ +typedef unsigned short int fexcept_t; + + +/* Type representing floating-point environment. This structure + corresponds to the layout of the block written by the `fstenv' + instruction and has additional fields for the contents of the MXCSR + register as written by the `stmxcsr' instruction. */ +typedef struct + { + unsigned short int __control_word; + unsigned short int __glibc_reserved1; + unsigned short int __status_word; + unsigned short int __glibc_reserved2; + unsigned short int __tags; + unsigned short int __glibc_reserved3; + unsigned int __eip; + unsigned short int __cs_selector; + unsigned int __opcode:11; + unsigned int __glibc_reserved4:5; + unsigned int __data_offset; + unsigned short int __data_selector; + unsigned short int __glibc_reserved5; +#ifdef __x86_64__ + unsigned int __mxcsr; +#endif + } +fenv_t; + +/* If the default argument is used we use this value. */ +#define FE_DFL_ENV ((const fenv_t *) -1) + +#ifdef __USE_GNU +/* Floating-point environment where none of the exception is masked. */ +# define FE_NOMASK_ENV ((const fenv_t *) -2) +#endif + +#if __GLIBC_USE (IEC_60559_BFP_EXT) +/* Type representing floating-point control modes. */ +typedef struct + { + unsigned short int __control_word; + unsigned short int __glibc_reserved; + unsigned int __mxcsr; + } +femode_t; + +/* Default floating-point control modes. */ +# define FE_DFL_MODE ((const femode_t *) -1L) +#endif + + +#ifdef __USE_EXTERN_INLINES +__BEGIN_DECLS + +/* Optimized versions. */ +extern int __REDIRECT_NTH (__feraiseexcept_renamed, (int), feraiseexcept); +__extern_always_inline void +__NTH (__feraiseexcept_invalid_divbyzero (int __excepts)) +{ + if ((FE_INVALID & __excepts) != 0) + { + /* One example of an invalid operation is 0.0 / 0.0. */ + float __f = 0.0; + +# ifdef __SSE_MATH__ + __asm__ __volatile__ ("divss %0, %0 " : : "x" (__f)); +# else + __asm__ __volatile__ ("fdiv %%st, %%st(0); fwait" + : "=t" (__f) : "0" (__f)); +# endif + (void) &__f; + } + if ((FE_DIVBYZERO & __excepts) != 0) + { + float __f = 1.0; + float __g = 0.0; + +# ifdef __SSE_MATH__ + __asm__ __volatile__ ("divss %1, %0" : : "x" (__f), "x" (__g)); +# else + __asm__ __volatile__ ("fdivp %%st, %%st(1); fwait" + : "=t" (__f) : "0" (__f), "u" (__g) : "st(1)"); +# endif + (void) &__f; + } +} +__extern_inline int +__NTH (feraiseexcept (int __excepts)) +{ + if (__builtin_constant_p (__excepts) + && (__excepts & ~(FE_INVALID | FE_DIVBYZERO)) == 0) + { + __feraiseexcept_invalid_divbyzero (__excepts); + return 0; + } + + return __feraiseexcept_renamed (__excepts); +} + +__END_DECLS +#endif diff --git a/REORG.TODO/sysdeps/x86/fpu/bits/math-vector.h b/REORG.TODO/sysdeps/x86/fpu/bits/math-vector.h new file mode 100644 index 0000000000..419af076a7 --- /dev/null +++ b/REORG.TODO/sysdeps/x86/fpu/bits/math-vector.h @@ -0,0 +1,63 @@ +/* Platform-specific SIMD declarations of math functions. + Copyright (C) 2014-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#ifndef _MATH_H +# error "Never include <bits/math-vector.h> directly;\ + include <math.h> instead." +#endif + +/* Get default empty definitions for simd declarations. */ +#include <bits/libm-simd-decl-stubs.h> + +#if defined __x86_64__ && defined __FAST_MATH__ +# if defined _OPENMP && _OPENMP >= 201307 +/* OpenMP case. */ +# define __DECL_SIMD_x86_64 _Pragma ("omp declare simd notinbranch") +# elif __GNUC_PREREQ (6,0) +/* W/o OpenMP use GCC 6.* __attribute__ ((__simd__)). */ +# define __DECL_SIMD_x86_64 __attribute__ ((__simd__ ("notinbranch"))) +# endif + +# ifdef __DECL_SIMD_x86_64 +# undef __DECL_SIMD_cos +# define __DECL_SIMD_cos __DECL_SIMD_x86_64 +# undef __DECL_SIMD_cosf +# define __DECL_SIMD_cosf __DECL_SIMD_x86_64 +# undef __DECL_SIMD_sin +# define __DECL_SIMD_sin __DECL_SIMD_x86_64 +# undef __DECL_SIMD_sinf +# define __DECL_SIMD_sinf __DECL_SIMD_x86_64 +# undef __DECL_SIMD_sincos +# define __DECL_SIMD_sincos __DECL_SIMD_x86_64 +# undef __DECL_SIMD_sincosf +# define __DECL_SIMD_sincosf __DECL_SIMD_x86_64 +# undef __DECL_SIMD_log +# define __DECL_SIMD_log __DECL_SIMD_x86_64 +# undef __DECL_SIMD_logf +# define __DECL_SIMD_logf __DECL_SIMD_x86_64 +# undef __DECL_SIMD_exp +# define __DECL_SIMD_exp __DECL_SIMD_x86_64 +# undef __DECL_SIMD_expf +# define __DECL_SIMD_expf __DECL_SIMD_x86_64 +# undef __DECL_SIMD_pow +# define __DECL_SIMD_pow __DECL_SIMD_x86_64 +# undef __DECL_SIMD_powf +# define __DECL_SIMD_powf __DECL_SIMD_x86_64 + +# endif +#endif diff --git a/REORG.TODO/sysdeps/x86/fpu/bits/mathinline.h b/REORG.TODO/sysdeps/x86/fpu/bits/mathinline.h new file mode 100644 index 0000000000..bcd99bdf74 --- /dev/null +++ b/REORG.TODO/sysdeps/x86/fpu/bits/mathinline.h @@ -0,0 +1,962 @@ +/* Inline math functions for i387 and SSE. + Copyright (C) 1995-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#ifndef _MATH_H +# error "Never use <bits/mathinline.h> directly; include <math.h> instead." +#endif + +#ifndef __extern_always_inline +# define __MATH_INLINE __inline +#else +# define __MATH_INLINE __extern_always_inline +#endif + + +#if defined __USE_ISOC99 && defined __GNUC__ && __GNUC__ >= 2 +/* GCC 2.97 and up have builtins that actually can be used. */ +# if !__GNUC_PREREQ (2,97) +/* ISO C99 defines some macros to perform unordered comparisons. The + ix87 FPU supports this with special opcodes and we should use them. + These must not be inline functions since we have to be able to handle + all floating-point types. */ +# undef isgreater +# undef isgreaterequal +# undef isless +# undef islessequal +# undef islessgreater +# undef isunordered +# ifdef __i686__ +/* For the PentiumPro and more recent processors we can provide + better code. */ +# define isgreater(x, y) \ + ({ register char __result; \ + __asm__ ("fucomip %%st(1), %%st; seta %%al" \ + : "=a" (__result) : "u" (y), "t" (x) : "cc", "st"); \ + __result; }) +# define isgreaterequal(x, y) \ + ({ register char __result; \ + __asm__ ("fucomip %%st(1), %%st; setae %%al" \ + : "=a" (__result) : "u" (y), "t" (x) : "cc", "st"); \ + __result; }) + +# define isless(x, y) \ + ({ register char __result; \ + __asm__ ("fucomip %%st(1), %%st; seta %%al" \ + : "=a" (__result) : "u" (x), "t" (y) : "cc", "st"); \ + __result; }) + +# define islessequal(x, y) \ + ({ register char __result; \ + __asm__ ("fucomip %%st(1), %%st; setae %%al" \ + : "=a" (__result) : "u" (x), "t" (y) : "cc", "st"); \ + __result; }) + +# define islessgreater(x, y) \ + ({ register char __result; \ + __asm__ ("fucomip %%st(1), %%st; setne %%al" \ + : "=a" (__result) : "u" (y), "t" (x) : "cc", "st"); \ + __result; }) + +# define isunordered(x, y) \ + ({ register char __result; \ + __asm__ ("fucomip %%st(1), %%st; setp %%al" \ + : "=a" (__result) : "u" (y), "t" (x) : "cc", "st"); \ + __result; }) +# else +/* This is the dumb, portable code for i386 and above. */ +# define isgreater(x, y) \ + ({ register char __result; \ + __asm__ ("fucompp; fnstsw; testb $0x45, %%ah; setz %%al" \ + : "=a" (__result) : "u" (y), "t" (x) : "cc", "st", "st(1)"); \ + __result; }) + +# define isgreaterequal(x, y) \ + ({ register char __result; \ + __asm__ ("fucompp; fnstsw; testb $0x05, %%ah; setz %%al" \ + : "=a" (__result) : "u" (y), "t" (x) : "cc", "st", "st(1)"); \ + __result; }) + +# define isless(x, y) \ + ({ register char __result; \ + __asm__ ("fucompp; fnstsw; testb $0x45, %%ah; setz %%al" \ + : "=a" (__result) : "u" (x), "t" (y) : "cc", "st", "st(1)"); \ + __result; }) + +# define islessequal(x, y) \ + ({ register char __result; \ + __asm__ ("fucompp; fnstsw; testb $0x05, %%ah; setz %%al" \ + : "=a" (__result) : "u" (x), "t" (y) : "cc", "st", "st(1)"); \ + __result; }) + +# define islessgreater(x, y) \ + ({ register char __result; \ + __asm__ ("fucompp; fnstsw; testb $0x44, %%ah; setz %%al" \ + : "=a" (__result) : "u" (y), "t" (x) : "cc", "st", "st(1)"); \ + __result; }) + +# define isunordered(x, y) \ + ({ register char __result; \ + __asm__ ("fucompp; fnstsw; sahf; setp %%al" \ + : "=a" (__result) : "u" (y), "t" (x) : "cc", "st", "st(1)"); \ + __result; }) +# endif /* __i686__ */ +# endif /* GCC 2.97 */ + +/* The gcc, version 2.7 or below, has problems with all this inlining + code. So disable it for this version of the compiler. */ +# if __GNUC_PREREQ (2, 8) + +/* Test for negative number. Used in the signbit() macro. */ +__MATH_INLINE int +__NTH (__signbitf (float __x)) +{ +# ifdef __SSE2_MATH__ + int __m; + __asm ("pmovmskb %1, %0" : "=r" (__m) : "x" (__x)); + return (__m & 0x8) != 0; +# else + __extension__ union { float __f; int __i; } __u = { __f: __x }; + return __u.__i < 0; +# endif +} +__MATH_INLINE int +__NTH (__signbit (double __x)) +{ +# ifdef __SSE2_MATH__ + int __m; + __asm ("pmovmskb %1, %0" : "=r" (__m) : "x" (__x)); + return (__m & 0x80) != 0; +# else + __extension__ union { double __d; int __i[2]; } __u = { __d: __x }; + return __u.__i[1] < 0; +# endif +} +__MATH_INLINE int +__NTH (__signbitl (long double __x)) +{ + __extension__ union { long double __l; int __i[3]; } __u = { __l: __x }; + return (__u.__i[2] & 0x8000) != 0; +} + +# endif +#endif + + +/* The gcc, version 2.7 or below, has problems with all this inlining + code. So disable it for this version of the compiler. */ +#if __GNUC_PREREQ (2, 8) +# if !__GNUC_PREREQ (3, 4) && !defined __NO_MATH_INLINES \ + && defined __OPTIMIZE__ +/* GCC 3.4 introduced builtins for all functions below, so + there's no need to define any of these inline functions. */ + +# ifdef __USE_ISOC99 + +/* Round to nearest integer. */ +# ifdef __SSE_MATH__ +__MATH_INLINE long int +__NTH (lrintf (float __x)) +{ + long int __res; + /* Mark as volatile since the result is dependent on the state of + the SSE control register (the rounding mode). Otherwise GCC might + remove these assembler instructions since it does not know about + the rounding mode change and cannot currently be told. */ + __asm __volatile__ ("cvtss2si %1, %0" : "=r" (__res) : "xm" (__x)); + return __res; +} +# endif +# ifdef __SSE2_MATH__ +__MATH_INLINE long int +__NTH (lrint (double __x)) +{ + long int __res; + /* Mark as volatile since the result is dependent on the state of + the SSE control register (the rounding mode). Otherwise GCC might + remove these assembler instructions since it does not know about + the rounding mode change and cannot currently be told. */ + __asm __volatile__ ("cvtsd2si %1, %0" : "=r" (__res) : "xm" (__x)); + return __res; +} +# endif +# ifdef __x86_64__ +__extension__ +__MATH_INLINE long long int +__NTH (llrintf (float __x)) +{ + long long int __res; + /* Mark as volatile since the result is dependent on the state of + the SSE control register (the rounding mode). Otherwise GCC might + remove these assembler instructions since it does not know about + the rounding mode change and cannot currently be told. */ + __asm __volatile__ ("cvtss2si %1, %0" : "=r" (__res) : "xm" (__x)); + return __res; +} +__extension__ +__MATH_INLINE long long int +__NTH (llrint (double __x)) +{ + long long int __res; + /* Mark as volatile since the result is dependent on the state of + the SSE control register (the rounding mode). Otherwise GCC might + remove these assembler instructions since it does not know about + the rounding mode change and cannot currently be told. */ + __asm __volatile__ ("cvtsd2si %1, %0" : "=r" (__res) : "xm" (__x)); + return __res; +} +# endif + +# if defined __FINITE_MATH_ONLY__ && __FINITE_MATH_ONLY__ > 0 \ + && defined __SSE2_MATH__ +/* Determine maximum of two values. */ +__MATH_INLINE float +__NTH (fmaxf (float __x, float __y)) +{ +# ifdef __AVX__ + float __res; + __asm ("vmaxss %2, %1, %0" : "=x" (__res) : "x" (x), "xm" (__y)); + return __res; +# else + __asm ("maxss %1, %0" : "+x" (__x) : "xm" (__y)); + return __x; +# endif +} +__MATH_INLINE double +__NTH (fmax (double __x, double __y)) +{ +# ifdef __AVX__ + float __res; + __asm ("vmaxsd %2, %1, %0" : "=x" (__res) : "x" (x), "xm" (__y)); + return __res; +# else + __asm ("maxsd %1, %0" : "+x" (__x) : "xm" (__y)); + return __x; +# endif +} + +/* Determine minimum of two values. */ +__MATH_INLINE float +__NTH (fminf (float __x, float __y)) +{ +# ifdef __AVX__ + float __res; + __asm ("vminss %2, %1, %0" : "=x" (__res) : "x" (x), "xm" (__y)); + return __res; +# else + __asm ("minss %1, %0" : "+x" (__x) : "xm" (__y)); + return __x; +# endif +} +__MATH_INLINE double +__NTH (fmin (double __x, double __y)) +{ +# ifdef __AVX__ + float __res; + __asm ("vminsd %2, %1, %0" : "=x" (__res) : "x" (x), "xm" (__y)); + return __res; +# else + __asm ("minsd %1, %0" : "+x" (__x) : "xm" (__y)); + return __x; +# endif +} +# endif + +# endif + +# if defined __SSE4_1__ && defined __SSE2_MATH__ +# if defined __USE_XOPEN_EXTENDED || defined __USE_ISOC99 + +/* Round to nearest integer. */ +__MATH_INLINE double +__NTH (rint (double __x)) +{ + double __res; + /* Mark as volatile since the result is dependent on the state of + the SSE control register (the rounding mode). Otherwise GCC might + remove these assembler instructions since it does not know about + the rounding mode change and cannot currently be told. */ + __asm __volatile__ ("roundsd $4, %1, %0" : "=x" (__res) : "xm" (__x)); + return __res; +} +__MATH_INLINE float +__NTH (rintf (float __x)) +{ + float __res; + /* Mark as volatile since the result is dependent on the state of + the SSE control register (the rounding mode). Otherwise GCC might + remove these assembler instructions since it does not know about + the rounding mode change and cannot currently be told. */ + __asm __volatile__ ("roundss $4, %1, %0" : "=x" (__res) : "xm" (__x)); + return __res; +} + +# ifdef __USE_ISOC99 +/* Round to nearest integer without raising inexact exception. */ +__MATH_INLINE double +__NTH (nearbyint (double __x)) +{ + double __res; + /* Mark as volatile since the result is dependent on the state of + the SSE control register (the rounding mode). Otherwise GCC might + remove these assembler instructions since it does not know about + the rounding mode change and cannot currently be told. */ + __asm __volatile__ ("roundsd $0xc, %1, %0" : "=x" (__res) : "xm" (__x)); + return __res; +} +__MATH_INLINE float +__NTH (nearbyintf (float __x)) +{ + float __res; + /* Mark as volatile since the result is dependent on the state of + the SSE control register (the rounding mode). Otherwise GCC might + remove these assembler instructions since it does not know about + the rounding mode change and cannot currently be told. */ + __asm __volatile__ ("roundss $0xc, %1, %0" : "=x" (__res) : "xm" (__x)); + return __res; +} +# endif + +# endif + +/* Smallest integral value not less than X. */ +__MATH_INLINE double +__NTH (ceil (double __x)) +{ + double __res; + __asm ("roundsd $2, %1, %0" : "=x" (__res) : "xm" (__x)); + return __res; +} + +__MATH_INLINE float +__NTH (ceilf (float __x)) +{ + float __res; + __asm ("roundss $2, %1, %0" : "=x" (__res) : "xm" (__x)); + return __res; +} + +/* Largest integer not greater than X. */ +__MATH_INLINE double +__NTH (floor (double __x)) +{ + double __res; + __asm ("roundsd $1, %1, %0" : "=x" (__res) : "xm" (__x)); + return __res; +} + +__MATH_INLINE float +__NTH (floorf (float __x)) +{ + float __res; + __asm ("roundss $1, %1, %0" : "=x" (__res) : "xm" (__x)); + return __res; +} +# endif +# endif +#endif + +/* Disable x87 inlines when -fpmath=sse is passed and also when we're building + on x86_64. Older gcc (gcc-3.2 for example) does not define __SSE2_MATH__ + for x86_64. */ +#if !defined __SSE2_MATH__ && !defined __x86_64__ +# if ((!defined __NO_MATH_INLINES || defined __LIBC_INTERNAL_MATH_INLINES) \ + && defined __OPTIMIZE__) + +/* The inline functions do not set errno or raise necessarily the + correct exceptions. */ +# undef math_errhandling + +/* A macro to define float, double, and long double versions of various + math functions for the ix87 FPU. FUNC is the function name (which will + be suffixed with f and l for the float and long double version, + respectively). OP is the name of the FPU operation. + We define two sets of macros. The set with the additional NP + doesn't add a prototype declaration. */ + +# ifdef __USE_ISOC99 +# define __inline_mathop(func, op) \ + __inline_mathop_ (double, func, op) \ + __inline_mathop_ (float, __CONCAT(func,f), op) \ + __inline_mathop_ (long double, __CONCAT(func,l), op) +# define __inline_mathopNP(func, op) \ + __inline_mathopNP_ (double, func, op) \ + __inline_mathopNP_ (float, __CONCAT(func,f), op) \ + __inline_mathopNP_ (long double, __CONCAT(func,l), op) +# else +# define __inline_mathop(func, op) \ + __inline_mathop_ (double, func, op) +# define __inline_mathopNP(func, op) \ + __inline_mathopNP_ (double, func, op) +# endif + +# define __inline_mathop_(float_type, func, op) \ + __inline_mathop_decl_ (float_type, func, op, "0" (__x)) +# define __inline_mathopNP_(float_type, func, op) \ + __inline_mathop_declNP_ (float_type, func, op, "0" (__x)) + + +# ifdef __USE_ISOC99 +# define __inline_mathop_decl(func, op, params...) \ + __inline_mathop_decl_ (double, func, op, params) \ + __inline_mathop_decl_ (float, __CONCAT(func,f), op, params) \ + __inline_mathop_decl_ (long double, __CONCAT(func,l), op, params) +# define __inline_mathop_declNP(func, op, params...) \ + __inline_mathop_declNP_ (double, func, op, params) \ + __inline_mathop_declNP_ (float, __CONCAT(func,f), op, params) \ + __inline_mathop_declNP_ (long double, __CONCAT(func,l), op, params) +# else +# define __inline_mathop_decl(func, op, params...) \ + __inline_mathop_decl_ (double, func, op, params) +# define __inline_mathop_declNP(func, op, params...) \ + __inline_mathop_declNP_ (double, func, op, params) +# endif + +# define __inline_mathop_decl_(float_type, func, op, params...) \ + __MATH_INLINE float_type func (float_type) __THROW; \ + __inline_mathop_declNP_ (float_type, func, op, params) + +# define __inline_mathop_declNP_(float_type, func, op, params...) \ + __MATH_INLINE float_type __NTH (func (float_type __x)) \ + { \ + register float_type __result; \ + __asm __volatile__ (op : "=t" (__result) : params); \ + return __result; \ + } + + +# ifdef __USE_ISOC99 +# define __inline_mathcode(func, arg, code) \ + __inline_mathcode_ (double, func, arg, code) \ + __inline_mathcode_ (float, __CONCAT(func,f), arg, code) \ + __inline_mathcode_ (long double, __CONCAT(func,l), arg, code) +# define __inline_mathcodeNP(func, arg, code) \ + __inline_mathcodeNP_ (double, func, arg, code) \ + __inline_mathcodeNP_ (float, __CONCAT(func,f), arg, code) \ + __inline_mathcodeNP_ (long double, __CONCAT(func,l), arg, code) +# define __inline_mathcode2(func, arg1, arg2, code) \ + __inline_mathcode2_ (double, func, arg1, arg2, code) \ + __inline_mathcode2_ (float, __CONCAT(func,f), arg1, arg2, code) \ + __inline_mathcode2_ (long double, __CONCAT(func,l), arg1, arg2, code) +# define __inline_mathcodeNP2(func, arg1, arg2, code) \ + __inline_mathcodeNP2_ (double, func, arg1, arg2, code) \ + __inline_mathcodeNP2_ (float, __CONCAT(func,f), arg1, arg2, code) \ + __inline_mathcodeNP2_ (long double, __CONCAT(func,l), arg1, arg2, code) +# define __inline_mathcode3(func, arg1, arg2, arg3, code) \ + __inline_mathcode3_ (double, func, arg1, arg2, arg3, code) \ + __inline_mathcode3_ (float, __CONCAT(func,f), arg1, arg2, arg3, code) \ + __inline_mathcode3_ (long double, __CONCAT(func,l), arg1, arg2, arg3, code) +# define __inline_mathcodeNP3(func, arg1, arg2, arg3, code) \ + __inline_mathcodeNP3_ (double, func, arg1, arg2, arg3, code) \ + __inline_mathcodeNP3_ (float, __CONCAT(func,f), arg1, arg2, arg3, code) \ + __inline_mathcodeNP3_ (long double, __CONCAT(func,l), arg1, arg2, arg3, code) +# else +# define __inline_mathcode(func, arg, code) \ + __inline_mathcode_ (double, func, (arg), code) +# define __inline_mathcodeNP(func, arg, code) \ + __inline_mathcodeNP_ (double, func, (arg), code) +# define __inline_mathcode2(func, arg1, arg2, code) \ + __inline_mathcode2_ (double, func, arg1, arg2, code) +# define __inline_mathcodeNP2(func, arg1, arg2, code) \ + __inline_mathcodeNP2_ (double, func, arg1, arg2, code) +# define __inline_mathcode3(func, arg1, arg2, arg3, code) \ + __inline_mathcode3_ (double, func, arg1, arg2, arg3, code) +# define __inline_mathcodeNP3(func, arg1, arg2, arg3, code) \ + __inline_mathcodeNP3_ (double, func, arg1, arg2, arg3, code) +# endif + +# define __inline_mathcode_(float_type, func, arg, code) \ + __MATH_INLINE float_type func (float_type) __THROW; \ + __inline_mathcodeNP_(float_type, func, arg, code) + +# define __inline_mathcodeNP_(float_type, func, arg, code) \ + __MATH_INLINE float_type __NTH (func (float_type arg)) \ + { \ + code; \ + } + + +# define __inline_mathcode2_(float_type, func, arg1, arg2, code) \ + __MATH_INLINE float_type func (float_type, float_type) __THROW; \ + __inline_mathcodeNP2_ (float_type, func, arg1, arg2, code) + +# define __inline_mathcodeNP2_(float_type, func, arg1, arg2, code) \ + __MATH_INLINE float_type __NTH (func (float_type arg1, float_type arg2)) \ + { \ + code; \ + } + +# define __inline_mathcode3_(float_type, func, arg1, arg2, arg3, code) \ + __MATH_INLINE float_type func (float_type, float_type, float_type) __THROW; \ + __inline_mathcodeNP3_(float_type, func, arg1, arg2, arg3, code) + +# define __inline_mathcodeNP3_(float_type, func, arg1, arg2, arg3, code) \ + __MATH_INLINE float_type __NTH (func (float_type arg1, float_type arg2, \ + float_type arg3)) \ + { \ + code; \ + } +# endif + + +# if !defined __NO_MATH_INLINES && defined __OPTIMIZE__ +/* Miscellaneous functions */ + +/* __FAST_MATH__ is defined by gcc -ffast-math. */ +# ifdef __FAST_MATH__ +# ifdef __USE_GNU +# define __sincos_code \ + register long double __cosr; \ + register long double __sinr; \ + register unsigned int __swtmp; \ + __asm __volatile__ \ + ("fsincos\n\t" \ + "fnstsw %w2\n\t" \ + "testl $0x400, %2\n\t" \ + "jz 1f\n\t" \ + "fldpi\n\t" \ + "fadd %%st(0)\n\t" \ + "fxch %%st(1)\n\t" \ + "2: fprem1\n\t" \ + "fnstsw %w2\n\t" \ + "testl $0x400, %2\n\t" \ + "jnz 2b\n\t" \ + "fstp %%st(1)\n\t" \ + "fsincos\n\t" \ + "1:" \ + : "=t" (__cosr), "=u" (__sinr), "=a" (__swtmp) : "0" (__x)); \ + *__sinx = __sinr; \ + *__cosx = __cosr + +__MATH_INLINE void +__NTH (__sincos (double __x, double *__sinx, double *__cosx)) +{ + __sincos_code; +} + +__MATH_INLINE void +__NTH (__sincosf (float __x, float *__sinx, float *__cosx)) +{ + __sincos_code; +} + +__MATH_INLINE void +__NTH (__sincosl (long double __x, long double *__sinx, long double *__cosx)) +{ + __sincos_code; +} +# endif + + +/* Optimized inline implementation, sometimes with reduced precision + and/or argument range. */ + +# if __GNUC_PREREQ (3, 5) +# define __expm1_code \ + register long double __temp; \ + __temp = __builtin_expm1l (__x); \ + return __temp ? __temp : __x +# else +# define __expm1_code \ + register long double __value; \ + register long double __exponent; \ + register long double __temp; \ + __asm __volatile__ \ + ("fldl2e # e^x - 1 = 2^(x * log2(e)) - 1\n\t" \ + "fmul %%st(1) # x * log2(e)\n\t" \ + "fst %%st(1)\n\t" \ + "frndint # int(x * log2(e))\n\t" \ + "fxch\n\t" \ + "fsub %%st(1) # fract(x * log2(e))\n\t" \ + "f2xm1 # 2^(fract(x * log2(e))) - 1\n\t" \ + "fscale # 2^(x * log2(e)) - 2^(int(x * log2(e)))\n\t" \ + : "=t" (__value), "=u" (__exponent) : "0" (__x)); \ + __asm __volatile__ \ + ("fscale # 2^int(x * log2(e))\n\t" \ + : "=t" (__temp) : "0" (1.0), "u" (__exponent)); \ + __temp -= 1.0; \ + __temp += __value; \ + return __temp ? __temp : __x +# endif +__inline_mathcodeNP_ (long double, __expm1l, __x, __expm1_code) + +# if __GNUC_PREREQ (3, 4) +__inline_mathcodeNP_ (long double, __expl, __x, return __builtin_expl (__x)) +# else +# define __exp_code \ + register long double __value; \ + register long double __exponent; \ + __asm __volatile__ \ + ("fldl2e # e^x = 2^(x * log2(e))\n\t" \ + "fmul %%st(1) # x * log2(e)\n\t" \ + "fst %%st(1)\n\t" \ + "frndint # int(x * log2(e))\n\t" \ + "fxch\n\t" \ + "fsub %%st(1) # fract(x * log2(e))\n\t" \ + "f2xm1 # 2^(fract(x * log2(e))) - 1\n\t" \ + : "=t" (__value), "=u" (__exponent) : "0" (__x)); \ + __value += 1.0; \ + __asm __volatile__ \ + ("fscale" \ + : "=t" (__value) : "0" (__value), "u" (__exponent)); \ + return __value +__inline_mathcodeNP (exp, __x, __exp_code) +__inline_mathcodeNP_ (long double, __expl, __x, __exp_code) +# endif + + +# if !__GNUC_PREREQ (3, 5) +__inline_mathcodeNP (tan, __x, \ + register long double __value; \ + register long double __value2 __attribute__ ((__unused__)); \ + __asm __volatile__ \ + ("fptan" \ + : "=t" (__value2), "=u" (__value) : "0" (__x)); \ + return __value) +# endif +# endif /* __FAST_MATH__ */ + + +# if __GNUC_PREREQ (3, 4) +__inline_mathcodeNP2_ (long double, __atan2l, __y, __x, + return __builtin_atan2l (__y, __x)) +# else +# define __atan2_code \ + register long double __value; \ + __asm __volatile__ \ + ("fpatan" \ + : "=t" (__value) : "0" (__x), "u" (__y) : "st(1)"); \ + return __value +# ifdef __FAST_MATH__ +__inline_mathcodeNP2 (atan2, __y, __x, __atan2_code) +# endif +__inline_mathcodeNP2_ (long double, __atan2l, __y, __x, __atan2_code) +# endif + + +# if defined __FAST_MATH__ && !__GNUC_PREREQ (3, 5) +__inline_mathcodeNP2 (fmod, __x, __y, \ + register long double __value; \ + __asm __volatile__ \ + ("1: fprem\n\t" \ + "fnstsw %%ax\n\t" \ + "sahf\n\t" \ + "jp 1b" \ + : "=t" (__value) : "0" (__x), "u" (__y) : "ax", "cc"); \ + return __value) +# endif + + +# ifdef __FAST_MATH__ +# if !__GNUC_PREREQ (3,3) +__inline_mathopNP (sqrt, "fsqrt") +__inline_mathopNP_ (long double, __sqrtl, "fsqrt") +# define __libc_sqrtl(n) __sqrtl (n) +# else +# define __libc_sqrtl(n) __builtin_sqrtl (n) +# endif +# endif + +# if __GNUC_PREREQ (2, 8) +__inline_mathcodeNP_ (double, fabs, __x, return __builtin_fabs (__x)) +# ifdef __USE_ISOC99 +__inline_mathcodeNP_ (float, fabsf, __x, return __builtin_fabsf (__x)) +__inline_mathcodeNP_ (long double, fabsl, __x, return __builtin_fabsl (__x)) +# endif +__inline_mathcodeNP_ (long double, __fabsl, __x, return __builtin_fabsl (__x)) +# else +__inline_mathop (fabs, "fabs") +__inline_mathop_ (long double, __fabsl, "fabs") +# endif + +# ifdef __FAST_MATH__ +# if !__GNUC_PREREQ (3, 4) +/* The argument range of this inline version is reduced. */ +__inline_mathopNP (sin, "fsin") +/* The argument range of this inline version is reduced. */ +__inline_mathopNP (cos, "fcos") + +__inline_mathop_declNP (log, "fldln2; fxch; fyl2x", "0" (__x) : "st(1)") +# endif + +# if !__GNUC_PREREQ (3, 5) +__inline_mathop_declNP (log10, "fldlg2; fxch; fyl2x", "0" (__x) : "st(1)") + +__inline_mathcodeNP (asin, __x, return __atan2l (__x, __libc_sqrtl (1.0 - __x * __x))) +__inline_mathcodeNP (acos, __x, return __atan2l (__libc_sqrtl (1.0 - __x * __x), __x)) +# endif + +# if !__GNUC_PREREQ (3, 4) +__inline_mathop_declNP (atan, "fld1; fpatan", "0" (__x) : "st(1)") +# endif +# endif /* __FAST_MATH__ */ + +__inline_mathcode_ (long double, __sgn1l, __x, \ + __extension__ union { long double __xld; unsigned int __xi[3]; } __n = \ + { __xld: __x }; \ + __n.__xi[2] = (__n.__xi[2] & 0x8000) | 0x3fff; \ + __n.__xi[1] = 0x80000000; \ + __n.__xi[0] = 0; \ + return __n.__xld) + + +# ifdef __FAST_MATH__ +/* The argument range of the inline version of sinhl is slightly reduced. */ +__inline_mathcodeNP (sinh, __x, \ + register long double __exm1 = __expm1l (__fabsl (__x)); \ + return 0.5 * (__exm1 / (__exm1 + 1.0) + __exm1) * __sgn1l (__x)) + +__inline_mathcodeNP (cosh, __x, \ + register long double __ex = __expl (__x); \ + return 0.5 * (__ex + 1.0 / __ex)) + +__inline_mathcodeNP (tanh, __x, \ + register long double __exm1 = __expm1l (-__fabsl (__x + __x)); \ + return __exm1 / (__exm1 + 2.0) * __sgn1l (-__x)) +# endif + +__inline_mathcodeNP (floor, __x, \ + register long double __value; \ + register int __ignore; \ + unsigned short int __cw; \ + unsigned short int __cwtmp; \ + __asm __volatile ("fnstcw %3\n\t" \ + "movzwl %3, %1\n\t" \ + "andl $0xf3ff, %1\n\t" \ + "orl $0x0400, %1\n\t" /* rounding down */ \ + "movw %w1, %2\n\t" \ + "fldcw %2\n\t" \ + "frndint\n\t" \ + "fldcw %3" \ + : "=t" (__value), "=&q" (__ignore), "=m" (__cwtmp), \ + "=m" (__cw) \ + : "0" (__x)); \ + return __value) + +__inline_mathcodeNP (ceil, __x, \ + register long double __value; \ + register int __ignore; \ + unsigned short int __cw; \ + unsigned short int __cwtmp; \ + __asm __volatile ("fnstcw %3\n\t" \ + "movzwl %3, %1\n\t" \ + "andl $0xf3ff, %1\n\t" \ + "orl $0x0800, %1\n\t" /* rounding up */ \ + "movw %w1, %2\n\t" \ + "fldcw %2\n\t" \ + "frndint\n\t" \ + "fldcw %3" \ + : "=t" (__value), "=&q" (__ignore), "=m" (__cwtmp), \ + "=m" (__cw) \ + : "0" (__x)); \ + return __value) + +# ifdef __FAST_MATH__ +# define __ldexp_code \ + register long double __value; \ + __asm __volatile__ \ + ("fscale" \ + : "=t" (__value) : "0" (__x), "u" ((long double) __y)); \ + return __value + +__MATH_INLINE double +__NTH (ldexp (double __x, int __y)) +{ + __ldexp_code; +} +# endif + + +/* Optimized versions for some non-standardized functions. */ +# ifdef __USE_ISOC99 + +# ifdef __FAST_MATH__ +__inline_mathcodeNP (expm1, __x, __expm1_code) + +/* We cannot rely on M_SQRT being defined. So we do it for ourself + here. */ +# define __M_SQRT2 1.41421356237309504880L /* sqrt(2) */ + +# if !__GNUC_PREREQ (3, 5) +__inline_mathcodeNP (log1p, __x, \ + register long double __value; \ + if (__fabsl (__x) >= 1.0 - 0.5 * __M_SQRT2) \ + __value = logl (1.0 + __x); \ + else \ + __asm __volatile__ \ + ("fldln2\n\t" \ + "fxch\n\t" \ + "fyl2xp1" \ + : "=t" (__value) : "0" (__x) : "st(1)"); \ + return __value) +# endif + + +/* The argument range of the inline version of asinhl is slightly reduced. */ +__inline_mathcodeNP (asinh, __x, \ + register long double __y = __fabsl (__x); \ + return (log1pl (__y * __y / (__libc_sqrtl (__y * __y + 1.0) + 1.0) + __y) \ + * __sgn1l (__x))) + +__inline_mathcodeNP (acosh, __x, \ + return logl (__x + __libc_sqrtl (__x - 1.0) * __libc_sqrtl (__x + 1.0))) + +__inline_mathcodeNP (atanh, __x, \ + register long double __y = __fabsl (__x); \ + return -0.5 * log1pl (-(__y + __y) / (1.0 + __y)) * __sgn1l (__x)) + +/* The argument range of the inline version of hypotl is slightly reduced. */ +__inline_mathcodeNP2 (hypot, __x, __y, + return __libc_sqrtl (__x * __x + __y * __y)) + +# if !__GNUC_PREREQ (3, 5) +__inline_mathcodeNP(logb, __x, \ + register long double __value; \ + register long double __junk; \ + __asm __volatile__ \ + ("fxtract\n\t" \ + : "=t" (__junk), "=u" (__value) : "0" (__x)); \ + return __value) +# endif + +# endif +# endif + +# ifdef __USE_ISOC99 +# ifdef __FAST_MATH__ + +# if !__GNUC_PREREQ (3, 5) +__inline_mathop_declNP (log2, "fld1; fxch; fyl2x", "0" (__x) : "st(1)") +# endif + +__MATH_INLINE float +__NTH (ldexpf (float __x, int __y)) +{ + __ldexp_code; +} + +__MATH_INLINE long double +__NTH (ldexpl (long double __x, int __y)) +{ + __ldexp_code; +} + +__inline_mathopNP (rint, "frndint") +# endif /* __FAST_MATH__ */ + +# define __lrint_code \ + long int __lrintres; \ + __asm__ __volatile__ \ + ("fistpl %0" \ + : "=m" (__lrintres) : "t" (__x) : "st"); \ + return __lrintres +__MATH_INLINE long int +__NTH (lrintf (float __x)) +{ + __lrint_code; +} +__MATH_INLINE long int +__NTH (lrint (double __x)) +{ + __lrint_code; +} +__MATH_INLINE long int +__NTH (lrintl (long double __x)) +{ + __lrint_code; +} +# undef __lrint_code + +# define __llrint_code \ + long long int __llrintres; \ + __asm__ __volatile__ \ + ("fistpll %0" \ + : "=m" (__llrintres) : "t" (__x) : "st"); \ + return __llrintres +__extension__ +__MATH_INLINE long long int +__NTH (llrintf (float __x)) +{ + __llrint_code; +} +__extension__ +__MATH_INLINE long long int +__NTH (llrint (double __x)) +{ + __llrint_code; +} +__extension__ +__MATH_INLINE long long int +__NTH (llrintl (long double __x)) +{ + __llrint_code; +} +# undef __llrint_code + +# endif + + +# ifdef __USE_MISC + +# if defined __FAST_MATH__ && !__GNUC_PREREQ (3, 5) +__inline_mathcodeNP2 (drem, __x, __y, \ + register double __value; \ + register int __clobbered; \ + __asm __volatile__ \ + ("1: fprem1\n\t" \ + "fstsw %%ax\n\t" \ + "sahf\n\t" \ + "jp 1b" \ + : "=t" (__value), "=&a" (__clobbered) : "0" (__x), "u" (__y) : "cc"); \ + return __value) +# endif + + +/* This function is used in the `isfinite' macro. */ +__MATH_INLINE int +__NTH (__finite (double __x)) +{ + return (__extension__ + (((((union { double __d; int __i[2]; }) {__d: __x}).__i[1] + | 0x800fffffu) + 1) >> 31)); +} + +# endif /* __USE_MISC */ + +/* Undefine some of the large macros which are not used anymore. */ +# undef __atan2_code +# ifdef __FAST_MATH__ +# undef __expm1_code +# undef __exp_code +# undef __sincos_code +# endif /* __FAST_MATH__ */ + +# endif /* __NO_MATH_INLINES */ + + +/* This code is used internally in the GNU libc. */ +# ifdef __LIBC_INTERNAL_MATH_INLINES +__inline_mathop (__ieee754_sqrt, "fsqrt") +__inline_mathcode2_ (long double, __ieee754_atan2l, __y, __x, + register long double __value; + __asm __volatile__ ("fpatan\n\t" + : "=t" (__value) + : "0" (__x), "u" (__y) : "st(1)"); + return __value;) +# endif + +#endif /* !__SSE2_MATH__ && !__x86_64__ */ diff --git a/REORG.TODO/sysdeps/x86/fpu/fix-fp-int-compare-invalid.h b/REORG.TODO/sysdeps/x86/fpu/fix-fp-int-compare-invalid.h new file mode 100644 index 0000000000..bda247d51f --- /dev/null +++ b/REORG.TODO/sysdeps/x86/fpu/fix-fp-int-compare-invalid.h @@ -0,0 +1,28 @@ +/* Fix for missing "invalid" exceptions from floating-point + comparisons. x86 version. + Copyright (C) 2016-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#ifndef FIX_FP_INT_COMPARE_INVALID_H +#define FIX_FP_INT_COMPARE_INVALID_H 1 + +/* As of GCC 5, both x87 and SSE comparisons use unordered comparison + instructions when they should use ordered comparisons + <https://gcc.gnu.org/bugzilla/show_bug.cgi?id=52451>. */ +#define FIX_COMPARE_INVALID 1 + +#endif /* fix-fp-int-compare-invalid.h */ diff --git a/REORG.TODO/sysdeps/x86/fpu/include/bits/fenv.h b/REORG.TODO/sysdeps/x86/fpu/include/bits/fenv.h new file mode 100644 index 0000000000..a39b6fa85d --- /dev/null +++ b/REORG.TODO/sysdeps/x86/fpu/include/bits/fenv.h @@ -0,0 +1,48 @@ +/* Wrapper for x86 bits/fenv.h for use when building glibc. + Copyright (C) 1997-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#ifndef _BITS_FENV_H +#include_next <bits/fenv.h> + +# ifndef _ISOMAC + +/* Ensure __feraiseexcept calls in glibc are optimized the same as + feraiseexcept calls. */ + +#ifdef __USE_EXTERN_INLINES +__BEGIN_DECLS + +extern int __REDIRECT_NTH (____feraiseexcept_renamed, (int), __feraiseexcept); +__extern_inline int +__NTH (__feraiseexcept (int __excepts)) +{ + if (__builtin_constant_p (__excepts) + && (__excepts & ~(FE_INVALID | FE_DIVBYZERO)) == 0) + { + __feraiseexcept_invalid_divbyzero (__excepts); + return 0; + } + + return ____feraiseexcept_renamed (__excepts); +} + +__END_DECLS +#endif + +# endif /* _ISOMAC */ +#endif /* bits/fenv.h */ diff --git a/REORG.TODO/sysdeps/x86/fpu/powl_helper.c b/REORG.TODO/sysdeps/x86/fpu/powl_helper.c new file mode 100644 index 0000000000..46f8cd9318 --- /dev/null +++ b/REORG.TODO/sysdeps/x86/fpu/powl_helper.c @@ -0,0 +1,236 @@ +/* Implement powl for x86 using extra-precision log. + Copyright (C) 2012-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <math.h> +#include <math_private.h> +#include <stdbool.h> + +/* High parts and low parts of -log (k/16), for integer k from 12 to + 24. */ + +static const long double powl_log_table[] = + { + 0x4.9a58844d36e49e1p-4L, -0x1.0522624fd558f574p-68L, + 0x3.527da7915b3c6de4p-4L, 0x1.7d4ef4b901b99b9ep-68L, + 0x2.22f1d044fc8f7bc8p-4L, -0x1.8e97c071a42fc388p-68L, + 0x1.08598b59e3a0688ap-4L, 0x3.fd9bf503372c12fcp-72L, + -0x0p+0L, 0x0p+0L, + -0xf.85186008b15330cp-8L, 0x1.9b47488a6687672cp-72L, + -0x1.e27076e2af2e5e9ep-4L, -0xa.87ffe1fe9e155dcp-72L, + -0x2.bfe60e14f27a791p-4L, 0x1.83bebf1bdb88a032p-68L, + -0x3.91fef8f353443584p-4L, -0xb.b03de5ff734495cp-72L, + -0x4.59d72aeae98380e8p-4L, 0xc.e0aa3be4747dc1p-72L, + -0x5.1862f08717b09f4p-4L, -0x2.decdeccf1cd10578p-68L, + -0x5.ce75fdaef401a738p-4L, -0x9.314feb4fbde5aaep-72L, + -0x6.7cc8fb2fe612fcbp-4L, 0x2.5ca2642feb779f98p-68L, + }; + +/* High 32 bits of log2 (e), and remainder rounded to 64 bits. */ +static const long double log2e_hi = 0x1.71547652p+0L; +static const long double log2e_lo = 0xb.82fe1777d0ffda1p-36L; + +/* Given a number with high part HI and low part LO, add the number X + to it and store the result in *RHI and *RLO. It is given that + either |X| < |0.7 * HI|, or HI == LO == 0, and that the values are + small enough that no overflow occurs. The result does not need to + be exact to 128 bits; 78-bit accuracy of the final accumulated + result suffices. */ + +static inline void +acc_split (long double *rhi, long double *rlo, long double hi, long double lo, + long double x) +{ + long double thi = hi + x; + long double tlo = (hi - thi) + x + lo; + *rhi = thi + tlo; + *rlo = (thi - *rhi) + tlo; +} + +extern long double __powl_helper (long double x, long double y); +libm_hidden_proto (__powl_helper) + +/* Given X a value that is finite and nonzero, or a NaN, and Y a + finite nonzero value with 0x1p-79 <= |Y| <= 0x1p78, compute X to + the power Y. */ + +long double +__powl_helper (long double x, long double y) +{ + if (isnan (x)) + return __ieee754_expl (y * __ieee754_logl (x)); + bool negate; + if (x < 0) + { + long double absy = fabsl (y); + if (absy >= 0x1p64L) + negate = false; + else + { + unsigned long long yll = absy; + if (yll != absy) + return __ieee754_expl (y * __ieee754_logl (x)); + negate = (yll & 1) != 0; + } + x = fabsl (x); + } + else + negate = false; + + /* We need to compute Y * log2 (X) to at least 64 bits after the + point for normal results (that is, to at least 78 bits + precision). */ + int x_int_exponent; + long double x_frac; + x_frac = __frexpl (x, &x_int_exponent); + if (x_frac <= 0x0.aaaaaaaaaaaaaaaap0L) /* 2.0L / 3.0L, rounded down */ + { + x_frac *= 2.0; + x_int_exponent--; + } + + long double log_x_frac_hi, log_x_frac_lo; + /* Determine an initial approximation to log (X_FRAC) using + POWL_LOG_TABLE, and multiply by a value K/16 to reduce to an + interval (24/25, 26/25). */ + int k = (int) ((16.0L / x_frac) + 0.5L); + log_x_frac_hi = powl_log_table[2 * k - 24]; + log_x_frac_lo = powl_log_table[2 * k - 23]; + long double x_frac_low; + if (k == 16) + x_frac_low = 0.0L; + else + { + /* Mask off low 5 bits of X_FRAC so the multiplication by K/16 + is exact. These bits are small enough that they can be + corrected for by adding log2 (e) * X_FRAC_LOW to the final + result. */ + int32_t se; + u_int32_t i0, i1; + GET_LDOUBLE_WORDS (se, i0, i1, x_frac); + x_frac_low = x_frac; + i1 &= 0xffffffe0; + SET_LDOUBLE_WORDS (x_frac, se, i0, i1); + x_frac_low -= x_frac; + x_frac_low /= x_frac; + x_frac *= k / 16.0L; + } + + /* Now compute log (X_FRAC) for X_FRAC in (24/25, 26/25). Separate + W = X_FRAC - 1 into high 16 bits and remaining bits, so that + multiplications for low-order power series terms are exact. The + remaining bits are small enough that adding a 64-bit value of + log2 (1 + W_LO / (1 + W_HI)) will be a sufficient correction for + them. */ + long double w = x_frac - 1; + long double w_hi, w_lo; + int32_t se; + u_int32_t i0, i1; + GET_LDOUBLE_WORDS (se, i0, i1, w); + i0 &= 0xffff0000; + i1 = 0; + SET_LDOUBLE_WORDS (w_hi, se, i0, i1); + w_lo = w - w_hi; + long double wp = w_hi; + acc_split (&log_x_frac_hi, &log_x_frac_lo, log_x_frac_hi, log_x_frac_lo, wp); + wp *= -w_hi; + acc_split (&log_x_frac_hi, &log_x_frac_lo, log_x_frac_hi, log_x_frac_lo, + wp / 2.0L); + wp *= -w_hi; + acc_split (&log_x_frac_hi, &log_x_frac_lo, log_x_frac_hi, log_x_frac_lo, + wp * 0x0.5555p0L); /* -W_HI**3 / 3, high part. */ + acc_split (&log_x_frac_hi, &log_x_frac_lo, log_x_frac_hi, log_x_frac_lo, + wp * 0x0.5555555555555555p-16L); /* -W_HI**3 / 3, low part. */ + wp *= -w_hi; + acc_split (&log_x_frac_hi, &log_x_frac_lo, log_x_frac_hi, log_x_frac_lo, + wp / 4.0L); + /* Subsequent terms are small enough that they only need be computed + to 64 bits. */ + for (int i = 5; i <= 17; i++) + { + wp *= -w_hi; + acc_split (&log_x_frac_hi, &log_x_frac_lo, log_x_frac_hi, log_x_frac_lo, + wp / i); + } + + /* Convert LOG_X_FRAC_HI + LOG_X_FRAC_LO to a base-2 logarithm. */ + long double log2_x_frac_hi, log2_x_frac_lo; + long double log_x_frac_hi32, log_x_frac_lo64; + GET_LDOUBLE_WORDS (se, i0, i1, log_x_frac_hi); + i1 = 0; + SET_LDOUBLE_WORDS (log_x_frac_hi32, se, i0, i1); + log_x_frac_lo64 = (log_x_frac_hi - log_x_frac_hi32) + log_x_frac_lo; + long double log2_x_frac_hi1 = log_x_frac_hi32 * log2e_hi; + long double log2_x_frac_lo1 + = log_x_frac_lo64 * log2e_hi + log_x_frac_hi * log2e_lo; + log2_x_frac_hi = log2_x_frac_hi1 + log2_x_frac_lo1; + log2_x_frac_lo = (log2_x_frac_hi1 - log2_x_frac_hi) + log2_x_frac_lo1; + + /* Correct for the masking off of W_LO. */ + long double log2_1p_w_lo; + asm ("fyl2xp1" + : "=t" (log2_1p_w_lo) + : "0" (w_lo / (1.0L + w_hi)), "u" (1.0L) + : "st(1)"); + acc_split (&log2_x_frac_hi, &log2_x_frac_lo, log2_x_frac_hi, log2_x_frac_lo, + log2_1p_w_lo); + + /* Correct for the masking off of X_FRAC_LOW. */ + acc_split (&log2_x_frac_hi, &log2_x_frac_lo, log2_x_frac_hi, log2_x_frac_lo, + x_frac_low * M_LOG2El); + + /* Add the integer and fractional parts of the base-2 logarithm. */ + long double log2_x_hi, log2_x_lo; + log2_x_hi = x_int_exponent + log2_x_frac_hi; + log2_x_lo = ((x_int_exponent - log2_x_hi) + log2_x_frac_hi) + log2_x_frac_lo; + + /* Compute the base-2 logarithm of the result. */ + long double log2_res_hi, log2_res_lo; + long double log2_x_hi32, log2_x_lo64; + GET_LDOUBLE_WORDS (se, i0, i1, log2_x_hi); + i1 = 0; + SET_LDOUBLE_WORDS (log2_x_hi32, se, i0, i1); + log2_x_lo64 = (log2_x_hi - log2_x_hi32) + log2_x_lo; + long double y_hi32, y_lo32; + GET_LDOUBLE_WORDS (se, i0, i1, y); + i1 = 0; + SET_LDOUBLE_WORDS (y_hi32, se, i0, i1); + y_lo32 = y - y_hi32; + log2_res_hi = log2_x_hi32 * y_hi32; + log2_res_lo = log2_x_hi32 * y_lo32 + log2_x_lo64 * y; + + /* Split the base-2 logarithm of the result into integer and + fractional parts. */ + long double log2_res_int = __roundl (log2_res_hi); + long double log2_res_frac = log2_res_hi - log2_res_int + log2_res_lo; + /* If the integer part is very large, the computed fractional part + may be outside the valid range for f2xm1. */ + if (fabsl (log2_res_int) > 16500) + log2_res_frac = 0; + + /* Compute the final result. */ + long double res; + asm ("f2xm1" : "=t" (res) : "0" (log2_res_frac)); + res += 1.0L; + if (negate) + res = -res; + asm ("fscale" : "=t" (res) : "0" (res), "u" (log2_res_int)); + math_check_force_underflow (res); + return res; +} + +libm_hidden_def (__powl_helper) diff --git a/REORG.TODO/sysdeps/x86/fpu/test-fenv-clear-sse.c b/REORG.TODO/sysdeps/x86/fpu/test-fenv-clear-sse.c new file mode 100644 index 0000000000..52a1b63c0b --- /dev/null +++ b/REORG.TODO/sysdeps/x86/fpu/test-fenv-clear-sse.c @@ -0,0 +1,45 @@ +/* Test fesetenv (FE_DFL_ENV) and fesetenv (FE_NOMASK_ENV) clear + exceptions (bug 19181). SSE version. + Copyright (C) 2015-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <cpuid.h> +#include <stdbool.h> + +static bool +have_sse2 (void) +{ + unsigned int eax, ebx, ecx, edx; + + if (!__get_cpuid (1, &eax, &ebx, &ecx, &edx)) + return false; + + return (edx & bit_SSE2) != 0; +} + +#define CHECK_CAN_TEST \ + do \ + { \ + if (!have_sse2 ()) \ + { \ + puts ("CPU does not support SSE2, cannot test"); \ + return 0; \ + } \ + } \ + while (0) + +#include <test-fenv-clear-main.c> diff --git a/REORG.TODO/sysdeps/x86/fpu/test-fenv-sse-2.c b/REORG.TODO/sysdeps/x86/fpu/test-fenv-sse-2.c new file mode 100644 index 0000000000..b5f96850f9 --- /dev/null +++ b/REORG.TODO/sysdeps/x86/fpu/test-fenv-sse-2.c @@ -0,0 +1,176 @@ +/* Test x86-specific floating-point environment (bug 16068): SSE part. + Copyright (C) 2015-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <cpuid.h> +#include <fenv.h> +#include <float.h> +#include <stdbool.h> +#include <stdint.h> +#include <stdio.h> + +static bool +have_sse2 (void) +{ + unsigned int eax, ebx, ecx, edx; + + if (!__get_cpuid (1, &eax, &ebx, &ecx, &edx)) + return false; + + return (edx & bit_SSE2) != 0; +} + +static uint32_t +get_sse_mxcsr (void) +{ + uint32_t temp; + __asm__ __volatile__ ("stmxcsr %0" : "=m" (temp)); + return temp; +} + +static void +set_sse_mxcsr (uint32_t val) +{ + __asm__ __volatile__ ("ldmxcsr %0" : : "m" (val)); +} + +static void +set_sse_mxcsr_bits (uint32_t mask, uint32_t bits) +{ + uint32_t mxcsr = get_sse_mxcsr (); + mxcsr = (mxcsr & ~mask) | bits; + set_sse_mxcsr (mxcsr); +} + +static int +test_sse_mxcsr_bits (const char *test, uint32_t mask, uint32_t bits) +{ + uint32_t mxcsr = get_sse_mxcsr (); + printf ("Testing %s: mxcsr = %x\n", test, mxcsr); + if ((mxcsr & mask) == bits) + { + printf ("PASS: %s\n", test); + return 0; + } + else + { + printf ("FAIL: %s\n", test); + return 1; + } +} + +#define MXCSR_FZ 0x8000 +#define MXCSR_DAZ 0x40 +#define MXCSR_DE 0x2 +#define MXCSR_DM 0x100 + +static __attribute__ ((noinline)) int +sse_tests (void) +{ + int result = 0; + fenv_t env1, env2; + /* Test FZ bit. */ + fegetenv (&env1); + set_sse_mxcsr_bits (MXCSR_FZ, MXCSR_FZ); + fegetenv (&env2); + fesetenv (&env1); + result |= test_sse_mxcsr_bits ("fesetenv FZ restoration", + MXCSR_FZ, 0); + set_sse_mxcsr_bits (MXCSR_FZ, 0); + fesetenv (&env2); + result |= test_sse_mxcsr_bits ("fesetenv FZ restoration 2", + MXCSR_FZ, MXCSR_FZ); + set_sse_mxcsr_bits (MXCSR_FZ, MXCSR_FZ); + fesetenv (FE_NOMASK_ENV); + result |= test_sse_mxcsr_bits ("fesetenv (FE_NOMASK_ENV) FZ restoration", + MXCSR_FZ, 0); + set_sse_mxcsr_bits (MXCSR_FZ, MXCSR_FZ); + fesetenv (FE_DFL_ENV); + result |= test_sse_mxcsr_bits ("fesetenv (FE_DFL_ENV) FZ restoration", + MXCSR_FZ, 0); + /* Test DAZ bit. */ + set_sse_mxcsr_bits (MXCSR_DAZ, MXCSR_DAZ); + fegetenv (&env2); + fesetenv (&env1); + result |= test_sse_mxcsr_bits ("fesetenv DAZ restoration", + MXCSR_DAZ, 0); + set_sse_mxcsr_bits (MXCSR_DAZ, 0); + fesetenv (&env2); + result |= test_sse_mxcsr_bits ("fesetenv DAZ restoration 2", + MXCSR_DAZ, MXCSR_DAZ); + set_sse_mxcsr_bits (MXCSR_DAZ, MXCSR_DAZ); + fesetenv (FE_NOMASK_ENV); + result |= test_sse_mxcsr_bits ("fesetenv (FE_NOMASK_ENV) DAZ restoration", + MXCSR_DAZ, 0); + set_sse_mxcsr_bits (MXCSR_DAZ, MXCSR_DAZ); + fesetenv (FE_DFL_ENV); + result |= test_sse_mxcsr_bits ("fesetenv (FE_DFL_ENV) DAZ restoration", + MXCSR_DAZ, 0); + /* Test DM bit. */ + set_sse_mxcsr_bits (MXCSR_DM, 0); + fegetenv (&env2); + fesetenv (&env1); + result |= test_sse_mxcsr_bits ("fesetenv DM restoration", + MXCSR_DM, MXCSR_DM); + set_sse_mxcsr_bits (MXCSR_DM, MXCSR_DM); + fesetenv (&env2); + result |= test_sse_mxcsr_bits ("fesetenv DM restoration 2", + MXCSR_DM, 0); + set_sse_mxcsr_bits (MXCSR_DM, 0); + /* Presume FE_NOMASK_ENV should leave the "denormal operand" + exception masked, as not a standard exception. */ + fesetenv (FE_NOMASK_ENV); + result |= test_sse_mxcsr_bits ("fesetenv (FE_NOMASK_ENV) DM restoration", + MXCSR_DM, MXCSR_DM); + set_sse_mxcsr_bits (MXCSR_DM, 0); + fesetenv (FE_DFL_ENV); + result |= test_sse_mxcsr_bits ("fesetenv (FE_DFL_ENV) DM restoration", + MXCSR_DM, MXCSR_DM); + /* Test DE bit. */ + set_sse_mxcsr_bits (MXCSR_DE, MXCSR_DE); + fegetenv (&env2); + fesetenv (&env1); + result |= test_sse_mxcsr_bits ("fesetenv DE restoration", + MXCSR_DE, 0); + set_sse_mxcsr_bits (MXCSR_DE, 0); + fesetenv (&env2); + result |= test_sse_mxcsr_bits ("fesetenv DE restoration 2", + MXCSR_DE, MXCSR_DE); + set_sse_mxcsr_bits (MXCSR_DE, MXCSR_DE); + fesetenv (FE_NOMASK_ENV); + result |= test_sse_mxcsr_bits ("fesetenv (FE_NOMASK_ENV) DE restoration", + MXCSR_DE, 0); + set_sse_mxcsr_bits (MXCSR_DE, MXCSR_DE); + fesetenv (FE_DFL_ENV); + result |= test_sse_mxcsr_bits ("fesetenv (FE_DFL_ENV) DE restoration", + MXCSR_DE, 0); + return result; +} + +static int +do_test (void) +{ + if (!have_sse2 ()) + { + puts ("CPU does not support SSE2, cannot test"); + return 0; + } + return sse_tests (); +} + +#define TEST_FUNCTION do_test () +#include <test-skeleton.c> diff --git a/REORG.TODO/sysdeps/x86/fpu/test-fenv-sse.c b/REORG.TODO/sysdeps/x86/fpu/test-fenv-sse.c new file mode 100644 index 0000000000..569db9a674 --- /dev/null +++ b/REORG.TODO/sysdeps/x86/fpu/test-fenv-sse.c @@ -0,0 +1,138 @@ +/* Test floating-point environment includes SSE state (bug 16064). + Copyright (C) 2014-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <cpuid.h> +#include <fenv.h> +#include <float.h> +#include <stdbool.h> +#include <stdio.h> + +static bool +have_sse2 (void) +{ + unsigned int eax, ebx, ecx, edx; + + if (!__get_cpuid (1, &eax, &ebx, &ecx, &edx)) + return false; + + return (edx & bit_SSE2) != 0; +} + +static __attribute__ ((noinline)) int +sse_tests (void) +{ + int ret = 0; + fenv_t base_env; + if (fegetenv (&base_env) != 0) + { + puts ("fegetenv (&base_env) failed"); + return 1; + } + if (fesetround (FE_UPWARD) != 0) + { + puts ("fesetround (FE_UPWARD) failed"); + return 1; + } + if (fesetenv (&base_env) != 0) + { + puts ("fesetenv (&base_env) failed"); + return 1; + } + volatile float a = 1.0f, b = FLT_MIN, c; + c = a + b; + if (c != 1.0f) + { + puts ("fesetenv did not restore rounding mode"); + ret = 1; + } + if (fesetround (FE_DOWNWARD) != 0) + { + puts ("fesetround (FE_DOWNWARD) failed"); + return 1; + } + if (feupdateenv (&base_env) != 0) + { + puts ("feupdateenv (&base_env) failed"); + return 1; + } + volatile float d = -FLT_MIN, e; + e = a + d; + if (e != 1.0f) + { + puts ("feupdateenv did not restore rounding mode"); + ret = 1; + } + if (fesetround (FE_UPWARD) != 0) + { + puts ("fesetround (FE_UPWARD) failed"); + return 1; + } + fenv_t upward_env; + if (feholdexcept (&upward_env) != 0) + { + puts ("feholdexcept (&upward_env) failed"); + return 1; + } + if (fesetround (FE_DOWNWARD) != 0) + { + puts ("fesetround (FE_DOWNWARD) failed"); + return 1; + } + if (fesetenv (&upward_env) != 0) + { + puts ("fesetenv (&upward_env) failed"); + return 1; + } + e = a + d; + if (e != 1.0f) + { + puts ("fesetenv did not restore rounding mode from feholdexcept"); + ret = 1; + } + if (fesetround (FE_UPWARD) != 0) + { + puts ("fesetround (FE_UPWARD) failed"); + return 1; + } + if (fesetenv (FE_DFL_ENV) != 0) + { + puts ("fesetenv (FE_DFL_ENV) failed"); + return 1; + } + c = a + b; + if (c != 1.0f) + { + puts ("fesetenv (FE_DFL_ENV) did not restore rounding mode"); + ret = 1; + } + return ret; +} + +static int +do_test (void) +{ + if (!have_sse2 ()) + { + puts ("CPU does not support SSE2, cannot test"); + return 0; + } + return sse_tests (); +} + +#define TEST_FUNCTION do_test () +#include <test-skeleton.c> diff --git a/REORG.TODO/sysdeps/x86/fpu/test-fenv-x87.c b/REORG.TODO/sysdeps/x86/fpu/test-fenv-x87.c new file mode 100644 index 0000000000..29ae1c8a7e --- /dev/null +++ b/REORG.TODO/sysdeps/x86/fpu/test-fenv-x87.c @@ -0,0 +1,169 @@ +/* Test x86-specific floating-point environment (bug 16068): x87 part. + Copyright (C) 2015-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <fenv.h> +#include <float.h> +#include <fpu_control.h> +#include <stdint.h> +#include <stdio.h> + +static uint16_t +get_x87_cw (void) +{ + fpu_control_t cw; + _FPU_GETCW (cw); + return cw; +} + +static void +set_x87_cw (uint16_t val) +{ + fpu_control_t cw = val; + _FPU_SETCW (cw); +} + +static void +set_x87_cw_bits (uint16_t mask, uint16_t bits) +{ + uint16_t cw = get_x87_cw (); + cw = (cw & ~mask) | bits; + set_x87_cw (cw); +} + +static int +test_x87_cw_bits (const char *test, uint16_t mask, uint16_t bits) +{ + uint16_t cw = get_x87_cw (); + printf ("Testing %s: cw = %x\n", test, cw); + if ((cw & mask) == bits) + { + printf ("PASS: %s\n", test); + return 0; + } + else + { + printf ("FAIL: %s\n", test); + return 1; + } +} + +static uint16_t +get_x87_sw (void) +{ + uint16_t temp; + __asm__ __volatile__ ("fnstsw %0" : "=a" (temp)); + return temp; +} + +static void +set_x87_sw_bits (uint16_t mask, uint16_t bits) +{ + fenv_t temp; + __asm__ __volatile__ ("fnstenv %0" : "=m" (temp)); + temp.__status_word = (temp.__status_word & ~mask) | bits; + __asm__ __volatile__ ("fldenv %0" : : "m" (temp)); +} + +static int +test_x87_sw_bits (const char *test, uint16_t mask, uint16_t bits) +{ + uint16_t sw = get_x87_sw (); + printf ("Testing %s: sw = %x\n", test, sw); + if ((sw & mask) == bits) + { + printf ("PASS: %s\n", test); + return 0; + } + else + { + printf ("FAIL: %s\n", test); + return 1; + } +} + +#define X87_CW_PREC_MASK _FPU_EXTENDED + +static int +do_test (void) +{ + int result = 0; + fenv_t env1, env2; + /* Test precision mask. */ + fegetenv (&env1); + set_x87_cw_bits (X87_CW_PREC_MASK, _FPU_SINGLE); + fegetenv (&env2); + fesetenv (&env1); + result |= test_x87_cw_bits ("fesetenv precision restoration", + X87_CW_PREC_MASK, _FPU_EXTENDED); + set_x87_cw_bits (X87_CW_PREC_MASK, _FPU_EXTENDED); + fesetenv (&env2); + result |= test_x87_cw_bits ("fesetenv precision restoration 2", + X87_CW_PREC_MASK, _FPU_SINGLE); + set_x87_cw_bits (X87_CW_PREC_MASK, _FPU_DOUBLE); + fesetenv (FE_NOMASK_ENV); + result |= test_x87_cw_bits ("fesetenv (FE_NOMASK_ENV) precision restoration", + X87_CW_PREC_MASK, _FPU_EXTENDED); + set_x87_cw_bits (X87_CW_PREC_MASK, _FPU_SINGLE); + fesetenv (FE_DFL_ENV); + result |= test_x87_cw_bits ("fesetenv (FE_DFL_ENV) precision restoration", + X87_CW_PREC_MASK, _FPU_EXTENDED); + /* Test x87 denormal operand masking. */ + set_x87_cw_bits (_FPU_MASK_DM, 0); + fegetenv (&env2); + fesetenv (&env1); + result |= test_x87_cw_bits ("fesetenv denormal mask restoration", + _FPU_MASK_DM, _FPU_MASK_DM); + set_x87_cw_bits (_FPU_MASK_DM, _FPU_MASK_DM); + fesetenv (&env2); + result |= test_x87_cw_bits ("fesetenv denormal mask restoration 2", + _FPU_MASK_DM, 0); + set_x87_cw_bits (_FPU_MASK_DM, 0); + /* Presume FE_NOMASK_ENV should leave the "denormal operand" + exception masked, as not a standard exception. */ + fesetenv (FE_NOMASK_ENV); + result |= test_x87_cw_bits ("fesetenv (FE_NOMASK_ENV) denormal mask " + "restoration", + _FPU_MASK_DM, _FPU_MASK_DM); + set_x87_cw_bits (_FPU_MASK_DM, 0); + fesetenv (FE_DFL_ENV); + result |= test_x87_cw_bits ("fesetenv (FE_DFL_ENV) denormal mask " + "restoration", + _FPU_MASK_DM, _FPU_MASK_DM); + /* Test x87 denormal operand exception. */ + set_x87_sw_bits (__FE_DENORM, __FE_DENORM); + fegetenv (&env2); + fesetenv (&env1); + result |= test_x87_sw_bits ("fesetenv denormal exception restoration", + __FE_DENORM, 0); + set_x87_sw_bits (__FE_DENORM, 0); + fesetenv (&env2); + result |= test_x87_sw_bits ("fesetenv denormal exception restoration 2", + __FE_DENORM, __FE_DENORM); + set_x87_sw_bits (__FE_DENORM, __FE_DENORM); + fesetenv (FE_NOMASK_ENV); + result |= test_x87_sw_bits ("fesetenv (FE_NOMASK_ENV) exception restoration", + __FE_DENORM, 0); + set_x87_sw_bits (__FE_DENORM, __FE_DENORM); + fesetenv (FE_DFL_ENV); + result |= test_x87_sw_bits ("fesetenv (FE_DFL_ENV) exception restoration", + __FE_DENORM, 0); + return result; +} + +#define TEST_FUNCTION do_test () +#include <test-skeleton.c> diff --git a/REORG.TODO/sysdeps/x86/fpu/test-flt-eval-method-387.c b/REORG.TODO/sysdeps/x86/fpu/test-flt-eval-method-387.c new file mode 100644 index 0000000000..2fb7acfb76 --- /dev/null +++ b/REORG.TODO/sysdeps/x86/fpu/test-flt-eval-method-387.c @@ -0,0 +1 @@ +#include <test-flt-eval-method.c> diff --git a/REORG.TODO/sysdeps/x86/fpu/test-flt-eval-method-sse.c b/REORG.TODO/sysdeps/x86/fpu/test-flt-eval-method-sse.c new file mode 100644 index 0000000000..2fb7acfb76 --- /dev/null +++ b/REORG.TODO/sysdeps/x86/fpu/test-flt-eval-method-sse.c @@ -0,0 +1 @@ +#include <test-flt-eval-method.c> diff --git a/REORG.TODO/sysdeps/x86/fpu/test-math-vector-sincos.h b/REORG.TODO/sysdeps/x86/fpu/test-math-vector-sincos.h new file mode 100644 index 0000000000..95282a3ac7 --- /dev/null +++ b/REORG.TODO/sysdeps/x86/fpu/test-math-vector-sincos.h @@ -0,0 +1,98 @@ +/* Wrappers definitions for tests of ABI of vector sincos/sincosf having + vector declaration "#pragma omp declare simd notinbranch". + Copyright (C) 2016-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#define INIT_VEC_PTRS_LOOP(vec, val, len) \ + do \ + { \ + union { VEC_INT_TYPE v; __typeof__ ((val)[0]) *a[(len)]; } u; \ + for (i = 0; i < len; i++) \ + u.a[i] = &(val)[i]; \ + (vec) = u.v; \ + } \ + while (0) + +/* Wrapper for vector sincos/sincosf compatible with x86_64 and x32 variants + of _ZGVbN2vvv_sincos, _ZGVdN4vvv_sincos, _ZGVeN8vvv_sincos; + x32 variants of _ZGVbN4vvv_sincosf, _ZGVcN4vvv_sincos, _ZGVdN8vvv_sincosf, + _ZGVeN16vvv_sincosf. */ +#define VECTOR_WRAPPER_fFF_2(scalar_func, vector_func) \ +extern void vector_func (VEC_TYPE, VEC_INT_TYPE, VEC_INT_TYPE); \ +void scalar_func (FLOAT x, FLOAT * r, FLOAT * r1) \ +{ \ + int i; \ + FLOAT r_loc[VEC_LEN], r1_loc[VEC_LEN]; \ + VEC_TYPE mx; \ + VEC_INT_TYPE mr, mr1; \ + INIT_VEC_LOOP (mx, x, VEC_LEN); \ + INIT_VEC_PTRS_LOOP (mr, r_loc, VEC_LEN); \ + INIT_VEC_PTRS_LOOP (mr1, r1_loc, VEC_LEN); \ + vector_func (mx, mr, mr1); \ + TEST_VEC_LOOP (r_loc, VEC_LEN); \ + TEST_VEC_LOOP (r1_loc, VEC_LEN); \ + *r = r_loc[0]; \ + *r1 = r1_loc[0]; \ + return; \ +} + +/* Wrapper for vector sincos/sincosf compatible with x86_64 variants of + _ZGVcN4vvv_sincos, _ZGVeN16vvv_sincosf, _ZGVbN4vvv_sincosf, + _ZGVdN8vvv_sincosf, _ZGVcN8vvv_sincosf. */ +#define VECTOR_WRAPPER_fFF_3(scalar_func, vector_func) \ +extern void vector_func (VEC_TYPE, VEC_INT_TYPE, VEC_INT_TYPE, \ + VEC_INT_TYPE, VEC_INT_TYPE); \ +void scalar_func (FLOAT x, FLOAT * r, FLOAT * r1) \ +{ \ + int i; \ + FLOAT r_loc[VEC_LEN/2], r1_loc[VEC_LEN/2]; \ + VEC_TYPE mx; \ + VEC_INT_TYPE mr, mr1; \ + INIT_VEC_LOOP (mx, x, VEC_LEN); \ + INIT_VEC_PTRS_LOOP (mr, r_loc, VEC_LEN/2); \ + INIT_VEC_PTRS_LOOP (mr1, r1_loc, VEC_LEN/2); \ + vector_func (mx, mr, mr, mr1, mr1); \ + TEST_VEC_LOOP (r_loc, VEC_LEN/2); \ + TEST_VEC_LOOP (r1_loc, VEC_LEN/2); \ + *r = r_loc[0]; \ + *r1 = r1_loc[0]; \ + return; \ +} + +/* Wrapper for vector sincosf compatible with x86_64 variant of + _ZGVcN8vvv_sincosf. */ +#define VECTOR_WRAPPER_fFF_4(scalar_func, vector_func) \ +extern void vector_func (VEC_TYPE, VEC_INT_TYPE, VEC_INT_TYPE, \ + VEC_INT_TYPE, VEC_INT_TYPE, \ + VEC_INT_TYPE, VEC_INT_TYPE, \ + VEC_INT_TYPE, VEC_INT_TYPE); \ +void scalar_func (FLOAT x, FLOAT * r, FLOAT * r1) \ +{ \ + int i; \ + FLOAT r_loc[VEC_LEN/4], r1_loc[VEC_LEN/4]; \ + VEC_TYPE mx; \ + VEC_INT_TYPE mr, mr1; \ + INIT_VEC_LOOP (mx, x, VEC_LEN); \ + INIT_VEC_PTRS_LOOP (mr, r_loc, VEC_LEN/4); \ + INIT_VEC_PTRS_LOOP (mr1, r1_loc, VEC_LEN/4); \ + vector_func (mx, mr, mr, mr, mr, mr1, mr1, mr1, mr1); \ + TEST_VEC_LOOP (r_loc, VEC_LEN/4); \ + TEST_VEC_LOOP (r1_loc, VEC_LEN/4); \ + *r = r_loc[0]; \ + *r1 = r1_loc[0]; \ + return; \ +} diff --git a/REORG.TODO/sysdeps/x86/fpu_control.h b/REORG.TODO/sysdeps/x86/fpu_control.h new file mode 100644 index 0000000000..7f8a57183a --- /dev/null +++ b/REORG.TODO/sysdeps/x86/fpu_control.h @@ -0,0 +1,109 @@ +/* FPU control word bits. x86 version. + Copyright (C) 1993-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + Contributed by Olaf Flebbe. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#ifndef _FPU_CONTROL_H +#define _FPU_CONTROL_H 1 + +/* Note that this file sets on x86-64 only the x87 FPU, it does not + touch the SSE unit. */ + +/* Here is the dirty part. Set up your 387 through the control word + * (cw) register. + * + * 15-13 12 11-10 9-8 7-6 5 4 3 2 1 0 + * | reserved | IC | RC | PC | reserved | PM | UM | OM | ZM | DM | IM + * + * IM: Invalid operation mask + * DM: Denormalized operand mask + * ZM: Zero-divide mask + * OM: Overflow mask + * UM: Underflow mask + * PM: Precision (inexact result) mask + * + * Mask bit is 1 means no interrupt. + * + * PC: Precision control + * 11 - round to extended precision + * 10 - round to double precision + * 00 - round to single precision + * + * RC: Rounding control + * 00 - rounding to nearest + * 01 - rounding down (toward - infinity) + * 10 - rounding up (toward + infinity) + * 11 - rounding toward zero + * + * IC: Infinity control + * That is for 8087 and 80287 only. + * + * The hardware default is 0x037f which we use. + */ + +#include <features.h> + +/* masking of interrupts */ +#define _FPU_MASK_IM 0x01 +#define _FPU_MASK_DM 0x02 +#define _FPU_MASK_ZM 0x04 +#define _FPU_MASK_OM 0x08 +#define _FPU_MASK_UM 0x10 +#define _FPU_MASK_PM 0x20 + +/* precision control */ +#define _FPU_EXTENDED 0x300 /* libm requires double extended precision. */ +#define _FPU_DOUBLE 0x200 +#define _FPU_SINGLE 0x0 + +/* rounding control */ +#define _FPU_RC_NEAREST 0x0 /* RECOMMENDED */ +#define _FPU_RC_DOWN 0x400 +#define _FPU_RC_UP 0x800 +#define _FPU_RC_ZERO 0xC00 + +#define _FPU_RESERVED 0xF0C0 /* Reserved bits in cw */ + + +/* The fdlibm code requires strict IEEE double precision arithmetic, + and no interrupts for exceptions, rounding to nearest. */ + +#define _FPU_DEFAULT 0x037f + +/* IEEE: same as above. */ +#define _FPU_IEEE 0x037f + +/* Type of the control word. */ +typedef unsigned int fpu_control_t __attribute__ ((__mode__ (__HI__))); + +/* Macros for accessing the hardware control word. "*&" is used to + work around a bug in older versions of GCC. __volatile__ is used + to support combination of writing the control register and reading + it back. Without __volatile__, the old value may be used for reading + back under compiler optimization. + + Note that the use of these macros is not sufficient anymore with + recent hardware nor on x86-64. Some floating point operations are + executed in the SSE/SSE2 engines which have their own control and + status register. */ +#define _FPU_GETCW(cw) __asm__ __volatile__ ("fnstcw %0" : "=m" (*&cw)) +#define _FPU_SETCW(cw) __asm__ __volatile__ ("fldcw %0" : : "m" (*&cw)) + +/* Default control word set at startup. */ +extern fpu_control_t __fpu_control; + +#endif /* fpu_control.h */ diff --git a/REORG.TODO/sysdeps/x86/init-arch.h b/REORG.TODO/sysdeps/x86/init-arch.h new file mode 100644 index 0000000000..15d3f0975d --- /dev/null +++ b/REORG.TODO/sysdeps/x86/init-arch.h @@ -0,0 +1,75 @@ +/* This file is part of the GNU C Library. + Copyright (C) 2008-2017 Free Software Foundation, Inc. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#ifdef __ASSEMBLER__ +# include <cpu-features.h> +#else +# include <ldsodefs.h> +#endif + +/* These macros are used to implement ifunc selection in C. To implement + an ifunc function, foo, which returns the address of __foo_sse2 or + __foo_avx2: + + #define foo __redirect_foo + #define __foo __redirect___foo + #include <foo.h> + #undef foo + #undef __foo + #define SYMBOL_NAME foo + #include <init-arch.h> + + extern __typeof (REDIRECT_NAME) OPTIMIZE (sse2) attribute_hidden; + extern __typeof (REDIRECT_NAME) OPTIMIZE (avx2) attribute_hidden; + + static inline void * + foo_selector (void) + { + if (use AVX2) + return OPTIMIZE (avx2); + + return OPTIMIZE (sse2); + } + + libc_ifunc_redirected (__redirect_foo, foo, foo_selector ()); + +*/ + +#define PASTER1(x,y) x##_##y +#define EVALUATOR1(x,y) PASTER1 (x,y) +#define PASTER2(x,y) __##x##_##y +#define EVALUATOR2(x,y) PASTER2 (x,y) + +/* Basically set '__redirect_<symbol>' to use as type definition, + '__<symbol>_<variant>' as the optimized implementation and + '<symbol>_ifunc_selector' as the IFUNC selector. */ +#define REDIRECT_NAME EVALUATOR1 (__redirect, SYMBOL_NAME) +#define OPTIMIZE(name) EVALUATOR2 (SYMBOL_NAME, name) +#define IFUNC_SELECTOR EVALUATOR1 (SYMBOL_NAME, ifunc_selector) + +#ifndef __x86_64__ +/* Due to the reordering and the other nifty extensions in i686, it is + not really good to use heavily i586 optimized code on an i686. It's + better to use i486 code if it isn't an i586. */ +# if MINIMUM_ISA == 686 +# define USE_I586 0 +# define USE_I686 1 +# else +# define USE_I586 (HAS_ARCH_FEATURE (I586) && !HAS_ARCH_FEATURE (I686)) +# define USE_I686 HAS_ARCH_FEATURE (I686) +# endif +#endif diff --git a/REORG.TODO/sysdeps/x86/libc-start.c b/REORG.TODO/sysdeps/x86/libc-start.c new file mode 100644 index 0000000000..e11b490f5c --- /dev/null +++ b/REORG.TODO/sysdeps/x86/libc-start.c @@ -0,0 +1,28 @@ +/* Copyright (C) 2015-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#ifndef SHARED +#include <ldsodefs.h> +# include <cpu-features.h> +# include <cpu-features.c> + +extern struct cpu_features _dl_x86_cpu_features; + +#define ARCH_INIT_CPU_FEATURES() init_cpu_features (&_dl_x86_cpu_features) + +#endif +# include <csu/libc-start.c> diff --git a/REORG.TODO/sysdeps/x86/linkmap.h b/REORG.TODO/sysdeps/x86/linkmap.h new file mode 100644 index 0000000000..dd0d140874 --- /dev/null +++ b/REORG.TODO/sysdeps/x86/linkmap.h @@ -0,0 +1,16 @@ +#if __WORDSIZE == 64 +struct link_map_machine + { + Elf64_Addr plt; /* Address of .plt + 0x16 */ + Elf64_Addr gotplt; /* Address of .got + 0x18 */ + void *tlsdesc_table; /* Address of TLS descriptor hash table. */ + }; + +#else +struct link_map_machine + { + Elf32_Addr plt; /* Address of .plt + 0x16 */ + Elf32_Addr gotplt; /* Address of .got + 0x0c */ + void *tlsdesc_table; /* Address of TLS descriptor hash table. */ + }; +#endif diff --git a/REORG.TODO/sysdeps/x86/nptl/bits/pthreadtypes-arch.h b/REORG.TODO/sysdeps/x86/nptl/bits/pthreadtypes-arch.h new file mode 100644 index 0000000000..fd86806800 --- /dev/null +++ b/REORG.TODO/sysdeps/x86/nptl/bits/pthreadtypes-arch.h @@ -0,0 +1,99 @@ +/* Copyright (C) 2002-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#ifndef _BITS_PTHREADTYPES_ARCH_H +#define _BITS_PTHREADTYPES_ARCH_H 1 + +#include <bits/wordsize.h> + +#ifdef __x86_64__ +# if __WORDSIZE == 64 +# define __SIZEOF_PTHREAD_MUTEX_T 40 +# define __SIZEOF_PTHREAD_ATTR_T 56 +# define __SIZEOF_PTHREAD_MUTEX_T 40 +# define __SIZEOF_PTHREAD_RWLOCK_T 56 +# define __SIZEOF_PTHREAD_BARRIER_T 32 +# else +# define __SIZEOF_PTHREAD_MUTEX_T 32 +# define __SIZEOF_PTHREAD_ATTR_T 32 +# define __SIZEOF_PTHREAD_MUTEX_T 32 +# define __SIZEOF_PTHREAD_RWLOCK_T 44 +# define __SIZEOF_PTHREAD_BARRIER_T 20 +# endif +#else +# define __SIZEOF_PTHREAD_MUTEX_T 24 +# define __SIZEOF_PTHREAD_ATTR_T 36 +# define __SIZEOF_PTHREAD_MUTEX_T 24 +# define __SIZEOF_PTHREAD_RWLOCK_T 32 +# define __SIZEOF_PTHREAD_BARRIER_T 20 +#endif +#define __SIZEOF_PTHREAD_MUTEXATTR_T 4 +#define __SIZEOF_PTHREAD_COND_T 48 +#define __SIZEOF_PTHREAD_CONDATTR_T 4 +#define __SIZEOF_PTHREAD_RWLOCKATTR_T 8 +#define __SIZEOF_PTHREAD_BARRIERATTR_T 4 + +/* Definitions for internal mutex struct. */ +#define __PTHREAD_COMPAT_PADDING_MID +#define __PTHREAD_COMPAT_PADDING_END +#define __PTHREAD_MUTEX_LOCK_ELISION 1 + +#define __LOCK_ALIGNMENT +#define __ONCE_ALIGNMENT + +struct __pthread_rwlock_arch_t +{ + unsigned int __readers; + unsigned int __writers; + unsigned int __wrphase_futex; + unsigned int __writers_futex; + unsigned int __pad3; + unsigned int __pad4; +#ifdef __x86_64__ + int __cur_writer; + int __shared; + signed char __rwelision; +# ifdef __ILP32__ + unsigned char __pad1[3]; +# define __PTHREAD_RWLOCK_ELISION_EXTRA 0, { 0, 0, 0 } +# else + unsigned char __pad1[7]; +# define __PTHREAD_RWLOCK_ELISION_EXTRA 0, { 0, 0, 0, 0, 0, 0, 0 } +# endif + unsigned long int __pad2; + /* FLAGS must stay at this position in the structure to maintain + binary compatibility. */ + unsigned int __flags; +# define __PTHREAD_RWLOCK_INT_FLAGS_SHARED 1 +#else + /* FLAGS must stay at this position in the structure to maintain + binary compatibility. */ + unsigned char __flags; + unsigned char __shared; + signed char __rwelision; +# define __PTHREAD_RWLOCK_ELISION_EXTRA 0 + unsigned char __pad2; + int __cur_writer; +#endif +}; + +#ifndef __x86_64__ +/* Extra attributes for the cleanup functions. */ +# define __cleanup_fct_attribute __attribute__ ((__regparm__ (1))) +#endif + +#endif /* bits/pthreadtypes.h */ diff --git a/REORG.TODO/sysdeps/x86/string_private.h b/REORG.TODO/sysdeps/x86/string_private.h new file mode 100644 index 0000000000..485b73eca5 --- /dev/null +++ b/REORG.TODO/sysdeps/x86/string_private.h @@ -0,0 +1,20 @@ +/* Define _STRING_ARCH_unaligned. i486/x86-64 version. + Copyright (C) 2016-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +/* The ix86 processors can access unaligned multi-byte variables. */ +#define _STRING_ARCH_unaligned 1 diff --git a/REORG.TODO/sysdeps/x86/tininess.h b/REORG.TODO/sysdeps/x86/tininess.h new file mode 100644 index 0000000000..1db37790f8 --- /dev/null +++ b/REORG.TODO/sysdeps/x86/tininess.h @@ -0,0 +1 @@ +#define TININESS_AFTER_ROUNDING 1 diff --git a/REORG.TODO/sysdeps/x86/tst-get-cpu-features-static.c b/REORG.TODO/sysdeps/x86/tst-get-cpu-features-static.c new file mode 100644 index 0000000000..03f59060c5 --- /dev/null +++ b/REORG.TODO/sysdeps/x86/tst-get-cpu-features-static.c @@ -0,0 +1 @@ +#include "tst-get-cpu-features.c" diff --git a/REORG.TODO/sysdeps/x86/tst-get-cpu-features.c b/REORG.TODO/sysdeps/x86/tst-get-cpu-features.c new file mode 100644 index 0000000000..5aa5779857 --- /dev/null +++ b/REORG.TODO/sysdeps/x86/tst-get-cpu-features.c @@ -0,0 +1,31 @@ +/* Test case for x86 __get_cpu_features interface + Copyright (C) 2015-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <stdlib.h> +#include <cpu-features.h> + +static int +do_test (void) +{ + if (__get_cpu_features ()->kind == arch_kind_unknown) + abort (); + return 0; +} + +#define TEST_FUNCTION do_test () +#include "../../test-skeleton.c" |