diff options
Diffstat (limited to 'sysdeps/i386')
450 files changed, 0 insertions, 62011 deletions
diff --git a/sysdeps/i386/Implies b/sysdeps/i386/Implies deleted file mode 100644 index 20b2dffc29..0000000000 --- a/sysdeps/i386/Implies +++ /dev/null @@ -1,5 +0,0 @@ -x86 -wordsize-32 -ieee754/ldbl-96 -ieee754/dbl-64 -ieee754/flt-32 diff --git a/sysdeps/i386/Makefile b/sysdeps/i386/Makefile deleted file mode 100644 index e30e1339f0..0000000000 --- a/sysdeps/i386/Makefile +++ /dev/null @@ -1,103 +0,0 @@ -# The mpn functions need a #define for asm syntax flavor. -# Every i386 port in use uses gas syntax (I think). -asm-CPPFLAGS += -DGAS_SYNTAX - -# The i386 `long double' is a distinct type we support. -long-double-fcts = yes - -ifeq ($(subdir),string) -sysdep_routines += cacheinfo -endif - -ifeq ($(subdir),gmon) -sysdep_routines += i386-mcount -endif - -ifeq ($(subdir),elf) -CFLAGS-rtld.c += -Wno-uninitialized -Wno-unused -CFLAGS-dl-load.c += -Wno-unused -CFLAGS-dl-reloc.c += -Wno-unused -endif - -ifeq ($(subdir),debug) -CFLAGS-backtrace.c += -fexceptions -endif - -# Most of the glibc routines don't ever call user defined callbacks -# nor use any FPU or SSE* and as such don't need bigger %esp alignment -# than 4 bytes. -# Lots of routines in math will use FPU, so make math subdir an exception -# here. -# In gcc 4.6 (and maybe earlier?) giving -mpreferred-stack-boundary=2 is -# an error, so don't try to reduce it here like we used to. We still -# explicit set -mpreferred-stack-boundary=4 the places where it matters, -# in case an older compiler defaulted to 2. -ifeq ($(subdir),math) -sysdep-CFLAGS += -mpreferred-stack-boundary=4 -else -ifeq ($(subdir),csu) -sysdep-CFLAGS += -mpreferred-stack-boundary=4 -gen-as-const-headers += link-defines.sym -else -# Likewise, any function which calls user callbacks -uses-callbacks += -mpreferred-stack-boundary=4 -# Likewise, any stack alignment tests -stack-align-test-flags += -malign-double -mpreferred-stack-boundary=4 -endif -endif - -# And a couple of other routines -ifeq ($(subdir),stdlib) -CFLAGS-exit.c += -mpreferred-stack-boundary=4 -CFLAGS-cxa_finalize.c += -mpreferred-stack-boundary=4 -endif -ifeq ($(subdir),elf) -CFLAGS-dl-init.c += -mpreferred-stack-boundary=4 -CFLAGS-dl-fini.c += -mpreferred-stack-boundary=4 -CFLAGS-dl-open.c += -mpreferred-stack-boundary=4 -CFLAGS-dl-close.c += -mpreferred-stack-boundary=4 -CFLAGS-dl-error.c += -mpreferred-stack-boundary=4 -endif -ifeq ($(subdir),dlfcn) -CFLAGS-dlopen.c += -mpreferred-stack-boundary=4 -CFLAGS-dlopenold.c += -mpreferred-stack-boundary=4 -CFLAGS-dlclose.c += -mpreferred-stack-boundary=4 -CFLAGS-dlerror.c += -mpreferred-stack-boundary=4 -endif - -ifneq (,$(filter -mno-tls-direct-seg-refs,$(CFLAGS))) -defines += -DNO_TLS_DIRECT_SEG_REFS -endif - -ifeq ($(subdir),elf) -sysdep-dl-routines += tlsdesc dl-tlsdesc - -tests += tst-audit3 -modules-names += tst-auditmod3a tst-auditmod3b - -$(objpfx)tst-audit3: $(objpfx)tst-auditmod3a.so -$(objpfx)tst-audit3.out: $(objpfx)tst-auditmod3b.so -tst-audit3-ENV = LD_AUDIT=$(objpfx)tst-auditmod3b.so -endif - -ifeq ($(subdir),csu) -gen-as-const-headers += tlsdesc.sym -endif - -# Make sure no code in ld.so uses mm/xmm/ymm/zmm registers on i386 since -# the first 3 mm/xmm/ymm/zmm registers are used to pass vector parameters -# which must be preserved. -# With SSE disabled, ensure -fpmath is not set to use sse either. -rtld-CFLAGS += -mno-sse -mno-mmx -mfpmath=387 -ifeq ($(subdir),elf) -CFLAGS-.os += $(if $(filter $(@F),$(patsubst %,%.os,$(all-rtld-routines))),\ - $(rtld-CFLAGS)) - -tests-special += $(objpfx)tst-ld-sse-use.out -$(objpfx)tst-ld-sse-use.out: ../sysdeps/i386/tst-ld-sse-use.sh $(objpfx)ld.so - @echo "Checking ld.so for SSE register use. This will take a few seconds..." - $(BASH) $< $(objpfx) '$(NM)' '$(OBJDUMP)' '$(READELF)' > $@; \ - $(evaluate-test) -else -CFLAGS-.os += $(if $(filter rtld-%.os,$(@F)), $(rtld-CFLAGS)) -endif diff --git a/sysdeps/i386/Versions b/sysdeps/i386/Versions deleted file mode 100644 index 7be44aad7a..0000000000 --- a/sysdeps/i386/Versions +++ /dev/null @@ -1,35 +0,0 @@ -ld { - GLIBC_2.3 { - # The alternative i386 runtime interface to TLS. - ___tls_get_addr; - } -} -libc { - GLIBC_2.0 { - # Functions from libgcc. - __divdi3; __moddi3; __udivdi3; __umoddi3; - } - GLIBC_2.1 { - # global variable - _fp_hw; - } - GLIBC_2.1.1 { - # extern inline functions used by <bits/string.h> - __memcpy_c; __memset_cc; __memset_cg; __memset_gg; - __memcpy_by2; __memcpy_by4; __memcpy_g; __mempcpy_by2; __mempcpy_by4; - __mempcpy_byn; __memset_ccn_by2; __memset_ccn_by4; __memset_gcn_by2; - __memset_gcn_by4; __stpcpy_g; __strcat_c; __strcat_g; __strchr_c; - __strchr_g; __strchrnul_c; __strchrnul_g; __strcmp_gg; __strcpy_g; - __strcspn_c1; __strcspn_cg; __strcspn_g; __strlen_g; __strncat_g; - __strncmp_g; __strncpy_by2; __strncpy_by4; __strncpy_byn; __strncpy_gg; - __strpbrk_cg; __strpbrk_g; __strrchr_c; __strrchr_g; __strspn_c1; - __strspn_cg; __strspn_g; __strstr_cg; __strstr_g; - } -} -libm { - GLIBC_2.1 { - # A generic bug got this omitted from other configurations' version - # sets, but we always had it. - exp2l; - } -} diff --git a/sysdeps/i386/____longjmp_chk.S b/sysdeps/i386/____longjmp_chk.S deleted file mode 100644 index 0910861a9d..0000000000 --- a/sysdeps/i386/____longjmp_chk.S +++ /dev/null @@ -1 +0,0 @@ -#error "OS-specific version needed" diff --git a/sysdeps/i386/__longjmp.S b/sysdeps/i386/__longjmp.S deleted file mode 100644 index 3719763cd6..0000000000 --- a/sysdeps/i386/__longjmp.S +++ /dev/null @@ -1,72 +0,0 @@ -/* longjmp for i386. - Copyright (C) 1995-2017 Free Software Foundation, Inc. - This file is part of the GNU C Library. - - The GNU C Library is free software; you can redistribute it and/or - modify it under the terms of the GNU Lesser General Public - License as published by the Free Software Foundation; either - version 2.1 of the License, or (at your option) any later version. - - The GNU C Library is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - Lesser General Public License for more details. - - You should have received a copy of the GNU Lesser General Public - License along with the GNU C Library; if not, see - <http://www.gnu.org/licenses/>. */ - -#include <sysdep.h> -#include <jmpbuf-offsets.h> -#include <asm-syntax.h> -#include <stap-probe.h> - - .text -ENTRY (__longjmp) -#ifdef PTR_DEMANGLE - movl 4(%esp), %eax /* User's jmp_buf in %eax. */ - - /* Save the return address now. */ - movl (JB_PC*4)(%eax), %edx - /* Get the stack pointer. */ - movl (JB_SP*4)(%eax), %ecx - PTR_DEMANGLE (%edx) - PTR_DEMANGLE (%ecx) - LIBC_PROBE (longjmp, 3, 4@%eax, -4@8(%esp), 4@%edx) - cfi_def_cfa(%eax, 0) - cfi_register(%eip, %edx) - cfi_register(%esp, %ecx) - cfi_offset(%ebx, JB_BX*4) - cfi_offset(%esi, JB_SI*4) - cfi_offset(%edi, JB_DI*4) - cfi_offset(%ebp, JB_BP*4) - /* Restore registers. */ - movl (JB_BX*4)(%eax), %ebx - movl (JB_SI*4)(%eax), %esi - movl (JB_DI*4)(%eax), %edi - movl (JB_BP*4)(%eax), %ebp - cfi_restore(%ebx) - cfi_restore(%esi) - cfi_restore(%edi) - cfi_restore(%ebp) - - LIBC_PROBE (longjmp_target, 3, 4@%eax, -4@8(%esp), 4@%edx) - movl 8(%esp), %eax /* Second argument is return value. */ - movl %ecx, %esp -#else - movl 4(%esp), %ecx /* User's jmp_buf in %ecx. */ - movl 8(%esp), %eax /* Second argument is return value. */ - /* Save the return address now. */ - movl (JB_PC*4)(%ecx), %edx - LIBC_PROBE (longjmp, 3, 4@%ecx, -4@%eax, 4@%edx) - /* Restore registers. */ - movl (JB_BX*4)(%ecx), %ebx - movl (JB_SI*4)(%ecx), %esi - movl (JB_DI*4)(%ecx), %edi - movl (JB_BP*4)(%ecx), %ebp - movl (JB_SP*4)(%ecx), %esp - LIBC_PROBE (longjmp_target, 3, 4@%ecx, -4@%ecx, 4@%edx) -#endif - /* Jump to saved PC. */ - jmp *%edx -END (__longjmp) diff --git a/sysdeps/i386/abort-instr.h b/sysdeps/i386/abort-instr.h deleted file mode 100644 index 810f10379b..0000000000 --- a/sysdeps/i386/abort-instr.h +++ /dev/null @@ -1,2 +0,0 @@ -/* An instruction which should crash any program is `hlt'. */ -#define ABORT_INSTRUCTION asm ("hlt") diff --git a/sysdeps/i386/add_n.S b/sysdeps/i386/add_n.S deleted file mode 100644 index c2923094a8..0000000000 --- a/sysdeps/i386/add_n.S +++ /dev/null @@ -1,111 +0,0 @@ -/* Add two limb vectors of the same length > 0 and store sum in a third - limb vector. - Copyright (C) 1992-2017 Free Software Foundation, Inc. - This file is part of the GNU MP Library. - - The GNU MP Library is free software; you can redistribute it and/or modify - it under the terms of the GNU Lesser General Public License as published by - the Free Software Foundation; either version 2.1 of the License, or (at your - option) any later version. - - The GNU MP Library is distributed in the hope that it will be useful, but - WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY - or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public - License for more details. - - You should have received a copy of the GNU Lesser General Public License - along with the GNU MP Library; see the file COPYING.LIB. If not, - see <http://www.gnu.org/licenses/>. */ - -#include "sysdep.h" -#include "asm-syntax.h" - -#define PARMS 4+8 /* space for 2 saved regs */ -#define RES PARMS -#define S1 RES+4 -#define S2 S1+4 -#define SIZE S2+4 - - .text -ENTRY (__mpn_add_n) - - pushl %edi - cfi_adjust_cfa_offset (4) - pushl %esi - cfi_adjust_cfa_offset (4) - - movl RES(%esp),%edi - cfi_rel_offset (edi, 4) - movl S1(%esp),%esi - cfi_rel_offset (esi, 0) - movl S2(%esp),%edx - movl SIZE(%esp),%ecx - movl %ecx,%eax - shrl $3,%ecx /* compute count for unrolled loop */ - negl %eax - andl $7,%eax /* get index where to start loop */ - jz L(oop) /* necessary special case for 0 */ - incl %ecx /* adjust loop count */ - shll $2,%eax /* adjustment for pointers... */ - subl %eax,%edi /* ... since they are offset ... */ - subl %eax,%esi /* ... by a constant when we ... */ - subl %eax,%edx /* ... enter the loop */ - shrl $2,%eax /* restore previous value */ -#ifdef PIC -/* Calculate start address in loop for PIC. Due to limitations in some - assemblers, Loop-L0-3 cannot be put into the leal */ - call L(0) - cfi_adjust_cfa_offset (4) -L(0): leal (%eax,%eax,8),%eax - addl (%esp),%eax - addl $(L(oop)-L(0)-3),%eax - addl $4,%esp - cfi_adjust_cfa_offset (-4) -#else -/* Calculate start address in loop for non-PIC. */ - leal (L(oop) - 3)(%eax,%eax,8),%eax -#endif - jmp *%eax /* jump into loop */ - ALIGN (3) -L(oop): movl (%esi),%eax - adcl (%edx),%eax - movl %eax,(%edi) - movl 4(%esi),%eax - adcl 4(%edx),%eax - movl %eax,4(%edi) - movl 8(%esi),%eax - adcl 8(%edx),%eax - movl %eax,8(%edi) - movl 12(%esi),%eax - adcl 12(%edx),%eax - movl %eax,12(%edi) - movl 16(%esi),%eax - adcl 16(%edx),%eax - movl %eax,16(%edi) - movl 20(%esi),%eax - adcl 20(%edx),%eax - movl %eax,20(%edi) - movl 24(%esi),%eax - adcl 24(%edx),%eax - movl %eax,24(%edi) - movl 28(%esi),%eax - adcl 28(%edx),%eax - movl %eax,28(%edi) - leal 32(%edi),%edi - leal 32(%esi),%esi - leal 32(%edx),%edx - decl %ecx - jnz L(oop) - - sbbl %eax,%eax - negl %eax - - popl %esi - cfi_adjust_cfa_offset (-4) - cfi_restore (esi) - popl %edi - cfi_adjust_cfa_offset (-4) - cfi_restore (edi) - - ret -END (__mpn_add_n) diff --git a/sysdeps/i386/addmul_1.S b/sysdeps/i386/addmul_1.S deleted file mode 100644 index ad90ea53e5..0000000000 --- a/sysdeps/i386/addmul_1.S +++ /dev/null @@ -1,86 +0,0 @@ -/* i80386 __mpn_addmul_1 -- Multiply a limb vector with a limb and add - the result to a second limb vector. - Copyright (C) 1992-2017 Free Software Foundation, Inc. - This file is part of the GNU MP Library. - - The GNU MP Library is free software; you can redistribute it and/or modify - it under the terms of the GNU Lesser General Public License as published by - the Free Software Foundation; either version 2.1 of the License, or (at your - option) any later version. - - The GNU MP Library is distributed in the hope that it will be useful, but - WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY - or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public - License for more details. - - You should have received a copy of the GNU Lesser General Public License - along with the GNU MP Library; see the file COPYING.LIB. If not, - see <http://www.gnu.org/licenses/>. */ - -#include "sysdep.h" -#include "asm-syntax.h" - -#define PARMS 4+16 /* space for 4 saved regs */ -#define RES PARMS -#define S1 RES+4 -#define SIZE S1+4 -#define S2LIMB SIZE+4 - -#define res_ptr edi -#define s1_ptr esi -#define sizeP ecx -#define s2_limb ebx - - .text -ENTRY (__mpn_addmul_1) - - pushl %res_ptr - cfi_adjust_cfa_offset (4) - pushl %s1_ptr - cfi_adjust_cfa_offset (4) - pushl %ebp - cfi_adjust_cfa_offset (4) - pushl %s2_limb - cfi_adjust_cfa_offset (4) - - movl RES(%esp), %res_ptr - cfi_rel_offset (res_ptr, 12) - movl S1(%esp), %s1_ptr - cfi_rel_offset (s1_ptr, 8) - movl SIZE(%esp), %sizeP - movl S2LIMB(%esp), %s2_limb - cfi_rel_offset (s2_limb, 0) - leal (%res_ptr,%sizeP,4), %res_ptr - leal (%s1_ptr,%sizeP,4), %s1_ptr - negl %sizeP - xorl %ebp, %ebp - cfi_rel_offset (ebp, 4) - ALIGN (3) -L(oop): - movl (%s1_ptr,%sizeP,4), %eax - mull %s2_limb - addl %ebp, %eax - adcl $0, %edx - addl %eax, (%res_ptr,%sizeP,4) - adcl $0, %edx - movl %edx, %ebp - - incl %sizeP - jnz L(oop) - movl %ebp, %eax - - popl %s2_limb - cfi_adjust_cfa_offset (-4) - cfi_restore (s2_limb) - popl %ebp - cfi_adjust_cfa_offset (-4) - cfi_restore (ebp) - popl %s1_ptr - cfi_adjust_cfa_offset (-4) - cfi_restore (s1_ptr) - popl %res_ptr - cfi_adjust_cfa_offset (-4) - cfi_restore (res_ptr) - - ret -END (__mpn_addmul_1) diff --git a/sysdeps/i386/asm-syntax.h b/sysdeps/i386/asm-syntax.h deleted file mode 100644 index a992da2dd1..0000000000 --- a/sysdeps/i386/asm-syntax.h +++ /dev/null @@ -1,24 +0,0 @@ -/* Definitions for x86 syntax variations. - Copyright (C) 1992-2017 Free Software Foundation, Inc. - This file is part of the GNU C Library. Its master source is NOT part of - the C library, however. The master source lives in the GNU MP Library. - - The GNU C Library is free software; you can redistribute it and/or - modify it under the terms of the GNU Lesser General Public - License as published by the Free Software Foundation; either - version 2.1 of the License, or (at your option) any later version. - - The GNU C Library is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - Lesser General Public License for more details. - - You should have received a copy of the GNU Lesser General Public - License along with the GNU C Library; if not, see - <http://www.gnu.org/licenses/>. */ - -#undef ALIGN -#define ALIGN(log) .align 1<<log - -#undef L -#define L(body) .L##body diff --git a/sysdeps/i386/atomic-machine.h b/sysdeps/i386/atomic-machine.h deleted file mode 100644 index 0e24200617..0000000000 --- a/sysdeps/i386/atomic-machine.h +++ /dev/null @@ -1,545 +0,0 @@ -/* Copyright (C) 2002-2017 Free Software Foundation, Inc. - This file is part of the GNU C Library. - Contributed by Ulrich Drepper <drepper@redhat.com>, 2002. - - The GNU C Library is free software; you can redistribute it and/or - modify it under the terms of the GNU Lesser General Public - License as published by the Free Software Foundation; either - version 2.1 of the License, or (at your option) any later version. - - The GNU C Library is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - Lesser General Public License for more details. - - You should have received a copy of the GNU Lesser General Public - License along with the GNU C Library; if not, see - <http://www.gnu.org/licenses/>. */ - -#include <stdint.h> -#include <tls.h> /* For tcbhead_t. */ - - -typedef int8_t atomic8_t; -typedef uint8_t uatomic8_t; -typedef int_fast8_t atomic_fast8_t; -typedef uint_fast8_t uatomic_fast8_t; - -typedef int16_t atomic16_t; -typedef uint16_t uatomic16_t; -typedef int_fast16_t atomic_fast16_t; -typedef uint_fast16_t uatomic_fast16_t; - -typedef int32_t atomic32_t; -typedef uint32_t uatomic32_t; -typedef int_fast32_t atomic_fast32_t; -typedef uint_fast32_t uatomic_fast32_t; - -typedef int64_t atomic64_t; -typedef uint64_t uatomic64_t; -typedef int_fast64_t atomic_fast64_t; -typedef uint_fast64_t uatomic_fast64_t; - -typedef intptr_t atomicptr_t; -typedef uintptr_t uatomicptr_t; -typedef intmax_t atomic_max_t; -typedef uintmax_t uatomic_max_t; - - -#ifndef LOCK_PREFIX -# ifdef UP -# define LOCK_PREFIX /* nothing */ -# else -# define LOCK_PREFIX "lock;" -# endif -#endif - -#define __HAVE_64B_ATOMICS 0 -#define USE_ATOMIC_COMPILER_BUILTINS 0 -#define ATOMIC_EXCHANGE_USES_CAS 0 - - -#define atomic_compare_and_exchange_val_acq(mem, newval, oldval) \ - __sync_val_compare_and_swap (mem, oldval, newval) -#define atomic_compare_and_exchange_bool_acq(mem, newval, oldval) \ - (! __sync_bool_compare_and_swap (mem, oldval, newval)) - - -#define __arch_c_compare_and_exchange_val_8_acq(mem, newval, oldval) \ - ({ __typeof (*mem) ret; \ - __asm __volatile ("cmpl $0, %%gs:%P5\n\t" \ - "je 0f\n\t" \ - "lock\n" \ - "0:\tcmpxchgb %b2, %1" \ - : "=a" (ret), "=m" (*mem) \ - : "q" (newval), "m" (*mem), "0" (oldval), \ - "i" (offsetof (tcbhead_t, multiple_threads))); \ - ret; }) - -#define __arch_c_compare_and_exchange_val_16_acq(mem, newval, oldval) \ - ({ __typeof (*mem) ret; \ - __asm __volatile ("cmpl $0, %%gs:%P5\n\t" \ - "je 0f\n\t" \ - "lock\n" \ - "0:\tcmpxchgw %w2, %1" \ - : "=a" (ret), "=m" (*mem) \ - : "r" (newval), "m" (*mem), "0" (oldval), \ - "i" (offsetof (tcbhead_t, multiple_threads))); \ - ret; }) - -#define __arch_c_compare_and_exchange_val_32_acq(mem, newval, oldval) \ - ({ __typeof (*mem) ret; \ - __asm __volatile ("cmpl $0, %%gs:%P5\n\t" \ - "je 0f\n\t" \ - "lock\n" \ - "0:\tcmpxchgl %2, %1" \ - : "=a" (ret), "=m" (*mem) \ - : "r" (newval), "m" (*mem), "0" (oldval), \ - "i" (offsetof (tcbhead_t, multiple_threads))); \ - ret; }) - -/* XXX We do not really need 64-bit compare-and-exchange. At least - not in the moment. Using it would mean causing portability - problems since not many other 32-bit architectures have support for - such an operation. So don't define any code for now. If it is - really going to be used the code below can be used on Intel Pentium - and later, but NOT on i486. */ -#if 1 -# define __arch_compare_and_exchange_val_64_acq(mem, newval, oldval) \ - ({ __typeof (*mem) ret = *(mem); \ - abort (); \ - ret = (newval); \ - ret = (oldval); \ - ret; }) -# define __arch_c_compare_and_exchange_val_64_acq(mem, newval, oldval) \ - ({ __typeof (*mem) ret = *(mem); \ - abort (); \ - ret = (newval); \ - ret = (oldval); \ - ret; }) -#else -# ifdef __PIC__ -# define __arch_compare_and_exchange_val_64_acq(mem, newval, oldval) \ - ({ __typeof (*mem) ret; \ - __asm __volatile ("xchgl %2, %%ebx\n\t" \ - LOCK_PREFIX "cmpxchg8b %1\n\t" \ - "xchgl %2, %%ebx" \ - : "=A" (ret), "=m" (*mem) \ - : "DS" (((unsigned long long int) (newval)) \ - & 0xffffffff), \ - "c" (((unsigned long long int) (newval)) >> 32), \ - "m" (*mem), "a" (((unsigned long long int) (oldval)) \ - & 0xffffffff), \ - "d" (((unsigned long long int) (oldval)) >> 32)); \ - ret; }) - -# define __arch_c_compare_and_exchange_val_64_acq(mem, newval, oldval) \ - ({ __typeof (*mem) ret; \ - __asm __volatile ("xchgl %2, %%ebx\n\t" \ - "cmpl $0, %%gs:%P7\n\t" \ - "je 0f\n\t" \ - "lock\n" \ - "0:\tcmpxchg8b %1\n\t" \ - "xchgl %2, %%ebx" \ - : "=A" (ret), "=m" (*mem) \ - : "DS" (((unsigned long long int) (newval)) \ - & 0xffffffff), \ - "c" (((unsigned long long int) (newval)) >> 32), \ - "m" (*mem), "a" (((unsigned long long int) (oldval)) \ - & 0xffffffff), \ - "d" (((unsigned long long int) (oldval)) >> 32), \ - "i" (offsetof (tcbhead_t, multiple_threads))); \ - ret; }) -# else -# define __arch_compare_and_exchange_val_64_acq(mem, newval, oldval) \ - ({ __typeof (*mem) ret; \ - __asm __volatile (LOCK_PREFIX "cmpxchg8b %1" \ - : "=A" (ret), "=m" (*mem) \ - : "b" (((unsigned long long int) (newval)) \ - & 0xffffffff), \ - "c" (((unsigned long long int) (newval)) >> 32), \ - "m" (*mem), "a" (((unsigned long long int) (oldval)) \ - & 0xffffffff), \ - "d" (((unsigned long long int) (oldval)) >> 32)); \ - ret; }) - -# define __arch_c_compare_and_exchange_val_64_acq(mem, newval, oldval) \ - ({ __typeof (*mem) ret; \ - __asm __volatile ("cmpl $0, %%gs:%P7\n\t" \ - "je 0f\n\t" \ - "lock\n" \ - "0:\tcmpxchg8b %1" \ - : "=A" (ret), "=m" (*mem) \ - : "b" (((unsigned long long int) (newval)) \ - & 0xffffffff), \ - "c" (((unsigned long long int) (newval)) >> 32), \ - "m" (*mem), "a" (((unsigned long long int) (oldval)) \ - & 0xffffffff), \ - "d" (((unsigned long long int) (oldval)) >> 32), \ - "i" (offsetof (tcbhead_t, multiple_threads))); \ - ret; }) -# endif -#endif - - -/* Note that we need no lock prefix. */ -#define atomic_exchange_acq(mem, newvalue) \ - ({ __typeof (*mem) result; \ - if (sizeof (*mem) == 1) \ - __asm __volatile ("xchgb %b0, %1" \ - : "=q" (result), "=m" (*mem) \ - : "0" (newvalue), "m" (*mem)); \ - else if (sizeof (*mem) == 2) \ - __asm __volatile ("xchgw %w0, %1" \ - : "=r" (result), "=m" (*mem) \ - : "0" (newvalue), "m" (*mem)); \ - else if (sizeof (*mem) == 4) \ - __asm __volatile ("xchgl %0, %1" \ - : "=r" (result), "=m" (*mem) \ - : "0" (newvalue), "m" (*mem)); \ - else \ - { \ - result = 0; \ - abort (); \ - } \ - result; }) - - -#define __arch_exchange_and_add_body(lock, pfx, mem, value) \ - ({ __typeof (*mem) __result; \ - __typeof (value) __addval = (value); \ - if (sizeof (*mem) == 1) \ - __asm __volatile (lock "xaddb %b0, %1" \ - : "=q" (__result), "=m" (*mem) \ - : "0" (__addval), "m" (*mem), \ - "i" (offsetof (tcbhead_t, multiple_threads))); \ - else if (sizeof (*mem) == 2) \ - __asm __volatile (lock "xaddw %w0, %1" \ - : "=r" (__result), "=m" (*mem) \ - : "0" (__addval), "m" (*mem), \ - "i" (offsetof (tcbhead_t, multiple_threads))); \ - else if (sizeof (*mem) == 4) \ - __asm __volatile (lock "xaddl %0, %1" \ - : "=r" (__result), "=m" (*mem) \ - : "0" (__addval), "m" (*mem), \ - "i" (offsetof (tcbhead_t, multiple_threads))); \ - else \ - { \ - __typeof (mem) __memp = (mem); \ - __typeof (*mem) __tmpval; \ - __result = *__memp; \ - do \ - __tmpval = __result; \ - while ((__result = pfx##_compare_and_exchange_val_64_acq \ - (__memp, __result + __addval, __result)) == __tmpval); \ - } \ - __result; }) - -#define atomic_exchange_and_add(mem, value) \ - __sync_fetch_and_add (mem, value) - -#define __arch_exchange_and_add_cprefix \ - "cmpl $0, %%gs:%P4\n\tje 0f\n\tlock\n0:\t" - -#define catomic_exchange_and_add(mem, value) \ - __arch_exchange_and_add_body (__arch_exchange_and_add_cprefix, __arch_c, \ - mem, value) - - -#define __arch_add_body(lock, pfx, mem, value) \ - do { \ - if (__builtin_constant_p (value) && (value) == 1) \ - atomic_increment (mem); \ - else if (__builtin_constant_p (value) && (value) == -1) \ - atomic_decrement (mem); \ - else if (sizeof (*mem) == 1) \ - __asm __volatile (lock "addb %b1, %0" \ - : "=m" (*mem) \ - : "iq" (value), "m" (*mem), \ - "i" (offsetof (tcbhead_t, multiple_threads))); \ - else if (sizeof (*mem) == 2) \ - __asm __volatile (lock "addw %w1, %0" \ - : "=m" (*mem) \ - : "ir" (value), "m" (*mem), \ - "i" (offsetof (tcbhead_t, multiple_threads))); \ - else if (sizeof (*mem) == 4) \ - __asm __volatile (lock "addl %1, %0" \ - : "=m" (*mem) \ - : "ir" (value), "m" (*mem), \ - "i" (offsetof (tcbhead_t, multiple_threads))); \ - else \ - { \ - __typeof (value) __addval = (value); \ - __typeof (mem) __memp = (mem); \ - __typeof (*mem) __oldval = *__memp; \ - __typeof (*mem) __tmpval; \ - do \ - __tmpval = __oldval; \ - while ((__oldval = pfx##_compare_and_exchange_val_64_acq \ - (__memp, __oldval + __addval, __oldval)) == __tmpval); \ - } \ - } while (0) - -#define atomic_add(mem, value) \ - __arch_add_body (LOCK_PREFIX, __arch, mem, value) - -#define __arch_add_cprefix \ - "cmpl $0, %%gs:%P3\n\tje 0f\n\tlock\n0:\t" - -#define catomic_add(mem, value) \ - __arch_add_body (__arch_add_cprefix, __arch_c, mem, value) - - -#define atomic_add_negative(mem, value) \ - ({ unsigned char __result; \ - if (sizeof (*mem) == 1) \ - __asm __volatile (LOCK_PREFIX "addb %b2, %0; sets %1" \ - : "=m" (*mem), "=qm" (__result) \ - : "iq" (value), "m" (*mem)); \ - else if (sizeof (*mem) == 2) \ - __asm __volatile (LOCK_PREFIX "addw %w2, %0; sets %1" \ - : "=m" (*mem), "=qm" (__result) \ - : "ir" (value), "m" (*mem)); \ - else if (sizeof (*mem) == 4) \ - __asm __volatile (LOCK_PREFIX "addl %2, %0; sets %1" \ - : "=m" (*mem), "=qm" (__result) \ - : "ir" (value), "m" (*mem)); \ - else \ - abort (); \ - __result; }) - - -#define atomic_add_zero(mem, value) \ - ({ unsigned char __result; \ - if (sizeof (*mem) == 1) \ - __asm __volatile (LOCK_PREFIX "addb %b2, %0; setz %1" \ - : "=m" (*mem), "=qm" (__result) \ - : "iq" (value), "m" (*mem)); \ - else if (sizeof (*mem) == 2) \ - __asm __volatile (LOCK_PREFIX "addw %w2, %0; setz %1" \ - : "=m" (*mem), "=qm" (__result) \ - : "ir" (value), "m" (*mem)); \ - else if (sizeof (*mem) == 4) \ - __asm __volatile (LOCK_PREFIX "addl %2, %0; setz %1" \ - : "=m" (*mem), "=qm" (__result) \ - : "ir" (value), "m" (*mem)); \ - else \ - abort (); \ - __result; }) - - -#define __arch_increment_body(lock, pfx, mem) \ - do { \ - if (sizeof (*mem) == 1) \ - __asm __volatile (lock "incb %b0" \ - : "=m" (*mem) \ - : "m" (*mem), \ - "i" (offsetof (tcbhead_t, multiple_threads))); \ - else if (sizeof (*mem) == 2) \ - __asm __volatile (lock "incw %w0" \ - : "=m" (*mem) \ - : "m" (*mem), \ - "i" (offsetof (tcbhead_t, multiple_threads))); \ - else if (sizeof (*mem) == 4) \ - __asm __volatile (lock "incl %0" \ - : "=m" (*mem) \ - : "m" (*mem), \ - "i" (offsetof (tcbhead_t, multiple_threads))); \ - else \ - { \ - __typeof (mem) __memp = (mem); \ - __typeof (*mem) __oldval = *__memp; \ - __typeof (*mem) __tmpval; \ - do \ - __tmpval = __oldval; \ - while ((__oldval = pfx##_compare_and_exchange_val_64_acq \ - (__memp, __oldval + 1, __oldval)) == __tmpval); \ - } \ - } while (0) - -#define atomic_increment(mem) __arch_increment_body (LOCK_PREFIX, __arch, mem) - -#define __arch_increment_cprefix \ - "cmpl $0, %%gs:%P2\n\tje 0f\n\tlock\n0:\t" - -#define catomic_increment(mem) \ - __arch_increment_body (__arch_increment_cprefix, __arch_c, mem) - - -#define atomic_increment_and_test(mem) \ - ({ unsigned char __result; \ - if (sizeof (*mem) == 1) \ - __asm __volatile (LOCK_PREFIX "incb %0; sete %b1" \ - : "=m" (*mem), "=qm" (__result) \ - : "m" (*mem)); \ - else if (sizeof (*mem) == 2) \ - __asm __volatile (LOCK_PREFIX "incw %0; sete %w1" \ - : "=m" (*mem), "=qm" (__result) \ - : "m" (*mem)); \ - else if (sizeof (*mem) == 4) \ - __asm __volatile (LOCK_PREFIX "incl %0; sete %1" \ - : "=m" (*mem), "=qm" (__result) \ - : "m" (*mem)); \ - else \ - abort (); \ - __result; }) - - -#define __arch_decrement_body(lock, pfx, mem) \ - do { \ - if (sizeof (*mem) == 1) \ - __asm __volatile (lock "decb %b0" \ - : "=m" (*mem) \ - : "m" (*mem), \ - "i" (offsetof (tcbhead_t, multiple_threads))); \ - else if (sizeof (*mem) == 2) \ - __asm __volatile (lock "decw %w0" \ - : "=m" (*mem) \ - : "m" (*mem), \ - "i" (offsetof (tcbhead_t, multiple_threads))); \ - else if (sizeof (*mem) == 4) \ - __asm __volatile (lock "decl %0" \ - : "=m" (*mem) \ - : "m" (*mem), \ - "i" (offsetof (tcbhead_t, multiple_threads))); \ - else \ - { \ - __typeof (mem) __memp = (mem); \ - __typeof (*mem) __oldval = *__memp; \ - __typeof (*mem) __tmpval; \ - do \ - __tmpval = __oldval; \ - while ((__oldval = pfx##_compare_and_exchange_val_64_acq \ - (__memp, __oldval - 1, __oldval)) == __tmpval); \ - } \ - } while (0) - -#define atomic_decrement(mem) __arch_decrement_body (LOCK_PREFIX, __arch, mem) - -#define __arch_decrement_cprefix \ - "cmpl $0, %%gs:%P2\n\tje 0f\n\tlock\n0:\t" - -#define catomic_decrement(mem) \ - __arch_decrement_body (__arch_decrement_cprefix, __arch_c, mem) - - -#define atomic_decrement_and_test(mem) \ - ({ unsigned char __result; \ - if (sizeof (*mem) == 1) \ - __asm __volatile (LOCK_PREFIX "decb %b0; sete %1" \ - : "=m" (*mem), "=qm" (__result) \ - : "m" (*mem)); \ - else if (sizeof (*mem) == 2) \ - __asm __volatile (LOCK_PREFIX "decw %w0; sete %1" \ - : "=m" (*mem), "=qm" (__result) \ - : "m" (*mem)); \ - else if (sizeof (*mem) == 4) \ - __asm __volatile (LOCK_PREFIX "decl %0; sete %1" \ - : "=m" (*mem), "=qm" (__result) \ - : "m" (*mem)); \ - else \ - abort (); \ - __result; }) - - -#define atomic_bit_set(mem, bit) \ - do { \ - if (sizeof (*mem) == 1) \ - __asm __volatile (LOCK_PREFIX "orb %b2, %0" \ - : "=m" (*mem) \ - : "m" (*mem), "iq" (1 << (bit))); \ - else if (sizeof (*mem) == 2) \ - __asm __volatile (LOCK_PREFIX "orw %w2, %0" \ - : "=m" (*mem) \ - : "m" (*mem), "ir" (1 << (bit))); \ - else if (sizeof (*mem) == 4) \ - __asm __volatile (LOCK_PREFIX "orl %2, %0" \ - : "=m" (*mem) \ - : "m" (*mem), "ir" (1 << (bit))); \ - else \ - abort (); \ - } while (0) - - -#define atomic_bit_test_set(mem, bit) \ - ({ unsigned char __result; \ - if (sizeof (*mem) == 1) \ - __asm __volatile (LOCK_PREFIX "btsb %3, %1; setc %0" \ - : "=q" (__result), "=m" (*mem) \ - : "m" (*mem), "ir" (bit)); \ - else if (sizeof (*mem) == 2) \ - __asm __volatile (LOCK_PREFIX "btsw %3, %1; setc %0" \ - : "=q" (__result), "=m" (*mem) \ - : "m" (*mem), "ir" (bit)); \ - else if (sizeof (*mem) == 4) \ - __asm __volatile (LOCK_PREFIX "btsl %3, %1; setc %0" \ - : "=q" (__result), "=m" (*mem) \ - : "m" (*mem), "ir" (bit)); \ - else \ - abort (); \ - __result; }) - - -#define atomic_spin_nop() asm ("rep; nop") - - -#define __arch_and_body(lock, mem, mask) \ - do { \ - if (sizeof (*mem) == 1) \ - __asm __volatile (lock "andb %b1, %0" \ - : "=m" (*mem) \ - : "iq" (mask), "m" (*mem), \ - "i" (offsetof (tcbhead_t, multiple_threads))); \ - else if (sizeof (*mem) == 2) \ - __asm __volatile (lock "andw %w1, %0" \ - : "=m" (*mem) \ - : "ir" (mask), "m" (*mem), \ - "i" (offsetof (tcbhead_t, multiple_threads))); \ - else if (sizeof (*mem) == 4) \ - __asm __volatile (lock "andl %1, %0" \ - : "=m" (*mem) \ - : "ir" (mask), "m" (*mem), \ - "i" (offsetof (tcbhead_t, multiple_threads))); \ - else \ - abort (); \ - } while (0) - -#define __arch_cprefix \ - "cmpl $0, %%gs:%P3\n\tje 0f\n\tlock\n0:\t" - -#define atomic_and(mem, mask) __arch_and_body (LOCK_PREFIX, mem, mask) - -#define catomic_and(mem, mask) __arch_and_body (__arch_cprefix, mem, mask) - - -#define __arch_or_body(lock, mem, mask) \ - do { \ - if (sizeof (*mem) == 1) \ - __asm __volatile (lock "orb %b1, %0" \ - : "=m" (*mem) \ - : "iq" (mask), "m" (*mem), \ - "i" (offsetof (tcbhead_t, multiple_threads))); \ - else if (sizeof (*mem) == 2) \ - __asm __volatile (lock "orw %w1, %0" \ - : "=m" (*mem) \ - : "ir" (mask), "m" (*mem), \ - "i" (offsetof (tcbhead_t, multiple_threads))); \ - else if (sizeof (*mem) == 4) \ - __asm __volatile (lock "orl %1, %0" \ - : "=m" (*mem) \ - : "ir" (mask), "m" (*mem), \ - "i" (offsetof (tcbhead_t, multiple_threads))); \ - else \ - abort (); \ - } while (0) - -#define atomic_or(mem, mask) __arch_or_body (LOCK_PREFIX, mem, mask) - -#define catomic_or(mem, mask) __arch_or_body (__arch_cprefix, mem, mask) - -/* We don't use mfence because it is supposedly slower due to having to - provide stronger guarantees (e.g., regarding self-modifying code). */ -#define atomic_full_barrier() \ - __asm __volatile (LOCK_PREFIX "orl $0, (%%esp)" ::: "memory") -#define atomic_read_barrier() __asm ("" ::: "memory") -#define atomic_write_barrier() __asm ("" ::: "memory") diff --git a/sysdeps/i386/backtrace.c b/sysdeps/i386/backtrace.c deleted file mode 100644 index ee8238d0ce..0000000000 --- a/sysdeps/i386/backtrace.c +++ /dev/null @@ -1,163 +0,0 @@ -/* Return backtrace of current program state. - Copyright (C) 1998-2017 Free Software Foundation, Inc. - This file is part of the GNU C Library. - Contributed by Ulrich Drepper <drepper@cygnus.com>, 1998. - - The GNU C Library is free software; you can redistribute it and/or - modify it under the terms of the GNU Lesser General Public - License as published by the Free Software Foundation; either - version 2.1 of the License, or (at your option) any later version. - - The GNU C Library is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - Lesser General Public License for more details. - - You should have received a copy of the GNU Lesser General Public - License along with the GNU C Library; if not, see - <http://www.gnu.org/licenses/>. */ - -#include <libc-lock.h> -#include <dlfcn.h> -#include <execinfo.h> -#include <stdlib.h> -#include <unwind.h> - -struct trace_arg -{ - void **array; - int cnt, size; - void *lastebp, *lastesp; -}; - -#ifdef SHARED -static _Unwind_Reason_Code (*unwind_backtrace) (_Unwind_Trace_Fn, void *); -static _Unwind_Ptr (*unwind_getip) (struct _Unwind_Context *); -static _Unwind_Ptr (*unwind_getcfa) (struct _Unwind_Context *); -static _Unwind_Ptr (*unwind_getgr) (struct _Unwind_Context *, int); -static void *libgcc_handle; - -static void -init (void) -{ - libgcc_handle = __libc_dlopen ("libgcc_s.so.1"); - - if (libgcc_handle == NULL) - return; - - unwind_backtrace = __libc_dlsym (libgcc_handle, "_Unwind_Backtrace"); - unwind_getip = __libc_dlsym (libgcc_handle, "_Unwind_GetIP"); - unwind_getcfa = __libc_dlsym (libgcc_handle, "_Unwind_GetCFA"); - unwind_getgr = __libc_dlsym (libgcc_handle, "_Unwind_GetGR"); - if (unwind_getip == NULL || unwind_getgr == NULL || unwind_getcfa == NULL) - { - unwind_backtrace = NULL; - __libc_dlclose (libgcc_handle); - libgcc_handle = NULL; - } -} -#else -# define unwind_backtrace _Unwind_Backtrace -# define unwind_getip _Unwind_GetIP -# define unwind_getcfa _Unwind_GetCFA -# define unwind_getgr _Unwind_GetGR -#endif - -static _Unwind_Reason_Code -backtrace_helper (struct _Unwind_Context *ctx, void *a) -{ - struct trace_arg *arg = a; - - /* We are first called with address in the __backtrace function. - Skip it. */ - if (arg->cnt != -1) - arg->array[arg->cnt] = (void *) unwind_getip (ctx); - if (++arg->cnt == arg->size) - return _URC_END_OF_STACK; - - /* %ebp is DWARF2 register 5 on IA-32. */ - arg->lastebp = (void *) unwind_getgr (ctx, 5); - arg->lastesp = (void *) unwind_getcfa (ctx); - return _URC_NO_REASON; -} - - -/* This is a global variable set at program start time. It marks the - highest used stack address. */ -extern void *__libc_stack_end; - - -/* This is the stack layout we see with every stack frame - if not compiled without frame pointer. - - +-----------------+ +-----------------+ - %ebp -> | %ebp last frame--------> | %ebp last frame--->... - | | | | - | return address | | return address | - +-----------------+ +-----------------+ - - First try as far to get as far as possible using - _Unwind_Backtrace which handles -fomit-frame-pointer - as well, but requires .eh_frame info. Then fall back to - walking the stack manually. */ - -struct layout -{ - struct layout *ebp; - void *ret; -}; - - -int -__backtrace (void **array, int size) -{ - struct trace_arg arg = { .array = array, .size = size, .cnt = -1 }; - - if (size <= 0) - return 0; - -#ifdef SHARED - __libc_once_define (static, once); - - __libc_once (once, init); - if (unwind_backtrace == NULL) - return 0; -#endif - - unwind_backtrace (backtrace_helper, &arg); - - if (arg.cnt > 1 && arg.array[arg.cnt - 1] == NULL) - --arg.cnt; - else if (arg.cnt < size) - { - struct layout *ebp = (struct layout *) arg.lastebp; - - while (arg.cnt < size) - { - /* Check for out of range. */ - if ((void *) ebp < arg.lastesp || (void *) ebp > __libc_stack_end - || ((long) ebp & 3)) - break; - - array[arg.cnt++] = ebp->ret; - ebp = ebp->ebp; - } - } - return arg.cnt != -1 ? arg.cnt : 0; -} -weak_alias (__backtrace, backtrace) -libc_hidden_def (__backtrace) - - -#ifdef SHARED -/* Free all resources if necessary. */ -libc_freeres_fn (free_mem) -{ - unwind_backtrace = NULL; - if (libgcc_handle != NULL) - { - __libc_dlclose (libgcc_handle); - libgcc_handle = NULL; - } -} -#endif diff --git a/sysdeps/i386/bcopy.S b/sysdeps/i386/bcopy.S deleted file mode 100644 index 12b8ddb886..0000000000 --- a/sysdeps/i386/bcopy.S +++ /dev/null @@ -1,4 +0,0 @@ -#define USE_AS_MEMMOVE -#define USE_AS_BCOPY -#define MEMCPY bcopy -#include "memcpy.S" diff --git a/sysdeps/i386/bsd-_setjmp.S b/sysdeps/i386/bsd-_setjmp.S deleted file mode 100644 index 6496304946..0000000000 --- a/sysdeps/i386/bsd-_setjmp.S +++ /dev/null @@ -1,56 +0,0 @@ -/* BSD `_setjmp' entry point to `sigsetjmp (..., 0)'. i386 version. - Copyright (C) 1994-2017 Free Software Foundation, Inc. - This file is part of the GNU C Library. - - The GNU C Library is free software; you can redistribute it and/or - modify it under the terms of the GNU Lesser General Public - License as published by the Free Software Foundation; either - version 2.1 of the License, or (at your option) any later version. - - The GNU C Library is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - Lesser General Public License for more details. - - You should have received a copy of the GNU Lesser General Public - License along with the GNU C Library; if not, see - <http://www.gnu.org/licenses/>. */ - -/* This just does a tail-call to `__sigsetjmp (ARG, 0)'. - We cannot do it in C because it must be a tail-call, so frame-unwinding - in setjmp doesn't clobber the state restored by longjmp. */ - -#include <sysdep.h> -#include <jmpbuf-offsets.h> -#include <stap-probe.h> - -#define PARMS 4 /* no space for saved regs */ -#define JMPBUF PARMS -#define SIGMSK JMPBUF+4 - -ENTRY (_setjmp) - - xorl %eax, %eax - movl JMPBUF(%esp), %edx - - /* Save registers. */ - movl %ebx, (JB_BX*4)(%edx) - movl %esi, (JB_SI*4)(%edx) - movl %edi, (JB_DI*4)(%edx) - leal JMPBUF(%esp), %ecx /* Save SP as it will be after we return. */ -#ifdef PTR_MANGLE - PTR_MANGLE (%ecx) -#endif - movl %ecx, (JB_SP*4)(%edx) - movl 0(%esp), %ecx /* Save PC we are returning to now. */ - LIBC_PROBE (setjmp, 3, 4@%edx, -4@$0, 4@%ecx) -#ifdef PTR_MANGLE - PTR_MANGLE (%ecx) -#endif - movl %ecx, (JB_PC*4)(%edx) - movl %ebp, (JB_BP*4)(%edx) /* Save caller's frame pointer. */ - - movl %eax, JB_SIZE(%edx) /* No signal mask set. */ - ret -END (_setjmp) -libc_hidden_def (_setjmp) diff --git a/sysdeps/i386/bsd-setjmp.S b/sysdeps/i386/bsd-setjmp.S deleted file mode 100644 index 5710e1f42b..0000000000 --- a/sysdeps/i386/bsd-setjmp.S +++ /dev/null @@ -1,66 +0,0 @@ -/* BSD `setjmp' entry point to `sigsetjmp (..., 1)'. i386 version. - Copyright (C) 1994-2017 Free Software Foundation, Inc. - This file is part of the GNU C Library. - - The GNU C Library is free software; you can redistribute it and/or - modify it under the terms of the GNU Lesser General Public - License as published by the Free Software Foundation; either - version 2.1 of the License, or (at your option) any later version. - - The GNU C Library is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - Lesser General Public License for more details. - - You should have received a copy of the GNU Lesser General Public - License along with the GNU C Library; if not, see - <http://www.gnu.org/licenses/>. */ - -/* This just does a tail-call to `__sigsetjmp (ARG, 1)'. - We cannot do it in C because it must be a tail-call, so frame-unwinding - in setjmp doesn't clobber the state restored by longjmp. */ - -#include <sysdep.h> -#include <jmpbuf-offsets.h> -#include <stap-probe.h> - -#define PARMS 4 /* no space for saved regs */ -#define JMPBUF PARMS -#define SIGMSK JMPBUF+4 - -ENTRY (setjmp) - /* Note that we have to use a non-exported symbol in the next - jump since otherwise gas will emit it as a jump through the - PLT which is what we cannot use here. */ - - movl JMPBUF(%esp), %eax - - /* Save registers. */ - movl %ebx, (JB_BX*4)(%eax) - movl %esi, (JB_SI*4)(%eax) - movl %edi, (JB_DI*4)(%eax) - leal JMPBUF(%esp), %ecx /* Save SP as it will be after we return. */ -#ifdef PTR_MANGLE - PTR_MANGLE (%ecx) -#endif - movl %ecx, (JB_SP*4)(%eax) - movl 0(%esp), %ecx /* Save PC we are returning to now. */ - LIBC_PROBE (setjmp, 3, 4@%eax, -4@$1, 4@%ecx) -#ifdef PTR_MANGLE - PTR_MANGLE (%ecx) -#endif - movl %ecx, (JB_PC*4)(%eax) - movl %ebp, (JB_BP*4)(%eax) /* Save caller's frame pointer. */ - - /* Call __sigjmp_save. */ - pushl $1 - cfi_adjust_cfa_offset (4) - pushl 8(%esp) - cfi_adjust_cfa_offset (4) - call __sigjmp_save - popl %ecx - cfi_adjust_cfa_offset (-4) - popl %edx - cfi_adjust_cfa_offset (-4) - ret -END (setjmp) diff --git a/sysdeps/i386/bzero.S b/sysdeps/i386/bzero.S deleted file mode 100644 index c8dd47b4da..0000000000 --- a/sysdeps/i386/bzero.S +++ /dev/null @@ -1,5 +0,0 @@ -#define USE_AS_BZERO -#define memset __bzero -#include "memset.S" - -weak_alias (__bzero, bzero) diff --git a/sysdeps/i386/cacheinfo.c b/sysdeps/i386/cacheinfo.c deleted file mode 100644 index f15fe0779a..0000000000 --- a/sysdeps/i386/cacheinfo.c +++ /dev/null @@ -1,3 +0,0 @@ -#define DISABLE_PREFETCHW - -#include <sysdeps/x86/cacheinfo.c> diff --git a/sysdeps/i386/configure b/sysdeps/i386/configure deleted file mode 100644 index 5b55c5affe..0000000000 --- a/sysdeps/i386/configure +++ /dev/null @@ -1,84 +0,0 @@ -# This file is generated from configure.ac by Autoconf. DO NOT EDIT! - # Local configure fragment for sysdeps/i386. - -# We no longer support i386 since it lacks the atomic instructions -# required to implement NPTL threading. -if test "$config_machine" = i386; then - as_fn_error $? " -*** ERROR: Support for i386 is deprecated. -*** Please use host i786, i686, i585 or i486. -*** For example: /src/glibc/configure --host=i686-pc-linux-gnu ...\"" "$LINENO" 5 -fi - -# The GNU C Library can't be built for i386. There are several reasons for -# this restriction. The primary reason is that i386 lacks the atomic -# operations required to support the current NPTL implementation. While it is -# possible that such atomic operations could be emulated in the kernel to date -# no such work has been done to enable this. Even with NPTL disabled you still -# have no atomic.h implementation. Given the declining use of i386 we disable -# support for building with `-march=i386' or `-mcpu=i386.' We don't explicitly -# check for i386, instead we make sure the compiler has support for inlining -# the builtin __sync_val_compare_and_swap. If it does then we should have no -# problem building for i386. -{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for compiler support of inlined builtin function __sync_val_compare_and_swap" >&5 -$as_echo_n "checking for compiler support of inlined builtin function __sync_val_compare_and_swap... " >&6; } -libc_compiler_builtin_inlined=no -cat > conftest.c <<EOF -int _start (void) { int a, b, c; __sync_val_compare_and_swap (&a, b, c); return 0; } -EOF -if ! { ac_try='${CC-cc} $CFLAGS $CPPFLAGS $LDFLAGS - -O0 -nostdlib -nostartfiles - -S conftest.c -o - | fgrep "__sync_val_compare_and_swap" - 1>&5' - { { eval echo "\"\$as_me\":${as_lineno-$LINENO}: \"$ac_try\""; } >&5 - (eval $ac_try) 2>&5 - ac_status=$? - $as_echo "$as_me:${as_lineno-$LINENO}: \$? = $ac_status" >&5 - test $ac_status = 0; }; } -then - libc_compiler_builtin_inlined=yes -fi -rm -f conftest* -if test $libc_compiler_builtin_inlined = yes; then - libc_cv_unsupported_i386=no -else - as_fn_error $? " -*** Building with -march=i386/-mcpu=i386 is not supported. -*** Please use host i786, i686, i586, or i486. -*** For example: /source/glibc/configure CFLAGS='-O2 -march=i686' ..." "$LINENO" 5 -fi -{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $libc_compiler_builtin_inlined" >&5 -$as_echo "$libc_compiler_builtin_inlined" >&6; } - -{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for Intel MPX support" >&5 -$as_echo_n "checking for Intel MPX support... " >&6; } -if ${libc_cv_asm_mpx+:} false; then : - $as_echo_n "(cached) " >&6 -else - cat > conftest.s <<\EOF - bndmov %bnd0,(%esp) -EOF -if { ac_try='${CC-cc} -c $ASFLAGS conftest.s 1>&5' - { { eval echo "\"\$as_me\":${as_lineno-$LINENO}: \"$ac_try\""; } >&5 - (eval $ac_try) 2>&5 - ac_status=$? - $as_echo "$as_me:${as_lineno-$LINENO}: \$? = $ac_status" >&5 - test $ac_status = 0; }; }; then - libc_cv_asm_mpx=yes -else - libc_cv_asm_mpx=no -fi -rm -f conftest* -fi -{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $libc_cv_asm_mpx" >&5 -$as_echo "$libc_cv_asm_mpx" >&6; } -if test $libc_cv_asm_mpx = yes; then - $as_echo "#define HAVE_MPX_SUPPORT 1" >>confdefs.h - -fi - -$as_echo "#define USE_REGPARMS 1" >>confdefs.h - - -$as_echo "#define PI_STATIC_AND_HIDDEN 1" >>confdefs.h - diff --git a/sysdeps/i386/configure.ac b/sysdeps/i386/configure.ac deleted file mode 100644 index 19ef33f34a..0000000000 --- a/sysdeps/i386/configure.ac +++ /dev/null @@ -1,52 +0,0 @@ -GLIBC_PROVIDES dnl See aclocal.m4 in the top level source directory. -# Local configure fragment for sysdeps/i386. - -# We no longer support i386 since it lacks the atomic instructions -# required to implement NPTL threading. -if test "$config_machine" = i386; then - AC_MSG_ERROR([ -*** ERROR: Support for i386 is deprecated. -*** Please use host i786, i686, i585 or i486. -*** For example: /src/glibc/configure --host=i686-pc-linux-gnu ..."]) -fi - -# The GNU C Library can't be built for i386. There are several reasons for -# this restriction. The primary reason is that i386 lacks the atomic -# operations required to support the current NPTL implementation. While it is -# possible that such atomic operations could be emulated in the kernel to date -# no such work has been done to enable this. Even with NPTL disabled you still -# have no atomic.h implementation. Given the declining use of i386 we disable -# support for building with `-march=i386' or `-mcpu=i386.' We don't explicitly -# check for i386, instead we make sure the compiler has support for inlining -# the builtin __sync_val_compare_and_swap. If it does then we should have no -# problem building for i386. -LIBC_COMPILER_BUILTIN_INLINED( - [__sync_val_compare_and_swap], - [int a, b, c; __sync_val_compare_and_swap (&a, b, c);], - [-O0], - [libc_cv_unsupported_i386=no], - [AC_MSG_ERROR([ -*** Building with -march=i386/-mcpu=i386 is not supported. -*** Please use host i786, i686, i586, or i486. -*** For example: /source/glibc/configure CFLAGS='-O2 -march=i686' ...])]) - -dnl Check whether asm supports Intel MPX -AC_CACHE_CHECK(for Intel MPX support, libc_cv_asm_mpx, [dnl -cat > conftest.s <<\EOF - bndmov %bnd0,(%esp) -EOF -if AC_TRY_COMMAND(${CC-cc} -c $ASFLAGS conftest.s 1>&AS_MESSAGE_LOG_FD); then - libc_cv_asm_mpx=yes -else - libc_cv_asm_mpx=no -fi -rm -f conftest*]) -if test $libc_cv_asm_mpx = yes; then - AC_DEFINE(HAVE_MPX_SUPPORT) -fi - -AC_DEFINE(USE_REGPARMS) - -dnl It is always possible to access static and hidden symbols in an -dnl position independent way. -AC_DEFINE(PI_STATIC_AND_HIDDEN) diff --git a/sysdeps/i386/crti.S b/sysdeps/i386/crti.S deleted file mode 100644 index f800209990..0000000000 --- a/sysdeps/i386/crti.S +++ /dev/null @@ -1,84 +0,0 @@ -/* Special .init and .fini section support for x86. - Copyright (C) 1995-2017 Free Software Foundation, Inc. - This file is part of the GNU C Library. - - The GNU C Library is free software; you can redistribute it and/or - modify it under the terms of the GNU Lesser General Public - License as published by the Free Software Foundation; either - version 2.1 of the License, or (at your option) any later version. - - In addition to the permissions in the GNU Lesser General Public - License, the Free Software Foundation gives you unlimited - permission to link the compiled version of this file with other - programs, and to distribute those programs without any restriction - coming from the use of this file. (The GNU Lesser General Public - License restrictions do apply in other respects; for example, they - cover modification of the file, and distribution when not linked - into another program.) - - Note that people who make modified versions of this file are not - obligated to grant this special exception for their modified - versions; it is their choice whether to do so. The GNU Lesser - General Public License gives permission to release a modified - version without this exception; this exception also makes it - possible to release a modified version which carries forward this - exception. - - The GNU C Library is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - Lesser General Public License for more details. - - You should have received a copy of the GNU Lesser General Public - License along with the GNU C Library; if not, see - <http://www.gnu.org/licenses/>. */ - -/* crti.S puts a function prologue at the beginning of the .init and - .fini sections and defines global symbols for those addresses, so - they can be called as functions. The symbols _init and _fini are - magic and cause the linker to emit DT_INIT and DT_FINI. */ - -#include <libc-symbols.h> -#include <sysdep.h> - -#ifndef PREINIT_FUNCTION -# define PREINIT_FUNCTION __gmon_start__ -#endif - -#ifndef PREINIT_FUNCTION_WEAK -# define PREINIT_FUNCTION_WEAK 1 -#endif - -#if PREINIT_FUNCTION_WEAK - weak_extern (PREINIT_FUNCTION) -#else - .hidden PREINIT_FUNCTION -#endif - - .section .init,"ax",@progbits - .p2align 2 - .globl _init - .type _init, @function -_init: - pushl %ebx - /* Maintain 16-byte stack alignment for called functions. */ - subl $8, %esp - LOAD_PIC_REG (bx) -#if PREINIT_FUNCTION_WEAK - movl PREINIT_FUNCTION@GOT(%ebx), %eax - testl %eax, %eax - je .Lno_weak_fn - call PREINIT_FUNCTION@PLT -.Lno_weak_fn: -#else - call PREINIT_FUNCTION -#endif - - .section .fini,"ax",@progbits - .p2align 2 - .globl _fini - .type _fini, @function -_fini: - pushl %ebx - subl $8, %esp - LOAD_PIC_REG (bx) diff --git a/sysdeps/i386/crtn.S b/sysdeps/i386/crtn.S deleted file mode 100644 index b18b9c171a..0000000000 --- a/sysdeps/i386/crtn.S +++ /dev/null @@ -1,47 +0,0 @@ -/* Special .init and .fini section support for x86. - Copyright (C) 1995-2017 Free Software Foundation, Inc. - This file is part of the GNU C Library. - - The GNU C Library is free software; you can redistribute it and/or - modify it under the terms of the GNU Lesser General Public - License as published by the Free Software Foundation; either - version 2.1 of the License, or (at your option) any later version. - - In addition to the permissions in the GNU Lesser General Public - License, the Free Software Foundation gives you unlimited - permission to link the compiled version of this file with other - programs, and to distribute those programs without any restriction - coming from the use of this file. (The GNU Lesser General Public - License restrictions do apply in other respects; for example, they - cover modification of the file, and distribution when not linked - into another program.) - - Note that people who make modified versions of this file are not - obligated to grant this special exception for their modified - versions; it is their choice whether to do so. The GNU Lesser - General Public License gives permission to release a modified - version without this exception; this exception also makes it - possible to release a modified version which carries forward this - exception. - - The GNU C Library is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - Lesser General Public License for more details. - - You should have received a copy of the GNU Lesser General Public - License along with the GNU C Library; if not, see - <http://www.gnu.org/licenses/>. */ - -/* crtn.S puts function epilogues in the .init and .fini sections - corresponding to the prologues in crti.S. */ - - .section .init,"ax",@progbits - addl $8, %esp - popl %ebx - ret - - .section .fini,"ax",@progbits - addl $8, %esp - popl %ebx - ret diff --git a/sysdeps/i386/dl-irel.h b/sysdeps/i386/dl-irel.h deleted file mode 100644 index 824e81aed1..0000000000 --- a/sysdeps/i386/dl-irel.h +++ /dev/null @@ -1,51 +0,0 @@ -/* Machine-dependent ELF indirect relocation inline functions. - i386 version. - Copyright (C) 2009-2017 Free Software Foundation, Inc. - This file is part of the GNU C Library. - - The GNU C Library is free software; you can redistribute it and/or - modify it under the terms of the GNU Lesser General Public - License as published by the Free Software Foundation; either - version 2.1 of the License, or (at your option) any later version. - - The GNU C Library is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - Lesser General Public License for more details. - - You should have received a copy of the GNU Lesser General Public - License along with the GNU C Library; if not, see - <http://www.gnu.org/licenses/>. */ - -#ifndef _DL_IREL_H -#define _DL_IREL_H - -#include <stdio.h> -#include <unistd.h> - -#define ELF_MACHINE_IREL 1 - -static inline Elf32_Addr -__attribute ((always_inline)) -elf_ifunc_invoke (Elf32_Addr addr) -{ - return ((Elf32_Addr (*) (void)) (addr)) (); -} - -static inline void -__attribute ((always_inline)) -elf_irel (const Elf32_Rel *reloc) -{ - Elf32_Addr *const reloc_addr = (void *) reloc->r_offset; - const unsigned long int r_type = ELF32_R_TYPE (reloc->r_info); - - if (__glibc_likely (r_type == R_386_IRELATIVE)) - { - Elf32_Addr value = elf_ifunc_invoke(*reloc_addr); - *reloc_addr = value; - } - else - __libc_fatal ("unexpected reloc type in static binary"); -} - -#endif /* dl-irel.h */ diff --git a/sysdeps/i386/dl-lookupcfg.h b/sysdeps/i386/dl-lookupcfg.h deleted file mode 100644 index 47b534a059..0000000000 --- a/sysdeps/i386/dl-lookupcfg.h +++ /dev/null @@ -1,32 +0,0 @@ -/* Configuration of lookup functions. - Copyright (C) 2005-2017 Free Software Foundation, Inc. - This file is part of the GNU C Library. - - The GNU C Library is free software; you can redistribute it and/or - modify it under the terms of the GNU Lesser General Public - License as published by the Free Software Foundation; either - version 2.1 of the License, or (at your option) any later version. - - The GNU C Library is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - Lesser General Public License for more details. - - You should have received a copy of the GNU Lesser General Public - License along with the GNU C Library; if not, see - <http://www.gnu.org/licenses/>. */ - -#define DL_UNMAP_IS_SPECIAL - -#include_next <dl-lookupcfg.h> - -/* Address of protected data defined in the shared library may be - external due to copy relocation. */ -#define DL_EXTERN_PROTECTED_DATA - -struct link_map; - -extern void _dl_unmap (struct link_map *map) - internal_function attribute_hidden; - -#define DL_UNMAP(map) _dl_unmap (map) diff --git a/sysdeps/i386/dl-machine.h b/sysdeps/i386/dl-machine.h deleted file mode 100644 index 57d4a0bdbd..0000000000 --- a/sysdeps/i386/dl-machine.h +++ /dev/null @@ -1,757 +0,0 @@ -/* Machine-dependent ELF dynamic relocation inline functions. i386 version. - Copyright (C) 1995-2017 Free Software Foundation, Inc. - This file is part of the GNU C Library. - - The GNU C Library is free software; you can redistribute it and/or - modify it under the terms of the GNU Lesser General Public - License as published by the Free Software Foundation; either - version 2.1 of the License, or (at your option) any later version. - - The GNU C Library is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - Lesser General Public License for more details. - - You should have received a copy of the GNU Lesser General Public - License along with the GNU C Library; if not, see - <http://www.gnu.org/licenses/>. */ - -#ifndef dl_machine_h -#define dl_machine_h - -#define ELF_MACHINE_NAME "i386" - -#include <sys/param.h> -#include <sysdep.h> -#include <tls.h> -#include <dl-tlsdesc.h> -#include <cpu-features.c> - -/* Return nonzero iff ELF header is compatible with the running host. */ -static inline int __attribute__ ((unused)) -elf_machine_matches_host (const Elf32_Ehdr *ehdr) -{ - return ehdr->e_machine == EM_386; -} - - -/* Return the link-time address of _DYNAMIC. Conveniently, this is the - first element of the GOT, a special entry that is never relocated. */ -static inline Elf32_Addr __attribute__ ((unused, const)) -elf_machine_dynamic (void) -{ - /* This produces a GOTOFF reloc that resolves to zero at link time, so in - fact just loads from the GOT register directly. By doing it without - an asm we can let the compiler choose any register. */ - extern const Elf32_Addr _GLOBAL_OFFSET_TABLE_[] attribute_hidden; - return _GLOBAL_OFFSET_TABLE_[0]; -} - -/* Return the run-time load address of the shared object. */ -static inline Elf32_Addr __attribute__ ((unused)) -elf_machine_load_address (void) -{ - /* Compute the difference between the runtime address of _DYNAMIC as seen - by a GOTOFF reference, and the link-time address found in the special - unrelocated first GOT entry. */ - extern Elf32_Dyn bygotoff[] asm ("_DYNAMIC") attribute_hidden; - return (Elf32_Addr) &bygotoff - elf_machine_dynamic (); -} - -/* Set up the loaded object described by L so its unrelocated PLT - entries will jump to the on-demand fixup code in dl-runtime.c. */ - -static inline int __attribute__ ((unused, always_inline)) -elf_machine_runtime_setup (struct link_map *l, int lazy, int profile) -{ - Elf32_Addr *got; - extern void _dl_runtime_resolve (Elf32_Word) attribute_hidden; - extern void _dl_runtime_profile (Elf32_Word) attribute_hidden; - - if (l->l_info[DT_JMPREL] && lazy) - { - /* The GOT entries for functions in the PLT have not yet been filled - in. Their initial contents will arrange when called to push an - offset into the .rel.plt section, push _GLOBAL_OFFSET_TABLE_[1], - and then jump to _GLOBAL_OFFSET_TABLE[2]. */ - got = (Elf32_Addr *) D_PTR (l, l_info[DT_PLTGOT]); - /* If a library is prelinked but we have to relocate anyway, - we have to be able to undo the prelinking of .got.plt. - The prelinker saved us here address of .plt + 0x16. */ - if (got[1]) - { - l->l_mach.plt = got[1] + l->l_addr; - l->l_mach.gotplt = (Elf32_Addr) &got[3]; - } - got[1] = (Elf32_Addr) l; /* Identify this shared object. */ - - /* The got[2] entry contains the address of a function which gets - called to get the address of a so far unresolved function and - jump to it. The profiling extension of the dynamic linker allows - to intercept the calls to collect information. In this case we - don't store the address in the GOT so that all future calls also - end in this function. */ - if (__glibc_unlikely (profile)) - { - got[2] = (Elf32_Addr) &_dl_runtime_profile; - - if (GLRO(dl_profile) != NULL - && _dl_name_match_p (GLRO(dl_profile), l)) - /* This is the object we are looking for. Say that we really - want profiling and the timers are started. */ - GL(dl_profile_map) = l; - } - else - /* This function will get called to fix up the GOT entry indicated by - the offset on the stack, and then jump to the resolved address. */ - got[2] = (Elf32_Addr) &_dl_runtime_resolve; - } - - return lazy; -} - -#ifdef IN_DL_RUNTIME - -# ifndef PROF -/* We add a declaration of this function here so that in dl-runtime.c - the ELF_MACHINE_RUNTIME_TRAMPOLINE macro really can pass the parameters - in registers. - - We cannot use this scheme for profiling because the _mcount call - destroys the passed register information. */ -#define ARCH_FIXUP_ATTRIBUTE __attribute__ ((regparm (3), stdcall, unused)) - -extern ElfW(Addr) _dl_fixup (struct link_map *l, - ElfW(Word) reloc_offset) - ARCH_FIXUP_ATTRIBUTE; -extern ElfW(Addr) _dl_profile_fixup (struct link_map *l, - ElfW(Word) reloc_offset, - ElfW(Addr) retaddr, void *regs, - long int *framesizep) - ARCH_FIXUP_ATTRIBUTE; -# endif - -#endif - -/* Mask identifying addresses reserved for the user program, - where the dynamic linker should not map anything. */ -#define ELF_MACHINE_USER_ADDRESS_MASK 0xf8000000UL - -/* Initial entry point code for the dynamic linker. - The C function `_dl_start' is the real entry point; - its return value is the user program's entry point. */ - -#define RTLD_START asm ("\n\ - .text\n\ - .align 16\n\ -0: movl (%esp), %ebx\n\ - ret\n\ - .align 16\n\ -.globl _start\n\ -.globl _dl_start_user\n\ -_start:\n\ - # Note that _dl_start gets the parameter in %eax.\n\ - movl %esp, %eax\n\ - call _dl_start\n\ -_dl_start_user:\n\ - # Save the user entry point address in %edi.\n\ - movl %eax, %edi\n\ - # Point %ebx at the GOT.\n\ - call 0b\n\ - addl $_GLOBAL_OFFSET_TABLE_, %ebx\n\ - # See if we were run as a command with the executable file\n\ - # name as an extra leading argument.\n\ - movl _dl_skip_args@GOTOFF(%ebx), %eax\n\ - # Pop the original argument count.\n\ - popl %edx\n\ - # Adjust the stack pointer to skip _dl_skip_args words.\n\ - leal (%esp,%eax,4), %esp\n\ - # Subtract _dl_skip_args from argc.\n\ - subl %eax, %edx\n\ - # Push argc back on the stack.\n\ - push %edx\n\ - # The special initializer gets called with the stack just\n\ - # as the application's entry point will see it; it can\n\ - # switch stacks if it moves these contents over.\n\ -" RTLD_START_SPECIAL_INIT "\n\ - # Load the parameters again.\n\ - # (eax, edx, ecx, *--esp) = (_dl_loaded, argc, argv, envp)\n\ - movl _rtld_local@GOTOFF(%ebx), %eax\n\ - leal 8(%esp,%edx,4), %esi\n\ - leal 4(%esp), %ecx\n\ - movl %esp, %ebp\n\ - # Make sure _dl_init is run with 16 byte aligned stack.\n\ - andl $-16, %esp\n\ - pushl %eax\n\ - pushl %eax\n\ - pushl %ebp\n\ - pushl %esi\n\ - # Clear %ebp, so that even constructors have terminated backchain.\n\ - xorl %ebp, %ebp\n\ - # Call the function to run the initializers.\n\ - call _dl_init\n\ - # Pass our finalizer function to the user in %edx, as per ELF ABI.\n\ - leal _dl_fini@GOTOFF(%ebx), %edx\n\ - # Restore %esp _start expects.\n\ - movl (%esp), %esp\n\ - # Jump to the user's entry point.\n\ - jmp *%edi\n\ - .previous\n\ -"); - -#ifndef RTLD_START_SPECIAL_INIT -# define RTLD_START_SPECIAL_INIT /* nothing */ -#endif - -/* ELF_RTYPE_CLASS_PLT iff TYPE describes relocation of a PLT entry or - TLS variable, so undefined references should not be allowed to - define the value. - ELF_RTYPE_CLASS_COPY iff TYPE should not be allowed to resolve to one - of the main executable's symbols, as for a COPY reloc. - ELF_RTYPE_CLASS_EXTERN_PROTECTED_DATA iff TYPE describes relocation may - against protected data whose address be external due to copy relocation. - */ -# define elf_machine_type_class(type) \ - ((((type) == R_386_JMP_SLOT || (type) == R_386_TLS_DTPMOD32 \ - || (type) == R_386_TLS_DTPOFF32 || (type) == R_386_TLS_TPOFF32 \ - || (type) == R_386_TLS_TPOFF || (type) == R_386_TLS_DESC) \ - * ELF_RTYPE_CLASS_PLT) \ - | (((type) == R_386_COPY) * ELF_RTYPE_CLASS_COPY) \ - | (((type) == R_386_GLOB_DAT) * ELF_RTYPE_CLASS_EXTERN_PROTECTED_DATA)) - -/* A reloc type used for ld.so cmdline arg lookups to reject PLT entries. */ -#define ELF_MACHINE_JMP_SLOT R_386_JMP_SLOT - -/* The i386 never uses Elf32_Rela relocations for the dynamic linker. - Prelinked libraries may use Elf32_Rela though. */ -#define ELF_MACHINE_PLT_REL 1 - -/* We define an initialization functions. This is called very early in - _dl_sysdep_start. */ -#define DL_PLATFORM_INIT dl_platform_init () - -static inline void __attribute__ ((unused)) -dl_platform_init (void) -{ -#if IS_IN (rtld) - /* init_cpu_features has been called early from __libc_start_main in - static executable. */ - init_cpu_features (&GLRO(dl_x86_cpu_features)); -#else - if (GLRO(dl_platform) != NULL && *GLRO(dl_platform) == '\0') - /* Avoid an empty string which would disturb us. */ - GLRO(dl_platform) = NULL; -#endif -} - -static inline Elf32_Addr -elf_machine_fixup_plt (struct link_map *map, lookup_t t, - const Elf32_Rel *reloc, - Elf32_Addr *reloc_addr, Elf32_Addr value) -{ - return *reloc_addr = value; -} - -/* Return the final value of a plt relocation. */ -static inline Elf32_Addr -elf_machine_plt_value (struct link_map *map, const Elf32_Rel *reloc, - Elf32_Addr value) -{ - return value; -} - - -/* Names of the architecture-specific auditing callback functions. */ -#define ARCH_LA_PLTENTER i86_gnu_pltenter -#define ARCH_LA_PLTEXIT i86_gnu_pltexit - -#endif /* !dl_machine_h */ - -/* The i386 never uses Elf32_Rela relocations for the dynamic linker. - Prelinked libraries may use Elf32_Rela though. */ -#define ELF_MACHINE_NO_RELA defined RTLD_BOOTSTRAP -#define ELF_MACHINE_NO_REL 0 - -#ifdef RESOLVE_MAP - -/* Perform the relocation specified by RELOC and SYM (which is fully resolved). - MAP is the object containing the reloc. */ - -auto inline void -__attribute ((always_inline)) -elf_machine_rel (struct link_map *map, const Elf32_Rel *reloc, - const Elf32_Sym *sym, const struct r_found_version *version, - void *const reloc_addr_arg, int skip_ifunc) -{ - Elf32_Addr *const reloc_addr = reloc_addr_arg; - const unsigned int r_type = ELF32_R_TYPE (reloc->r_info); - -# if !defined RTLD_BOOTSTRAP || !defined HAVE_Z_COMBRELOC - if (__glibc_unlikely (r_type == R_386_RELATIVE)) - { -# if !defined RTLD_BOOTSTRAP && !defined HAVE_Z_COMBRELOC - /* This is defined in rtld.c, but nowhere in the static libc.a; - make the reference weak so static programs can still link. - This declaration cannot be done when compiling rtld.c - (i.e. #ifdef RTLD_BOOTSTRAP) because rtld.c contains the - common defn for _dl_rtld_map, which is incompatible with a - weak decl in the same file. */ -# ifndef SHARED - weak_extern (_dl_rtld_map); -# endif - if (map != &GL(dl_rtld_map)) /* Already done in rtld itself. */ -# endif - *reloc_addr += map->l_addr; - } -# ifndef RTLD_BOOTSTRAP - else if (__glibc_unlikely (r_type == R_386_NONE)) - return; -# endif - else -# endif /* !RTLD_BOOTSTRAP and have no -z combreloc */ - { -# ifndef RTLD_BOOTSTRAP - const Elf32_Sym *const refsym = sym; -# endif - struct link_map *sym_map = RESOLVE_MAP (&sym, version, r_type); - Elf32_Addr value = sym_map == NULL ? 0 : sym_map->l_addr + sym->st_value; - - if (sym != NULL - && __builtin_expect (ELFW(ST_TYPE) (sym->st_info) == STT_GNU_IFUNC, - 0) - && __builtin_expect (sym->st_shndx != SHN_UNDEF, 1) - && __builtin_expect (!skip_ifunc, 1)) - { -# ifndef RTLD_BOOTSTRAP - if (sym_map != map - && sym_map->l_type != lt_executable - && !sym_map->l_relocated) - { - const char *strtab - = (const char *) D_PTR (map, l_info[DT_STRTAB]); - _dl_error_printf ("\ -%s: Relink `%s' with `%s' for IFUNC symbol `%s'\n", - RTLD_PROGNAME, map->l_name, - sym_map->l_name, - strtab + refsym->st_name); - } -# endif - value = ((Elf32_Addr (*) (void)) value) (); - } - - switch (r_type) - { -# ifndef RTLD_BOOTSTRAP - case R_386_SIZE32: - /* Set to symbol size plus addend. */ - *reloc_addr += sym->st_size; - break; -# endif - case R_386_GLOB_DAT: - case R_386_JMP_SLOT: - *reloc_addr = value; - break; - - case R_386_TLS_DTPMOD32: -# ifdef RTLD_BOOTSTRAP - /* During startup the dynamic linker is always the module - with index 1. - XXX If this relocation is necessary move before RESOLVE - call. */ - *reloc_addr = 1; -# else - /* Get the information from the link map returned by the - resolv function. */ - if (sym_map != NULL) - *reloc_addr = sym_map->l_tls_modid; -# endif - break; - case R_386_TLS_DTPOFF32: -# ifndef RTLD_BOOTSTRAP - /* During relocation all TLS symbols are defined and used. - Therefore the offset is already correct. */ - if (sym != NULL) - *reloc_addr = sym->st_value; -# endif - break; - case R_386_TLS_DESC: - { - struct tlsdesc volatile *td = - (struct tlsdesc volatile *)reloc_addr; - -# ifndef RTLD_BOOTSTRAP - if (! sym) - td->entry = _dl_tlsdesc_undefweak; - else -# endif - { -# ifndef RTLD_BOOTSTRAP -# ifndef SHARED - CHECK_STATIC_TLS (map, sym_map); -# else - if (!TRY_STATIC_TLS (map, sym_map)) - { - td->arg = _dl_make_tlsdesc_dynamic - (sym_map, sym->st_value + (ElfW(Word))td->arg); - td->entry = _dl_tlsdesc_dynamic; - } - else -# endif -# endif - { - td->arg = (void*)(sym->st_value - sym_map->l_tls_offset - + (ElfW(Word))td->arg); - td->entry = _dl_tlsdesc_return; - } - } - break; - } - case R_386_TLS_TPOFF32: - /* The offset is positive, backward from the thread pointer. */ -# ifdef RTLD_BOOTSTRAP - *reloc_addr += map->l_tls_offset - sym->st_value; -# else - /* We know the offset of object the symbol is contained in. - It is a positive value which will be subtracted from the - thread pointer. To get the variable position in the TLS - block we subtract the offset from that of the TLS block. */ - if (sym != NULL) - { - CHECK_STATIC_TLS (map, sym_map); - *reloc_addr += sym_map->l_tls_offset - sym->st_value; - } -# endif - break; - case R_386_TLS_TPOFF: - /* The offset is negative, forward from the thread pointer. */ -# ifdef RTLD_BOOTSTRAP - *reloc_addr += sym->st_value - map->l_tls_offset; -# else - /* We know the offset of object the symbol is contained in. - It is a negative value which will be added to the - thread pointer. */ - if (sym != NULL) - { - CHECK_STATIC_TLS (map, sym_map); - *reloc_addr += sym->st_value - sym_map->l_tls_offset; - } -# endif - break; - -# ifndef RTLD_BOOTSTRAP - case R_386_32: - *reloc_addr += value; - break; - case R_386_PC32: - *reloc_addr += (value - (Elf32_Addr) reloc_addr); - break; - case R_386_COPY: - if (sym == NULL) - /* This can happen in trace mode if an object could not be - found. */ - break; - if (__builtin_expect (sym->st_size > refsym->st_size, 0) - || (__builtin_expect (sym->st_size < refsym->st_size, 0) - && GLRO(dl_verbose))) - { - const char *strtab; - - strtab = (const char *) D_PTR (map, l_info[DT_STRTAB]); - _dl_error_printf ("\ -%s: Symbol `%s' has different size in shared object, consider re-linking\n", - RTLD_PROGNAME, strtab + refsym->st_name); - } - memcpy (reloc_addr_arg, (void *) value, - MIN (sym->st_size, refsym->st_size)); - break; - case R_386_IRELATIVE: - value = map->l_addr + *reloc_addr; - value = ((Elf32_Addr (*) (void)) value) (); - *reloc_addr = value; - break; - default: - _dl_reloc_bad_type (map, r_type, 0); - break; -# endif /* !RTLD_BOOTSTRAP */ - } - } -} - -# ifndef RTLD_BOOTSTRAP -auto inline void -__attribute__ ((always_inline)) -elf_machine_rela (struct link_map *map, const Elf32_Rela *reloc, - const Elf32_Sym *sym, const struct r_found_version *version, - void *const reloc_addr_arg, int skip_ifunc) -{ - Elf32_Addr *const reloc_addr = reloc_addr_arg; - const unsigned int r_type = ELF32_R_TYPE (reloc->r_info); - - if (ELF32_R_TYPE (reloc->r_info) == R_386_RELATIVE) - *reloc_addr = map->l_addr + reloc->r_addend; - else if (r_type != R_386_NONE) - { -# ifndef RESOLVE_CONFLICT_FIND_MAP - const Elf32_Sym *const refsym = sym; -# endif - struct link_map *sym_map = RESOLVE_MAP (&sym, version, r_type); - Elf32_Addr value = sym == NULL ? 0 : sym_map->l_addr + sym->st_value; - - if (sym != NULL - && __builtin_expect (sym->st_shndx != SHN_UNDEF, 1) - && __builtin_expect (ELFW(ST_TYPE) (sym->st_info) == STT_GNU_IFUNC, 0) - && __builtin_expect (!skip_ifunc, 1)) - value = ((Elf32_Addr (*) (void)) value) (); - - switch (ELF32_R_TYPE (reloc->r_info)) - { - case R_386_SIZE32: - /* Set to symbol size plus addend. */ - value = sym->st_size; - case R_386_GLOB_DAT: - case R_386_JMP_SLOT: - case R_386_32: - *reloc_addr = value + reloc->r_addend; - break; -# ifndef RESOLVE_CONFLICT_FIND_MAP - /* Not needed for dl-conflict.c. */ - case R_386_PC32: - *reloc_addr = (value + reloc->r_addend - (Elf32_Addr) reloc_addr); - break; - - case R_386_TLS_DTPMOD32: - /* Get the information from the link map returned by the - resolv function. */ - if (sym_map != NULL) - *reloc_addr = sym_map->l_tls_modid; - break; - case R_386_TLS_DTPOFF32: - /* During relocation all TLS symbols are defined and used. - Therefore the offset is already correct. */ - *reloc_addr = (sym == NULL ? 0 : sym->st_value) + reloc->r_addend; - break; - case R_386_TLS_DESC: - { - struct tlsdesc volatile *td = - (struct tlsdesc volatile *)reloc_addr; - -# ifndef RTLD_BOOTSTRAP - if (!sym) - { - td->arg = (void*)reloc->r_addend; - td->entry = _dl_tlsdesc_undefweak; - } - else -# endif - { -# ifndef RTLD_BOOTSTRAP -# ifndef SHARED - CHECK_STATIC_TLS (map, sym_map); -# else - if (!TRY_STATIC_TLS (map, sym_map)) - { - td->arg = _dl_make_tlsdesc_dynamic - (sym_map, sym->st_value + reloc->r_addend); - td->entry = _dl_tlsdesc_dynamic; - } - else -# endif -# endif - { - td->arg = (void*)(sym->st_value - sym_map->l_tls_offset - + reloc->r_addend); - td->entry = _dl_tlsdesc_return; - } - } - } - break; - case R_386_TLS_TPOFF32: - /* The offset is positive, backward from the thread pointer. */ - /* We know the offset of object the symbol is contained in. - It is a positive value which will be subtracted from the - thread pointer. To get the variable position in the TLS - block we subtract the offset from that of the TLS block. */ - if (sym != NULL) - { - CHECK_STATIC_TLS (map, sym_map); - *reloc_addr = sym_map->l_tls_offset - sym->st_value - + reloc->r_addend; - } - break; - case R_386_TLS_TPOFF: - /* The offset is negative, forward from the thread pointer. */ - /* We know the offset of object the symbol is contained in. - It is a negative value which will be added to the - thread pointer. */ - if (sym != NULL) - { - CHECK_STATIC_TLS (map, sym_map); - *reloc_addr = sym->st_value - sym_map->l_tls_offset - + reloc->r_addend; - } - break; - case R_386_COPY: - if (sym == NULL) - /* This can happen in trace mode if an object could not be - found. */ - break; - if (__builtin_expect (sym->st_size > refsym->st_size, 0) - || (__builtin_expect (sym->st_size < refsym->st_size, 0) - && GLRO(dl_verbose))) - { - const char *strtab; - - strtab = (const char *) D_PTR (map, l_info[DT_STRTAB]); - _dl_error_printf ("\ -%s: Symbol `%s' has different size in shared object, consider re-linking\n", - RTLD_PROGNAME, strtab + refsym->st_name); - } - memcpy (reloc_addr_arg, (void *) value, - MIN (sym->st_size, refsym->st_size)); - break; -# endif /* !RESOLVE_CONFLICT_FIND_MAP */ - case R_386_IRELATIVE: - value = map->l_addr + reloc->r_addend; - value = ((Elf32_Addr (*) (void)) value) (); - *reloc_addr = value; - break; - default: - /* We add these checks in the version to relocate ld.so only - if we are still debugging. */ - _dl_reloc_bad_type (map, r_type, 0); - break; - } - } -} -# endif /* !RTLD_BOOTSTRAP */ - -auto inline void -__attribute ((always_inline)) -elf_machine_rel_relative (Elf32_Addr l_addr, const Elf32_Rel *reloc, - void *const reloc_addr_arg) -{ - Elf32_Addr *const reloc_addr = reloc_addr_arg; - assert (ELF32_R_TYPE (reloc->r_info) == R_386_RELATIVE); - *reloc_addr += l_addr; -} - -# ifndef RTLD_BOOTSTRAP -auto inline void -__attribute__ ((always_inline)) -elf_machine_rela_relative (Elf32_Addr l_addr, const Elf32_Rela *reloc, - void *const reloc_addr_arg) -{ - Elf32_Addr *const reloc_addr = reloc_addr_arg; - *reloc_addr = l_addr + reloc->r_addend; -} -# endif /* !RTLD_BOOTSTRAP */ - -auto inline void -__attribute__ ((always_inline)) -elf_machine_lazy_rel (struct link_map *map, - Elf32_Addr l_addr, const Elf32_Rel *reloc, - int skip_ifunc) -{ - Elf32_Addr *const reloc_addr = (void *) (l_addr + reloc->r_offset); - const unsigned int r_type = ELF32_R_TYPE (reloc->r_info); - /* Check for unexpected PLT reloc type. */ - if (__glibc_likely (r_type == R_386_JMP_SLOT)) - { - if (__builtin_expect (map->l_mach.plt, 0) == 0) - *reloc_addr += l_addr; - else - *reloc_addr = (map->l_mach.plt - + (((Elf32_Addr) reloc_addr) - map->l_mach.gotplt) * 4); - } - else if (__glibc_likely (r_type == R_386_TLS_DESC)) - { - struct tlsdesc volatile * __attribute__((__unused__)) td = - (struct tlsdesc volatile *)reloc_addr; - - /* Handle relocations that reference the local *ABS* in a simple - way, so as to preserve a potential addend. */ - if (ELF32_R_SYM (reloc->r_info) == 0) - td->entry = _dl_tlsdesc_resolve_abs_plus_addend; - /* Given a known-zero addend, we can store a pointer to the - reloc in the arg position. */ - else if (td->arg == 0) - { - td->arg = (void*)reloc; - td->entry = _dl_tlsdesc_resolve_rel; - } - else - { - /* We could handle non-*ABS* relocations with non-zero addends - by allocating dynamically an arg to hold a pointer to the - reloc, but that sounds pointless. */ - const Elf32_Rel *const r = reloc; - /* The code below was borrowed from elf_dynamic_do_rel(). */ - const ElfW(Sym) *const symtab = - (const void *) D_PTR (map, l_info[DT_SYMTAB]); - -# ifdef RTLD_BOOTSTRAP - /* The dynamic linker always uses versioning. */ - assert (map->l_info[VERSYMIDX (DT_VERSYM)] != NULL); -# else - if (map->l_info[VERSYMIDX (DT_VERSYM)]) -# endif - { - const ElfW(Half) *const version = - (const void *) D_PTR (map, l_info[VERSYMIDX (DT_VERSYM)]); - ElfW(Half) ndx = version[ELFW(R_SYM) (r->r_info)] & 0x7fff; - elf_machine_rel (map, r, &symtab[ELFW(R_SYM) (r->r_info)], - &map->l_versions[ndx], - (void *) (l_addr + r->r_offset), skip_ifunc); - } -# ifndef RTLD_BOOTSTRAP - else - elf_machine_rel (map, r, &symtab[ELFW(R_SYM) (r->r_info)], NULL, - (void *) (l_addr + r->r_offset), skip_ifunc); -# endif - } - } - else if (__glibc_unlikely (r_type == R_386_IRELATIVE)) - { - Elf32_Addr value = map->l_addr + *reloc_addr; - if (__glibc_likely (!skip_ifunc)) - value = ((Elf32_Addr (*) (void)) value) (); - *reloc_addr = value; - } - else - _dl_reloc_bad_type (map, r_type, 1); -} - -# ifndef RTLD_BOOTSTRAP - -auto inline void -__attribute__ ((always_inline)) -elf_machine_lazy_rela (struct link_map *map, - Elf32_Addr l_addr, const Elf32_Rela *reloc, - int skip_ifunc) -{ - Elf32_Addr *const reloc_addr = (void *) (l_addr + reloc->r_offset); - const unsigned int r_type = ELF32_R_TYPE (reloc->r_info); - if (__glibc_likely (r_type == R_386_JMP_SLOT)) - ; - else if (__glibc_likely (r_type == R_386_TLS_DESC)) - { - struct tlsdesc volatile * __attribute__((__unused__)) td = - (struct tlsdesc volatile *)reloc_addr; - - td->arg = (void*)reloc; - td->entry = _dl_tlsdesc_resolve_rela; - } - else if (__glibc_unlikely (r_type == R_386_IRELATIVE)) - { - Elf32_Addr value = map->l_addr + reloc->r_addend; - if (__glibc_likely (!skip_ifunc)) - value = ((Elf32_Addr (*) (void)) value) (); - *reloc_addr = value; - } - else - _dl_reloc_bad_type (map, r_type, 1); -} - -# endif /* !RTLD_BOOTSTRAP */ - -#endif /* RESOLVE_MAP */ diff --git a/sysdeps/i386/dl-procinfo.c b/sysdeps/i386/dl-procinfo.c deleted file mode 100644 index 7237f778b2..0000000000 --- a/sysdeps/i386/dl-procinfo.c +++ /dev/null @@ -1,65 +0,0 @@ -/* Data for i386 version of processor capability information. - Copyright (C) 2001-2017 Free Software Foundation, Inc. - This file is part of the GNU C Library. - Contributed by Ulrich Drepper <drepper@redhat.com>, 2001. - - The GNU C Library is free software; you can redistribute it and/or - modify it under the terms of the GNU Lesser General Public - License as published by the Free Software Foundation; either - version 2.1 of the License, or (at your option) any later version. - - The GNU C Library is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - Lesser General Public License for more details. - - You should have received a copy of the GNU Lesser General Public - License along with the GNU C Library; if not, see - <http://www.gnu.org/licenses/>. */ - -/* If anything should be added here check whether the size of each string - is still ok with the given array size. - - All the #ifdefs in the definitions are quite irritating but - necessary if we want to avoid duplicating the information. There - are three different modes: - - - PROCINFO_DECL is defined. This means we are only interested in - declarations. - - - PROCINFO_DECL is not defined: - - + if SHARED is defined the file is included in an array - initializer. The .element = { ... } syntax is needed. - - + if SHARED is not defined a normal array initialization is - needed. - */ - -#ifndef PROCINFO_CLASS -# define PROCINFO_CLASS -#endif - -#include <sysdeps/x86/dl-procinfo.c> - -#if !defined PROCINFO_DECL && defined SHARED - ._dl_x86_cap_flags -#else -PROCINFO_CLASS const char _dl_x86_cap_flags[32][8] -#endif -#ifndef PROCINFO_DECL -= { - "fpu", "vme", "de", "pse", "tsc", "msr", "pae", "mce", - "cx8", "apic", "10", "sep", "mtrr", "pge", "mca", "cmov", - "pat", "pse36", "pn", "clflush", "20", "dts", "acpi", "mmx", - "fxsr", "sse", "sse2", "ss", "ht", "tm", "ia64", "pbe" - } -#endif -#if !defined SHARED || defined PROCINFO_DECL -; -#else -, -#endif - -#undef PROCINFO_DECL -#undef PROCINFO_CLASS diff --git a/sysdeps/i386/dl-tls.h b/sysdeps/i386/dl-tls.h deleted file mode 100644 index 525ebab992..0000000000 --- a/sysdeps/i386/dl-tls.h +++ /dev/null @@ -1,61 +0,0 @@ -/* Thread-local storage handling in the ELF dynamic linker. i386 version. - Copyright (C) 2002-2017 Free Software Foundation, Inc. - This file is part of the GNU C Library. - - The GNU C Library is free software; you can redistribute it and/or - modify it under the terms of the GNU Lesser General Public - License as published by the Free Software Foundation; either - version 2.1 of the License, or (at your option) any later version. - - The GNU C Library is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - Lesser General Public License for more details. - - You should have received a copy of the GNU Lesser General Public - License along with the GNU C Library; if not, see - <http://www.gnu.org/licenses/>. */ - - -/* Type used for the representation of TLS information in the GOT. */ -typedef struct dl_tls_index -{ - unsigned long int ti_module; - unsigned long int ti_offset; -} tls_index; - - -#ifdef SHARED -/* This is the prototype for the GNU version. */ -extern void *___tls_get_addr (tls_index *ti) - __attribute__ ((__regparm__ (1))); -extern void *___tls_get_addr_internal (tls_index *ti) - __attribute__ ((__regparm__ (1))) attribute_hidden; - -# if IS_IN (rtld) -/* The special thing about the x86 TLS ABI is that we have two - variants of the __tls_get_addr function with different calling - conventions. The GNU version, which we are mostly concerned here, - takes the parameter in a register. The name is changed by adding - an additional underscore at the beginning. The Sun version uses - the normal calling convention. */ -void * -__tls_get_addr (tls_index *ti) -{ - return ___tls_get_addr_internal (ti); -} - - -/* Prepare using the definition of __tls_get_addr in the generic - version of this file. */ -# define __tls_get_addr __attribute__ ((__regparm__ (1))) ___tls_get_addr -strong_alias (___tls_get_addr, ___tls_get_addr_internal) -rtld_hidden_proto (___tls_get_addr) -rtld_hidden_def (___tls_get_addr) -#else - -/* Users should get the better interface. */ -# define __tls_get_addr ___tls_get_addr - -# endif -#endif diff --git a/sysdeps/i386/dl-tlsdesc.S b/sysdeps/i386/dl-tlsdesc.S deleted file mode 100644 index 8befdc2b39..0000000000 --- a/sysdeps/i386/dl-tlsdesc.S +++ /dev/null @@ -1,285 +0,0 @@ -/* Thread-local storage handling in the ELF dynamic linker. i386 version. - Copyright (C) 2004-2017 Free Software Foundation, Inc. - This file is part of the GNU C Library. - - The GNU C Library is free software; you can redistribute it and/or - modify it under the terms of the GNU Lesser General Public - License as published by the Free Software Foundation; either - version 2.1 of the License, or (at your option) any later version. - - The GNU C Library is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - Lesser General Public License for more details. - - You should have received a copy of the GNU Lesser General Public - License along with the GNU C Library; if not, see - <http://www.gnu.org/licenses/>. */ - -#include <sysdep.h> -#include <tls.h> -#include "tlsdesc.h" - - .text - - /* This function is used to compute the TP offset for symbols in - Static TLS, i.e., whose TP offset is the same for all - threads. - - The incoming %eax points to the TLS descriptor, such that - 0(%eax) points to _dl_tlsdesc_return itself, and 4(%eax) holds - the TP offset of the symbol corresponding to the object - denoted by the argument. */ - - .hidden _dl_tlsdesc_return - .global _dl_tlsdesc_return - .type _dl_tlsdesc_return,@function - cfi_startproc - .align 16 -_dl_tlsdesc_return: - movl 4(%eax), %eax - ret - cfi_endproc - .size _dl_tlsdesc_return, .-_dl_tlsdesc_return - - /* This function is used for undefined weak TLS symbols, for - which the base address (i.e., disregarding any addend) should - resolve to NULL. - - %eax points to the TLS descriptor, such that 0(%eax) points to - _dl_tlsdesc_undefweak itself, and 4(%eax) holds the addend. - We return the addend minus the TP, such that, when the caller - adds TP, it gets the addend back. If that's zero, as usual, - that's most likely a NULL pointer. */ - - .hidden _dl_tlsdesc_undefweak - .global _dl_tlsdesc_undefweak - .type _dl_tlsdesc_undefweak,@function - cfi_startproc - .align 16 -_dl_tlsdesc_undefweak: - movl 4(%eax), %eax - subl %gs:0, %eax - ret - cfi_endproc - .size _dl_tlsdesc_undefweak, .-_dl_tlsdesc_undefweak - -#ifdef SHARED - .hidden _dl_tlsdesc_dynamic - .global _dl_tlsdesc_dynamic - .type _dl_tlsdesc_dynamic,@function - - /* This function is used for symbols that need dynamic TLS. - - %eax points to the TLS descriptor, such that 0(%eax) points to - _dl_tlsdesc_dynamic itself, and 4(%eax) points to a struct - tlsdesc_dynamic_arg object. It must return in %eax the offset - between the thread pointer and the object denoted by the - argument, without clobbering any registers. - - The assembly code that follows is a rendition of the following - C code, hand-optimized a little bit. - -ptrdiff_t -__attribute__ ((__regparm__ (1))) -_dl_tlsdesc_dynamic (struct tlsdesc *tdp) -{ - struct tlsdesc_dynamic_arg *td = tdp->arg; - dtv_t *dtv = *(dtv_t **)((char *)__thread_pointer + DTV_OFFSET); - if (__builtin_expect (td->gen_count <= dtv[0].counter - && (dtv[td->tlsinfo.ti_module].pointer.val - != TLS_DTV_UNALLOCATED), - 1)) - return dtv[td->tlsinfo.ti_module].pointer.val + td->tlsinfo.ti_offset - - __thread_pointer; - - return ___tls_get_addr (&td->tlsinfo) - __thread_pointer; -} -*/ - cfi_startproc - .align 16 -_dl_tlsdesc_dynamic: - /* Like all TLS resolvers, preserve call-clobbered registers. - We need two scratch regs anyway. */ - subl $28, %esp - cfi_adjust_cfa_offset (28) - movl %ecx, 20(%esp) - movl %edx, 24(%esp) - movl TLSDESC_ARG(%eax), %eax - movl %gs:DTV_OFFSET, %edx - movl TLSDESC_GEN_COUNT(%eax), %ecx - cmpl (%edx), %ecx - ja .Lslow - movl TLSDESC_MODID(%eax), %ecx - movl (%edx,%ecx,8), %edx - cmpl $-1, %edx - je .Lslow - movl TLSDESC_MODOFF(%eax), %eax - addl %edx, %eax -.Lret: - movl 20(%esp), %ecx - subl %gs:0, %eax - movl 24(%esp), %edx - addl $28, %esp - cfi_adjust_cfa_offset (-28) - ret - .p2align 4,,7 -.Lslow: - cfi_adjust_cfa_offset (28) - call HIDDEN_JUMPTARGET (___tls_get_addr) - jmp .Lret - cfi_endproc - .size _dl_tlsdesc_dynamic, .-_dl_tlsdesc_dynamic -#endif /* SHARED */ - - /* This function is a wrapper for a lazy resolver for TLS_DESC - REL relocations that reference the *ABS* segment in their own - link maps. %ebx points to the caller's GOT. %eax points to a - TLS descriptor, such that 0(%eax) holds the address of the - resolver wrapper itself (unless some other thread beat us to - it) and 4(%eax) holds the addend in the relocation. - - When the actual resolver returns, it will have adjusted the - TLS descriptor such that we can tail-call it for it to return - the TP offset of the symbol. */ - - .hidden _dl_tlsdesc_resolve_abs_plus_addend - .global _dl_tlsdesc_resolve_abs_plus_addend - .type _dl_tlsdesc_resolve_abs_plus_addend,@function - cfi_startproc - .align 16 -_dl_tlsdesc_resolve_abs_plus_addend: -0: - pushl %eax - cfi_adjust_cfa_offset (4) - pushl %ecx - cfi_adjust_cfa_offset (4) - pushl %edx - cfi_adjust_cfa_offset (4) - movl $1f - 0b, %ecx - movl 4(%ebx), %edx - call _dl_tlsdesc_resolve_abs_plus_addend_fixup -1: - popl %edx - cfi_adjust_cfa_offset (-4) - popl %ecx - cfi_adjust_cfa_offset (-4) - popl %eax - cfi_adjust_cfa_offset (-4) - jmp *(%eax) - cfi_endproc - .size _dl_tlsdesc_resolve_abs_plus_addend, .-_dl_tlsdesc_resolve_abs_plus_addend - - /* This function is a wrapper for a lazy resolver for TLS_DESC - REL relocations that had zero addends. %ebx points to the - caller's GOT. %eax points to a TLS descriptor, such that - 0(%eax) holds the address of the resolver wrapper itself - (unless some other thread beat us to it) and 4(%eax) holds a - pointer to the relocation. - - When the actual resolver returns, it will have adjusted the - TLS descriptor such that we can tail-call it for it to return - the TP offset of the symbol. */ - - .hidden _dl_tlsdesc_resolve_rel - .global _dl_tlsdesc_resolve_rel - .type _dl_tlsdesc_resolve_rel,@function - cfi_startproc - .align 16 -_dl_tlsdesc_resolve_rel: -0: - pushl %eax - cfi_adjust_cfa_offset (4) - pushl %ecx - cfi_adjust_cfa_offset (4) - pushl %edx - cfi_adjust_cfa_offset (4) - movl $1f - 0b, %ecx - movl 4(%ebx), %edx - call _dl_tlsdesc_resolve_rel_fixup -1: - popl %edx - cfi_adjust_cfa_offset (-4) - popl %ecx - cfi_adjust_cfa_offset (-4) - popl %eax - cfi_adjust_cfa_offset (-4) - jmp *(%eax) - cfi_endproc - .size _dl_tlsdesc_resolve_rel, .-_dl_tlsdesc_resolve_rel - - /* This function is a wrapper for a lazy resolver for TLS_DESC - RELA relocations. %ebx points to the caller's GOT. %eax - points to a TLS descriptor, such that 0(%eax) holds the - address of the resolver wrapper itself (unless some other - thread beat us to it) and 4(%eax) holds a pointer to the - relocation. - - When the actual resolver returns, it will have adjusted the - TLS descriptor such that we can tail-call it for it to return - the TP offset of the symbol. */ - - .hidden _dl_tlsdesc_resolve_rela - .global _dl_tlsdesc_resolve_rela - .type _dl_tlsdesc_resolve_rela,@function - cfi_startproc - .align 16 -_dl_tlsdesc_resolve_rela: -0: - pushl %eax - cfi_adjust_cfa_offset (4) - pushl %ecx - cfi_adjust_cfa_offset (4) - pushl %edx - cfi_adjust_cfa_offset (4) - movl $1f - 0b, %ecx - movl 4(%ebx), %edx - call _dl_tlsdesc_resolve_rela_fixup -1: - popl %edx - cfi_adjust_cfa_offset (-4) - popl %ecx - cfi_adjust_cfa_offset (-4) - popl %eax - cfi_adjust_cfa_offset (-4) - jmp *(%eax) - cfi_endproc - .size _dl_tlsdesc_resolve_rela, .-_dl_tlsdesc_resolve_rela - - /* This function is a placeholder for lazy resolving of TLS - relocations. Once some thread starts resolving a TLS - relocation, it sets up the TLS descriptor to use this - resolver, such that other threads that would attempt to - resolve it concurrently may skip the call to the original lazy - resolver and go straight to a condition wait. - - When the actual resolver returns, it will have adjusted the - TLS descriptor such that we can tail-call it for it to return - the TP offset of the symbol. */ - - .hidden _dl_tlsdesc_resolve_hold - .global _dl_tlsdesc_resolve_hold - .type _dl_tlsdesc_resolve_hold,@function - cfi_startproc - .align 16 -_dl_tlsdesc_resolve_hold: -0: - pushl %eax - cfi_adjust_cfa_offset (4) - pushl %ecx - cfi_adjust_cfa_offset (4) - pushl %edx - cfi_adjust_cfa_offset (4) - movl $1f - 0b, %ecx - movl 4(%ebx), %edx - call _dl_tlsdesc_resolve_hold_fixup -1: - popl %edx - cfi_adjust_cfa_offset (-4) - popl %ecx - cfi_adjust_cfa_offset (-4) - popl %eax - cfi_adjust_cfa_offset (-4) - jmp *(%eax) - cfi_endproc - .size _dl_tlsdesc_resolve_hold, .-_dl_tlsdesc_resolve_hold diff --git a/sysdeps/i386/dl-tlsdesc.h b/sysdeps/i386/dl-tlsdesc.h deleted file mode 100644 index 242bebfc8e..0000000000 --- a/sysdeps/i386/dl-tlsdesc.h +++ /dev/null @@ -1,61 +0,0 @@ -/* Thread-local storage descriptor handling in the ELF dynamic linker. - i386 version. - Copyright (C) 2005-2017 Free Software Foundation, Inc. - This file is part of the GNU C Library. - - The GNU C Library is free software; you can redistribute it and/or - modify it under the terms of the GNU Lesser General Public - License as published by the Free Software Foundation; either - version 2.1 of the License, or (at your option) any later version. - - The GNU C Library is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - Lesser General Public License for more details. - - You should have received a copy of the GNU Lesser General Public - License along with the GNU C Library; if not, see - <http://www.gnu.org/licenses/>. */ - -#ifndef _I386_DL_TLSDESC_H -# define _I386_DL_TLSDESC_H 1 - -/* Type used to represent a TLS descriptor in the GOT. */ -struct tlsdesc -{ - ptrdiff_t __attribute__ ((regparm (1))) (*entry) (struct tlsdesc *); - void *arg; -}; - -typedef struct dl_tls_index -{ - unsigned long int ti_module; - unsigned long int ti_offset; -} tls_index; - -/* Type used as the argument in a TLS descriptor for a symbol that - needs dynamic TLS offsets. */ -struct tlsdesc_dynamic_arg -{ - tls_index tlsinfo; - size_t gen_count; -}; - -extern ptrdiff_t attribute_hidden __attribute__ ((regparm (1))) - _dl_tlsdesc_return (struct tlsdesc *), - _dl_tlsdesc_undefweak (struct tlsdesc *), - _dl_tlsdesc_resolve_abs_plus_addend (struct tlsdesc *), - _dl_tlsdesc_resolve_rel (struct tlsdesc *), - _dl_tlsdesc_resolve_rela (struct tlsdesc *), - _dl_tlsdesc_resolve_hold (struct tlsdesc *); - -# ifdef SHARED -extern void *_dl_make_tlsdesc_dynamic (struct link_map *map, - size_t ti_offset) - internal_function attribute_hidden; - -extern ptrdiff_t attribute_hidden __attribute__ ((regparm (1))) - _dl_tlsdesc_dynamic (struct tlsdesc *); -# endif - -#endif diff --git a/sysdeps/i386/dl-trampoline.S b/sysdeps/i386/dl-trampoline.S deleted file mode 100644 index 6e7f3aef92..0000000000 --- a/sysdeps/i386/dl-trampoline.S +++ /dev/null @@ -1,215 +0,0 @@ -/* PLT trampolines. i386 version. - Copyright (C) 2004-2017 Free Software Foundation, Inc. - This file is part of the GNU C Library. - - The GNU C Library is free software; you can redistribute it and/or - modify it under the terms of the GNU Lesser General Public - License as published by the Free Software Foundation; either - version 2.1 of the License, or (at your option) any later version. - - The GNU C Library is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - Lesser General Public License for more details. - - You should have received a copy of the GNU Lesser General Public - License along with the GNU C Library; if not, see - <http://www.gnu.org/licenses/>. */ - -#include <sysdep.h> -#include <link-defines.h> - -#ifdef HAVE_MPX_SUPPORT -# define PRESERVE_BND_REGS_PREFIX bnd -#else -# define PRESERVE_BND_REGS_PREFIX .byte 0xf2 -#endif - - .text - .globl _dl_runtime_resolve - .type _dl_runtime_resolve, @function - cfi_startproc - .align 16 -_dl_runtime_resolve: - cfi_adjust_cfa_offset (8) - pushl %eax # Preserve registers otherwise clobbered. - cfi_adjust_cfa_offset (4) - pushl %ecx - cfi_adjust_cfa_offset (4) - pushl %edx - cfi_adjust_cfa_offset (4) - movl 16(%esp), %edx # Copy args pushed by PLT in register. Note - movl 12(%esp), %eax # that `fixup' takes its parameters in regs. - call _dl_fixup # Call resolver. - popl %edx # Get register content back. - cfi_adjust_cfa_offset (-4) - movl (%esp), %ecx - movl %eax, (%esp) # Store the function address. - movl 4(%esp), %eax - ret $12 # Jump to function address. - cfi_endproc - .size _dl_runtime_resolve, .-_dl_runtime_resolve - - -#ifndef PROF - .globl _dl_runtime_profile - .type _dl_runtime_profile, @function - cfi_startproc - .align 16 -_dl_runtime_profile: - cfi_adjust_cfa_offset (8) - pushl %esp - cfi_adjust_cfa_offset (4) - addl $8, (%esp) # Account for the pushed PLT data - pushl %ebp - cfi_adjust_cfa_offset (4) - pushl %eax # Preserve registers otherwise clobbered. - cfi_adjust_cfa_offset (4) - pushl %ecx - cfi_adjust_cfa_offset (4) - pushl %edx - cfi_adjust_cfa_offset (4) - movl %esp, %ecx - subl $8, %esp - cfi_adjust_cfa_offset (8) - movl $-1, 4(%esp) - leal 4(%esp), %edx - movl %edx, (%esp) - pushl %ecx # Address of the register structure - cfi_adjust_cfa_offset (4) - movl 40(%esp), %ecx # Load return address - movl 36(%esp), %edx # Copy args pushed by PLT in register. Note - movl 32(%esp), %eax # that `fixup' takes its parameters in regs. - call _dl_profile_fixup # Call resolver. - cfi_adjust_cfa_offset (-8) - movl (%esp), %edx - testl %edx, %edx - jns 1f - popl %edx - cfi_adjust_cfa_offset (-4) - popl %edx # Get register content back. - cfi_adjust_cfa_offset (-4) - movl (%esp), %ecx - movl %eax, (%esp) # Store the function address. - movl 4(%esp), %eax - ret $20 # Jump to function address. - - /* - +32 return address - +28 PLT1 - +24 PLT2 - +20 %esp - +16 %ebp - +12 %eax - +8 %ecx - +4 %edx - %esp free - */ - cfi_adjust_cfa_offset (8) -1: movl %ebx, (%esp) - cfi_rel_offset (ebx, 0) - movl %edx, %ebx # This is the frame buffer size - pushl %edi - cfi_adjust_cfa_offset (4) - cfi_rel_offset (edi, 0) - pushl %esi - cfi_adjust_cfa_offset (4) - cfi_rel_offset (esi, 0) - leal 44(%esp), %esi - movl %ebx, %ecx - orl $4, %ebx # Increase frame size if necessary to align - # stack for the function call - andl $~3, %ebx - movl %esp, %edi - subl %ebx, %edi - movl %esp, %ebx - cfi_def_cfa_register (ebx) - movl %edi, %esp - shrl $2, %ecx - rep - movsl - movl (%ebx), %esi - cfi_restore (esi) - movl 4(%ebx), %edi - cfi_restore (edi) - /* - %ebx+40 return address - %ebx+36 PLT1 - %ebx+32 PLT2 - %ebx+28 %esp - %ebx+24 %ebp - %ebx+20 %eax - %ebx+16 %ecx - %ebx+12 %edx - %ebx+8 %ebx - %ebx+4 free - %ebx free - %esp copied stack frame - */ - movl %eax, (%ebx) - movl 12(%ebx), %edx - movl 16(%ebx), %ecx - movl 20(%ebx), %eax - call *(%ebx) - movl %ebx, %esp - cfi_def_cfa_register (esp) - movl 8(%esp), %ebx - cfi_restore (ebx) - /* - +40 return address - +36 PLT1 - +32 PLT2 - +28 %esp - +24 %ebp - +20 %eax - +16 %ecx - +12 %edx - +8 free - +4 free - %esp free - */ -#if LONG_DOUBLE_SIZE != 12 -# error "long double size must be 12 bytes" -#endif - # Allocate space for La_i86_retval and subtract 12 free bytes. - subl $(LRV_SIZE - 12), %esp - cfi_adjust_cfa_offset (LRV_SIZE - 12) - movl %eax, LRV_EAX_OFFSET(%esp) - movl %edx, LRV_EDX_OFFSET(%esp) - fstpt LRV_ST0_OFFSET(%esp) - fstpt LRV_ST1_OFFSET(%esp) -#ifdef HAVE_MPX_SUPPORT - bndmov %bnd0, LRV_BND0_OFFSET(%esp) - bndmov %bnd1, LRV_BND1_OFFSET(%esp) -#else - .byte 0x66,0x0f,0x1b,0x44,0x24,LRV_BND0_OFFSET - .byte 0x66,0x0f,0x1b,0x4c,0x24,LRV_BND1_OFFSET -#endif - pushl %esp - cfi_adjust_cfa_offset (4) - # Address of La_i86_regs area. - leal (LRV_SIZE + 4)(%esp), %ecx - # PLT2 - movl (LRV_SIZE + 4 + LR_SIZE)(%esp), %eax - # PLT1 - movl (LRV_SIZE + 4 + LR_SIZE + 4)(%esp), %edx - call _dl_call_pltexit - movl LRV_EAX_OFFSET(%esp), %eax - movl LRV_EDX_OFFSET(%esp), %edx - fldt LRV_ST1_OFFSET(%esp) - fldt LRV_ST0_OFFSET(%esp) -#ifdef HAVE_MPX_SUPPORT - bndmov LRV_BND0_OFFSET(%esp), %bnd0 - bndmov LRV_BND1_OFFSET(%esp), %bnd1 -#else - .byte 0x66,0x0f,0x1a,0x44,0x24,LRV_BND0_OFFSET - .byte 0x66,0x0f,0x1a,0x4c,0x24,LRV_BND1_OFFSET -#endif - # Restore stack before return. - addl $(LRV_SIZE + 4 + LR_SIZE + 4), %esp - cfi_adjust_cfa_offset (-(LRV_SIZE + 4 + LR_SIZE + 4)) - PRESERVE_BND_REGS_PREFIX - ret - cfi_endproc - .size _dl_runtime_profile, .-_dl_runtime_profile -#endif diff --git a/sysdeps/i386/ffs.c b/sysdeps/i386/ffs.c deleted file mode 100644 index c229c8166e..0000000000 --- a/sysdeps/i386/ffs.c +++ /dev/null @@ -1,50 +0,0 @@ -/* ffs -- find first set bit in a word, counted from least significant end. - For Intel 80x86, x>=3. - This file is part of the GNU C Library. - Copyright (C) 1991-2017 Free Software Foundation, Inc. - Contributed by Torbjorn Granlund (tege@sics.se). - - The GNU C Library is free software; you can redistribute it and/or - modify it under the terms of the GNU Lesser General Public - License as published by the Free Software Foundation; either - version 2.1 of the License, or (at your option) any later version. - - The GNU C Library is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - Lesser General Public License for more details. - - You should have received a copy of the GNU Lesser General Public - License along with the GNU C Library; if not, see - <http://www.gnu.org/licenses/>. */ - -#define ffsl __something_else -#include <string.h> - -#undef ffs - -#ifdef __GNUC__ - -int -__ffs (int x) -{ - int cnt; - int tmp; - - asm ("xorl %0,%0\n" /* Set CNT to zero. */ - "bsfl %2,%1\n" /* Count low bits in X and store in %1. */ - "jz 1f\n" /* Jump if OK, i.e. X was non-zero. */ - "leal 1(%1),%0\n" /* Return bsfl-result plus one on %0. */ - "1:" : "=&a" (cnt), "=r" (tmp) : "rm" (x)); - - return cnt; -} -weak_alias (__ffs, ffs) -libc_hidden_def (__ffs) -libc_hidden_builtin_def (ffs) -#undef ffsl -weak_alias (__ffs, ffsl) - -#else -#include <string/ffs.c> -#endif diff --git a/sysdeps/i386/fpu/Implies b/sysdeps/i386/fpu/Implies deleted file mode 100644 index 2b745a34fb..0000000000 --- a/sysdeps/i386/fpu/Implies +++ /dev/null @@ -1 +0,0 @@ -x86/fpu diff --git a/sysdeps/i386/fpu/Versions b/sysdeps/i386/fpu/Versions deleted file mode 100644 index a2eec371f1..0000000000 --- a/sysdeps/i386/fpu/Versions +++ /dev/null @@ -1,6 +0,0 @@ -libm { - GLIBC_2.2 { - # functions used in inline functions or macros - __expl; __expm1l; - } -} diff --git a/sysdeps/i386/fpu/doasin.c b/sysdeps/i386/fpu/doasin.c deleted file mode 100644 index 1cc8931700..0000000000 --- a/sysdeps/i386/fpu/doasin.c +++ /dev/null @@ -1 +0,0 @@ -/* Not needed. */ diff --git a/sysdeps/i386/fpu/e_acos.S b/sysdeps/i386/fpu/e_acos.S deleted file mode 100644 index 586c7fc406..0000000000 --- a/sysdeps/i386/fpu/e_acos.S +++ /dev/null @@ -1,25 +0,0 @@ -/* - * Written by J.T. Conklin <jtc@netbsd.org>. - * Public domain. - */ - -#include <machine/asm.h> - -RCSID("$NetBSD: e_acos.S,v 1.4 1995/05/08 23:44:37 jtc Exp $") - -/* acos = atan (sqrt((1-x) (1+x)) / x) */ -ENTRY(__ieee754_acos) - fldl 4(%esp) /* x */ - fld %st /* x : x */ - fld1 /* 1 : x : x */ - fsubp /* 1 - x : x */ - fld1 /* 1 : 1 - x : x */ - fadd %st(2) /* 1 + x : 1 - x : x */ - fmulp /* 1 - x^2 : x */ - fsqrt /* sqrt (1 - x^2) : x */ - fabs - fxch %st(1) /* x : sqrt (1 - x^2) */ - fpatan /* atan (sqrt(1 - x^2) / x) */ - ret -END (__ieee754_acos) -strong_alias (__ieee754_acos, __acos_finite) diff --git a/sysdeps/i386/fpu/e_acosf.S b/sysdeps/i386/fpu/e_acosf.S deleted file mode 100644 index 54930af8b2..0000000000 --- a/sysdeps/i386/fpu/e_acosf.S +++ /dev/null @@ -1,24 +0,0 @@ -/* - * Written by J.T. Conklin <jtc@netbsd.org>. - * Public domain. - * Adapted for float type by Ulrich Drepper <drepper@cygnus.com>. - */ - -#include <machine/asm.h> - -RCSID("$NetBSD: $") - -/* acos = atan (sqrt(1 - x^2) / x) */ -ENTRY(__ieee754_acosf) - flds 4(%esp) /* x */ - fld %st - fmul %st(0) /* x^2 */ - fld1 - fsubp /* 1 - x^2 */ - fsqrt /* sqrt (1 - x^2) */ - fabs - fxch %st(1) - fpatan - ret -END (__ieee754_acosf) -strong_alias (__ieee754_acosf, __acosf_finite) diff --git a/sysdeps/i386/fpu/e_acosh.S b/sysdeps/i386/fpu/e_acosh.S deleted file mode 100644 index 9555ef8078..0000000000 --- a/sysdeps/i386/fpu/e_acosh.S +++ /dev/null @@ -1,101 +0,0 @@ -/* ix87 specific implementation of arcsinh. - Copyright (C) 1996-2017 Free Software Foundation, Inc. - This file is part of the GNU C Library. - Contributed by Ulrich Drepper <drepper@cygnus.com>, 1996. - - The GNU C Library is free software; you can redistribute it and/or - modify it under the terms of the GNU Lesser General Public - License as published by the Free Software Foundation; either - version 2.1 of the License, or (at your option) any later version. - - The GNU C Library is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - Lesser General Public License for more details. - - You should have received a copy of the GNU Lesser General Public - License along with the GNU C Library; if not, see - <http://www.gnu.org/licenses/>. */ - -#include <machine/asm.h> - - .section .rodata.cst8,"aM",@progbits,8 - - .p2align 3 - .type one,@object -one: .double 1.0 - ASM_SIZE_DIRECTIVE(one) - .type limit,@object -limit: .double 0.29 - ASM_SIZE_DIRECTIVE(limit) - -#ifdef PIC -#define MO(op) op##@GOTOFF(%edx) -#else -#define MO(op) op -#endif - - .text -ENTRY(__ieee754_acosh) - movl 8(%esp), %ecx - cmpl $0x3ff00000, %ecx - jl 5f // < 1 => invalid - fldln2 // log(2) - fldl 4(%esp) // x : log(2) - cmpl $0x41b00000, %ecx - ja 3f // x > 2^28 -#ifdef PIC - LOAD_PIC_REG (dx) -#endif - cmpl $0x40000000, %ecx - ja 4f // x > 2 - - // 1 <= x <= 2 => y = log1p(x-1+sqrt(2*(x-1)+(x-1)^2)) - fsubl MO(one) // x-1 : log(2) - fabs // acosh(1) is +0 in all rounding modes - fld %st // x-1 : x-1 : log(2) - fmul %st(1) // (x-1)^2 : x-1 : log(2) - fadd %st(1) // x-1+(x-1)^2 : x-1 : log(2) - fadd %st(1) // 2*(x-1)+(x-1)^2 : x-1 : log(2) - fsqrt // sqrt(2*(x-1)+(x-1)^2) : x-1 : log(2) - faddp // x-1+sqrt(2*(x-1)+(x-1)^2) : log(2) - fcoml MO(limit) - fnstsw - sahf - ja 2f - fyl2xp1 // log1p(x-1+sqrt(2*(x-1)+(x-1)^2)) - ret - -2: faddl MO(one) // x+sqrt(2*(x-1)+(x-1)^2) : log(2) - fyl2x // log(x+sqrt(2*(x-1)+(x-1)^2)) - ret - - // x > 2^28 => y = log(x) + log(2) - .align ALIGNARG(4) -3: fyl2x // log(x) - fldln2 // log(2) : log(x) - faddp // log(x)+log(2) - ret - - // 2^28 > x > 2 => y = log(2*x - 1/(x+sqrt(x*x-1))) - .align ALIGNARG(4) -4: fld %st // x : x : log(2) - fadd %st, %st(1) // x : 2*x : log(2) - fld %st // x : x : 2*x : log(2) - fmul %st(1) // x^2 : x : 2*x : log(2) - fsubl MO(one) // x^2-1 : x : 2*x : log(2) - fsqrt // sqrt(x^2-1) : x : 2*x : log(2) - faddp // x+sqrt(x^2-1) : 2*x : log(2) - fdivrl MO(one) // 1/(x+sqrt(x^2-1)) : 2*x : log(2) - fsubrp // 2*x+1/(x+sqrt(x^2)-1) : log(2) - fyl2x // log(2*x+1/(x+sqrt(x^2-1))) - ret - - // x < 1 (or -NaN) => NaN - .align ALIGNARG(4) -5: fldl 4(%esp) - fsub %st - fdiv %st, %st(0) - ret -END(__ieee754_acosh) -strong_alias (__ieee754_acosh, __acosh_finite) diff --git a/sysdeps/i386/fpu/e_acoshf.S b/sysdeps/i386/fpu/e_acoshf.S deleted file mode 100644 index 662fda3c06..0000000000 --- a/sysdeps/i386/fpu/e_acoshf.S +++ /dev/null @@ -1,101 +0,0 @@ -/* ix87 specific implementation of arcsinh. - Copyright (C) 1996-2017 Free Software Foundation, Inc. - This file is part of the GNU C Library. - Contributed by Ulrich Drepper <drepper@cygnus.com>, 1996. - - The GNU C Library is free software; you can redistribute it and/or - modify it under the terms of the GNU Lesser General Public - License as published by the Free Software Foundation; either - version 2.1 of the License, or (at your option) any later version. - - The GNU C Library is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - Lesser General Public License for more details. - - You should have received a copy of the GNU Lesser General Public - License along with the GNU C Library; if not, see - <http://www.gnu.org/licenses/>. */ - -#include <machine/asm.h> - - .section .rodata.cst8,"aM",@progbits,8 - - .p2align 3 - .type one,@object -one: .double 1.0 - ASM_SIZE_DIRECTIVE(one) - .type limit,@object -limit: .double 0.29 - ASM_SIZE_DIRECTIVE(limit) - -#ifdef PIC -#define MO(op) op##@GOTOFF(%edx) -#else -#define MO(op) op -#endif - - .text -ENTRY(__ieee754_acoshf) - movl 4(%esp), %ecx - cmpl $0x3f800000, %ecx - jl 5f // < 1 => invalid - fldln2 // log(2) - flds 4(%esp) // x : log(2) - cmpl $0x47000000, %ecx - ja 3f // x > 2^14 -#ifdef PIC - LOAD_PIC_REG (dx) -#endif - cmpl $0x40000000, %ecx - ja 4f // x > 2 - - // 1 <= x <= 2 => y = log1p(x-1+sqrt(2*(x-1)+(x-1)^2)) - fsubl MO(one) // x-1 : log(2) - fabs // acosh(1) is +0 in all rounding modes - fld %st // x-1 : x-1 : log(2) - fmul %st(1) // (x-1)^2 : x-1 : log(2) - fadd %st(1) // x-1+(x-1)^2 : x-1 : log(2) - fadd %st(1) // 2*(x-1)+(x-1)^2 : x-1 : log(2) - fsqrt // sqrt(2*(x-1)+(x-1)^2) : x-1 : log(2) - faddp // x-1+sqrt(2*(x-1)+(x-1)^2) : log(2) - fcoml MO(limit) - fnstsw - sahf - ja 2f - fyl2xp1 // log1p(x-1+sqrt(2*(x-1)+(x-1)^2)) - ret - -2: faddl MO(one) // x+sqrt(2*(x-1)+(x-1)^2) : log(2) - fyl2x // log(x+sqrt(2*(x-1)+(x-1)^2)) - ret - - // x > 2^14 => y = log(x) + log(2) - .align ALIGNARG(4) -3: fyl2x // log(x) - fldln2 // log(2) : log(x) - faddp // log(x)+log(2) - ret - - // 2^28 > x > 2 => y = log(2*x - 1/(x+sqrt(x*x-1))) - .align ALIGNARG(4) -4: fld %st // x : x : log(2) - fadd %st, %st(1) // x : 2*x : log(2) - fld %st // x : x : 2*x : log(2) - fmul %st(1) // x^2 : x : 2*x : log(2) - fsubl MO(one) // x^2-1 : x : 2*x : log(2) - fsqrt // sqrt(x^2-1) : x : 2*x : log(2) - faddp // x+sqrt(x^2-1) : 2*x : log(2) - fdivrl MO(one) // 1/(x+sqrt(x^2-1)) : 2*x : log(2) - fsubrp // 2*x+1/(x+sqrt(x^2)-1) : log(2) - fyl2x // log(2*x+1/(x+sqrt(x^2-1))) - ret - - // x < 1 (or -NaN) => NaN - .align ALIGNARG(4) -5: flds 4(%esp) - fsub %st - fdiv %st, %st(0) - ret -END(__ieee754_acoshf) -strong_alias (__ieee754_acoshf, __acoshf_finite) diff --git a/sysdeps/i386/fpu/e_acoshl.S b/sysdeps/i386/fpu/e_acoshl.S deleted file mode 100644 index e0d6466aac..0000000000 --- a/sysdeps/i386/fpu/e_acoshl.S +++ /dev/null @@ -1,107 +0,0 @@ -/* ix87 specific implementation of arcsinh. - Copyright (C) 1996-2017 Free Software Foundation, Inc. - This file is part of the GNU C Library. - Contributed by Ulrich Drepper <drepper@cygnus.com>, 1996. - - The GNU C Library is free software; you can redistribute it and/or - modify it under the terms of the GNU Lesser General Public - License as published by the Free Software Foundation; either - version 2.1 of the License, or (at your option) any later version. - - The GNU C Library is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - Lesser General Public License for more details. - - You should have received a copy of the GNU Lesser General Public - License along with the GNU C Library; if not, see - <http://www.gnu.org/licenses/>. */ - -#include <machine/asm.h> - - .section .rodata.cst8,"aM",@progbits,8 - - .p2align 3 - /* Please note that we use double value for 1.0. This number - has an exact representation and so we don't get accuracy - problems. The advantage is that the code is simpler. */ - .type one,@object -one: .double 1.0 - ASM_SIZE_DIRECTIVE(one) - /* It is not important that this constant is precise. It is only - a value which is known to be on the safe side for using the - fyl2xp1 instruction. */ - .type limit,@object -limit: .double 0.29 - ASM_SIZE_DIRECTIVE(limit) - -#ifdef PIC -#define MO(op) op##@GOTOFF(%edx) -#else -#define MO(op) op -#endif - - .text -ENTRY(__ieee754_acoshl) - movl 12(%esp), %ecx - andl $0xffff, %ecx - cmpl $0x3fff, %ecx - jl 5f // < 1 => invalid - fldln2 // log(2) - fldt 4(%esp) // x : log(2) - cmpl $0x4020, %ecx - ja 3f // x > 2^34 -#ifdef PIC - LOAD_PIC_REG (dx) -#endif - cmpl $0x4000, %ecx - ja 4f // x > 2 - - // 1 <= x <= 2 => y = log1p(x-1+sqrt(2*(x-1)+(x-1)^2)) - fsubl MO(one) // x-1 : log(2) - fabs // acosh(1) is +0 in all rounding modes - fld %st // x-1 : x-1 : log(2) - fmul %st(1) // (x-1)^2 : x-1 : log(2) - fadd %st(1) // x-1+(x-1)^2 : x-1 : log(2) - fadd %st(1) // 2*(x-1)+(x-1)^2 : x-1 : log(2) - fsqrt // sqrt(2*(x-1)+(x-1)^2) : x-1 : log(2) - faddp // x-1+sqrt(2*(x-1)+(x-1)^2) : log(2) - fcoml MO(limit) - fnstsw - sahf - ja 2f - fyl2xp1 // log1p(x-1+sqrt(2*(x-1)+(x-1)^2)) - ret - -2: faddl MO(one) // x+sqrt(2*(x-1)+(x-1)^2) : log(2) - fyl2x // log(x+sqrt(2*(x-1)+(x-1)^2)) - ret - - // x > 2^34 => y = log(x) + log(2) - .align ALIGNARG(4) -3: fyl2x // log(x) - fldln2 // log(2) : log(x) - faddp // log(x)+log(2) - ret - - // 2^34 > x > 2 => y = log(2*x - 1/(x+sqrt(x*x-1))) - .align ALIGNARG(4) -4: fld %st // x : x : log(2) - fadd %st, %st(1) // x : 2*x : log(2) - fld %st // x : x : 2*x : log(2) - fmul %st(1) // x^2 : x : 2*x : log(2) - fsubl MO(one) // x^2-1 : x : 2*x : log(2) - fsqrt // sqrt(x^2-1) : x : 2*x : log(2) - faddp // x+sqrt(x^2-1) : 2*x : log(2) - fdivrl MO(one) // 1/(x+sqrt(x^2-1)) : 2*x : log(2) - fsubrp // 2*x+1/(x+sqrt(x^2)-1) : log(2) - fyl2x // log(2*x+1/(x+sqrt(x^2-1))) - ret - - // x < 1 => NaN - .align ALIGNARG(4) -5: fldz - fdiv %st, %st(0) - ret -END(__ieee754_acoshl) -strong_alias (__ieee754_acoshl, __acoshl_finite) diff --git a/sysdeps/i386/fpu/e_acosl.c b/sysdeps/i386/fpu/e_acosl.c deleted file mode 100644 index ab08931924..0000000000 --- a/sysdeps/i386/fpu/e_acosl.c +++ /dev/null @@ -1,29 +0,0 @@ -/* - * Written by J.T. Conklin <jtc@netbsd.org>. - * Public domain. - * - * Adapted for `long double' by Ulrich Drepper <drepper@cygnus.com>. - */ - -#include <math_private.h> - -long double -__ieee754_acosl (long double x) -{ - long double res; - - /* acosl = atanl (sqrtl((1-x) (1+x)) / x) */ - asm ( "fld %%st\n" - "fld1\n" - "fsubp\n" - "fld1\n" - "fadd %%st(2)\n" - "fmulp\n" /* 1 - x^2 */ - "fsqrt\n" /* sqrtl (1 - x^2) */ - "fabs\n" - "fxch %%st(1)\n" - "fpatan" - : "=t" (res) : "0" (x) : "st(1)"); - return res; -} -strong_alias (__ieee754_acosl, __acosl_finite) diff --git a/sysdeps/i386/fpu/e_asin.S b/sysdeps/i386/fpu/e_asin.S deleted file mode 100644 index 39c8b47da4..0000000000 --- a/sysdeps/i386/fpu/e_asin.S +++ /dev/null @@ -1,38 +0,0 @@ -/* - * Written by J.T. Conklin <jtc@netbsd.org>. - * Public domain. - */ - -#include <machine/asm.h> -#include <i386-math-asm.h> - -RCSID("$NetBSD: e_asin.S,v 1.4 1995/05/08 23:45:40 jtc Exp $") - -DEFINE_DBL_MIN - -#ifdef PIC -# define MO(op) op##@GOTOFF(%ecx) -#else -# define MO(op) op -#endif - - .text - -/* asin = atan (x / sqrt((1-x) (1+x))) */ -ENTRY(__ieee754_asin) -#ifdef PIC - LOAD_PIC_REG (cx) -#endif - fldl 4(%esp) /* x */ - fld %st - fld1 /* 1 : x : x */ - fsubp /* 1 - x : x */ - fld1 /* 1 : 1 - x : x */ - fadd %st(2) /* 1 + x : 1 - x : x */ - fmulp /* 1 - x^2 */ - fsqrt /* sqrt (1 - x^2) */ - fpatan - DBL_CHECK_FORCE_UFLOW - ret -END (__ieee754_asin) -strong_alias (__ieee754_asin, __asin_finite) diff --git a/sysdeps/i386/fpu/e_asinf.S b/sysdeps/i386/fpu/e_asinf.S deleted file mode 100644 index 1102bdedfd..0000000000 --- a/sysdeps/i386/fpu/e_asinf.S +++ /dev/null @@ -1,39 +0,0 @@ -/* - * Written by J.T. Conklin <jtc@netbsd.org>. - * Public domain. - * Adapted for float type by Ulrich Drepper <drepper@cygnus.com>. - */ - -#include <machine/asm.h> -#include <i386-math-asm.h> - -RCSID("$NetBSD: $") - - .section .rodata.cst4,"aM",@progbits,4 - -DEFINE_FLT_MIN - -#ifdef PIC -# define MO(op) op##@GOTOFF(%ecx) -#else -# define MO(op) op -#endif - - .text - -/* asin = atan (x / sqrt(1 - x^2)) */ -ENTRY(__ieee754_asinf) -#ifdef PIC - LOAD_PIC_REG (cx) -#endif - flds 4(%esp) /* x */ - fld %st - fmul %st(0) /* x^2 */ - fld1 - fsubp /* 1 - x^2 */ - fsqrt /* sqrt (1 - x^2) */ - fpatan - FLT_CHECK_FORCE_UFLOW - ret -END (__ieee754_asinf) -strong_alias (__ieee754_asinf, __asinf_finite) diff --git a/sysdeps/i386/fpu/e_atan2.S b/sysdeps/i386/fpu/e_atan2.S deleted file mode 100644 index 25f43bb5a1..0000000000 --- a/sysdeps/i386/fpu/e_atan2.S +++ /dev/null @@ -1,30 +0,0 @@ -/* - * Written by J.T. Conklin <jtc@netbsd.org>. - * Public domain. - */ - -#include <machine/asm.h> -#include <i386-math-asm.h> - -RCSID("$NetBSD: e_atan2.S,v 1.4 1995/05/08 23:46:28 jtc Exp $") - -DEFINE_DBL_MIN - -#ifdef PIC -# define MO(op) op##@GOTOFF(%ecx) -#else -# define MO(op) op -#endif - - .text -ENTRY(__ieee754_atan2) -#ifdef PIC - LOAD_PIC_REG (cx) -#endif - fldl 4(%esp) - fldl 12(%esp) - fpatan - DBL_CHECK_FORCE_UFLOW_NARROW - ret -END (__ieee754_atan2) -strong_alias (__ieee754_atan2, __atan2_finite) diff --git a/sysdeps/i386/fpu/e_atan2f.S b/sysdeps/i386/fpu/e_atan2f.S deleted file mode 100644 index 2bc909a762..0000000000 --- a/sysdeps/i386/fpu/e_atan2f.S +++ /dev/null @@ -1,30 +0,0 @@ -/* - * Written by J.T. Conklin <jtc@netbsd.org>. - * Public domain. - */ - -#include <machine/asm.h> -#include <i386-math-asm.h> - -RCSID("$NetBSD: e_atan2f.S,v 1.1 1995/05/08 23:35:10 jtc Exp $") - -DEFINE_FLT_MIN - -#ifdef PIC -# define MO(op) op##@GOTOFF(%ecx) -#else -# define MO(op) op -#endif - - .text -ENTRY(__ieee754_atan2f) -#ifdef PIC - LOAD_PIC_REG (cx) -#endif - flds 4(%esp) - flds 8(%esp) - fpatan - FLT_CHECK_FORCE_UFLOW_NARROW - ret -END (__ieee754_atan2f) -strong_alias (__ieee754_atan2f, __atan2f_finite) diff --git a/sysdeps/i386/fpu/e_atan2l.c b/sysdeps/i386/fpu/e_atan2l.c deleted file mode 100644 index 9f88bfcc08..0000000000 --- a/sysdeps/i386/fpu/e_atan2l.c +++ /dev/null @@ -1,19 +0,0 @@ -/* - * Written by J.T. Conklin <jtc@netbsd.org>. - * Public domain. - * - * Adapted for `long double' by Ulrich Drepper <drepper@cygnus.com>. - */ - -#include <math_private.h> - -long double -__ieee754_atan2l (long double y, long double x) -{ - long double res; - - asm ("fpatan" : "=t" (res) : "u" (y), "0" (x) : "st(1)"); - - return res; -} -strong_alias (__ieee754_atan2l, __atan2l_finite) diff --git a/sysdeps/i386/fpu/e_atanh.S b/sysdeps/i386/fpu/e_atanh.S deleted file mode 100644 index cbc93d5da2..0000000000 --- a/sysdeps/i386/fpu/e_atanh.S +++ /dev/null @@ -1,112 +0,0 @@ -/* ix87 specific implementation of arctanh function. - Copyright (C) 1996-2017 Free Software Foundation, Inc. - This file is part of the GNU C Library. - Contributed by Ulrich Drepper <drepper@cygnus.com>, 1996. - - The GNU C Library is free software; you can redistribute it and/or - modify it under the terms of the GNU Lesser General Public - License as published by the Free Software Foundation; either - version 2.1 of the License, or (at your option) any later version. - - The GNU C Library is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - Lesser General Public License for more details. - - You should have received a copy of the GNU Lesser General Public - License along with the GNU C Library; if not, see - <http://www.gnu.org/licenses/>. */ - -#include <machine/asm.h> -#include <i386-math-asm.h> - - .section .rodata - - .align ALIGNARG(4) - .type half,@object -half: .double 0.5 - ASM_SIZE_DIRECTIVE(half) - .type one,@object -one: .double 1.0 - ASM_SIZE_DIRECTIVE(one) - .type limit,@object -limit: .double 0.29 - ASM_SIZE_DIRECTIVE(limit) - .type ln2_2,@object -ln2_2: .tfloat 0.3465735902799726547086160 - ASM_SIZE_DIRECTIVE(ln2_2) - -DEFINE_DBL_MIN - -#ifdef PIC -#define MO(op) op##@GOTOFF(%edx) -#else -#define MO(op) op -#endif - - .text -ENTRY(__ieee754_atanh) - movl 8(%esp), %ecx - - movl %ecx, %eax - andl $0x7fffffff, %eax - cmpl $0x7ff00000, %eax - jae 5f -7: - -#ifdef PIC - LOAD_PIC_REG (dx) -#endif - - andl $0x80000000, %ecx // ECX == 0 iff X >= 0 - - fldt MO(ln2_2) // 0.5*ln2 - xorl %ecx, 8(%esp) - fldl 4(%esp) // |x| : 0.5*ln2 - fcoml MO(half) // |x| : 0.5*ln2 - fld %st // |x| : |x| : 0.5*ln2 - fnstsw // |x| : |x| : 0.5*ln2 - sahf - jae 2f - fadd %st, %st(1) // |x| : 2*|x| : 0.5*ln2 - fld %st // |x| : |x| : 2*|x| : 0.5*ln2 - fsubrl MO(one) // 1-|x| : |x| : 2*|x| : 0.5*ln2 - fxch // |x| : 1-|x| : 2*|x| : 0.5*ln2 - fmul %st(2) // 2*|x|^2 : 1-|x| : 2*|x| : 0.5*ln2 - fdivp // (2*|x|^2)/(1-|x|) : 2*|x| : 0.5*ln2 - faddp // 2*|x|+(2*|x|^2)/(1-|x|) : 0.5*ln2 - fcoml MO(limit) // 2*|x|+(2*|x|^2)/(1-|x|) : 0.5*ln2 - fnstsw // 2*|x|+(2*|x|^2)/(1-|x|) : 0.5*ln2 - sahf - jae 4f - fyl2xp1 // 0.5*ln2*ld(1+2*|x|+(2*|x|^2)/(1-|x|)) - DBL_CHECK_FORCE_UFLOW_NONNEG - jecxz 3f - fchs // 0.5*ln2*ld(1+2*x+(2*x^2)/(1-x)) -3: ret - - .align ALIGNARG(4) -4: faddl MO(one) // 1+2*|x|+(2*|x|^2)/(1-|x|) : 0.5*ln2 - fyl2x // 0.5*ln2*ld(1+2*|x|+(2*|x|^2)/(1-|x|)) - jecxz 3f - fchs // 0.5*ln2*ld(1+2*x+(2*x^2)/(1-x)) -3: ret - - .align ALIGNARG(4) -2: faddl MO(one) // 1+|x| : |x| : 0.5*ln2 - fxch // |x| : 1+|x| : 0.5*ln2 - fsubrl MO(one) // 1-|x| : 1+|x| : 0.5*ln2 - fdivrp // (1+|x|)/(1-|x|) : 0.5*ln2 - fyl2x // 0.5*ln2*ld((1+|x|)/(1-|x|)) - jecxz 3f - fchs // 0.5*ln2*ld((1+x)/(1-x)) -3: ret - - // x == NaN or ±Inf -5: ja 6f - cmpl $0, 4(%esp) - je 7b -6: fldl 4(%esp) - ret -END(__ieee754_atanh) -strong_alias (__ieee754_atanh, __atanh_finite) diff --git a/sysdeps/i386/fpu/e_atanhf.S b/sysdeps/i386/fpu/e_atanhf.S deleted file mode 100644 index 92fda3fd82..0000000000 --- a/sysdeps/i386/fpu/e_atanhf.S +++ /dev/null @@ -1,109 +0,0 @@ -/* ix87 specific implementation of arctanh function. - Copyright (C) 1996-2017 Free Software Foundation, Inc. - This file is part of the GNU C Library. - Contributed by Ulrich Drepper <drepper@cygnus.com>, 1996. - - The GNU C Library is free software; you can redistribute it and/or - modify it under the terms of the GNU Lesser General Public - License as published by the Free Software Foundation; either - version 2.1 of the License, or (at your option) any later version. - - The GNU C Library is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - Lesser General Public License for more details. - - You should have received a copy of the GNU Lesser General Public - License along with the GNU C Library; if not, see - <http://www.gnu.org/licenses/>. */ - -#include <machine/asm.h> -#include <i386-math-asm.h> - - .section .rodata - - .align ALIGNARG(4) - .type half,@object -half: .double 0.5 - ASM_SIZE_DIRECTIVE(half) - .type one,@object -one: .double 1.0 - ASM_SIZE_DIRECTIVE(one) - .type limit,@object -limit: .double 0.29 - ASM_SIZE_DIRECTIVE(limit) - .align ALIGNARG(4) - .type ln2_2,@object -ln2_2: .tfloat 0.3465735902799726547086160 - ASM_SIZE_DIRECTIVE(ln2_2) - -DEFINE_FLT_MIN - -#ifdef PIC -#define MO(op) op##@GOTOFF(%edx) -#else -#define MO(op) op -#endif - - .text -ENTRY(__ieee754_atanhf) - movl 4(%esp), %ecx - - movl %ecx, %eax - andl $0x7fffffff, %eax - cmpl $0x7f800000, %eax - ja 5f - -#ifdef PIC - LOAD_PIC_REG (dx) -#endif - - andl $0x80000000, %ecx // ECX == 0 iff X >= 0 - - fldt MO(ln2_2) // 0.5*ln2 - xorl %ecx, 4(%esp) - flds 4(%esp) // |x| : 0.5*ln2 - fcoml MO(half) // |x| : 0.5*ln2 - fld %st(0) // |x| : |x| : 0.5*ln2 - fnstsw // |x| : |x| : 0.5*ln2 - sahf - jae 2f - fadd %st, %st(1) // |x| : 2*|x| : 0.5*ln2 - fld %st // |x| : |x| : 2*|x| : 0.5*ln2 - fsubrl MO(one) // 1-|x| : |x| : 2*|x| : 0.5*ln2 - fxch // |x| : 1-|x| : 2*|x| : 0.5*ln2 - fmul %st(2) // 2*|x|^2 : 1-|x| : 2*|x| : 0.5*ln2 - fdivp // (2*|x|^2)/(1-|x|) : 2*|x| : 0.5*ln2 - faddp // 2*|x|+(2*|x|^2)/(1-|x|) : 0.5*ln2 - fcoml MO(limit) // 2*|x|+(2*|x|^2)/(1-|x|) : 0.5*ln2 - fnstsw // 2*|x|+(2*|x|^2)/(1-|x|) : 0.5*ln2 - sahf - jae 4f - fyl2xp1 // 0.5*ln2*ld(1+2*|x|+(2*|x|^2)/(1-|x|)) - FLT_CHECK_FORCE_UFLOW_NONNEG - jecxz 3f - fchs // 0.5*ln2*ld(1+2*x+(2*x^2)/(1-x)) -3: ret - - .align ALIGNARG(4) -4: faddl MO(one) // 1+2*|x|+(2*|x|^2)/(1-|x|) : 0.5*ln2 - fyl2x // 0.5*ln2*ld(1+2*|x|+(2*|x|^2)/(1-|x|)) - jecxz 3f - fchs // 0.5*ln2*ld(1+2*x+(2*x^2)/(1-x)) -3: ret - - .align ALIGNARG(4) -2: faddl MO(one) // 1+|x| : |x| : 0.5*ln2 - fxch // |x| : 1+|x| : 0.5*ln2 - fsubrl MO(one) // 1-|x| : 1+|x| : 0.5*ln2 - fdivrp // (1+|x|)/(1-|x|) : 0.5*ln2 - fyl2x // 0.5*ln2*ld((1+|x|)/(1-|x|)) - jecxz 3f - fchs // 0.5*ln2*ld((1+x)/(1-x)) -3: ret - - // x == NaN -5: flds 4(%esp) - ret -END(__ieee754_atanhf) -strong_alias (__ieee754_atanhf, __atanhf_finite) diff --git a/sysdeps/i386/fpu/e_atanhl.S b/sysdeps/i386/fpu/e_atanhl.S deleted file mode 100644 index 31ff7e5182..0000000000 --- a/sysdeps/i386/fpu/e_atanhl.S +++ /dev/null @@ -1,127 +0,0 @@ -/* ix87 specific implementation of arctanh function. - Copyright (C) 1996-2017 Free Software Foundation, Inc. - This file is part of the GNU C Library. - Contributed by Ulrich Drepper <drepper@cygnus.com>, 1996. - - The GNU C Library is free software; you can redistribute it and/or - modify it under the terms of the GNU Lesser General Public - License as published by the Free Software Foundation; either - version 2.1 of the License, or (at your option) any later version. - - The GNU C Library is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - Lesser General Public License for more details. - - You should have received a copy of the GNU Lesser General Public - License along with the GNU C Library; if not, see - <http://www.gnu.org/licenses/>. */ - -#include <machine/asm.h> - - .section .rodata - - .align ALIGNARG(4) - /* Please note that we use double values for 0.5 and 1.0. These - numbers have exact representations and so we don't get accuracy - problems. The advantage is that the code is simpler. */ - .type half,@object -half: .double 0.5 - ASM_SIZE_DIRECTIVE(half) - .type one,@object -one: .double 1.0 - ASM_SIZE_DIRECTIVE(one) - /* It is not important that this constant is precise. It is only - a value which is known to be on the safe side for using the - fyl2xp1 instruction. */ - .type limit,@object -limit: .double 0.29 - ASM_SIZE_DIRECTIVE(limit) - .align ALIGNARG(4) - .type ln2_2,@object -ln2_2: .tfloat 0.3465735902799726547086160 - ASM_SIZE_DIRECTIVE(ln2_2) - -#ifdef PIC -#define MO(op) op##@GOTOFF(%edx) -#else -#define MO(op) op -#endif - - .text -ENTRY(__ieee754_atanhl) - movl 12(%esp), %ecx - - movl %ecx, %eax - andl $0x7fff, %eax - cmpl $0x7fff, %eax - je 5f - cmpl $0x3fdf, %eax - jge 7f - // Exponent below -32; return x, with underflow if subnormal. - fldt 4(%esp) - cmpl $0, %eax - jne 8f - fld %st(0) - fmul %st(0) - fstp %st(0) -8: ret -7: - -#ifdef PIC - LOAD_PIC_REG (dx) -#endif - - andl $0x8000, %ecx // ECX == 0 iff X >= 0 - - fldt MO(ln2_2) // 0.5*ln2 - xorl %ecx, 12(%esp) - fldt 4(%esp) // |x| : 0.5*ln2 - fcoml MO(half) // |x| : 0.5*ln2 - fld %st(0) // |x| : |x| : 0.5*ln2 - fnstsw // |x| : |x| : 0.5*ln2 - sahf - jae 2f - fadd %st, %st(1) // |x| : 2*|x| : 0.5*ln2 - fld %st // |x| : |x| : 2*|x| : 0.5*ln2 - fsubrl MO(one) // 1-|x| : |x| : 2*|x| : 0.5*ln2 - fxch // |x| : 1-|x| : 2*|x| : 0.5*ln2 - fmul %st(2) // 2*|x|^2 : 1-|x| : 2*|x| : 0.5*ln2 - fdivp // (2*|x|^2)/(1-|x|) : 2*|x| : 0.5*ln2 - faddp // 2*|x|+(2*|x|^2)/(1-|x|) : 0.5*ln2 - fcoml MO(limit) // 2*|x|+(2*|x|^2)/(1-|x|) : 0.5*ln2 - fnstsw // 2*|x|+(2*|x|^2)/(1-|x|) : 0.5*ln2 - sahf - jae 4f - fyl2xp1 // 0.5*ln2*ld(1+2*|x|+(2*|x|^2)/(1-|x|)) - jecxz 3f - fchs // 0.5*ln2*ld(1+2*x+(2*x^2)/(1-x)) -3: ret - - .align ALIGNARG(4) -4: faddl MO(one) // 1+2*|x|+(2*|x|^2)/(1-|x|) : 0.5*ln2 - fyl2x // 0.5*ln2*ld(1+2*|x|+(2*|x|^2)/(1-|x|)) - jecxz 3f - fchs // 0.5*ln2*ld(1+2*x+(2*x^2)/(1-x)) -3: ret - - .align ALIGNARG(4) -2: faddl MO(one) // 1+|x| : |x| : 0.5*ln2 - fxch // |x| : 1+|x| : 0.5*ln2 - fsubrl MO(one) // 1-|x| : 1+|x| : 0.5*ln2 - fdivrp // (1+|x|)/(1-|x|) : 0.5*ln2 - fyl2x // 0.5*ln2*ld((1+|x|)/(1-|x|)) - jecxz 3f - fchs // 0.5*ln2*ld((1+x)/(1-x)) -3: ret - - // x == NaN or ±Inf -5: cmpl $0x80000000, 8(%esp) - ja 6f - cmpl $0, 4(%esp) - je 7b -6: fldt 4(%esp) - fadd %st(0) - ret -END(__ieee754_atanhl) -strong_alias (__ieee754_atanhl, __atanhl_finite) diff --git a/sysdeps/i386/fpu/e_exp.S b/sysdeps/i386/fpu/e_exp.S deleted file mode 100644 index a7e7f13f6f..0000000000 --- a/sysdeps/i386/fpu/e_exp.S +++ /dev/null @@ -1,73 +0,0 @@ -/* - * Written by J.T. Conklin <jtc@netbsd.org>. - * Public domain. - */ - -#include <machine/asm.h> -#include <i386-math-asm.h> - -DEFINE_DBL_MIN - -#ifdef PIC -# define MO(op) op##@GOTOFF(%ecx) -#else -# define MO(op) op -#endif - - .text -/* e^x = 2^(x * log2(e)) */ -ENTRY(__ieee754_exp) -#ifdef PIC - LOAD_PIC_REG (cx) -#endif - fldl 4(%esp) -/* I added the following ugly construct because exp(+-Inf) resulted - in NaN. The ugliness results from the bright minds at Intel. - For the i686 the code can be written better. - -- drepper@cygnus.com. */ - fxam /* Is NaN or +-Inf? */ - fstsw %ax - movb $0x45, %dh - andb %ah, %dh - cmpb $0x05, %dh - je 1f /* Is +-Inf, jump. */ - fldl2e - fmulp /* x * log2(e) */ - fld %st - frndint /* int(x * log2(e)) */ - fsubr %st,%st(1) /* fract(x * log2(e)) */ - fxch - f2xm1 /* 2^(fract(x * log2(e))) - 1 */ - fld1 - faddp /* 2^(fract(x * log2(e))) */ - fscale /* e^x */ - fstp %st(1) - DBL_NARROW_EVAL_UFLOW_NONNEG_NAN - ret - -1: testl $0x200, %eax /* Test sign. */ - jz 2f /* If positive, jump. */ - fstp %st - fldz /* Set result to 0. */ -2: ret -END (__ieee754_exp) - - -ENTRY(__exp_finite) -#ifdef PIC - LOAD_PIC_REG (cx) -#endif - fldl2e - fmull 4(%esp) /* x * log2(e) */ - fld %st - frndint /* int(x * log2(e)) */ - fsubr %st,%st(1) /* fract(x * log2(e)) */ - fxch - f2xm1 /* 2^(fract(x * log2(e))) - 1 */ - fld1 - faddp /* 2^(fract(x * log2(e))) */ - fscale /* e^x */ - fstp %st(1) - DBL_NARROW_EVAL_UFLOW_NONNEG - ret -END(__exp_finite) diff --git a/sysdeps/i386/fpu/e_exp10.S b/sysdeps/i386/fpu/e_exp10.S deleted file mode 100644 index acb5160a3f..0000000000 --- a/sysdeps/i386/fpu/e_exp10.S +++ /dev/null @@ -1,53 +0,0 @@ -/* - * Written by Ulrich Drepper <drepper@cygnus.com>. - */ - -#include <machine/asm.h> -#include <i386-math-asm.h> - -DEFINE_DBL_MIN - -#ifdef PIC -# define MO(op) op##@GOTOFF(%ecx) -#else -# define MO(op) op -#endif - - .text -/* 10^x = 2^(x * log2(10)) */ -ENTRY(__ieee754_exp10) -#ifdef PIC - LOAD_PIC_REG (cx) -#endif - fldl 4(%esp) -/* I added the following ugly construct because exp(+-Inf) resulted - in NaN. The ugliness results from the bright minds at Intel. - For the i686 the code can be written better. - -- drepper@cygnus.com. */ - fxam /* Is NaN or +-Inf? */ - fstsw %ax - movb $0x45, %dh - andb %ah, %dh - cmpb $0x05, %dh - je 1f /* Is +-Inf, jump. */ - fldl2t - fmulp /* x * log2(10) */ - fld %st - frndint /* int(x * log2(10)) */ - fsubr %st,%st(1) /* fract(x * log2(10)) */ - fxch - f2xm1 /* 2^(fract(x * log2(10))) - 1 */ - fld1 - faddp /* 2^(fract(x * log2(10))) */ - fscale /* e^x */ - fstp %st(1) - DBL_NARROW_EVAL_UFLOW_NONNEG_NAN - ret - -1: testl $0x200, %eax /* Test sign. */ - jz 2f /* If positive, jump. */ - fstp %st - fldz /* Set result to 0. */ -2: ret -END (__ieee754_exp10) -strong_alias (__ieee754_exp10, __exp10_finite) diff --git a/sysdeps/i386/fpu/e_exp10f.S b/sysdeps/i386/fpu/e_exp10f.S deleted file mode 100644 index 1812b34398..0000000000 --- a/sysdeps/i386/fpu/e_exp10f.S +++ /dev/null @@ -1,53 +0,0 @@ -/* - * Written by Ulrich Drepper. - */ - -#include <machine/asm.h> -#include <i386-math-asm.h> - -DEFINE_FLT_MIN - -#ifdef PIC -# define MO(op) op##@GOTOFF(%ecx) -#else -# define MO(op) op -#endif - - .text -/* 10^x = 2^(x * log2(10)) */ -ENTRY(__ieee754_exp10f) -#ifdef PIC - LOAD_PIC_REG (cx) -#endif - flds 4(%esp) -/* I added the following ugly construct because exp(+-Inf) resulted - in NaN. The ugliness results from the bright minds at Intel. - For the i686 the code can be written better. - -- drepper@cygnus.com. */ - fxam /* Is NaN or +-Inf? */ - fstsw %ax - movb $0x45, %dh - andb %ah, %dh - cmpb $0x05, %dh - je 1f /* Is +-Inf, jump. */ - fldl2t - fmulp /* x * log2(10) */ - fld %st - frndint /* int(x * log2(10)) */ - fsubr %st,%st(1) /* fract(x * log2(10)) */ - fxch - f2xm1 /* 2^(fract(x * log2(10))) - 1 */ - fld1 - faddp /* 2^(fract(x * log2(10))) */ - fscale /* e^x */ - fstp %st(1) - FLT_NARROW_EVAL_UFLOW_NONNEG_NAN - ret - -1: testl $0x200, %eax /* Test sign. */ - jz 2f /* If positive, jump. */ - fstp %st - fldz /* Set result to 0. */ -2: ret -END (__ieee754_exp10f) -strong_alias (__ieee754_exp10f, __exp10f_finite) diff --git a/sysdeps/i386/fpu/e_exp10l.S b/sysdeps/i386/fpu/e_exp10l.S deleted file mode 100644 index d843e2b5e8..0000000000 --- a/sysdeps/i386/fpu/e_exp10l.S +++ /dev/null @@ -1,2 +0,0 @@ -#define USE_AS_EXP10L -#include <e_expl.S> diff --git a/sysdeps/i386/fpu/e_exp2.S b/sysdeps/i386/fpu/e_exp2.S deleted file mode 100644 index fc16a96053..0000000000 --- a/sysdeps/i386/fpu/e_exp2.S +++ /dev/null @@ -1,52 +0,0 @@ -/* - * Written by J.T. Conklin <jtc@netbsd.org>. - * Adapted for exp2 by Ulrich Drepper <drepper@cygnus.com>. - * Public domain. - */ - -#include <machine/asm.h> -#include <i386-math-asm.h> - -DEFINE_DBL_MIN - -#ifdef PIC -# define MO(op) op##@GOTOFF(%ecx) -#else -# define MO(op) op -#endif - - .text -ENTRY(__ieee754_exp2) -#ifdef PIC - LOAD_PIC_REG (cx) -#endif - fldl 4(%esp) -/* I added the following ugly construct because exp(+-Inf) resulted - in NaN. The ugliness results from the bright minds at Intel. - For the i686 the code can be written better. - -- drepper@cygnus.com. */ - fxam /* Is NaN or +-Inf? */ - fstsw %ax - movb $0x45, %dh - andb %ah, %dh - cmpb $0x05, %dh - je 1f /* Is +-Inf, jump. */ - fld %st - frndint /* int(x) */ - fsubr %st,%st(1) /* fract(x) */ - fxch - f2xm1 /* 2^(fract(x)) - 1 */ - fld1 - faddp /* 2^(fract(x)) */ - fscale /* e^x */ - fstp %st(1) - DBL_NARROW_EVAL_UFLOW_NONNEG_NAN - ret - -1: testl $0x200, %eax /* Test sign. */ - jz 2f /* If positive, jump. */ - fstp %st - fldz /* Set result to 0. */ -2: ret -END (__ieee754_exp2) -strong_alias (__ieee754_exp2, __exp2_finite) diff --git a/sysdeps/i386/fpu/e_exp2f.S b/sysdeps/i386/fpu/e_exp2f.S deleted file mode 100644 index 30623cd850..0000000000 --- a/sysdeps/i386/fpu/e_exp2f.S +++ /dev/null @@ -1,52 +0,0 @@ -/* - * Written by J.T. Conklin <jtc@netbsd.org>. - * Adapted for exp2 by Ulrich Drepper <drepper@cygnus.com>. - * Public domain. - */ - -#include <machine/asm.h> -#include <i386-math-asm.h> - -DEFINE_FLT_MIN - -#ifdef PIC -# define MO(op) op##@GOTOFF(%ecx) -#else -# define MO(op) op -#endif - - .text -ENTRY(__ieee754_exp2f) -#ifdef PIC - LOAD_PIC_REG (cx) -#endif - flds 4(%esp) -/* I added the following ugly construct because exp(+-Inf) resulted - in NaN. The ugliness results from the bright minds at Intel. - For the i686 the code can be written better. - -- drepper@cygnus.com. */ - fxam /* Is NaN or +-Inf? */ - fstsw %ax - movb $0x45, %dh - andb %ah, %dh - cmpb $0x05, %dh - je 1f /* Is +-Inf, jump. */ - fld %st - frndint /* int(x) */ - fsubr %st,%st(1) /* fract(x) */ - fxch - f2xm1 /* 2^(fract(x)) - 1 */ - fld1 - faddp /* 2^(fract(x)) */ - fscale /* e^x */ - fstp %st(1) - FLT_NARROW_EVAL_UFLOW_NONNEG_NAN - ret - -1: testl $0x200, %eax /* Test sign. */ - jz 2f /* If positive, jump. */ - fstp %st - fldz /* Set result to 0. */ -2: ret -END (__ieee754_exp2f) -strong_alias (__ieee754_exp2f, __exp2f_finite) diff --git a/sysdeps/i386/fpu/e_exp2l.S b/sysdeps/i386/fpu/e_exp2l.S deleted file mode 100644 index c4cb73d589..0000000000 --- a/sysdeps/i386/fpu/e_exp2l.S +++ /dev/null @@ -1,60 +0,0 @@ -/* - * Written by J.T. Conklin <jtc@netbsd.org>. - * Adapted for exp2 by Ulrich Drepper <drepper@cygnus.com>. - * Public domain. - */ - -#include <machine/asm.h> -#include <i386-math-asm.h> - -DEFINE_LDBL_MIN - -#ifdef PIC -# define MO(op) op##@GOTOFF(%ecx) -#else -# define MO(op) op -#endif - - .text -ENTRY(__ieee754_exp2l) -#ifdef PIC - LOAD_PIC_REG (cx) -#endif - fldt 4(%esp) -/* I added the following ugly construct because exp(+-Inf) resulted - in NaN. The ugliness results from the bright minds at Intel. - For the i686 the code can be written better. - -- drepper@cygnus.com. */ - fxam /* Is NaN or +-Inf? */ - fstsw %ax - movb $0x45, %dh - andb %ah, %dh - cmpb $0x05, %dh - je 1f /* Is +-Inf, jump. */ - movzwl 4+8(%esp), %eax - andl $0x7fff, %eax - cmpl $0x3fbe, %eax - jge 3f - /* Argument's exponent below -65, result rounds to 1. */ - fld1 - faddp - ret -3: fld %st - frndint /* int(x) */ - fsubr %st,%st(1) /* fract(x) */ - fxch - f2xm1 /* 2^(fract(x)) - 1 */ - fld1 - faddp /* 2^(fract(x)) */ - fscale /* e^x */ - fstp %st(1) - LDBL_CHECK_FORCE_UFLOW_NONNEG_NAN - ret - -1: testl $0x200, %eax /* Test sign. */ - jz 2f /* If positive, jump. */ - fstp %st - fldz /* Set result to 0. */ -2: ret -END (__ieee754_exp2l) -strong_alias (__ieee754_exp2l, __exp2l_finite) diff --git a/sysdeps/i386/fpu/e_expf.S b/sysdeps/i386/fpu/e_expf.S deleted file mode 100644 index 65cb4ec204..0000000000 --- a/sysdeps/i386/fpu/e_expf.S +++ /dev/null @@ -1,74 +0,0 @@ -/* - * Written by J.T. Conklin <jtc@netbsd.org>. - * Public domain. - * Adapted for float type by Ulrich Drepper <drepper@cygnus.com>. - */ - -#include <machine/asm.h> -#include <i386-math-asm.h> - -DEFINE_FLT_MIN - -#ifdef PIC -# define MO(op) op##@GOTOFF(%ecx) -#else -# define MO(op) op -#endif - - .text -/* e^x = 2^(x * log2(e)) */ -ENTRY(__ieee754_expf) -#ifdef PIC - LOAD_PIC_REG (cx) -#endif - flds 4(%esp) -/* I added the following ugly construct because exp(+-Inf) resulted - in NaN. The ugliness results from the bright minds at Intel. - For the i686 the code can be written better. - -- drepper@cygnus.com. */ - fxam /* Is NaN or +-Inf? */ - fstsw %ax - movb $0x45, %dh - andb %ah, %dh - cmpb $0x05, %dh - je 1f /* Is +-Inf, jump. */ - fldl2e - fmulp /* x * log2(e) */ - fld %st - frndint /* int(x * log2(e)) */ - fsubr %st,%st(1) /* fract(x * log2(e)) */ - fxch - f2xm1 /* 2^(fract(x * log2(e))) - 1 */ - fld1 - faddp /* 2^(fract(x * log2(e))) */ - fscale /* e^x */ - fstp %st(1) - FLT_NARROW_EVAL_UFLOW_NONNEG_NAN - ret - -1: testl $0x200, %eax /* Test sign. */ - jz 2f /* If positive, jump. */ - fstp %st - fldz /* Set result to 0. */ -2: ret -END (__ieee754_expf) - - -ENTRY(__expf_finite) -#ifdef PIC - LOAD_PIC_REG (cx) -#endif - fldl2e - fmuls 4(%esp) /* x * log2(e) */ - fld %st - frndint /* int(x * log2(e)) */ - fsubr %st,%st(1) /* fract(x * log2(e)) */ - fxch - f2xm1 /* 2^(fract(x * log2(e))) - 1 */ - fld1 - faddp /* 2^(fract(x * log2(e))) */ - fscale /* e^x */ - fstp %st(1) - FLT_NARROW_EVAL_UFLOW_NONNEG - ret -END(__expf_finite) diff --git a/sysdeps/i386/fpu/e_expl.S b/sysdeps/i386/fpu/e_expl.S deleted file mode 100644 index 7d75fe22a1..0000000000 --- a/sysdeps/i386/fpu/e_expl.S +++ /dev/null @@ -1,226 +0,0 @@ -/* - * Written by J.T. Conklin <jtc@netbsd.org>. - * Public domain. - * - * Adapted for `long double' by Ulrich Drepper <drepper@cygnus.com>. - */ - -/* - * The 8087 method for the exponential function is to calculate - * exp(x) = 2^(x log2(e)) - * after separating integer and fractional parts - * x log2(e) = i + f, |f| <= .5 - * 2^i is immediate but f needs to be precise for long double accuracy. - * Suppress range reduction error in computing f by the following. - * Separate x into integer and fractional parts - * x = xi + xf, |xf| <= .5 - * Separate log2(e) into the sum of an exact number c0 and small part c1. - * c0 + c1 = log2(e) to extra precision - * Then - * f = (c0 xi - i) + c0 xf + c1 x - * where c0 xi is exact and so also is (c0 xi - i). - * -- moshier@na-net.ornl.gov - */ - -#include <machine/asm.h> -#include <i386-math-asm.h> - -#ifdef USE_AS_EXP10L -# define IEEE754_EXPL __ieee754_exp10l -# define EXPL_FINITE __exp10l_finite -# define FLDLOG fldl2t -#elif defined USE_AS_EXPM1L -# define IEEE754_EXPL __expm1l -# undef EXPL_FINITE -# define FLDLOG fldl2e -#else -# define IEEE754_EXPL __ieee754_expl -# define EXPL_FINITE __expl_finite -# define FLDLOG fldl2e -#endif - - .section .rodata.cst16,"aM",@progbits,16 - - .p2align 4 -#ifdef USE_AS_EXP10L - .type c0,@object -c0: .byte 0, 0, 0, 0, 0, 0, 0x9a, 0xd4, 0x00, 0x40 - .byte 0, 0, 0, 0, 0, 0 - ASM_SIZE_DIRECTIVE(c0) - .type c1,@object -c1: .byte 0x58, 0x92, 0xfc, 0x15, 0x37, 0x9a, 0x97, 0xf0, 0xef, 0x3f - .byte 0, 0, 0, 0, 0, 0 - ASM_SIZE_DIRECTIVE(c1) -#else - .type c0,@object -c0: .byte 0, 0, 0, 0, 0, 0, 0xaa, 0xb8, 0xff, 0x3f - .byte 0, 0, 0, 0, 0, 0 - ASM_SIZE_DIRECTIVE(c0) - .type c1,@object -c1: .byte 0x20, 0xfa, 0xee, 0xc2, 0x5f, 0x70, 0xa5, 0xec, 0xed, 0x3f - .byte 0, 0, 0, 0, 0, 0 - ASM_SIZE_DIRECTIVE(c1) -#endif -#ifndef USE_AS_EXPM1L - .type csat,@object -csat: .byte 0, 0, 0, 0, 0, 0, 0, 0x80, 0x0e, 0x40 - .byte 0, 0, 0, 0, 0, 0 - ASM_SIZE_DIRECTIVE(csat) -DEFINE_LDBL_MIN -#endif - -#ifdef PIC -# define MO(op) op##@GOTOFF(%ecx) -#else -# define MO(op) op -#endif - - .text -ENTRY(IEEE754_EXPL) -#ifdef USE_AS_EXPM1L - movzwl 4+8(%esp), %eax - xorb $0x80, %ah // invert sign bit (now 1 is "positive") - cmpl $0xc006, %eax // is num positive and exp >= 6 (number is >= 128.0)? - jae HIDDEN_JUMPTARGET (__expl) // (if num is denormal, it is at least >= 64.0) -#endif - fldt 4(%esp) -/* I added the following ugly construct because expl(+-Inf) resulted - in NaN. The ugliness results from the bright minds at Intel. - For the i686 the code can be written better. - -- drepper@cygnus.com. */ - fxam /* Is NaN or +-Inf? */ -#ifdef PIC - LOAD_PIC_REG (cx) -#endif -#ifdef USE_AS_EXPM1L - xorb $0x80, %ah - cmpl $0xc006, %eax - fstsw %ax - movb $0x45, %dh - jb 4f - - /* Below -64.0 (may be -NaN or -Inf). */ - andb %ah, %dh - cmpb $0x01, %dh - je 6f /* Is +-NaN, jump. */ - jmp 1f /* -large, possibly -Inf. */ - -4: /* In range -64.0 to 64.0 (may be +-0 but not NaN or +-Inf). */ - /* Test for +-0 as argument. */ - andb %ah, %dh - cmpb $0x40, %dh - je 2f - - /* Test for arguments that are small but not subnormal. */ - movzwl 4+8(%esp), %eax - andl $0x7fff, %eax - cmpl $0x3fbf, %eax - jge 3f - /* Argument's exponent below -64; avoid spurious underflow if - normal. */ - cmpl $0x0001, %eax - jge 2f - /* Force underflow and return the argument, to avoid wrong signs - of zero results from the code below in some rounding modes. */ - fld %st - fmul %st - fstp %st - jmp 2f -#else - movzwl 4+8(%esp), %eax - andl $0x7fff, %eax - cmpl $0x400d, %eax - jg 5f - cmpl $0x3fbc, %eax - jge 3f - /* Argument's exponent below -67, result rounds to 1. */ - fld1 - faddp - jmp 2f -5: /* Overflow, underflow or infinity or NaN as argument. */ - fstsw %ax - movb $0x45, %dh - andb %ah, %dh - cmpb $0x05, %dh - je 1f /* Is +-Inf, jump. */ - cmpb $0x01, %dh - je 6f /* Is +-NaN, jump. */ - /* Overflow or underflow; saturate. */ - fstp %st - fldt MO(csat) - andb $2, %ah - jz 3f - fchs -#endif -3: FLDLOG /* 1 log2(base) */ - fmul %st(1), %st /* 1 x log2(base) */ - /* Set round-to-nearest temporarily. */ - subl $8, %esp - cfi_adjust_cfa_offset (8) - fstcw 4(%esp) - movl $0xf3ff, %edx - andl 4(%esp), %edx - movl %edx, (%esp) - fldcw (%esp) - frndint /* 1 i */ - fld %st(1) /* 2 x */ - frndint /* 2 xi */ - fldcw 4(%esp) - addl $8, %esp - cfi_adjust_cfa_offset (-8) - fld %st(1) /* 3 i */ - fldt MO(c0) /* 4 c0 */ - fld %st(2) /* 5 xi */ - fmul %st(1), %st /* 5 c0 xi */ - fsubp %st, %st(2) /* 4 f = c0 xi - i */ - fld %st(4) /* 5 x */ - fsub %st(3), %st /* 5 xf = x - xi */ - fmulp %st, %st(1) /* 4 c0 xf */ - faddp %st, %st(1) /* 3 f = f + c0 xf */ - fldt MO(c1) /* 4 */ - fmul %st(4), %st /* 4 c1 * x */ - faddp %st, %st(1) /* 3 f = f + c1 * x */ - f2xm1 /* 3 2^(fract(x * log2(base))) - 1 */ -#ifdef USE_AS_EXPM1L - fstp %st(1) /* 2 */ - fscale /* 2 scale factor is st(1); base^x - 2^i */ - fxch /* 2 i */ - fld1 /* 3 1.0 */ - fscale /* 3 2^i */ - fld1 /* 4 1.0 */ - fsubrp %st, %st(1) /* 3 2^i - 1.0 */ - fstp %st(1) /* 2 */ - faddp %st, %st(1) /* 1 base^x - 1.0 */ -#else - fld1 /* 4 1.0 */ - faddp /* 3 2^(fract(x * log2(base))) */ - fstp %st(1) /* 2 */ - fscale /* 2 scale factor is st(1); base^x */ - fstp %st(1) /* 1 */ - LDBL_CHECK_FORCE_UFLOW_NONNEG -#endif - fstp %st(1) /* 0 */ - jmp 2f -1: -#ifdef USE_AS_EXPM1L - /* For expm1l, only negative sign gets here. */ - fstp %st - fld1 - fchs -#else - testl $0x200, %eax /* Test sign. */ - jz 2f /* If positive, jump. */ - fstp %st - fldz /* Set result to 0. */ -#endif -2: ret -6: /* NaN argument. */ - fadd %st - ret -END(IEEE754_EXPL) -#ifdef USE_AS_EXPM1L -libm_hidden_def (__expm1l) -weak_alias (__expm1l, expm1l) -#else -strong_alias (IEEE754_EXPL, EXPL_FINITE) -#endif diff --git a/sysdeps/i386/fpu/e_fmod.S b/sysdeps/i386/fpu/e_fmod.S deleted file mode 100644 index 26b3acc392..0000000000 --- a/sysdeps/i386/fpu/e_fmod.S +++ /dev/null @@ -1,18 +0,0 @@ -/* - * Written by J.T. Conklin <jtc@netbsd.org>. - * Public domain. - */ - -#include <machine/asm.h> - -ENTRY(__ieee754_fmod) - fldl 12(%esp) - fldl 4(%esp) -1: fprem - fstsw %ax - sahf - jp 1b - fstp %st(1) - ret -END (__ieee754_fmod) -strong_alias (__ieee754_fmod, __fmod_finite) diff --git a/sysdeps/i386/fpu/e_fmodf.S b/sysdeps/i386/fpu/e_fmodf.S deleted file mode 100644 index ece4d98427..0000000000 --- a/sysdeps/i386/fpu/e_fmodf.S +++ /dev/null @@ -1,19 +0,0 @@ -/* - * Written by J.T. Conklin <jtc@netbsd.org>. - * Public domain. - * Adapted for float type by Ulrich Drepper <drepper@cygnus.com>. - */ - -#include <machine/asm.h> - -ENTRY(__ieee754_fmodf) - flds 8(%esp) - flds 4(%esp) -1: fprem - fstsw %ax - sahf - jp 1b - fstp %st(1) - ret -END(__ieee754_fmodf) -strong_alias (__ieee754_fmodf, __fmodf_finite) diff --git a/sysdeps/i386/fpu/e_fmodl.c b/sysdeps/i386/fpu/e_fmodl.c deleted file mode 100644 index 49700ae8f6..0000000000 --- a/sysdeps/i386/fpu/e_fmodl.c +++ /dev/null @@ -1,23 +0,0 @@ -/* - * Written by J.T. Conklin <jtc@netbsd.org>. - * Public domain. - * - * Adapted for `long double' by Ulrich Drepper <drepper@cygnus.com>. - */ - -#include <math_private.h> - -long double -__ieee754_fmodl (long double x, long double y) -{ - long double res; - - asm ("1:\tfprem\n" - "fstsw %%ax\n" - "sahf\n" - "jp 1b\n" - "fstp %%st(1)" - : "=t" (res) : "0" (x), "u" (y) : "ax", "st(1)"); - return res; -} -strong_alias (__ieee754_fmodl, __fmodl_finite) diff --git a/sysdeps/i386/fpu/e_hypot.S b/sysdeps/i386/fpu/e_hypot.S deleted file mode 100644 index 7403566fd7..0000000000 --- a/sysdeps/i386/fpu/e_hypot.S +++ /dev/null @@ -1,75 +0,0 @@ -/* Compute the hypothenuse of X and Y. - Copyright (C) 1998-2017 Free Software Foundation, Inc. - This file is part of the GNU C Library. - Contributed by Ulrich Drepper <drepper@cygnus.com>, 1998. - - The GNU C Library is free software; you can redistribute it and/or - modify it under the terms of the GNU Lesser General Public - License as published by the Free Software Foundation; either - version 2.1 of the License, or (at your option) any later version. - - The GNU C Library is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - Lesser General Public License for more details. - - You should have received a copy of the GNU Lesser General Public - License along with the GNU C Library; if not, see - <http://www.gnu.org/licenses/>. */ - -#include <sysdep.h> -#include <i386-math-asm.h> - -DEFINE_DBL_MIN - -#ifdef PIC -# define MO(op) op##@GOTOFF(%edx) -#else -# define MO(op) op -#endif - - .text -ENTRY(__ieee754_hypot) -#ifdef PIC - LOAD_PIC_REG (dx) -#endif - fldl 4(%esp) // x - fxam - fnstsw - fldl 12(%esp) // y : x - movb %ah, %ch - fxam - fnstsw - movb %ah, %al - orb %ch, %ah - sahf - jc 1f - fmul %st(0) // y * y : x - fxch // x : y * y - fmul %st(0) // x * x : y * y - faddp // x * x + y * y - fsqrt - DBL_NARROW_EVAL_UFLOW_NONNEG -2: ret - - // We have to test whether any of the parameters is Inf. - // In this case the result is infinity. -1: andb $0x45, %al - cmpb $5, %al - je 3f // jump if y is Inf - andb $0x45, %ch - cmpb $5, %ch - jne 4f // jump if x is not Inf - fxch -3: fstp %st(1) - fabs - jmp 2b - -4: testb $1, %al - jnz 5f // y is NaN - fxch -5: fstp %st(1) - jmp 2b - -END(__ieee754_hypot) -strong_alias (__ieee754_hypot, __hypot_finite) diff --git a/sysdeps/i386/fpu/e_hypotf.S b/sysdeps/i386/fpu/e_hypotf.S deleted file mode 100644 index 6a2c7052b2..0000000000 --- a/sysdeps/i386/fpu/e_hypotf.S +++ /dev/null @@ -1,64 +0,0 @@ -/* Compute the hypothenuse of X and Y. - Copyright (C) 1998-2017 Free Software Foundation, Inc. - This file is part of the GNU C Library. - Contributed by Ulrich Drepper <drepper@cygnus.com>, 1998. - - The GNU C Library is free software; you can redistribute it and/or - modify it under the terms of the GNU Lesser General Public - License as published by the Free Software Foundation; either - version 2.1 of the License, or (at your option) any later version. - - The GNU C Library is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - Lesser General Public License for more details. - - You should have received a copy of the GNU Lesser General Public - License along with the GNU C Library; if not, see - <http://www.gnu.org/licenses/>. */ - -#include <sysdep.h> -#include <i386-math-asm.h> - - .text -ENTRY(__ieee754_hypotf) - flds 4(%esp) // x - fxam - fnstsw - flds 8(%esp) // y : x - movb %ah, %ch - fxam - fnstsw - movb %ah, %al - orb %ch, %ah - sahf - jc 1f - fmul %st(0) // y * y : x - fxch // x : y * y - fmul %st(0) // x * x : y * y - faddp // x * x + y * y - fsqrt - FLT_NARROW_EVAL -2: ret - - // We have to test whether any of the parameters is Inf. - // In this case the result is infinity. -1: andb $0x45, %al - cmpb $5, %al - je 3f // jump if y is Inf - andb $0x45, %ch - cmpb $5, %ch - jne 4f // jump if x is not Inf - fxch -3: fstp %st(1) - fabs - jmp 2b - -4: testb $1, %al - jnz 5f // y is NaN - fxch -5: fstp %st(1) - jmp 2b - -END(__ieee754_hypotf) -strong_alias (__ieee754_hypotf, __hypotf_finite) diff --git a/sysdeps/i386/fpu/e_ilogb.S b/sysdeps/i386/fpu/e_ilogb.S deleted file mode 100644 index 29ef2214e6..0000000000 --- a/sysdeps/i386/fpu/e_ilogb.S +++ /dev/null @@ -1,42 +0,0 @@ -/* - * Written by J.T. Conklin <jtc@netbsd.org>. - * Public domain. - */ - -#include <machine/asm.h> - -RCSID("$NetBSD: s_ilogb.S,v 1.5 1995/10/12 15:53:09 jtc Exp $") - -ENTRY(__ieee754_ilogb) - fldl 4(%esp) -/* I added the following ugly construct because ilogb(+-Inf) is - required to return INT_MAX in ISO C99. - -- jakub@redhat.com. */ - fxam /* Is NaN or +-Inf? */ - fstsw %ax - movb $0x45, %dh - andb %ah, %dh - cmpb $0x05, %dh - je 1f /* Is +-Inf, jump. */ - cmpb $0x40, %dh - je 2f /* Is +-0, jump. */ - - fxtract - pushl %eax - cfi_adjust_cfa_offset (4) - fstp %st - - fistpl (%esp) - fwait - popl %eax - cfi_adjust_cfa_offset (-4) - - ret - -1: fstp %st - movl $0x7fffffff, %eax - ret -2: fstp %st - movl $0x80000000, %eax /* FP_ILOGB0 */ - ret -END (__ieee754_ilogb) diff --git a/sysdeps/i386/fpu/e_ilogbf.S b/sysdeps/i386/fpu/e_ilogbf.S deleted file mode 100644 index d72de6c84a..0000000000 --- a/sysdeps/i386/fpu/e_ilogbf.S +++ /dev/null @@ -1,42 +0,0 @@ -/* - * Written by J.T. Conklin <jtc@netbsd.org>. - * Public domain. - */ - -#include <machine/asm.h> - -RCSID("$NetBSD: s_ilogbf.S,v 1.4 1995/10/22 20:32:43 pk Exp $") - -ENTRY(__ieee754_ilogbf) - flds 4(%esp) -/* I added the following ugly construct because ilogb(+-Inf) is - required to return INT_MAX in ISO C99. - -- jakub@redhat.com. */ - fxam /* Is NaN or +-Inf? */ - fstsw %ax - movb $0x45, %dh - andb %ah, %dh - cmpb $0x05, %dh - je 1f /* Is +-Inf, jump. */ - cmpb $0x40, %dh - je 2f /* Is +-0, jump. */ - - fxtract - pushl %eax - cfi_adjust_cfa_offset (4) - fstp %st - - fistpl (%esp) - fwait - popl %eax - cfi_adjust_cfa_offset (-4) - - ret - -1: fstp %st - movl $0x7fffffff, %eax - ret -2: fstp %st - movl $0x80000000, %eax /* FP_ILOGB0 */ - ret -END (__ieee754_ilogbf) diff --git a/sysdeps/i386/fpu/e_ilogbl.S b/sysdeps/i386/fpu/e_ilogbl.S deleted file mode 100644 index 60761dfa38..0000000000 --- a/sysdeps/i386/fpu/e_ilogbl.S +++ /dev/null @@ -1,43 +0,0 @@ -/* - * Written by J.T. Conklin <jtc@netbsd.org>. - * Changes for long double by Ulrich Drepper <drepper@cygnus.com> - * Public domain. - */ - -#include <machine/asm.h> - -RCSID("$NetBSD: $") - -ENTRY(__ieee754_ilogbl) - fldt 4(%esp) -/* I added the following ugly construct because ilogb(+-Inf) is - required to return INT_MAX in ISO C99. - -- jakub@redhat.com. */ - fxam /* Is NaN or +-Inf? */ - fstsw %ax - movb $0x45, %dh - andb %ah, %dh - cmpb $0x05, %dh - je 1f /* Is +-Inf, jump. */ - cmpb $0x40, %dh - je 2f /* Is +-0, jump. */ - - fxtract - pushl %eax - cfi_adjust_cfa_offset (4) - fstp %st - - fistpl (%esp) - fwait - popl %eax - cfi_adjust_cfa_offset (-4) - - ret - -1: fstp %st - movl $0x7fffffff, %eax - ret -2: fstp %st - movl $0x80000000, %eax /* FP_ILOGB0 */ - ret -END (__ieee754_ilogbl) diff --git a/sysdeps/i386/fpu/e_log.S b/sysdeps/i386/fpu/e_log.S deleted file mode 100644 index 335df22577..0000000000 --- a/sysdeps/i386/fpu/e_log.S +++ /dev/null @@ -1,92 +0,0 @@ -/* - * Written by J.T. Conklin <jtc@netbsd.org>. - * Public domain. - * - * Changed to use fyl2xp1 for values near 1, <drepper@cygnus.com>. - */ - -#include <machine/asm.h> - - .section .rodata.cst8,"aM",@progbits,8 - - .p2align 3 - .type one,@object -one: .double 1.0 - ASM_SIZE_DIRECTIVE(one) - /* It is not important that this constant is precise. It is only - a value which is known to be on the safe side for using the - fyl2xp1 instruction. */ - .type limit,@object -limit: .double 0.29 - ASM_SIZE_DIRECTIVE(limit) - - -#ifdef PIC -# define MO(op) op##@GOTOFF(%edx) -#else -# define MO(op) op -#endif - - .text -ENTRY(__ieee754_log) - fldln2 // log(2) - fldl 4(%esp) // x : log(2) - fxam - fnstsw -#ifdef PIC - LOAD_PIC_REG (dx) -#endif - fld %st // x : x : log(2) - sahf - jc 3f // in case x is NaN or +-Inf -4: fsubl MO(one) // x-1 : x : log(2) - fld %st // x-1 : x-1 : x : log(2) - fabs // |x-1| : x-1 : x : log(2) - fcompl MO(limit) // x-1 : x : log(2) - fnstsw // x-1 : x : log(2) - andb $0x45, %ah - jz 2f - fxam - fnstsw - andb $0x45, %ah - cmpb $0x40, %ah - jne 5f - fabs // log(1) is +0 in all rounding modes. -5: fstp %st(1) // x-1 : log(2) - fyl2xp1 // log(x) - ret - -2: fstp %st(0) // x : log(2) - fyl2x // log(x) - ret - -3: jp 4b // in case x is +-Inf - fstp %st(1) - fstp %st(1) - ret -END (__ieee754_log) - -ENTRY(__log_finite) - fldln2 // log(2) - fldl 4(%esp) // x : log(2) -#ifdef PIC - LOAD_PIC_REG (dx) -#endif - fld %st // x : x : log(2) - fsubl MO(one) // x-1 : x : log(2) - fld %st // x-1 : x-1 : x : log(2) - fabs // |x-1| : x-1 : x : log(2) - fcompl MO(limit) // x-1 : x : log(2) - fnstsw // x-1 : x : log(2) - andb $0x45, %ah - jz 2b - fxam - fnstsw - andb $0x45, %ah - cmpb $0x40, %ah - jne 6f - fabs // log(1) is +0 in all rounding modes. -6: fstp %st(1) // x-1 : log(2) - fyl2xp1 // log(x) - ret -END(__log_finite) diff --git a/sysdeps/i386/fpu/e_log10.S b/sysdeps/i386/fpu/e_log10.S deleted file mode 100644 index 17277084ca..0000000000 --- a/sysdeps/i386/fpu/e_log10.S +++ /dev/null @@ -1,68 +0,0 @@ -/* - * Written by J.T. Conklin <jtc@netbsd.org>. - * Public domain. - * - * Changed to use fyl2xp1 for values near 1, <drepper@cygnus.com>. - */ - -#include <machine/asm.h> - - .section .rodata.cst8,"aM",@progbits,8 - - .p2align 3 - .type one,@object -one: .double 1.0 - ASM_SIZE_DIRECTIVE(one) - /* It is not important that this constant is precise. It is only - a value which is known to be on the safe side for using the - fyl2xp1 instruction. */ - .type limit,@object -limit: .double 0.29 - ASM_SIZE_DIRECTIVE(limit) - - -#ifdef PIC -# define MO(op) op##@GOTOFF(%edx) -#else -# define MO(op) op -#endif - - .text -ENTRY(__ieee754_log10) - fldlg2 // log10(2) - fldl 4(%esp) // x : log10(2) -#ifdef PIC - LOAD_PIC_REG (dx) -#endif - fxam - fnstsw - fld %st // x : x : log10(2) - sahf - jc 3f // in case x is NaN or ±Inf -4: fsubl MO(one) // x-1 : x : log10(2) - fld %st // x-1 : x-1 : x : log10(2) - fabs // |x-1| : x-1 : x : log10(2) - fcompl MO(limit) // x-1 : x : log10(2) - fnstsw // x-1 : x : log10(2) - andb $0x45, %ah - jz 2f - fxam - fnstsw - andb $0x45, %ah - cmpb $0x40, %ah - jne 5f - fabs // log10(1) is +0 in all rounding modes. -5: fstp %st(1) // x-1 : log10(2) - fyl2xp1 // log10(x) - ret - -2: fstp %st(0) // x : log10(2) - fyl2x // log10(x) - ret - -3: jp 4b // in case x is ±Inf - fstp %st(1) - fstp %st(1) - ret -END (__ieee754_log10) -strong_alias (__ieee754_log10, __log10_finite) diff --git a/sysdeps/i386/fpu/e_log10f.S b/sysdeps/i386/fpu/e_log10f.S deleted file mode 100644 index 72a3b88251..0000000000 --- a/sysdeps/i386/fpu/e_log10f.S +++ /dev/null @@ -1,69 +0,0 @@ -/* - * Written by J.T. Conklin <jtc@netbsd.org>. - * Public domain. - * Adapted for float type by Ulrich Drepper <drepper@cygnus.com>. - * - * Changed to use fyl2xp1 for values near 1, <drepper@cygnus.com>. - */ - -#include <machine/asm.h> - - .section .rodata.cst8,"aM",@progbits,8 - - .p2align 3 - .type one,@object -one: .double 1.0 - ASM_SIZE_DIRECTIVE(one) - /* It is not important that this constant is precise. It is only - a value which is known to be on the safe side for using the - fyl2xp1 instruction. */ - .type limit,@object -limit: .double 0.29 - ASM_SIZE_DIRECTIVE(limit) - - -#ifdef PIC -# define MO(op) op##@GOTOFF(%edx) -#else -# define MO(op) op -#endif - - .text -ENTRY(__ieee754_log10f) - fldlg2 // log10(2) - flds 4(%esp) // x : log10(2) -#ifdef PIC - LOAD_PIC_REG (dx) -#endif - fxam - fnstsw - fld %st // x : x : log10(2) - sahf - jc 3f // in case x is NaN or ±Inf -4: fsubl MO(one) // x-1 : x : log10(2) - fld %st // x-1 : x-1 : x : log10(2) - fabs // |x-1| : x-1 : x : log10(2) - fcompl MO(limit) // x-1 : x : log10(2) - fnstsw // x-1 : x : log10(2) - andb $0x45, %ah - jz 2f - fxam - fnstsw - andb $0x45, %ah - cmpb $0x40, %ah - jne 5f - fabs // log10(1) is +0 in all rounding modes. -5: fstp %st(1) // x-1 : log10(2) - fyl2xp1 // log10(x) - ret - -2: fstp %st(0) // x : log10(2) - fyl2x // log10(x) - ret - -3: jp 4b // in case x is ±Inf - fstp %st(1) - fstp %st(1) - ret -END (__ieee754_log10f) -strong_alias (__ieee754_log10f, __log10f_finite) diff --git a/sysdeps/i386/fpu/e_log10l.S b/sysdeps/i386/fpu/e_log10l.S deleted file mode 100644 index 9326b19796..0000000000 --- a/sysdeps/i386/fpu/e_log10l.S +++ /dev/null @@ -1,71 +0,0 @@ -/* - * Written by J.T. Conklin <jtc@netbsd.org>. - * Public domain. - * - * Adapted for `long double' by Ulrich Drepper <drepper@cygnus.com>. - * - * Changed to use fyl2xp1 for values near 1, <drepper@cygnus.com>. - */ - -#include <machine/asm.h> - - .section .rodata.cst8,"aM",@progbits,8 - - .p2align 3 - .type one,@object -one: .double 1.0 - ASM_SIZE_DIRECTIVE(one) - /* It is not important that this constant is precise. It is only - a value which is known to be on the safe side for using the - fyl2xp1 instruction. */ - .type limit,@object -limit: .double 0.29 - ASM_SIZE_DIRECTIVE(limit) - - -#ifdef PIC -# define MO(op) op##@GOTOFF(%edx) -#else -# define MO(op) op -#endif - - .text -ENTRY(__ieee754_log10l) - fldlg2 // log10(2) - fldt 4(%esp) // x : log10(2) -#ifdef PIC - LOAD_PIC_REG (dx) -#endif - fxam - fnstsw - fld %st // x : x : log10(2) - sahf - jc 3f // in case x is NaN or ±Inf -4: fsubl MO(one) // x-1 : x : log10(2) - fld %st // x-1 : x-1 : x : log10(2) - fabs // |x-1| : x-1 : x : log10(2) - fcompl MO(limit) // x-1 : x : log10(2) - fnstsw // x-1 : x : log10(2) - andb $0x45, %ah - jz 2f - fxam - fnstsw - andb $0x45, %ah - cmpb $0x40, %ah - jne 5f - fabs // log10(1) is +0 in all rounding modes. -5: fstp %st(1) // x-1 : log10(2) - fyl2xp1 // log10(x) - ret - -2: fstp %st(0) // x : log10(2) - fyl2x // log10(x) - ret - -3: jp 4b // in case x is ±Inf - fstp %st(1) - fstp %st(1) - fadd %st(0) - ret -END(__ieee754_log10l) -strong_alias (__ieee754_log10l, __log10l_finite) diff --git a/sysdeps/i386/fpu/e_log2.S b/sysdeps/i386/fpu/e_log2.S deleted file mode 100644 index 73ff0fffd3..0000000000 --- a/sysdeps/i386/fpu/e_log2.S +++ /dev/null @@ -1,69 +0,0 @@ -/* - * Written by J.T. Conklin <jtc@netbsd.org>. - * Adapted for use as log2 by Ulrich Drepper <drepper@cygnus.com>. - * Public domain. - * - * Changed to use fyl2xp1 for values near 1, <drepper@cygnus.com>. - */ - -#include <machine/asm.h> - - .section .rodata.cst8,"aM",@progbits,8 - - .p2align 3 - .type one,@object -one: .double 1.0 - ASM_SIZE_DIRECTIVE(one) - /* It is not important that this constant is precise. It is only - a value which is known to be on the safe side for using the - fyl2xp1 instruction. */ - .type limit,@object -limit: .double 0.29 - ASM_SIZE_DIRECTIVE(limit) - - -#ifdef PIC -# define MO(op) op##@GOTOFF(%edx) -#else -# define MO(op) op -#endif - - .text -ENTRY(__ieee754_log2) -#ifdef PIC - LOAD_PIC_REG (dx) -#endif - fldl MO(one) - fldl 4(%esp) // x : 1 - fxam - fnstsw - fld %st // x : x : 1 - sahf - jc 3f // in case x is NaN or ±Inf -4: fsub %st(2), %st // x-1 : x : 1 - fld %st // x-1 : x-1 : x : 1 - fabs // |x-1| : x-1 : x : 1 - fcompl MO(limit) // x-1 : x : 1 - fnstsw // x-1 : x : 1 - andb $0x45, %ah - jz 2f - fxam - fnstsw - andb $0x45, %ah - cmpb $0x40, %ah - jne 5f - fabs // log2(1) is +0 in all rounding modes. -5: fstp %st(1) // x-1 : 1 - fyl2xp1 // log(x) - ret - -2: fstp %st(0) // x : 1 - fyl2x // log(x) - ret - -3: jp 4b // in case x is ±Inf - fstp %st(1) - fstp %st(1) - ret -END (__ieee754_log2) -strong_alias (__ieee754_log2, __log2_finite) diff --git a/sysdeps/i386/fpu/e_log2f.S b/sysdeps/i386/fpu/e_log2f.S deleted file mode 100644 index 344eeb495e..0000000000 --- a/sysdeps/i386/fpu/e_log2f.S +++ /dev/null @@ -1,69 +0,0 @@ -/* - * Written by J.T. Conklin <jtc@netbsd.org>. - * Adapted for use as log2 by Ulrich Drepper <drepper@cygnus.com>. - * Public domain. - * - * Changed to use fyl2xp1 for values near 1, <drepper@cygnus.com>. - */ - -#include <machine/asm.h> - - .section .rodata.cst8,"aM",@progbits,8 - - .p2align 3 - .type one,@object -one: .double 1.0 - ASM_SIZE_DIRECTIVE(one) - /* It is not important that this constant is precise. It is only - a value which is known to be on the safe side for using the - fyl2xp1 instruction. */ - .type limit,@object -limit: .double 0.29 - ASM_SIZE_DIRECTIVE(limit) - - -#ifdef PIC -# define MO(op) op##@GOTOFF(%edx) -#else -# define MO(op) op -#endif - - .text -ENTRY(__ieee754_log2f) -#ifdef PIC - LOAD_PIC_REG (dx) -#endif - fldl MO(one) - flds 4(%esp) // x : 1 - fxam - fnstsw - fld %st // x : x : 1 - sahf - jc 3f // in case x is NaN or ±Inf -4: fsub %st(2), %st // x-1 : x : 1 - fld %st // x-1 : x-1 : x : 1 - fabs // |x-1| : x-1 : x : 1 - fcompl MO(limit) // x-1 : x : 1 - fnstsw // x-1 : x : 1 - andb $0x45, %ah - jz 2f - fxam - fnstsw - andb $0x45, %ah - cmpb $0x40, %ah - jne 5f - fabs // log2(1) is +0 in all rounding modes. -5: fstp %st(1) // x-1 : 1 - fyl2xp1 // log(x) - ret - -2: fstp %st(0) // x : 1 - fyl2x // log(x) - ret - -3: jp 4b // in case x is ±Inf - fstp %st(1) - fstp %st(1) - ret -END (__ieee754_log2f) -strong_alias (__ieee754_log2f, __log2f_finite) diff --git a/sysdeps/i386/fpu/e_log2l.S b/sysdeps/i386/fpu/e_log2l.S deleted file mode 100644 index 73e62ea908..0000000000 --- a/sysdeps/i386/fpu/e_log2l.S +++ /dev/null @@ -1,70 +0,0 @@ -/* - * Written by J.T. Conklin <jtc@netbsd.org>. - * Adapted for use as log2 by Ulrich Drepper <drepper@cygnus.com>. - * Public domain. - * - * Changed to use fyl2xp1 for values near 1, <drepper@cygnus.com>. - */ - -#include <machine/asm.h> - - .section .rodata.cst8,"aM",@progbits,8 - - .p2align 3 - .type one,@object -one: .double 1.0 - ASM_SIZE_DIRECTIVE(one) - /* It is not important that this constant is precise. It is only - a value which is known to be on the safe side for using the - fyl2xp1 instruction. */ - .type limit,@object -limit: .double 0.29 - ASM_SIZE_DIRECTIVE(limit) - - -#ifdef PIC -# define MO(op) op##@GOTOFF(%edx) -#else -# define MO(op) op -#endif - - .text -ENTRY(__ieee754_log2l) -#ifdef PIC - LOAD_PIC_REG (dx) -#endif - fldl MO(one) - fldt 4(%esp) // x : 1 - fxam - fnstsw - fld %st // x : x : 1 - sahf - jc 3f // in case x is NaN or ±Inf -4: fsub %st(2), %st // x-1 : x : 1 - fld %st // x-1 : x-1 : x : 1 - fabs // |x-1| : x-1 : x : 1 - fcompl MO(limit) // x-1 : x : 1 - fnstsw // x-1 : x : 1 - andb $0x45, %ah - jz 2f - fxam - fnstsw - andb $0x45, %ah - cmpb $0x40, %ah - jne 5f - fabs // log2(1) is +0 in all rounding modes. -5: fstp %st(1) // x-1 : 1 - fyl2xp1 // log(x) - ret - -2: fstp %st(0) // x : 1 - fyl2x // log(x) - ret - -3: jp 4b // in case x is ±Inf - fstp %st(1) - fstp %st(1) - fadd %st(0) - ret -END (__ieee754_log2l) -strong_alias (__ieee754_log2l, __log2l_finite) diff --git a/sysdeps/i386/fpu/e_logf.S b/sysdeps/i386/fpu/e_logf.S deleted file mode 100644 index de967a31f5..0000000000 --- a/sysdeps/i386/fpu/e_logf.S +++ /dev/null @@ -1,93 +0,0 @@ -/* - * Written by J.T. Conklin <jtc@netbsd.org>. - * Public domain. - * Adapted for float by Ulrich Drepper <drepper@cygnus.com>. - * - * Changed to use fyl2xp1 for values near 1, <drepper@cygnus.com>. - */ - -#include <machine/asm.h> - - .section .rodata.cst8,"aM",@progbits,8 - - .p2align 3 - .type one,@object -one: .double 1.0 - ASM_SIZE_DIRECTIVE(one) - /* It is not important that this constant is precise. It is only - a value which is known to be on the safe side for using the - fyl2xp1 instruction. */ - .type limit,@object -limit: .double 0.29 - ASM_SIZE_DIRECTIVE(limit) - - -#ifdef PIC -# define MO(op) op##@GOTOFF(%edx) -#else -# define MO(op) op -#endif - - .text -ENTRY(__ieee754_logf) - fldln2 // log(2) - flds 4(%esp) // x : log(2) - fxam - fnstsw -#ifdef PIC - LOAD_PIC_REG (dx) -#endif - fld %st // x : x : log(2) - sahf - jc 3f // in case x is NaN or +-Inf -4: fsubl MO(one) // x-1 : x : log(2) - fld %st // x-1 : x-1 : x : log(2) - fabs // |x-1| : x-1 : x : log(2) - fcompl MO(limit) // x-1 : x : log(2) - fnstsw // x-1 : x : log(2) - andb $0x45, %ah - jz 2f - fxam - fnstsw - andb $0x45, %ah - cmpb $0x40, %ah - jne 5f - fabs // log(1) is +0 in all rounding modes. -5: fstp %st(1) // x-1 : log(2) - fyl2xp1 // log(x) - ret - -2: fstp %st(0) // x : log(2) - fyl2x // log(x) - ret - -3: jp 4b // in case x is +-Inf - fstp %st(1) - fstp %st(1) - ret -END (__ieee754_logf) - -ENTRY(__logf_finite) - fldln2 // log(2) - flds 4(%esp) // x : log(2) -#ifdef PIC - LOAD_PIC_REG (dx) -#endif - fld %st // x : x : log(2) - fsubl MO(one) // x-1 : x : log(2) - fld %st // x-1 : x-1 : x : log(2) - fabs // |x-1| : x-1 : x : log(2) - fcompl MO(limit) // x-1 : x : log(2) - fnstsw // x-1 : x : log(2) - andb $0x45, %ah - jz 2b - fxam - fnstsw - andb $0x45, %ah - cmpb $0x40, %ah - jne 6f - fabs // log(1) is +0 in all rounding modes. -6: fstp %st(1) // x-1 : log(2) - fyl2xp1 // log(x) - ret -END(__logf_finite) diff --git a/sysdeps/i386/fpu/e_logl.S b/sysdeps/i386/fpu/e_logl.S deleted file mode 100644 index 53127d704e..0000000000 --- a/sysdeps/i386/fpu/e_logl.S +++ /dev/null @@ -1,97 +0,0 @@ -/* - * Written by J.T. Conklin <jtc@netbsd.org>. - * Public domain. - * - * Adapted for `long double' by Ulrich Drepper <drepper@cygnus.com>. - */ - -#include <machine/asm.h> - - - .section .rodata.cst8,"aM",@progbits,8 - - .p2align 3 - .type one,@object -one: .double 1.0 - ASM_SIZE_DIRECTIVE(one) - /* It is not important that this constant is precise. It is only - a value which is known to be on the safe side for using the - fyl2xp1 instruction. */ - .type limit,@object -limit: .double 0.29 - ASM_SIZE_DIRECTIVE(limit) - - -#ifdef PIC -# define MO(op) op##@GOTOFF(%edx) -#else -# define MO(op) op -#endif - - .text -ENTRY(__ieee754_logl) - fldln2 // log(2) - fldt 4(%esp) // x : log(2) - fxam - fnstsw -#ifdef PIC - LOAD_PIC_REG (dx) -#endif - fld %st // x : x : log(2) - sahf - jc 3f // in case x is NaN or +-Inf - movzwl 4+8(%esp), %eax - cmpl $0xc000, %eax - jae 6f // x <= -2, avoid overflow from -LDBL_MAX - 1. -4: fsubl MO(one) // x-1 : x : log(2) -6: fld %st // x-1 : x-1 : x : log(2) - fabs // |x-1| : x-1 : x : log(2) - fcompl MO(limit) // x-1 : x : log(2) - fnstsw // x-1 : x : log(2) - andb $0x45, %ah - jz 2f - fxam - fnstsw - andb $0x45, %ah - cmpb $0x40, %ah - jne 5f - fabs // log(1) is +0 in all rounding modes. -5: fstp %st(1) // x-1 : log(2) - fyl2xp1 // log(x) - ret - -2: fstp %st(0) // x : log(2) - fyl2x // log(x) - ret - -3: jp 4b // in case x is +-Inf - fstp %st(1) - fstp %st(1) - fadd %st(0) - ret -END (__ieee754_logl) - -ENTRY(__logl_finite) - fldln2 // log(2) - fldt 4(%esp) // x : log(2) -#ifdef PIC - LOAD_PIC_REG (dx) -#endif - fld %st // x : x : log(2) - fsubl MO(one) // x-1 : x : log(2) - fld %st // x-1 : x-1 : x : log(2) - fabs // |x-1| : x-1 : x : log(2) - fcompl MO(limit) // x-1 : x : log(2) - fnstsw // x-1 : x : log(2) - andb $0x45, %ah - jz 2b - fxam - fnstsw - andb $0x45, %ah - cmpb $0x40, %ah - jne 7f - fabs // log(1) is +0 in all rounding modes. -7: fstp %st(1) // x-1 : log(2) - fyl2xp1 // log(x) - ret -END(__logl_finite) diff --git a/sysdeps/i386/fpu/e_pow.S b/sysdeps/i386/fpu/e_pow.S deleted file mode 100644 index 2edb9a9fbc..0000000000 --- a/sysdeps/i386/fpu/e_pow.S +++ /dev/null @@ -1,456 +0,0 @@ -/* ix87 specific implementation of pow function. - Copyright (C) 1996-2017 Free Software Foundation, Inc. - This file is part of the GNU C Library. - Contributed by Ulrich Drepper <drepper@cygnus.com>, 1996. - - The GNU C Library is free software; you can redistribute it and/or - modify it under the terms of the GNU Lesser General Public - License as published by the Free Software Foundation; either - version 2.1 of the License, or (at your option) any later version. - - The GNU C Library is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - Lesser General Public License for more details. - - You should have received a copy of the GNU Lesser General Public - License along with the GNU C Library; if not, see - <http://www.gnu.org/licenses/>. */ - -#include <machine/asm.h> -#include <i386-math-asm.h> - - .section .rodata.cst8,"aM",@progbits,8 - - .p2align 3 - .type one,@object -one: .double 1.0 - ASM_SIZE_DIRECTIVE(one) - .type limit,@object -limit: .double 0.29 - ASM_SIZE_DIRECTIVE(limit) - .type p63,@object -p63: .byte 0, 0, 0, 0, 0, 0, 0xe0, 0x43 - ASM_SIZE_DIRECTIVE(p63) - .type p10,@object -p10: .byte 0, 0, 0, 0, 0, 0, 0x90, 0x40 - ASM_SIZE_DIRECTIVE(p10) - - .section .rodata.cst16,"aM",@progbits,16 - - .p2align 3 - .type infinity,@object -inf_zero: -infinity: - .byte 0, 0, 0, 0, 0, 0, 0xf0, 0x7f - ASM_SIZE_DIRECTIVE(infinity) - .type zero,@object -zero: .double 0.0 - ASM_SIZE_DIRECTIVE(zero) - .type minf_mzero,@object -minf_mzero: -minfinity: - .byte 0, 0, 0, 0, 0, 0, 0xf0, 0xff -mzero: - .byte 0, 0, 0, 0, 0, 0, 0, 0x80 - ASM_SIZE_DIRECTIVE(minf_mzero) -DEFINE_DBL_MIN - -#ifdef PIC -# define MO(op) op##@GOTOFF(%ecx) -# define MOX(op,x,f) op##@GOTOFF(%ecx,x,f) -#else -# define MO(op) op -# define MOX(op,x,f) op(,x,f) -#endif - - .text -ENTRY(__ieee754_pow) - fldl 12(%esp) // y - fxam - -#ifdef PIC - LOAD_PIC_REG (cx) -#endif - - fnstsw - movb %ah, %dl - andb $0x45, %ah - cmpb $0x40, %ah // is y == 0 ? - je 11f - - cmpb $0x05, %ah // is y == ±inf ? - je 12f - - cmpb $0x01, %ah // is y == NaN ? - je 30f - - fldl 4(%esp) // x : y - - subl $8,%esp - cfi_adjust_cfa_offset (8) - - fxam - fnstsw - movb %ah, %dh - andb $0x45, %ah - cmpb $0x40, %ah - je 20f // x is ±0 - - cmpb $0x05, %ah - je 15f // x is ±inf - - cmpb $0x01, %ah - je 32f // x is NaN - - fxch // y : x - - /* fistpll raises invalid exception for |y| >= 1L<<63. */ - fld %st // y : y : x - fabs // |y| : y : x - fcompl MO(p63) // y : x - fnstsw - sahf - jnc 2f - - /* First see whether `y' is a natural number. In this case we - can use a more precise algorithm. */ - fld %st // y : y : x - fistpll (%esp) // y : x - fildll (%esp) // int(y) : y : x - fucomp %st(1) // y : x - fnstsw - sahf - jne 3f - - /* OK, we have an integer value for y. If large enough that - errors may propagate out of the 11 bits excess precision, use - the algorithm for real exponent instead. */ - fld %st // y : y : x - fabs // |y| : y : x - fcompl MO(p10) // y : x - fnstsw - sahf - jnc 2f - popl %eax - cfi_adjust_cfa_offset (-4) - popl %edx - cfi_adjust_cfa_offset (-4) - orl $0, %edx - fstp %st(0) // x - jns 4f // y >= 0, jump - fdivrl MO(one) // 1/x (now referred to as x) - negl %eax - adcl $0, %edx - negl %edx -4: fldl MO(one) // 1 : x - fxch - - /* If y is even, take the absolute value of x. Otherwise, - ensure all intermediate values that might overflow have the - sign of x. */ - testb $1, %al - jnz 6f - fabs - -6: shrdl $1, %edx, %eax - jnc 5f - fxch - fabs - fmul %st(1) // x : ST*x - fxch -5: fld %st // x : x : ST*x - fabs // |x| : x : ST*x - fmulp // |x|*x : ST*x - shrl $1, %edx - movl %eax, %ecx - orl %edx, %ecx - jnz 6b - fstp %st(0) // ST*x -#ifdef PIC - LOAD_PIC_REG (cx) -#endif - DBL_NARROW_EVAL_UFLOW_NONNAN - ret - - /* y is ±NAN */ -30: fldl 4(%esp) // x : y - fldl MO(one) // 1.0 : x : y - fucomp %st(1) // x : y - fnstsw - sahf - je 31f - fxch // y : x -31: fstp %st(1) - ret - - cfi_adjust_cfa_offset (8) -32: addl $8, %esp - cfi_adjust_cfa_offset (-8) - fstp %st(1) - ret - - cfi_adjust_cfa_offset (8) - .align ALIGNARG(4) -2: // y is a large integer (absolute value at least 1L<<10), but - // may be odd unless at least 1L<<64. So it may be necessary - // to adjust the sign of a negative result afterwards. - fxch // x : y - fabs // |x| : y - fxch // y : x - .align ALIGNARG(4) -3: /* y is a real number. */ - fxch // x : y - fldl MO(one) // 1.0 : x : y - fldl MO(limit) // 0.29 : 1.0 : x : y - fld %st(2) // x : 0.29 : 1.0 : x : y - fsub %st(2) // x-1 : 0.29 : 1.0 : x : y - fabs // |x-1| : 0.29 : 1.0 : x : y - fucompp // 1.0 : x : y - fnstsw - fxch // x : 1.0 : y - sahf - ja 7f - fsub %st(1) // x-1 : 1.0 : y - fyl2xp1 // log2(x) : y - jmp 8f - -7: fyl2x // log2(x) : y -8: fmul %st(1) // y*log2(x) : y - fst %st(1) // y*log2(x) : y*log2(x) - frndint // int(y*log2(x)) : y*log2(x) - fsubr %st, %st(1) // int(y*log2(x)) : fract(y*log2(x)) - fxch // fract(y*log2(x)) : int(y*log2(x)) - f2xm1 // 2^fract(y*log2(x))-1 : int(y*log2(x)) - faddl MO(one) // 2^fract(y*log2(x)) : int(y*log2(x)) - - // Before scaling, we must negate if x is negative and y is an - // odd integer. - testb $2, %dh - jz 291f - // x is negative. If y is an odd integer, negate the result. - fldl 20(%esp) // y : 2^fract(y*log2(x)) : int(y*log2(x)) - fld %st // y : y : 2^fract(y*log2(x)) : int(y*log2(x)) - fabs // |y| : y : 2^fract(y*log2(x)) : int(y*log2(x)) - fcompl MO(p63) // y : 2^fract(y*log2(x)) : int(y*log2(x)) - fnstsw - sahf - jnc 290f - - // We must find out whether y is an odd integer. - fld %st // y : y : 2^fract(y*log2(x)) : int(y*log2(x)) - fistpll (%esp) // y : 2^fract(y*log2(x)) : int(y*log2(x)) - fildll (%esp) // int(y) : y : 2^fract(y*log2(x)) : int(y*log2(x)) - fucompp // 2^fract(y*log2(x)) : int(y*log2(x)) - fnstsw - sahf - jne 291f - - // OK, the value is an integer, but is it odd? - popl %eax - cfi_adjust_cfa_offset (-4) - popl %edx - cfi_adjust_cfa_offset (-4) - andb $1, %al - jz 292f // jump if not odd - // It's an odd integer. - fchs - jmp 292f - - cfi_adjust_cfa_offset (8) -290: fstp %st(0) // 2^fract(y*log2(x)) : int(y*log2(x)) -291: addl $8, %esp - cfi_adjust_cfa_offset (-8) -292: fscale // +/- 2^fract(y*log2(x))*2^int(y*log2(x)) : int(y*log2(x)) - fstp %st(1) // +/- 2^fract(y*log2(x))*2^int(y*log2(x)) - DBL_NARROW_EVAL_UFLOW_NONNAN - ret - - - // pow(x,±0) = 1 - .align ALIGNARG(4) -11: fstp %st(0) // pop y - fldl MO(one) - ret - - // y == ±inf - .align ALIGNARG(4) -12: fstp %st(0) // pop y - fldl MO(one) // 1 - fldl 4(%esp) // x : 1 - fabs // abs(x) : 1 - fucompp // < 1, == 1, or > 1 - fnstsw - andb $0x45, %ah - cmpb $0x45, %ah - je 13f // jump if x is NaN - - cmpb $0x40, %ah - je 14f // jump if |x| == 1 - - shlb $1, %ah - xorb %ah, %dl - andl $2, %edx - fldl MOX(inf_zero, %edx, 4) - ret - - .align ALIGNARG(4) -14: fldl MO(one) - ret - - .align ALIGNARG(4) -13: fldl 4(%esp) // load x == NaN - ret - - cfi_adjust_cfa_offset (8) - .align ALIGNARG(4) - // x is ±inf -15: fstp %st(0) // y - testb $2, %dh - jz 16f // jump if x == +inf - - // fistpll raises invalid exception for |y| >= 1L<<63, so test - // that (in which case y is certainly even) before testing - // whether y is odd. - fld %st // y : y - fabs // |y| : y - fcompl MO(p63) // y - fnstsw - sahf - jnc 16f - - // We must find out whether y is an odd integer. - fld %st // y : y - fistpll (%esp) // y - fildll (%esp) // int(y) : y - fucompp // <empty> - fnstsw - sahf - jne 17f - - // OK, the value is an integer. - popl %eax - cfi_adjust_cfa_offset (-4) - popl %edx - cfi_adjust_cfa_offset (-4) - andb $1, %al - jz 18f // jump if not odd - // It's an odd integer. - shrl $31, %edx - fldl MOX(minf_mzero, %edx, 8) - ret - - cfi_adjust_cfa_offset (8) - .align ALIGNARG(4) -16: fcompl MO(zero) - addl $8, %esp - cfi_adjust_cfa_offset (-8) - fnstsw - shrl $5, %eax - andl $8, %eax - fldl MOX(inf_zero, %eax, 1) - ret - - cfi_adjust_cfa_offset (8) - .align ALIGNARG(4) -17: shll $30, %edx // sign bit for y in right position - addl $8, %esp - cfi_adjust_cfa_offset (-8) -18: shrl $31, %edx - fldl MOX(inf_zero, %edx, 8) - ret - - cfi_adjust_cfa_offset (8) - .align ALIGNARG(4) - // x is ±0 -20: fstp %st(0) // y - testb $2, %dl - jz 21f // y > 0 - - // x is ±0 and y is < 0. We must find out whether y is an odd integer. - testb $2, %dh - jz 25f - - // fistpll raises invalid exception for |y| >= 1L<<63, so test - // that (in which case y is certainly even) before testing - // whether y is odd. - fld %st // y : y - fabs // |y| : y - fcompl MO(p63) // y - fnstsw - sahf - jnc 25f - - fld %st // y : y - fistpll (%esp) // y - fildll (%esp) // int(y) : y - fucompp // <empty> - fnstsw - sahf - jne 26f - - // OK, the value is an integer. - popl %eax - cfi_adjust_cfa_offset (-4) - popl %edx - cfi_adjust_cfa_offset (-4) - andb $1, %al - jz 27f // jump if not odd - // It's an odd integer. - // Raise divide-by-zero exception and get minus infinity value. - fldl MO(one) - fdivl MO(zero) - fchs - ret - - cfi_adjust_cfa_offset (8) -25: fstp %st(0) -26: addl $8, %esp - cfi_adjust_cfa_offset (-8) -27: // Raise divide-by-zero exception and get infinity value. - fldl MO(one) - fdivl MO(zero) - ret - - cfi_adjust_cfa_offset (8) - .align ALIGNARG(4) - // x is ±0 and y is > 0. We must find out whether y is an odd integer. -21: testb $2, %dh - jz 22f - - // fistpll raises invalid exception for |y| >= 1L<<63, so test - // that (in which case y is certainly even) before testing - // whether y is odd. - fcoml MO(p63) // y - fnstsw - sahf - jnc 22f - - fld %st // y : y - fistpll (%esp) // y - fildll (%esp) // int(y) : y - fucompp // <empty> - fnstsw - sahf - jne 23f - - // OK, the value is an integer. - popl %eax - cfi_adjust_cfa_offset (-4) - popl %edx - cfi_adjust_cfa_offset (-4) - andb $1, %al - jz 24f // jump if not odd - // It's an odd integer. - fldl MO(mzero) - ret - - cfi_adjust_cfa_offset (8) -22: fstp %st(0) -23: addl $8, %esp // Don't use 2 x pop - cfi_adjust_cfa_offset (-8) -24: fldl MO(zero) - ret - -END(__ieee754_pow) -strong_alias (__ieee754_pow, __pow_finite) diff --git a/sysdeps/i386/fpu/e_powf.S b/sysdeps/i386/fpu/e_powf.S deleted file mode 100644 index 467ef2380b..0000000000 --- a/sysdeps/i386/fpu/e_powf.S +++ /dev/null @@ -1,392 +0,0 @@ -/* ix87 specific implementation of pow function. - Copyright (C) 1996-2017 Free Software Foundation, Inc. - This file is part of the GNU C Library. - Contributed by Ulrich Drepper <drepper@cygnus.com>, 1996. - - The GNU C Library is free software; you can redistribute it and/or - modify it under the terms of the GNU Lesser General Public - License as published by the Free Software Foundation; either - version 2.1 of the License, or (at your option) any later version. - - The GNU C Library is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - Lesser General Public License for more details. - - You should have received a copy of the GNU Lesser General Public - License along with the GNU C Library; if not, see - <http://www.gnu.org/licenses/>. */ - -#include <machine/asm.h> -#include <i386-math-asm.h> - - .section .rodata.cst8,"aM",@progbits,8 - - .p2align 3 - .type one,@object -one: .double 1.0 - ASM_SIZE_DIRECTIVE(one) - .type limit,@object -limit: .double 0.29 - ASM_SIZE_DIRECTIVE(limit) - .type p31,@object -p31: .byte 0, 0, 0, 0, 0, 0, 0xe0, 0x41 - ASM_SIZE_DIRECTIVE(p31) - - .section .rodata.cst16,"aM",@progbits,16 - - .p2align 3 - .type infinity,@object -inf_zero: -infinity: - .byte 0, 0, 0, 0, 0, 0, 0xf0, 0x7f - ASM_SIZE_DIRECTIVE(infinity) - .type zero,@object -zero: .double 0.0 - ASM_SIZE_DIRECTIVE(zero) - .type minf_mzero,@object -minf_mzero: -minfinity: - .byte 0, 0, 0, 0, 0, 0, 0xf0, 0xff -mzero: - .byte 0, 0, 0, 0, 0, 0, 0, 0x80 - ASM_SIZE_DIRECTIVE(minf_mzero) -DEFINE_FLT_MIN - -#ifdef PIC -# define MO(op) op##@GOTOFF(%ecx) -# define MOX(op,x,f) op##@GOTOFF(%ecx,x,f) -#else -# define MO(op) op -# define MOX(op,x,f) op(,x,f) -#endif - - .text -ENTRY(__ieee754_powf) - flds 8(%esp) // y - fxam - -#ifdef PIC - LOAD_PIC_REG (cx) -#endif - - fnstsw - movb %ah, %dl - andb $0x45, %ah - cmpb $0x40, %ah // is y == 0 ? - je 11f - - cmpb $0x05, %ah // is y == ±inf ? - je 12f - - cmpb $0x01, %ah // is y == NaN ? - je 30f - - flds 4(%esp) // x : y - - subl $4, %esp - cfi_adjust_cfa_offset (4) - - fxam - fnstsw - movb %ah, %dh - andb $0x45, %ah - cmpb $0x40, %ah - je 20f // x is ±0 - - cmpb $0x05, %ah - je 15f // x is ±inf - - cmpb $0x01, %ah - je 33f // x is NaN - - fxch // y : x - - /* fistpl raises invalid exception for |y| >= 1L<<31. */ - fld %st // y : y : x - fabs // |y| : y : x - fcompl MO(p31) // y : x - fnstsw - sahf - jnc 2f - - /* First see whether `y' is a natural number. In this case we - can use a more precise algorithm. */ - fld %st // y : y : x - fistpl (%esp) // y : x - fildl (%esp) // int(y) : y : x - fucomp %st(1) // y : x - fnstsw - sahf - jne 3f - - /* OK, we have an integer value for y. */ - popl %edx - cfi_adjust_cfa_offset (-4) - orl $0, %edx - fstp %st(0) // x - jns 4f // y >= 0, jump - fdivrl MO(one) // 1/x (now referred to as x) - negl %edx -4: fldl MO(one) // 1 : x - fxch - - /* If y is even, take the absolute value of x. Otherwise, - ensure all intermediate values that might overflow have the - sign of x. */ - testb $1, %dl - jnz 6f - fabs - -6: shrl $1, %edx - jnc 5f - fxch - fabs - fmul %st(1) // x : ST*x - fxch -5: fld %st // x : x : ST*x - fabs // |x| : x : ST*x - fmulp // |x|*x : ST*x - testl %edx, %edx - jnz 6b - fstp %st(0) // ST*x - FLT_NARROW_EVAL_UFLOW_NONNAN - ret - - /* y is ±NAN */ -30: flds 4(%esp) // x : y - fldl MO(one) // 1.0 : x : y - fucomp %st(1) // x : y - fnstsw - sahf - je 31f - fxch // y : x -31: fstp %st(1) - ret - - cfi_adjust_cfa_offset (4) - .align ALIGNARG(4) -2: /* y is a large integer (so even). */ - fxch // x : y - fabs // |x| : y - fxch // y : x - .align ALIGNARG(4) -3: /* y is a real number. */ - fxch // x : y - fldl MO(one) // 1.0 : x : y - fldl MO(limit) // 0.29 : 1.0 : x : y - fld %st(2) // x : 0.29 : 1.0 : x : y - fsub %st(2) // x-1 : 0.29 : 1.0 : x : y - fabs // |x-1| : 0.29 : 1.0 : x : y - fucompp // 1.0 : x : y - fnstsw - fxch // x : 1.0 : y - sahf - ja 7f - fsub %st(1) // x-1 : 1.0 : y - fyl2xp1 // log2(x) : y - jmp 8f - -7: fyl2x // log2(x) : y -8: fmul %st(1) // y*log2(x) : y - fst %st(1) // y*log2(x) : y*log2(x) - frndint // int(y*log2(x)) : y*log2(x) - fsubr %st, %st(1) // int(y*log2(x)) : fract(y*log2(x)) - fxch // fract(y*log2(x)) : int(y*log2(x)) - f2xm1 // 2^fract(y*log2(x))-1 : int(y*log2(x)) - faddl MO(one) // 2^fract(y*log2(x)) : int(y*log2(x)) - fscale // 2^fract(y*log2(x))*2^int(y*log2(x)) : int(y*log2(x)) -32: addl $4, %esp - cfi_adjust_cfa_offset (-4) - fstp %st(1) // 2^fract(y*log2(x))*2^int(y*log2(x)) - FLT_NARROW_EVAL_UFLOW_NONNAN - ret - - /* x is NaN. */ - cfi_adjust_cfa_offset (4) -33: addl $4, %esp - cfi_adjust_cfa_offset (-4) - fstp %st(1) - ret - - // pow(x,±0) = 1 - .align ALIGNARG(4) -11: fstp %st(0) // pop y - fldl MO(one) - ret - - // y == ±inf - .align ALIGNARG(4) -12: fstp %st(0) // pop y - fldl MO(one) // 1 - flds 4(%esp) // x : 1 - fabs // abs(x) : 1 - fucompp // < 1, == 1, or > 1 - fnstsw - andb $0x45, %ah - cmpb $0x45, %ah - je 13f // jump if x is NaN - - cmpb $0x40, %ah - je 14f // jump if |x| == 1 - - shlb $1, %ah - xorb %ah, %dl - andl $2, %edx - fldl MOX(inf_zero, %edx, 4) - ret - - .align ALIGNARG(4) -14: fldl MO(one) - ret - - .align ALIGNARG(4) -13: flds 4(%esp) // load x == NaN - ret - - cfi_adjust_cfa_offset (4) - .align ALIGNARG(4) - // x is ±inf -15: fstp %st(0) // y - testb $2, %dh - jz 16f // jump if x == +inf - - // fistpl raises invalid exception for |y| >= 1L<<31, so test - // that (in which case y is certainly even) before testing - // whether y is odd. - fld %st // y : y - fabs // |y| : y - fcompl MO(p31) // y - fnstsw - sahf - jnc 16f - - // We must find out whether y is an odd integer. - fld %st // y : y - fistpl (%esp) // y - fildl (%esp) // int(y) : y - fucompp // <empty> - fnstsw - sahf - jne 17f - - // OK, the value is an integer. - popl %edx - cfi_adjust_cfa_offset (-4) - testb $1, %dl - jz 18f // jump if not odd - // It's an odd integer. - shrl $31, %edx - fldl MOX(minf_mzero, %edx, 8) - ret - - cfi_adjust_cfa_offset (4) - .align ALIGNARG(4) -16: fcompl MO(zero) - addl $4, %esp - cfi_adjust_cfa_offset (-4) - fnstsw - shrl $5, %eax - andl $8, %eax - fldl MOX(inf_zero, %eax, 1) - ret - - cfi_adjust_cfa_offset (4) - .align ALIGNARG(4) -17: shll $30, %edx // sign bit for y in right position - addl $4, %esp - cfi_adjust_cfa_offset (-4) -18: shrl $31, %edx - fldl MOX(inf_zero, %edx, 8) - ret - - cfi_adjust_cfa_offset (4) - .align ALIGNARG(4) - // x is ±0 -20: fstp %st(0) // y - testb $2, %dl - jz 21f // y > 0 - - // x is ±0 and y is < 0. We must find out whether y is an odd integer. - testb $2, %dh - jz 25f - - // fistpl raises invalid exception for |y| >= 1L<<31, so test - // that (in which case y is certainly even) before testing - // whether y is odd. - fld %st // y : y - fabs // |y| : y - fcompl MO(p31) // y - fnstsw - sahf - jnc 25f - - fld %st // y : y - fistpl (%esp) // y - fildl (%esp) // int(y) : y - fucompp // <empty> - fnstsw - sahf - jne 26f - - // OK, the value is an integer. - popl %edx - cfi_adjust_cfa_offset (-4) - testb $1, %dl - jz 27f // jump if not odd - // It's an odd integer. - // Raise divide-by-zero exception and get minus infinity value. - fldl MO(one) - fdivl MO(zero) - fchs - ret - - cfi_adjust_cfa_offset (4) -25: fstp %st(0) -26: addl $4, %esp - cfi_adjust_cfa_offset (-4) -27: // Raise divide-by-zero exception and get infinity value. - fldl MO(one) - fdivl MO(zero) - ret - - cfi_adjust_cfa_offset (4) - .align ALIGNARG(4) - // x is ±0 and y is > 0. We must find out whether y is an odd integer. -21: testb $2, %dh - jz 22f - - // fistpl raises invalid exception for |y| >= 1L<<31, so test - // that (in which case y is certainly even) before testing - // whether y is odd. - fcoml MO(p31) // y - fnstsw - sahf - jnc 22f - - fld %st // y : y - fistpl (%esp) // y - fildl (%esp) // int(y) : y - fucompp // <empty> - fnstsw - sahf - jne 23f - - // OK, the value is an integer. - popl %edx - cfi_adjust_cfa_offset (-4) - testb $1, %dl - jz 24f // jump if not odd - // It's an odd integer. - fldl MO(mzero) - ret - - cfi_adjust_cfa_offset (4) -22: fstp %st(0) -23: addl $4, %esp // Don't use pop. - cfi_adjust_cfa_offset (-4) -24: fldl MO(zero) - ret - -END(__ieee754_powf) -strong_alias (__ieee754_powf, __powf_finite) diff --git a/sysdeps/i386/fpu/e_powl.S b/sysdeps/i386/fpu/e_powl.S deleted file mode 100644 index 9e162848e4..0000000000 --- a/sysdeps/i386/fpu/e_powl.S +++ /dev/null @@ -1,459 +0,0 @@ -/* ix87 specific implementation of pow function. - Copyright (C) 1996-2017 Free Software Foundation, Inc. - This file is part of the GNU C Library. - Contributed by Ulrich Drepper <drepper@cygnus.com>, 1996. - - The GNU C Library is free software; you can redistribute it and/or - modify it under the terms of the GNU Lesser General Public - License as published by the Free Software Foundation; either - version 2.1 of the License, or (at your option) any later version. - - The GNU C Library is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - Lesser General Public License for more details. - - You should have received a copy of the GNU Lesser General Public - License along with the GNU C Library; if not, see - <http://www.gnu.org/licenses/>. */ - -#include <machine/asm.h> -#include <i386-math-asm.h> - - .section .rodata.cst8,"aM",@progbits,8 - - .p2align 3 - .type one,@object -one: .double 1.0 - ASM_SIZE_DIRECTIVE(one) - .type p2,@object -p2: .byte 0, 0, 0, 0, 0, 0, 0x10, 0x40 - ASM_SIZE_DIRECTIVE(p2) - .type p63,@object -p63: .byte 0, 0, 0, 0, 0, 0, 0xe0, 0x43 - ASM_SIZE_DIRECTIVE(p63) - .type p64,@object -p64: .byte 0, 0, 0, 0, 0, 0, 0xf0, 0x43 - ASM_SIZE_DIRECTIVE(p64) - .type p78,@object -p78: .byte 0, 0, 0, 0, 0, 0, 0xd0, 0x44 - ASM_SIZE_DIRECTIVE(p78) - .type pm79,@object -pm79: .byte 0, 0, 0, 0, 0, 0, 0, 0x3b - ASM_SIZE_DIRECTIVE(pm79) - - .section .rodata.cst16,"aM",@progbits,16 - - .p2align 3 - .type infinity,@object -inf_zero: -infinity: - .byte 0, 0, 0, 0, 0, 0, 0xf0, 0x7f - ASM_SIZE_DIRECTIVE(infinity) - .type zero,@object -zero: .double 0.0 - ASM_SIZE_DIRECTIVE(zero) - .type minf_mzero,@object -minf_mzero: -minfinity: - .byte 0, 0, 0, 0, 0, 0, 0xf0, 0xff -mzero: - .byte 0, 0, 0, 0, 0, 0, 0, 0x80 - ASM_SIZE_DIRECTIVE(minf_mzero) -DEFINE_LDBL_MIN - -#ifdef PIC -# define MO(op) op##@GOTOFF(%ecx) -# define MOX(op,x,f) op##@GOTOFF(%ecx,x,f) -#else -# define MO(op) op -# define MOX(op,x,f) op(,x,f) -#endif - - .text -ENTRY(__ieee754_powl) - fldt 16(%esp) // y - fxam - -#ifdef PIC - LOAD_PIC_REG (cx) -#endif - - fnstsw - movb %ah, %dl - andb $0x45, %ah - cmpb $0x40, %ah // is y == 0 ? - je 11f - - cmpb $0x05, %ah // is y == ±inf ? - je 12f - - cmpb $0x01, %ah // is y == NaN ? - je 30f - - fldt 4(%esp) // x : y - - subl $8,%esp - cfi_adjust_cfa_offset (8) - - fxam - fnstsw - movb %ah, %dh - andb $0x45, %ah - cmpb $0x40, %ah - je 20f // x is ±0 - - cmpb $0x05, %ah - je 15f // x is ±inf - - cmpb $0x01, %ah - je 32f // x is NaN - - fxch // y : x - - /* fistpll raises invalid exception for |y| >= 1L<<63. */ - fld %st // y : y : x - fabs // |y| : y : x - fcompl MO(p63) // y : x - fnstsw - sahf - jnc 2f - - /* First see whether `y' is a natural number. In this case we - can use a more precise algorithm. */ - fld %st // y : y : x - fistpll (%esp) // y : x - fildll (%esp) // int(y) : y : x - fucomp %st(1) // y : x - fnstsw - sahf - je 9f - - // If y has absolute value at most 0x1p-79, then any finite - // nonzero x will result in 1. Saturate y to those bounds to - // avoid underflow in the calculation of y*log2(x). - fld %st // y : y : x - fabs // |y| : y : x - fcompl MO(pm79) // y : x - fnstsw - sahf - jnc 3f - fstp %st(0) // pop y - fldl MO(pm79) // 0x1p-79 : x - testb $2, %dl - jnz 3f // y > 0 - fchs // -0x1p-79 : x - jmp 3f - -9: /* OK, we have an integer value for y. Unless very small - (we use < 4), use the algorithm for real exponent to avoid - accumulation of errors. */ - fld %st // y : y : x - fabs // |y| : y : x - fcompl MO(p2) // y : x - fnstsw - sahf - jnc 3f - popl %eax - cfi_adjust_cfa_offset (-4) - popl %edx - cfi_adjust_cfa_offset (-4) - orl $0, %edx - fstp %st(0) // x - jns 4f // y >= 0, jump - fdivrl MO(one) // 1/x (now referred to as x) - negl %eax - adcl $0, %edx - negl %edx -4: fldl MO(one) // 1 : x - fxch - - /* If y is even, take the absolute value of x. Otherwise, - ensure all intermediate values that might overflow have the - sign of x. */ - testb $1, %al - jnz 6f - fabs - -6: shrdl $1, %edx, %eax - jnc 5f - fxch - fabs - fmul %st(1) // x : ST*x - fxch -5: fld %st // x : x : ST*x - fabs // |x| : x : ST*x - fmulp // |x|*x : ST*x - shrl $1, %edx - movl %eax, %ecx - orl %edx, %ecx - jnz 6b - fstp %st(0) // ST*x -#ifdef PIC - LOAD_PIC_REG (cx) -#endif - LDBL_CHECK_FORCE_UFLOW_NONNAN - ret - - /* y is ±NAN */ -30: fldt 4(%esp) // x : y - fldl MO(one) // 1.0 : x : y - fucomp %st(1) // x : y - fnstsw - sahf - je 33f -31: /* At least one argument NaN, and result should be NaN. */ - faddp - ret -33: jp 31b - /* pow (1, NaN); check if the NaN signaling. */ - testb $0x40, 23(%esp) - jz 31b - fstp %st(1) - ret - - cfi_adjust_cfa_offset (8) -32: addl $8, %esp - cfi_adjust_cfa_offset (-8) - faddp - ret - - cfi_adjust_cfa_offset (8) - .align ALIGNARG(4) -2: // y is a large integer (absolute value at least 1L<<63). - // If y has absolute value at least 1L<<78, then any finite - // nonzero x will result in 0 (underflow), 1 or infinity (overflow). - // Saturate y to those bounds to avoid overflow in the calculation - // of y*log2(x). - fld %st // y : y : x - fabs // |y| : y : x - fcompl MO(p78) // y : x - fnstsw - sahf - jc 3f - fstp %st(0) // pop y - fldl MO(p78) // 1L<<78 : x - testb $2, %dl - jz 3f // y > 0 - fchs // -(1L<<78) : x - .align ALIGNARG(4) -3: /* y is a real number. */ - subl $28, %esp - cfi_adjust_cfa_offset (28) - fstpt 12(%esp) // x - fstpt (%esp) // <empty> - call HIDDEN_JUMPTARGET (__powl_helper) // <result> - addl $36, %esp - cfi_adjust_cfa_offset (-36) - ret - - // pow(x,±0) = 1, unless x is sNaN - .align ALIGNARG(4) -11: fstp %st(0) // pop y - fldt 4(%esp) // x - fxam - fnstsw - andb $0x45, %ah - cmpb $0x01, %ah - je 112f // x is NaN -111: fstp %st(0) - fldl MO(one) - ret - -112: testb $0x40, 11(%esp) - jnz 111b - fadd %st(0) - ret - - // y == ±inf - .align ALIGNARG(4) -12: fstp %st(0) // pop y - fldl MO(one) // 1 - fldt 4(%esp) // x : 1 - fabs // abs(x) : 1 - fucompp // < 1, == 1, or > 1 - fnstsw - andb $0x45, %ah - cmpb $0x45, %ah - je 13f // jump if x is NaN - - cmpb $0x40, %ah - je 14f // jump if |x| == 1 - - shlb $1, %ah - xorb %ah, %dl - andl $2, %edx - fldl MOX(inf_zero, %edx, 4) - ret - - .align ALIGNARG(4) -14: fldl MO(one) - ret - - .align ALIGNARG(4) -13: fldt 4(%esp) // load x == NaN - fadd %st(0) - ret - - cfi_adjust_cfa_offset (8) - .align ALIGNARG(4) - // x is ±inf -15: fstp %st(0) // y - testb $2, %dh - jz 16f // jump if x == +inf - - // fistpll raises invalid exception for |y| >= 1L<<63, but y - // may be odd unless we know |y| >= 1L<<64. - fld %st // y : y - fabs // |y| : y - fcompl MO(p64) // y - fnstsw - sahf - jnc 16f - fldl MO(p63) // p63 : y - fxch // y : p63 - fprem // y%p63 : p63 - fstp %st(1) // y%p63 - - // We must find out whether y is an odd integer. - fld %st // y : y - fistpll (%esp) // y - fildll (%esp) // int(y) : y - fucompp // <empty> - fnstsw - sahf - jne 17f - - // OK, the value is an integer, but is it odd? - popl %eax - cfi_adjust_cfa_offset (-4) - popl %edx - cfi_adjust_cfa_offset (-4) - andb $1, %al - jz 18f // jump if not odd - // It's an odd integer. - shrl $31, %edx - fldl MOX(minf_mzero, %edx, 8) - ret - - cfi_adjust_cfa_offset (8) - .align ALIGNARG(4) -16: fcompl MO(zero) - addl $8, %esp - cfi_adjust_cfa_offset (-8) - fnstsw - shrl $5, %eax - andl $8, %eax - fldl MOX(inf_zero, %eax, 1) - ret - - cfi_adjust_cfa_offset (8) - .align ALIGNARG(4) -17: shll $30, %edx // sign bit for y in right position - addl $8, %esp - cfi_adjust_cfa_offset (-8) -18: shrl $31, %edx - fldl MOX(inf_zero, %edx, 8) - ret - - cfi_adjust_cfa_offset (8) - .align ALIGNARG(4) - // x is ±0 -20: fstp %st(0) // y - testb $2, %dl - jz 21f // y > 0 - - // x is ±0 and y is < 0. We must find out whether y is an odd integer. - testb $2, %dh - jz 25f - - // fistpll raises invalid exception for |y| >= 1L<<63, but y - // may be odd unless we know |y| >= 1L<<64. - fld %st // y : y - fabs // |y| : y - fcompl MO(p64) // y - fnstsw - sahf - jnc 25f - fldl MO(p63) // p63 : y - fxch // y : p63 - fprem // y%p63 : p63 - fstp %st(1) // y%p63 - - fld %st // y : y - fistpll (%esp) // y - fildll (%esp) // int(y) : y - fucompp // <empty> - fnstsw - sahf - jne 26f - - // OK, the value is an integer, but is it odd? - popl %eax - cfi_adjust_cfa_offset (-4) - popl %edx - cfi_adjust_cfa_offset (-4) - andb $1, %al - jz 27f // jump if not odd - // It's an odd integer. - // Raise divide-by-zero exception and get minus infinity value. - fldl MO(one) - fdivl MO(zero) - fchs - ret - - cfi_adjust_cfa_offset (8) -25: fstp %st(0) -26: addl $8, %esp - cfi_adjust_cfa_offset (-8) -27: // Raise divide-by-zero exception and get infinity value. - fldl MO(one) - fdivl MO(zero) - ret - - cfi_adjust_cfa_offset (8) - .align ALIGNARG(4) - // x is ±0 and y is > 0. We must find out whether y is an odd integer. -21: testb $2, %dh - jz 22f - - // fistpll raises invalid exception for |y| >= 1L<<63, but y - // may be odd unless we know |y| >= 1L<<64. - fld %st // y : y - fcompl MO(p64) // y - fnstsw - sahf - jnc 22f - fldl MO(p63) // p63 : y - fxch // y : p63 - fprem // y%p63 : p63 - fstp %st(1) // y%p63 - - fld %st // y : y - fistpll (%esp) // y - fildll (%esp) // int(y) : y - fucompp // <empty> - fnstsw - sahf - jne 23f - - // OK, the value is an integer, but is it odd? - popl %eax - cfi_adjust_cfa_offset (-4) - popl %edx - cfi_adjust_cfa_offset (-4) - andb $1, %al - jz 24f // jump if not odd - // It's an odd integer. - fldl MO(mzero) - ret - - cfi_adjust_cfa_offset (8) -22: fstp %st(0) -23: addl $8, %esp // Don't use 2 x pop - cfi_adjust_cfa_offset (-8) -24: fldl MO(zero) - ret - -END(__ieee754_powl) -strong_alias (__ieee754_powl, __powl_finite) diff --git a/sysdeps/i386/fpu/e_rem_pio2.c b/sysdeps/i386/fpu/e_rem_pio2.c deleted file mode 100644 index 1347b0468c..0000000000 --- a/sysdeps/i386/fpu/e_rem_pio2.c +++ /dev/null @@ -1,3 +0,0 @@ -/* Empty. This file is only meant to avoid compiling the file with the - same name in the libm-ieee754 directory. The code is not used since - there is an assembler version for all users of this file. */ diff --git a/sysdeps/i386/fpu/e_remainder.S b/sysdeps/i386/fpu/e_remainder.S deleted file mode 100644 index f7867aa90b..0000000000 --- a/sysdeps/i386/fpu/e_remainder.S +++ /dev/null @@ -1,18 +0,0 @@ -/* - * Written by J.T. Conklin <jtc@netbsd.org>. - * Public domain. - */ - -#include <machine/asm.h> - -ENTRY(__ieee754_remainder) - fldl 12(%esp) - fldl 4(%esp) -1: fprem1 - fstsw %ax - sahf - jp 1b - fstp %st(1) - ret -END (__ieee754_remainder) -strong_alias (__ieee754_remainder, __remainder_finite) diff --git a/sysdeps/i386/fpu/e_remainderf.S b/sysdeps/i386/fpu/e_remainderf.S deleted file mode 100644 index cfd390bc69..0000000000 --- a/sysdeps/i386/fpu/e_remainderf.S +++ /dev/null @@ -1,18 +0,0 @@ -/* - * Written by J.T. Conklin <jtc@netbsd.org>. - * Public domain. - */ - -#include <machine/asm.h> - -ENTRY(__ieee754_remainderf) - flds 8(%esp) - flds 4(%esp) -1: fprem1 - fstsw %ax - sahf - jp 1b - fstp %st(1) - ret -END (__ieee754_remainderf) -strong_alias (__ieee754_remainderf, __remainderf_finite) diff --git a/sysdeps/i386/fpu/e_remainderl.S b/sysdeps/i386/fpu/e_remainderl.S deleted file mode 100644 index 5ec23a37a3..0000000000 --- a/sysdeps/i386/fpu/e_remainderl.S +++ /dev/null @@ -1,20 +0,0 @@ -/* - * Written by J.T. Conklin <jtc@netbsd.org>. - * Public domain. - * - * Adapted for `long double' by Ulrich Drepper <drepper@cygnus.com>. - */ - -#include <machine/asm.h> - -ENTRY(__ieee754_remainderl) - fldt 16(%esp) - fldt 4(%esp) -1: fprem1 - fstsw %ax - sahf - jp 1b - fstp %st(1) - ret -END (__ieee754_remainderl) -strong_alias (__ieee754_remainderl, __remainderl_finite) diff --git a/sysdeps/i386/fpu/e_scalb.S b/sysdeps/i386/fpu/e_scalb.S deleted file mode 100644 index 370924c29f..0000000000 --- a/sysdeps/i386/fpu/e_scalb.S +++ /dev/null @@ -1,100 +0,0 @@ -/* - * Written by J.T. Conklin <jtc@netbsd.org>. - * Public domain. - * - * Correct handling of y==-inf <drepper@gnu> - */ - -#include <machine/asm.h> -#include <i386-math-asm.h> - - .section .rodata - - .align ALIGNARG(4) - .type zero_nan,@object -zero_nan: - .double 0.0 -nan: .byte 0, 0, 0, 0, 0, 0, 0xff, 0x7f - .byte 0, 0, 0, 0, 0, 0, 0, 0x80 - .byte 0, 0, 0, 0, 0, 0, 0xff, 0x7f - ASM_SIZE_DIRECTIVE(zero_nan) - - -#ifdef PIC -# define MO(op) op##@GOTOFF(%ecx) -# define MOX(op,x,f) op##@GOTOFF(%ecx,x,f) -#else -# define MO(op) op -# define MOX(op,x,f) op(,x,f) -#endif - - .text -ENTRY(__ieee754_scalb) - fldl 12(%esp) - fxam - fnstsw - fldl 4(%esp) - andl $0x4700, %eax - cmpl $0x0700, %eax - je 1f - andl $0x4500, %eax - cmpl $0x0100, %eax - je 2f - fxam - fnstsw - andl $0x4500, %eax - cmpl $0x0100, %eax - je 3f - fld %st(1) - frndint - fcomp %st(2) - fnstsw - sahf - jne 4f - fscale - fstp %st(1) - DBL_NARROW_EVAL - ret - - /* y is -inf */ -1: fxam -#ifdef PIC - LOAD_PIC_REG (cx) -#endif - fnstsw - movl 8(%esp), %edx - shrl $5, %eax - fstp %st - fstp %st - andl $0x80000000, %edx - andl $0x0228, %eax - cmpl $0x0028, %eax - je 4f - andl $8, %eax - shrl $27, %edx - addl %edx, %eax - fldl MOX(zero_nan, %eax, 1) - ret - - /* The result is NaN, but we must not raise an exception. - So use a variable. */ -2: fstp %st - fstp %st -#ifdef PIC - LOAD_PIC_REG (cx) -#endif - fldl MO(nan) - ret - - /* The first parameter is a NaN. Return it. */ -3: fstp %st(1) - ret - - /* Return NaN and raise the invalid exception. */ -4: fstp %st - fstp %st - fldz - fdiv %st - ret -END(__ieee754_scalb) -strong_alias (__ieee754_scalb, __scalb_finite) diff --git a/sysdeps/i386/fpu/e_scalbf.S b/sysdeps/i386/fpu/e_scalbf.S deleted file mode 100644 index 4f2dfa3acf..0000000000 --- a/sysdeps/i386/fpu/e_scalbf.S +++ /dev/null @@ -1,102 +0,0 @@ -/* - * Written by J.T. Conklin <jtc@netbsd.org>. - * Public domain. - * Adapted for float type by Ulrich Drepper <drepper@cygnus.com>. - * - * Correct handling of y==-inf <drepper@gnu> - */ - -#include <machine/asm.h> -#include <i386-math-asm.h> - - .section .rodata - - .align ALIGNARG(4) - .type zero_nan,@object -zero_nan: - .double 0.0 -nan: .byte 0, 0, 0, 0, 0, 0, 0xff, 0x7f - .byte 0, 0, 0, 0, 0, 0, 0, 0x80 - .byte 0, 0, 0, 0, 0, 0, 0xff, 0x7f - ASM_SIZE_DIRECTIVE(zero_nan) - - -#ifdef PIC -# define MO(op) op##@GOTOFF(%ecx) -# define MOX(op,x,f) op##@GOTOFF(%ecx,x,f) -#else -# define MO(op) op -# define MOX(op,x,f) op(,x,f) -#endif - - - .text -ENTRY(__ieee754_scalbf) - flds 8(%esp) - fxam - fnstsw - flds 4(%esp) - andl $0x4700, %eax - cmpl $0x0700, %eax - je 1f - andl $0x4500, %eax - cmpl $0x0100, %eax - je 2f - fxam - fnstsw - andl $0x4500, %eax - cmpl $0x0100, %eax - je 3f - fld %st(1) - frndint - fcomp %st(2) - fnstsw - sahf - jne 4f - fscale - fstp %st(1) - FLT_NARROW_EVAL - ret - - /* y is -inf */ -1: fxam -#ifdef PIC - LOAD_PIC_REG (cx) -#endif - fnstsw - movl 4(%esp), %edx - shrl $5, %eax - fstp %st - fstp %st - andl $0x80000000, %edx - andl $0x0228, %eax - cmpl $0x0028, %eax - je 4f - andl $8, %eax - shrl $27, %edx - addl %edx, %eax - fldl MOX(zero_nan, %eax, 1) - ret - - /* The result is NaN, but we must not raise an exception. - So use a variable. */ -2: fstp %st - fstp %st -#ifdef PIC - LOAD_PIC_REG (cx) -#endif - fldl MO(nan) - ret - - /* The first parameter is a NaN. Return it. */ -3: fstp %st(1) - ret - - /* Return NaN and raise the invalid exception. */ -4: fstp %st - fstp %st - fldz - fdiv %st - ret -END(__ieee754_scalbf) -strong_alias (__ieee754_scalbf, __scalbf_finite) diff --git a/sysdeps/i386/fpu/e_scalbl.S b/sysdeps/i386/fpu/e_scalbl.S deleted file mode 100644 index 896f599cb0..0000000000 --- a/sysdeps/i386/fpu/e_scalbl.S +++ /dev/null @@ -1,90 +0,0 @@ -/* - * Written by J.T. Conklin <jtc@netbsd.org>. - * Public domain. - * - * Adapted for `long double' by Ulrich Drepper <drepper@cygnus.com>. - * - * Correct handling of y==-inf <drepper@gnu> - */ - -#include <machine/asm.h> - - .section .rodata - - .align ALIGNARG(4) - .type zero_nan,@object -zero_nan: - .double 0.0 -nan: .byte 0, 0, 0, 0, 0, 0, 0xff, 0x7f - .byte 0, 0, 0, 0, 0, 0, 0, 0x80 - .byte 0, 0, 0, 0, 0, 0, 0xff, 0x7f - ASM_SIZE_DIRECTIVE(zero_nan) - - -#ifdef PIC -# define MO(op) op##@GOTOFF(%ecx) -# define MOX(op,x,f) op##@GOTOFF(%ecx,x,f) -#else -# define MO(op) op -# define MOX(op,x,f) op(,x,f) -#endif - - .text -ENTRY(__ieee754_scalbl) - fldt 16(%esp) - fxam - fnstsw - fldt 4(%esp) - andl $0x4700, %eax - cmpl $0x0700, %eax - je 1f - andl $0x4500, %eax - cmpl $0x0100, %eax - je 2f - fxam - fnstsw - andl $0x4500, %eax - cmpl $0x0100, %eax - je 2f - fld %st(1) - frndint - fcomp %st(2) - fnstsw - sahf - jne 4f - fscale - fstp %st(1) - ret - - /* y is -inf */ -1: fxam -#ifdef PIC - LOAD_PIC_REG (cx) -#endif - fnstsw - movl 12(%esp), %edx - shrl $5, %eax - fstp %st - fstp %st - andl $0x8000, %edx - andl $0x0228, %eax - cmpl $0x0028, %eax - je 4f - andl $8, %eax - shrl $11, %edx - addl %edx, %eax - fldl MOX(zero_nan, %eax, 1) - ret - - /* The result is NaN; raise an exception for sNaN arguments. */ -2: faddp - ret - - /* Return NaN and raise the invalid exception. */ -4: fstp %st - fstp %st - fldz - fdiv %st - ret -END(__ieee754_scalbl) -strong_alias (__ieee754_scalbl, __scalbl_finite) diff --git a/sysdeps/i386/fpu/e_sqrt.S b/sysdeps/i386/fpu/e_sqrt.S deleted file mode 100644 index fba5833a9a..0000000000 --- a/sysdeps/i386/fpu/e_sqrt.S +++ /dev/null @@ -1,23 +0,0 @@ -/* - * Written by J.T. Conklin <jtc@netbsd.org>. - * Public domain. - */ - -#include <machine/asm.h> - -ENTRY(__ieee754_sqrt) - fldl 4(%esp) - subl $8, %esp - cfi_adjust_cfa_offset (8) - fstcw 4(%esp) - movl $0xfeff, %edx - andl 4(%esp), %edx - movl %edx, (%esp) - fldcw (%esp) - fsqrt - fldcw 4(%esp) - addl $8, %esp - cfi_adjust_cfa_offset (-8) - ret -END (__ieee754_sqrt) -strong_alias (__ieee754_sqrt, __sqrt_finite) diff --git a/sysdeps/i386/fpu/e_sqrtf.S b/sysdeps/i386/fpu/e_sqrtf.S deleted file mode 100644 index 6f7e4b015f..0000000000 --- a/sysdeps/i386/fpu/e_sqrtf.S +++ /dev/null @@ -1,13 +0,0 @@ -/* - * Written by J.T. Conklin <jtc@netbsd.org>. - * Public domain. - */ - -#include <machine/asm.h> - -ENTRY(__ieee754_sqrtf) - flds 4(%esp) - fsqrt - ret -END (__ieee754_sqrtf) -strong_alias (__ieee754_sqrtf, __sqrtf_finite) diff --git a/sysdeps/i386/fpu/e_sqrtl.c b/sysdeps/i386/fpu/e_sqrtl.c deleted file mode 100644 index 41bcd7eeb7..0000000000 --- a/sysdeps/i386/fpu/e_sqrtl.c +++ /dev/null @@ -1,20 +0,0 @@ -/* - * Written by J.T. Conklin <jtc@netbsd.org>. - * Public domain. - * - * Adapted for `long double' by Ulrich Drepper <drepper@cygnus.com>. - */ - -#include <math_private.h> - -#undef __ieee754_sqrtl -long double -__ieee754_sqrtl (long double x) -{ - long double res; - - asm ("fsqrt" : "=t" (res) : "0" (x)); - - return res; -} -strong_alias (__ieee754_sqrtl, __sqrtl_finite) diff --git a/sysdeps/i386/fpu/fclrexcpt.c b/sysdeps/i386/fpu/fclrexcpt.c deleted file mode 100644 index 5d8596964b..0000000000 --- a/sysdeps/i386/fpu/fclrexcpt.c +++ /dev/null @@ -1,69 +0,0 @@ -/* Clear given exceptions in current floating-point environment. - Copyright (C) 1997-2017 Free Software Foundation, Inc. - This file is part of the GNU C Library. - Contributed by Ulrich Drepper <drepper@cygnus.com>, 1997. - - The GNU C Library is free software; you can redistribute it and/or - modify it under the terms of the GNU Lesser General Public - License as published by the Free Software Foundation; either - version 2.1 of the License, or (at your option) any later version. - - The GNU C Library is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - Lesser General Public License for more details. - - You should have received a copy of the GNU Lesser General Public - License along with the GNU C Library; if not, see - <http://www.gnu.org/licenses/>. */ - -#include <fenv.h> -#include <unistd.h> -#include <ldsodefs.h> -#include <dl-procinfo.h> - -int -__feclearexcept (int excepts) -{ - fenv_t temp; - - /* Mask out unsupported bits/exceptions. */ - excepts &= FE_ALL_EXCEPT; - - /* Bah, we have to clear selected exceptions. Since there is no - `fldsw' instruction we have to do it the hard way. */ - __asm__ ("fnstenv %0" : "=m" (*&temp)); - - /* Clear the relevant bits. */ - temp.__status_word &= excepts ^ FE_ALL_EXCEPT; - - /* Put the new data in effect. */ - __asm__ ("fldenv %0" : : "m" (*&temp)); - - /* If the CPU supports SSE, we clear the MXCSR as well. */ - if (HAS_CPU_FEATURE (SSE)) - { - unsigned int xnew_exc; - - /* Get the current MXCSR. */ - __asm__ ("stmxcsr %0" : "=m" (*&xnew_exc)); - - /* Clear the relevant bits. */ - xnew_exc &= ~excepts; - - /* Put the new data in effect. */ - __asm__ ("ldmxcsr %0" : : "m" (*&xnew_exc)); - } - - /* Success. */ - return 0; -} - -#include <shlib-compat.h> -#if SHLIB_COMPAT (libm, GLIBC_2_1, GLIBC_2_2) -strong_alias (__feclearexcept, __old_feclearexcept) -compat_symbol (libm, __old_feclearexcept, feclearexcept, GLIBC_2_1); -#endif - -libm_hidden_ver (__feclearexcept, feclearexcept) -versioned_symbol (libm, __feclearexcept, feclearexcept, GLIBC_2_2); diff --git a/sysdeps/i386/fpu/fedisblxcpt.c b/sysdeps/i386/fpu/fedisblxcpt.c deleted file mode 100644 index f8db665425..0000000000 --- a/sysdeps/i386/fpu/fedisblxcpt.c +++ /dev/null @@ -1,54 +0,0 @@ -/* Disable floating-point exceptions. - Copyright (C) 1999-2017 Free Software Foundation, Inc. - This file is part of the GNU C Library. - Contributed by Andreas Jaeger <aj@suse.de>, 1999. - - The GNU C Library is free software; you can redistribute it and/or - modify it under the terms of the GNU Lesser General Public - License as published by the Free Software Foundation; either - version 2.1 of the License, or (at your option) any later version. - - The GNU C Library is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - Lesser General Public License for more details. - - You should have received a copy of the GNU Lesser General Public - License along with the GNU C Library; if not, see - <http://www.gnu.org/licenses/>. */ - -#include <fenv.h> -#include <unistd.h> -#include <ldsodefs.h> -#include <dl-procinfo.h> - -int -fedisableexcept (int excepts) -{ - unsigned short int new_exc, old_exc; - - /* Get the current control word. */ - __asm__ ("fstcw %0" : "=m" (*&new_exc)); - - old_exc = (~new_exc) & FE_ALL_EXCEPT; - - excepts &= FE_ALL_EXCEPT; - - new_exc |= excepts; - __asm__ ("fldcw %0" : : "m" (*&new_exc)); - - /* If the CPU supports SSE we set the MXCSR as well. */ - if (HAS_CPU_FEATURE (SSE)) - { - unsigned int xnew_exc; - - /* Get the current control word. */ - __asm__ ("stmxcsr %0" : "=m" (*&xnew_exc)); - - xnew_exc |= excepts << 7; - - __asm__ ("ldmxcsr %0" : : "m" (*&xnew_exc)); - } - - return old_exc; -} diff --git a/sysdeps/i386/fpu/feenablxcpt.c b/sysdeps/i386/fpu/feenablxcpt.c deleted file mode 100644 index f1c42d7c27..0000000000 --- a/sysdeps/i386/fpu/feenablxcpt.c +++ /dev/null @@ -1,54 +0,0 @@ -/* Enable floating-point exceptions. - Copyright (C) 1999-2017 Free Software Foundation, Inc. - This file is part of the GNU C Library. - Contributed by Andreas Jaeger <aj@suse.de>, 1999. - - The GNU C Library is free software; you can redistribute it and/or - modify it under the terms of the GNU Lesser General Public - License as published by the Free Software Foundation; either - version 2.1 of the License, or (at your option) any later version. - - The GNU C Library is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - Lesser General Public License for more details. - - You should have received a copy of the GNU Lesser General Public - License along with the GNU C Library; if not, see - <http://www.gnu.org/licenses/>. */ - -#include <fenv.h> -#include <unistd.h> -#include <ldsodefs.h> -#include <dl-procinfo.h> - -int -feenableexcept (int excepts) -{ - unsigned short int new_exc; - unsigned short int old_exc; - - /* Get the current control word. */ - __asm__ ("fstcw %0" : "=m" (*&new_exc)); - - excepts &= FE_ALL_EXCEPT; - old_exc = (~new_exc) & FE_ALL_EXCEPT; - - new_exc &= ~excepts; - __asm__ ("fldcw %0" : : "m" (*&new_exc)); - - /* If the CPU supports SSE we set the MXCSR as well. */ - if (HAS_CPU_FEATURE (SSE)) - { - unsigned int xnew_exc; - - /* Get the current control word. */ - __asm__ ("stmxcsr %0" : "=m" (*&xnew_exc)); - - xnew_exc &= ~(excepts << 7); - - __asm__ ("ldmxcsr %0" : : "m" (*&xnew_exc)); - } - - return old_exc; -} diff --git a/sysdeps/i386/fpu/fegetenv.c b/sysdeps/i386/fpu/fegetenv.c deleted file mode 100644 index 983f6af25e..0000000000 --- a/sysdeps/i386/fpu/fegetenv.c +++ /dev/null @@ -1,49 +0,0 @@ -/* Store current floating-point environment. - Copyright (C) 1997-2017 Free Software Foundation, Inc. - This file is part of the GNU C Library. - Contributed by Ulrich Drepper <drepper@cygnus.com>, 1997. - - The GNU C Library is free software; you can redistribute it and/or - modify it under the terms of the GNU Lesser General Public - License as published by the Free Software Foundation; either - version 2.1 of the License, or (at your option) any later version. - - The GNU C Library is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - Lesser General Public License for more details. - - You should have received a copy of the GNU Lesser General Public - License along with the GNU C Library; if not, see - <http://www.gnu.org/licenses/>. */ - -#include <fenv.h> -#include <unistd.h> -#include <ldsodefs.h> -#include <dl-procinfo.h> - -int -__fegetenv (fenv_t *envp) -{ - __asm__ ("fnstenv %0" : "=m" (*envp)); - /* And load it right back since the processor changes the mask. - Intel thought this opcode to be used in interrupt handlers which - would block all exceptions. */ - __asm__ ("fldenv %0" : : "m" (*envp)); - - if (HAS_CPU_FEATURE (SSE)) - __asm__ ("stmxcsr %0" : "=m" (envp->__eip)); - - /* Success. */ - return 0; -} - -#include <shlib-compat.h> -#if SHLIB_COMPAT (libm, GLIBC_2_1, GLIBC_2_2) -strong_alias (__fegetenv, __old_fegetenv) -compat_symbol (libm, __old_fegetenv, fegetenv, GLIBC_2_1); -#endif - -libm_hidden_def (__fegetenv) -libm_hidden_ver (__fegetenv, fegetenv) -versioned_symbol (libm, __fegetenv, fegetenv, GLIBC_2_2); diff --git a/sysdeps/i386/fpu/fegetexcept.c b/sysdeps/i386/fpu/fegetexcept.c deleted file mode 100644 index dc87b7a470..0000000000 --- a/sysdeps/i386/fpu/fegetexcept.c +++ /dev/null @@ -1,31 +0,0 @@ -/* Get enabled floating-point exceptions. - Copyright (C) 1999-2017 Free Software Foundation, Inc. - This file is part of the GNU C Library. - Contributed by Andreas Jaeger <aj@suse.de>, 1999. - - The GNU C Library is free software; you can redistribute it and/or - modify it under the terms of the GNU Lesser General Public - License as published by the Free Software Foundation; either - version 2.1 of the License, or (at your option) any later version. - - The GNU C Library is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - Lesser General Public License for more details. - - You should have received a copy of the GNU Lesser General Public - License along with the GNU C Library; if not, see - <http://www.gnu.org/licenses/>. */ - -#include <fenv.h> - -int -fegetexcept (void) -{ - unsigned short int exc; - - /* Get the current control word. */ - __asm__ ("fstcw %0" : "=m" (*&exc)); - - return (~exc) & FE_ALL_EXCEPT; -} diff --git a/sysdeps/i386/fpu/fegetmode.c b/sysdeps/i386/fpu/fegetmode.c deleted file mode 100644 index abbce3075f..0000000000 --- a/sysdeps/i386/fpu/fegetmode.c +++ /dev/null @@ -1,32 +0,0 @@ -/* Store current floating-point control modes. i386 version. - Copyright (C) 2016-2017 Free Software Foundation, Inc. - This file is part of the GNU C Library. - - The GNU C Library is free software; you can redistribute it and/or - modify it under the terms of the GNU Lesser General Public - License as published by the Free Software Foundation; either - version 2.1 of the License, or (at your option) any later version. - - The GNU C Library is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - Lesser General Public License for more details. - - You should have received a copy of the GNU Lesser General Public - License along with the GNU C Library; if not, see - <http://www.gnu.org/licenses/>. */ - -#include <fenv.h> -#include <fpu_control.h> -#include <unistd.h> -#include <ldsodefs.h> -#include <dl-procinfo.h> - -int -fegetmode (femode_t *modep) -{ - _FPU_GETCW (modep->__control_word); - if (HAS_CPU_FEATURE (SSE)) - __asm__ ("stmxcsr %0" : "=m" (modep->__mxcsr)); - return 0; -} diff --git a/sysdeps/i386/fpu/fegetround.c b/sysdeps/i386/fpu/fegetround.c deleted file mode 100644 index 8ce8b859d8..0000000000 --- a/sysdeps/i386/fpu/fegetround.c +++ /dev/null @@ -1,33 +0,0 @@ -/* Return current rounding direction. - Copyright (C) 1997-2017 Free Software Foundation, Inc. - This file is part of the GNU C Library. - Contributed by Ulrich Drepper <drepper@cygnus.com>, 1997. - - The GNU C Library is free software; you can redistribute it and/or - modify it under the terms of the GNU Lesser General Public - License as published by the Free Software Foundation; either - version 2.1 of the License, or (at your option) any later version. - - The GNU C Library is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - Lesser General Public License for more details. - - You should have received a copy of the GNU Lesser General Public - License along with the GNU C Library; if not, see - <http://www.gnu.org/licenses/>. */ - -#include <fenv.h> - -int -__fegetround (void) -{ - int cw; - - __asm__ ("fnstcw %0" : "=m" (*&cw)); - - return cw & 0xc00; -} -libm_hidden_def (__fegetround) -weak_alias (__fegetround, fegetround) -libm_hidden_weak (fegetround) diff --git a/sysdeps/i386/fpu/feholdexcpt.c b/sysdeps/i386/fpu/feholdexcpt.c deleted file mode 100644 index d327358913..0000000000 --- a/sysdeps/i386/fpu/feholdexcpt.c +++ /dev/null @@ -1,50 +0,0 @@ -/* Store current floating-point environment and clear exceptions. - Copyright (C) 1997-2017 Free Software Foundation, Inc. - This file is part of the GNU C Library. - Contributed by Ulrich Drepper <drepper@cygnus.com>, 1997. - - The GNU C Library is free software; you can redistribute it and/or - modify it under the terms of the GNU Lesser General Public - License as published by the Free Software Foundation; either - version 2.1 of the License, or (at your option) any later version. - - The GNU C Library is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - Lesser General Public License for more details. - - You should have received a copy of the GNU Lesser General Public - License along with the GNU C Library; if not, see - <http://www.gnu.org/licenses/>. */ - -#include <fenv.h> -#include <unistd.h> -#include <ldsodefs.h> -#include <dl-procinfo.h> - -int -__feholdexcept (fenv_t *envp) -{ - /* Store the environment. Recall that fnstenv has a side effect of - masking all exceptions. Then clear all exceptions. */ - __asm__ volatile ("fnstenv %0; fnclex" : "=m" (*envp)); - - /* If the CPU supports SSE we set the MXCSR as well. */ - if (HAS_CPU_FEATURE (SSE)) - { - unsigned int xwork; - - /* Get the current control word. */ - __asm__ ("stmxcsr %0" : "=m" (envp->__eip)); - - /* Set all exceptions to non-stop and clear them. */ - xwork = (envp->__eip | 0x1f80) & ~0x3f; - - __asm__ ("ldmxcsr %0" : : "m" (*&xwork)); - } - - return 0; -} -libm_hidden_def (__feholdexcept) -weak_alias (__feholdexcept, feholdexcept) -libm_hidden_weak (feholdexcept) diff --git a/sysdeps/i386/fpu/fenv_private.h b/sysdeps/i386/fpu/fenv_private.h deleted file mode 100644 index e20e1f1662..0000000000 --- a/sysdeps/i386/fpu/fenv_private.h +++ /dev/null @@ -1,501 +0,0 @@ -#ifndef FENV_PRIVATE_H -#define FENV_PRIVATE_H 1 - -#include <fenv.h> -#include <fpu_control.h> - -#ifdef __SSE2_MATH__ -# define math_opt_barrier(x) \ - ({ __typeof(x) __x; \ - if (sizeof (x) <= sizeof (double)) \ - __asm ("" : "=x" (__x) : "0" (x)); \ - else \ - __asm ("" : "=t" (__x) : "0" (x)); \ - __x; }) -# define math_force_eval(x) \ - do { \ - if (sizeof (x) <= sizeof (double)) \ - __asm __volatile ("" : : "x" (x)); \ - else \ - __asm __volatile ("" : : "f" (x)); \ - } while (0) -#else -# define math_opt_barrier(x) \ - ({ __typeof (x) __x; \ - __asm ("" : "=t" (__x) : "0" (x)); \ - __x; }) -# define math_force_eval(x) \ - do { \ - __typeof (x) __x = (x); \ - if (sizeof (x) <= sizeof (double)) \ - __asm __volatile ("" : : "m" (__x)); \ - else \ - __asm __volatile ("" : : "f" (__x)); \ - } while (0) -#endif - -/* This file is used by both the 32- and 64-bit ports. The 64-bit port - has a field in the fenv_t for the mxcsr; the 32-bit port does not. - Instead, we (ab)use the only 32-bit field extant in the struct. */ -#ifndef __x86_64__ -# define __mxcsr __eip -#endif - - -/* All of these functions are private to libm, and are all used in pairs - to save+change the fp state and restore the original state. Thus we - need not care for both the 387 and the sse unit, only the one we're - actually using. */ - -#if defined __AVX__ || defined SSE2AVX -# define STMXCSR "vstmxcsr" -# define LDMXCSR "vldmxcsr" -#else -# define STMXCSR "stmxcsr" -# define LDMXCSR "ldmxcsr" -#endif - -static __always_inline void -libc_feholdexcept_sse (fenv_t *e) -{ - unsigned int mxcsr; - asm (STMXCSR " %0" : "=m" (*&mxcsr)); - e->__mxcsr = mxcsr; - mxcsr = (mxcsr | 0x1f80) & ~0x3f; - asm volatile (LDMXCSR " %0" : : "m" (*&mxcsr)); -} - -static __always_inline void -libc_feholdexcept_387 (fenv_t *e) -{ - /* Recall that fnstenv has a side-effect of masking exceptions. - Clobber all of the fp registers so that the TOS field is 0. */ - asm volatile ("fnstenv %0; fnclex" - : "=m"(*e) - : : "st", "st(1)", "st(2)", "st(3)", - "st(4)", "st(5)", "st(6)", "st(7)"); -} - -static __always_inline void -libc_fesetround_sse (int r) -{ - unsigned int mxcsr; - asm (STMXCSR " %0" : "=m" (*&mxcsr)); - mxcsr = (mxcsr & ~0x6000) | (r << 3); - asm volatile (LDMXCSR " %0" : : "m" (*&mxcsr)); -} - -static __always_inline void -libc_fesetround_387 (int r) -{ - fpu_control_t cw; - _FPU_GETCW (cw); - cw = (cw & ~0xc00) | r; - _FPU_SETCW (cw); -} - -static __always_inline void -libc_feholdexcept_setround_sse (fenv_t *e, int r) -{ - unsigned int mxcsr; - asm (STMXCSR " %0" : "=m" (*&mxcsr)); - e->__mxcsr = mxcsr; - mxcsr = ((mxcsr | 0x1f80) & ~0x603f) | (r << 3); - asm volatile (LDMXCSR " %0" : : "m" (*&mxcsr)); -} - -/* Set both rounding mode and precision. A convenience function for use - by libc_feholdexcept_setround and libc_feholdexcept_setround_53bit. */ -static __always_inline void -libc_feholdexcept_setround_387_prec (fenv_t *e, int r) -{ - libc_feholdexcept_387 (e); - - fpu_control_t cw = e->__control_word; - cw &= ~(_FPU_RC_ZERO | _FPU_EXTENDED); - cw |= r | 0x3f; - _FPU_SETCW (cw); -} - -static __always_inline void -libc_feholdexcept_setround_387 (fenv_t *e, int r) -{ - libc_feholdexcept_setround_387_prec (e, r | _FPU_EXTENDED); -} - -static __always_inline void -libc_feholdexcept_setround_387_53bit (fenv_t *e, int r) -{ - libc_feholdexcept_setround_387_prec (e, r | _FPU_DOUBLE); -} - -static __always_inline int -libc_fetestexcept_sse (int e) -{ - unsigned int mxcsr; - asm volatile (STMXCSR " %0" : "=m" (*&mxcsr)); - return mxcsr & e & FE_ALL_EXCEPT; -} - -static __always_inline int -libc_fetestexcept_387 (int ex) -{ - fexcept_t temp; - asm volatile ("fnstsw %0" : "=a" (temp)); - return temp & ex & FE_ALL_EXCEPT; -} - -static __always_inline void -libc_fesetenv_sse (fenv_t *e) -{ - asm volatile (LDMXCSR " %0" : : "m" (e->__mxcsr)); -} - -static __always_inline void -libc_fesetenv_387 (fenv_t *e) -{ - /* Clobber all fp registers so that the TOS value we saved earlier is - compatible with the current state of the compiler. */ - asm volatile ("fldenv %0" - : : "m" (*e) - : "st", "st(1)", "st(2)", "st(3)", - "st(4)", "st(5)", "st(6)", "st(7)"); -} - -static __always_inline int -libc_feupdateenv_test_sse (fenv_t *e, int ex) -{ - unsigned int mxcsr, old_mxcsr, cur_ex; - asm volatile (STMXCSR " %0" : "=m" (*&mxcsr)); - cur_ex = mxcsr & FE_ALL_EXCEPT; - - /* Merge current exceptions with the old environment. */ - old_mxcsr = e->__mxcsr; - mxcsr = old_mxcsr | cur_ex; - asm volatile (LDMXCSR " %0" : : "m" (*&mxcsr)); - - /* Raise SIGFPE for any new exceptions since the hold. Expect that - the normal environment has all exceptions masked. */ - if (__glibc_unlikely (~(old_mxcsr >> 7) & cur_ex)) - __feraiseexcept (cur_ex); - - /* Test for exceptions raised since the hold. */ - return cur_ex & ex; -} - -static __always_inline int -libc_feupdateenv_test_387 (fenv_t *e, int ex) -{ - fexcept_t cur_ex; - - /* Save current exceptions. */ - asm volatile ("fnstsw %0" : "=a" (cur_ex)); - cur_ex &= FE_ALL_EXCEPT; - - /* Reload original environment. */ - libc_fesetenv_387 (e); - - /* Merge current exceptions. */ - __feraiseexcept (cur_ex); - - /* Test for exceptions raised since the hold. */ - return cur_ex & ex; -} - -static __always_inline void -libc_feupdateenv_sse (fenv_t *e) -{ - libc_feupdateenv_test_sse (e, 0); -} - -static __always_inline void -libc_feupdateenv_387 (fenv_t *e) -{ - libc_feupdateenv_test_387 (e, 0); -} - -static __always_inline void -libc_feholdsetround_sse (fenv_t *e, int r) -{ - unsigned int mxcsr; - asm (STMXCSR " %0" : "=m" (*&mxcsr)); - e->__mxcsr = mxcsr; - mxcsr = (mxcsr & ~0x6000) | (r << 3); - asm volatile (LDMXCSR " %0" : : "m" (*&mxcsr)); -} - -static __always_inline void -libc_feholdsetround_387_prec (fenv_t *e, int r) -{ - fpu_control_t cw; - - _FPU_GETCW (cw); - e->__control_word = cw; - cw &= ~(_FPU_RC_ZERO | _FPU_EXTENDED); - cw |= r; - _FPU_SETCW (cw); -} - -static __always_inline void -libc_feholdsetround_387 (fenv_t *e, int r) -{ - libc_feholdsetround_387_prec (e, r | _FPU_EXTENDED); -} - -static __always_inline void -libc_feholdsetround_387_53bit (fenv_t *e, int r) -{ - libc_feholdsetround_387_prec (e, r | _FPU_DOUBLE); -} - -static __always_inline void -libc_feresetround_sse (fenv_t *e) -{ - unsigned int mxcsr; - asm (STMXCSR " %0" : "=m" (*&mxcsr)); - mxcsr = (mxcsr & ~0x6000) | (e->__mxcsr & 0x6000); - asm volatile (LDMXCSR " %0" : : "m" (*&mxcsr)); -} - -static __always_inline void -libc_feresetround_387 (fenv_t *e) -{ - _FPU_SETCW (e->__control_word); -} - -#ifdef __SSE_MATH__ -# define libc_feholdexceptf libc_feholdexcept_sse -# define libc_fesetroundf libc_fesetround_sse -# define libc_feholdexcept_setroundf libc_feholdexcept_setround_sse -# define libc_fetestexceptf libc_fetestexcept_sse -# define libc_fesetenvf libc_fesetenv_sse -# define libc_feupdateenv_testf libc_feupdateenv_test_sse -# define libc_feupdateenvf libc_feupdateenv_sse -# define libc_feholdsetroundf libc_feholdsetround_sse -# define libc_feresetroundf libc_feresetround_sse -#else -# define libc_feholdexceptf libc_feholdexcept_387 -# define libc_fesetroundf libc_fesetround_387 -# define libc_feholdexcept_setroundf libc_feholdexcept_setround_387 -# define libc_fetestexceptf libc_fetestexcept_387 -# define libc_fesetenvf libc_fesetenv_387 -# define libc_feupdateenv_testf libc_feupdateenv_test_387 -# define libc_feupdateenvf libc_feupdateenv_387 -# define libc_feholdsetroundf libc_feholdsetround_387 -# define libc_feresetroundf libc_feresetround_387 -#endif /* __SSE_MATH__ */ - -#ifdef __SSE2_MATH__ -# define libc_feholdexcept libc_feholdexcept_sse -# define libc_fesetround libc_fesetround_sse -# define libc_feholdexcept_setround libc_feholdexcept_setround_sse -# define libc_fetestexcept libc_fetestexcept_sse -# define libc_fesetenv libc_fesetenv_sse -# define libc_feupdateenv_test libc_feupdateenv_test_sse -# define libc_feupdateenv libc_feupdateenv_sse -# define libc_feholdsetround libc_feholdsetround_sse -# define libc_feresetround libc_feresetround_sse -#else -# define libc_feholdexcept libc_feholdexcept_387 -# define libc_fesetround libc_fesetround_387 -# define libc_feholdexcept_setround libc_feholdexcept_setround_387 -# define libc_fetestexcept libc_fetestexcept_387 -# define libc_fesetenv libc_fesetenv_387 -# define libc_feupdateenv_test libc_feupdateenv_test_387 -# define libc_feupdateenv libc_feupdateenv_387 -# define libc_feholdsetround libc_feholdsetround_387 -# define libc_feresetround libc_feresetround_387 -#endif /* __SSE2_MATH__ */ - -#define libc_feholdexceptl libc_feholdexcept_387 -#define libc_fesetroundl libc_fesetround_387 -#define libc_feholdexcept_setroundl libc_feholdexcept_setround_387 -#define libc_fetestexceptl libc_fetestexcept_387 -#define libc_fesetenvl libc_fesetenv_387 -#define libc_feupdateenv_testl libc_feupdateenv_test_387 -#define libc_feupdateenvl libc_feupdateenv_387 -#define libc_feholdsetroundl libc_feholdsetround_387 -#define libc_feresetroundl libc_feresetround_387 - -#ifndef __SSE2_MATH__ -# define libc_feholdexcept_setround_53bit libc_feholdexcept_setround_387_53bit -# define libc_feholdsetround_53bit libc_feholdsetround_387_53bit -#endif - -/* We have support for rounding mode context. */ -#define HAVE_RM_CTX 1 - -static __always_inline void -libc_feholdexcept_setround_sse_ctx (struct rm_ctx *ctx, int r) -{ - unsigned int mxcsr, new_mxcsr; - asm (STMXCSR " %0" : "=m" (*&mxcsr)); - new_mxcsr = ((mxcsr | 0x1f80) & ~0x603f) | (r << 3); - - ctx->env.__mxcsr = mxcsr; - if (__glibc_unlikely (mxcsr != new_mxcsr)) - { - asm volatile (LDMXCSR " %0" : : "m" (*&new_mxcsr)); - ctx->updated_status = true; - } - else - ctx->updated_status = false; -} - -/* Unconditional since we want to overwrite any exceptions that occurred in the - context. This is also why all fehold* functions unconditionally write into - ctx->env. */ -static __always_inline void -libc_fesetenv_sse_ctx (struct rm_ctx *ctx) -{ - libc_fesetenv_sse (&ctx->env); -} - -static __always_inline void -libc_feupdateenv_sse_ctx (struct rm_ctx *ctx) -{ - if (__glibc_unlikely (ctx->updated_status)) - libc_feupdateenv_test_sse (&ctx->env, 0); -} - -static __always_inline void -libc_feholdexcept_setround_387_prec_ctx (struct rm_ctx *ctx, int r) -{ - libc_feholdexcept_387 (&ctx->env); - - fpu_control_t cw = ctx->env.__control_word; - fpu_control_t old_cw = cw; - cw &= ~(_FPU_RC_ZERO | _FPU_EXTENDED); - cw |= r | 0x3f; - - if (__glibc_unlikely (old_cw != cw)) - { - _FPU_SETCW (cw); - ctx->updated_status = true; - } - else - ctx->updated_status = false; -} - -static __always_inline void -libc_feholdexcept_setround_387_ctx (struct rm_ctx *ctx, int r) -{ - libc_feholdexcept_setround_387_prec_ctx (ctx, r | _FPU_EXTENDED); -} - -static __always_inline void -libc_feholdexcept_setround_387_53bit_ctx (struct rm_ctx *ctx, int r) -{ - libc_feholdexcept_setround_387_prec_ctx (ctx, r | _FPU_DOUBLE); -} - -static __always_inline void -libc_feholdsetround_387_prec_ctx (struct rm_ctx *ctx, int r) -{ - fpu_control_t cw, new_cw; - - _FPU_GETCW (cw); - new_cw = cw; - new_cw &= ~(_FPU_RC_ZERO | _FPU_EXTENDED); - new_cw |= r; - - ctx->env.__control_word = cw; - if (__glibc_unlikely (new_cw != cw)) - { - _FPU_SETCW (new_cw); - ctx->updated_status = true; - } - else - ctx->updated_status = false; -} - -static __always_inline void -libc_feholdsetround_387_ctx (struct rm_ctx *ctx, int r) -{ - libc_feholdsetround_387_prec_ctx (ctx, r | _FPU_EXTENDED); -} - -static __always_inline void -libc_feholdsetround_387_53bit_ctx (struct rm_ctx *ctx, int r) -{ - libc_feholdsetround_387_prec_ctx (ctx, r | _FPU_DOUBLE); -} - -static __always_inline void -libc_feholdsetround_sse_ctx (struct rm_ctx *ctx, int r) -{ - unsigned int mxcsr, new_mxcsr; - - asm (STMXCSR " %0" : "=m" (*&mxcsr)); - new_mxcsr = (mxcsr & ~0x6000) | (r << 3); - - ctx->env.__mxcsr = mxcsr; - if (__glibc_unlikely (new_mxcsr != mxcsr)) - { - asm volatile (LDMXCSR " %0" : : "m" (*&new_mxcsr)); - ctx->updated_status = true; - } - else - ctx->updated_status = false; -} - -static __always_inline void -libc_feresetround_sse_ctx (struct rm_ctx *ctx) -{ - if (__glibc_unlikely (ctx->updated_status)) - libc_feresetround_sse (&ctx->env); -} - -static __always_inline void -libc_feresetround_387_ctx (struct rm_ctx *ctx) -{ - if (__glibc_unlikely (ctx->updated_status)) - _FPU_SETCW (ctx->env.__control_word); -} - -static __always_inline void -libc_feupdateenv_387_ctx (struct rm_ctx *ctx) -{ - if (__glibc_unlikely (ctx->updated_status)) - libc_feupdateenv_test_387 (&ctx->env, 0); -} - -#ifdef __SSE_MATH__ -# define libc_feholdexcept_setroundf_ctx libc_feholdexcept_setround_sse_ctx -# define libc_fesetenvf_ctx libc_fesetenv_sse_ctx -# define libc_feupdateenvf_ctx libc_feupdateenv_sse_ctx -# define libc_feholdsetroundf_ctx libc_feholdsetround_sse_ctx -# define libc_feresetroundf_ctx libc_feresetround_sse_ctx -#else -# define libc_feholdexcept_setroundf_ctx libc_feholdexcept_setround_387_ctx -# define libc_feupdateenvf_ctx libc_feupdateenv_387_ctx -# define libc_feholdsetroundf_ctx libc_feholdsetround_387_ctx -# define libc_feresetroundf_ctx libc_feresetround_387_ctx -#endif /* __SSE_MATH__ */ - -#ifdef __SSE2_MATH__ -# define libc_feholdexcept_setround_ctx libc_feholdexcept_setround_sse_ctx -# define libc_fesetenv_ctx libc_fesetenv_sse_ctx -# define libc_feupdateenv_ctx libc_feupdateenv_sse_ctx -# define libc_feholdsetround_ctx libc_feholdsetround_sse_ctx -# define libc_feresetround_ctx libc_feresetround_sse_ctx -#else -# define libc_feholdexcept_setround_ctx libc_feholdexcept_setround_387_ctx -# define libc_feupdateenv_ctx libc_feupdateenv_387_ctx -# define libc_feholdsetround_ctx libc_feholdsetround_387_ctx -# define libc_feresetround_ctx libc_feresetround_387_ctx -#endif /* __SSE2_MATH__ */ - -#define libc_feholdexcept_setroundl_ctx libc_feholdexcept_setround_387_ctx -#define libc_feupdateenvl_ctx libc_feupdateenv_387_ctx -#define libc_feholdsetroundl_ctx libc_feholdsetround_387_ctx -#define libc_feresetroundl_ctx libc_feresetround_387_ctx - -#ifndef __SSE2_MATH__ -# define libc_feholdsetround_53bit_ctx libc_feholdsetround_387_53bit_ctx -# define libc_feresetround_53bit_ctx libc_feresetround_387_ctx -#endif - -#undef __mxcsr - -#endif /* FENV_PRIVATE_H */ diff --git a/sysdeps/i386/fpu/fesetenv.c b/sysdeps/i386/fpu/fesetenv.c deleted file mode 100644 index a338e5d555..0000000000 --- a/sysdeps/i386/fpu/fesetenv.c +++ /dev/null @@ -1,131 +0,0 @@ -/* Install given floating-point environment. - Copyright (C) 1997-2017 Free Software Foundation, Inc. - This file is part of the GNU C Library. - Contributed by Ulrich Drepper <drepper@cygnus.com>, 1997. - - The GNU C Library is free software; you can redistribute it and/or - modify it under the terms of the GNU Lesser General Public - License as published by the Free Software Foundation; either - version 2.1 of the License, or (at your option) any later version. - - The GNU C Library is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - Lesser General Public License for more details. - - You should have received a copy of the GNU Lesser General Public - License along with the GNU C Library; if not, see - <http://www.gnu.org/licenses/>. */ - -#include <fenv.h> -#include <fpu_control.h> -#include <assert.h> -#include <unistd.h> -#include <ldsodefs.h> -#include <dl-procinfo.h> - - -/* All exceptions, including the x86-specific "denormal operand" - exception. */ -#define FE_ALL_EXCEPT_X86 (FE_ALL_EXCEPT | __FE_DENORM) - - -int -__fesetenv (const fenv_t *envp) -{ - fenv_t temp; - - /* The memory block used by fstenv/fldenv has a size of 28 bytes. */ - assert (sizeof (fenv_t) == 28); - - /* Install the environment specified by ENVP. But there are a few - values which we do not want to come from the saved environment. - Therefore, we get the current environment and replace the values - we want to use from the environment specified by the parameter. */ - __asm__ ("fnstenv %0" : "=m" (*&temp)); - - if (envp == FE_DFL_ENV) - { - temp.__control_word |= FE_ALL_EXCEPT_X86; - temp.__control_word &= ~FE_TOWARDZERO; - temp.__control_word |= _FPU_EXTENDED; - temp.__status_word &= ~FE_ALL_EXCEPT_X86; - } - else if (envp == FE_NOMASK_ENV) - { - temp.__control_word &= ~(FE_ALL_EXCEPT | FE_TOWARDZERO); - /* Keep the "denormal operand" exception masked. */ - temp.__control_word |= __FE_DENORM; - temp.__control_word |= _FPU_EXTENDED; - temp.__status_word &= ~FE_ALL_EXCEPT_X86; - } - else - { - temp.__control_word &= ~(FE_ALL_EXCEPT_X86 - | FE_TOWARDZERO - | _FPU_EXTENDED); - temp.__control_word |= (envp->__control_word - & (FE_ALL_EXCEPT_X86 - | FE_TOWARDZERO - | _FPU_EXTENDED)); - temp.__status_word &= ~FE_ALL_EXCEPT_X86; - temp.__status_word |= envp->__status_word & FE_ALL_EXCEPT_X86; - } - temp.__eip = 0; - temp.__cs_selector = 0; - temp.__opcode = 0; - temp.__data_offset = 0; - temp.__data_selector = 0; - - __asm__ ("fldenv %0" : : "m" (temp)); - - if (HAS_CPU_FEATURE (SSE)) - { - unsigned int mxcsr; - __asm__ ("stmxcsr %0" : "=m" (mxcsr)); - - if (envp == FE_DFL_ENV) - { - /* Clear SSE exceptions. */ - mxcsr &= ~FE_ALL_EXCEPT_X86; - /* Set mask for SSE MXCSR. */ - mxcsr |= (FE_ALL_EXCEPT_X86 << 7); - /* Set rounding to FE_TONEAREST. */ - mxcsr &= ~0x6000; - mxcsr |= (FE_TONEAREST << 3); - /* Clear the FZ and DAZ bits. */ - mxcsr &= ~0x8040; - } - else if (envp == FE_NOMASK_ENV) - { - /* Clear SSE exceptions. */ - mxcsr &= ~FE_ALL_EXCEPT_X86; - /* Do not mask exceptions. */ - mxcsr &= ~(FE_ALL_EXCEPT << 7); - /* Keep the "denormal operand" exception masked. */ - mxcsr |= (__FE_DENORM << 7); - /* Set rounding to FE_TONEAREST. */ - mxcsr &= ~0x6000; - mxcsr |= (FE_TONEAREST << 3); - /* Clear the FZ and DAZ bits. */ - mxcsr &= ~0x8040; - } - else - mxcsr = envp->__eip; - - __asm__ ("ldmxcsr %0" : : "m" (mxcsr)); - } - - /* Success. */ - return 0; -} - -#include <shlib-compat.h> -#if SHLIB_COMPAT (libm, GLIBC_2_1, GLIBC_2_2) -strong_alias (__fesetenv, __old_fesetenv) -compat_symbol (libm, __old_fesetenv, fesetenv, GLIBC_2_1); -#endif - -libm_hidden_def (__fesetenv) -libm_hidden_ver (__fesetenv, fesetenv) -versioned_symbol (libm, __fesetenv, fesetenv, GLIBC_2_2); diff --git a/sysdeps/i386/fpu/fesetexcept.c b/sysdeps/i386/fpu/fesetexcept.c deleted file mode 100644 index adfcf17ba6..0000000000 --- a/sysdeps/i386/fpu/fesetexcept.c +++ /dev/null @@ -1,31 +0,0 @@ -/* Set given exception flags. i386 version. - Copyright (C) 2016-2017 Free Software Foundation, Inc. - This file is part of the GNU C Library. - - The GNU C Library is free software; you can redistribute it and/or - modify it under the terms of the GNU Lesser General Public - License as published by the Free Software Foundation; either - version 2.1 of the License, or (at your option) any later version. - - The GNU C Library is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - Lesser General Public License for more details. - - You should have received a copy of the GNU Lesser General Public - License along with the GNU C Library; if not, see - <http://www.gnu.org/licenses/>. */ - -#include <fenv.h> - -int -fesetexcept (int excepts) -{ - fenv_t temp; - - __asm__ ("fnstenv %0" : "=m" (*&temp)); - temp.__status_word |= excepts & FE_ALL_EXCEPT; - __asm__ ("fldenv %0" : : "m" (*&temp)); - - return 0; -} diff --git a/sysdeps/i386/fpu/fesetmode.c b/sysdeps/i386/fpu/fesetmode.c deleted file mode 100644 index bd9f74cd97..0000000000 --- a/sysdeps/i386/fpu/fesetmode.c +++ /dev/null @@ -1,54 +0,0 @@ -/* Install given floating-point control modes. i386 version. - Copyright (C) 2016-2017 Free Software Foundation, Inc. - This file is part of the GNU C Library. - - The GNU C Library is free software; you can redistribute it and/or - modify it under the terms of the GNU Lesser General Public - License as published by the Free Software Foundation; either - version 2.1 of the License, or (at your option) any later version. - - The GNU C Library is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - Lesser General Public License for more details. - - You should have received a copy of the GNU Lesser General Public - License along with the GNU C Library; if not, see - <http://www.gnu.org/licenses/>. */ - -#include <fenv.h> -#include <fpu_control.h> -#include <unistd.h> -#include <ldsodefs.h> -#include <dl-procinfo.h> - -/* All exceptions, including the x86-specific "denormal operand" - exception. */ -#define FE_ALL_EXCEPT_X86 (FE_ALL_EXCEPT | __FE_DENORM) - -int -fesetmode (const femode_t *modep) -{ - fpu_control_t cw; - if (modep == FE_DFL_MODE) - cw = _FPU_DEFAULT; - else - cw = modep->__control_word; - _FPU_SETCW (cw); - if (HAS_CPU_FEATURE (SSE)) - { - unsigned int mxcsr; - __asm__ ("stmxcsr %0" : "=m" (mxcsr)); - /* Preserve SSE exception flags but restore other state in - MXCSR. */ - mxcsr &= FE_ALL_EXCEPT_X86; - if (modep == FE_DFL_MODE) - /* Default MXCSR state has all bits zero except for those - masking exceptions. */ - mxcsr |= FE_ALL_EXCEPT_X86 << 7; - else - mxcsr |= modep->__mxcsr & ~FE_ALL_EXCEPT_X86; - __asm__ ("ldmxcsr %0" : : "m" (mxcsr)); - } - return 0; -} diff --git a/sysdeps/i386/fpu/fesetround.c b/sysdeps/i386/fpu/fesetround.c deleted file mode 100644 index a3fa6235c0..0000000000 --- a/sysdeps/i386/fpu/fesetround.c +++ /dev/null @@ -1,54 +0,0 @@ -/* Set current rounding direction. - Copyright (C) 1997-2017 Free Software Foundation, Inc. - This file is part of the GNU C Library. - Contributed by Ulrich Drepper <drepper@cygnus.com>, 1997. - - The GNU C Library is free software; you can redistribute it and/or - modify it under the terms of the GNU Lesser General Public - License as published by the Free Software Foundation; either - version 2.1 of the License, or (at your option) any later version. - - The GNU C Library is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - Lesser General Public License for more details. - - You should have received a copy of the GNU Lesser General Public - License along with the GNU C Library; if not, see - <http://www.gnu.org/licenses/>. */ - -#include <fenv.h> -#include <unistd.h> -#include <ldsodefs.h> -#include <dl-procinfo.h> - -int -__fesetround (int round) -{ - unsigned short int cw; - - if ((round & ~0xc00) != 0) - /* ROUND is no valid rounding mode. */ - return 1; - - __asm__ ("fnstcw %0" : "=m" (*&cw)); - cw &= ~0xc00; - cw |= round; - __asm__ ("fldcw %0" : : "m" (*&cw)); - - /* If the CPU supports SSE we set the MXCSR as well. */ - if (HAS_CPU_FEATURE (SSE)) - { - unsigned int xcw; - - __asm__ ("stmxcsr %0" : "=m" (*&xcw)); - xcw &= ~0x6000; - xcw |= round << 3; - __asm__ ("ldmxcsr %0" : : "m" (*&xcw)); - } - - return 0; -} -libm_hidden_def (__fesetround) -weak_alias (__fesetround, fesetround) -libm_hidden_weak (fesetround) diff --git a/sysdeps/i386/fpu/feupdateenv.c b/sysdeps/i386/fpu/feupdateenv.c deleted file mode 100644 index b610289cd0..0000000000 --- a/sysdeps/i386/fpu/feupdateenv.c +++ /dev/null @@ -1,60 +0,0 @@ -/* Install given floating-point environment and raise exceptions. - Copyright (C) 1997-2017 Free Software Foundation, Inc. - This file is part of the GNU C Library. - Contributed by Ulrich Drepper <drepper@cygnus.com>, 1997. - - The GNU C Library is free software; you can redistribute it and/or - modify it under the terms of the GNU Lesser General Public - License as published by the Free Software Foundation; either - version 2.1 of the License, or (at your option) any later version. - - The GNU C Library is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - Lesser General Public License for more details. - - You should have received a copy of the GNU Lesser General Public - License along with the GNU C Library; if not, see - <http://www.gnu.org/licenses/>. */ - -#include <fenv.h> -#include <unistd.h> -#include <dl-procinfo.h> -#include <ldsodefs.h> - -int -__feupdateenv (const fenv_t *envp) -{ - fexcept_t temp; - unsigned int xtemp = 0; - - /* Save current exceptions. */ - __asm__ ("fnstsw %0" : "=m" (*&temp)); - - /* If the CPU supports SSE we test the MXCSR as well. */ - if (HAS_CPU_FEATURE (SSE)) - __asm__ ("stmxcsr %0" : "=m" (*&xtemp)); - - temp = (temp | xtemp) & FE_ALL_EXCEPT; - - /* Install new environment. */ - __fesetenv (envp); - - /* Raise the saved exception. Incidently for us the implementation - defined format of the values in objects of type fexcept_t is the - same as the ones specified using the FE_* constants. */ - __feraiseexcept ((int) temp); - - /* Success. */ - return 0; -} - -#include <shlib-compat.h> -#if SHLIB_COMPAT (libm, GLIBC_2_1, GLIBC_2_2) -strong_alias (__feupdateenv, __old_feupdateenv) -compat_symbol (libm, __old_feupdateenv, feupdateenv, GLIBC_2_1); -#endif - -libm_hidden_def (__feupdateenv) -libm_hidden_ver (__feupdateenv, feupdateenv) -versioned_symbol (libm, __feupdateenv, feupdateenv, GLIBC_2_2); diff --git a/sysdeps/i386/fpu/fgetexcptflg.c b/sysdeps/i386/fpu/fgetexcptflg.c deleted file mode 100644 index 954e5f69d8..0000000000 --- a/sysdeps/i386/fpu/fgetexcptflg.c +++ /dev/null @@ -1,57 +0,0 @@ -/* Store current representation for exceptions. - Copyright (C) 1997-2017 Free Software Foundation, Inc. - This file is part of the GNU C Library. - Contributed by Ulrich Drepper <drepper@cygnus.com>, 1997. - - The GNU C Library is free software; you can redistribute it and/or - modify it under the terms of the GNU Lesser General Public - License as published by the Free Software Foundation; either - version 2.1 of the License, or (at your option) any later version. - - The GNU C Library is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - Lesser General Public License for more details. - - You should have received a copy of the GNU Lesser General Public - License along with the GNU C Library; if not, see - <http://www.gnu.org/licenses/>. */ - -#include <fenv.h> -#include <unistd.h> -#include <ldsodefs.h> -#include <dl-procinfo.h> - - -int -__fegetexceptflag (fexcept_t *flagp, int excepts) -{ - fexcept_t temp; - - /* Get the current exceptions. */ - __asm__ ("fnstsw %0" : "=m" (*&temp)); - - *flagp = temp & excepts & FE_ALL_EXCEPT; - - /* If the CPU supports SSE, we clear the MXCSR as well. */ - if (HAS_CPU_FEATURE (SSE)) - { - unsigned int sse_exc; - - /* Get the current MXCSR. */ - __asm__ ("stmxcsr %0" : "=m" (*&sse_exc)); - - *flagp |= sse_exc & excepts & FE_ALL_EXCEPT; - } - - /* Success. */ - return 0; -} - -#include <shlib-compat.h> -#if SHLIB_COMPAT (libm, GLIBC_2_1, GLIBC_2_2) -strong_alias (__fegetexceptflag, __old_fegetexceptflag) -compat_symbol (libm, __old_fegetexceptflag, fegetexceptflag, GLIBC_2_1); -#endif - -versioned_symbol (libm, __fegetexceptflag, fegetexceptflag, GLIBC_2_2); diff --git a/sysdeps/i386/fpu/fraiseexcpt.c b/sysdeps/i386/fpu/fraiseexcpt.c deleted file mode 100644 index 913d7b912c..0000000000 --- a/sysdeps/i386/fpu/fraiseexcpt.c +++ /dev/null @@ -1,124 +0,0 @@ -/* Raise given exceptions. - Copyright (C) 1997-2017 Free Software Foundation, Inc. - This file is part of the GNU C Library. - Contributed by Ulrich Drepper <drepper@cygnus.com>, 1997. - - The GNU C Library is free software; you can redistribute it and/or - modify it under the terms of the GNU Lesser General Public - License as published by the Free Software Foundation; either - version 2.1 of the License, or (at your option) any later version. - - The GNU C Library is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - Lesser General Public License for more details. - - You should have received a copy of the GNU Lesser General Public - License along with the GNU C Library; if not, see - <http://www.gnu.org/licenses/>. */ - -#include <fenv.h> -#include <math.h> - -int -__feraiseexcept (int excepts) -{ - /* Raise exceptions represented by EXPECTS. But we must raise only - one signal at a time. It is important that if the overflow/underflow - exception and the inexact exception are given at the same time, - the overflow/underflow exception follows the inexact exception. */ - - /* First: invalid exception. */ - if ((FE_INVALID & excepts) != 0) - { - /* One example of an invalid operation is 0.0 / 0.0. */ - double d; - __asm__ __volatile__ ("fldz; fdiv %%st, %%st(0); fwait" : "=t" (d)); - (void) &d; - } - - /* Next: division by zero. */ - if ((FE_DIVBYZERO & excepts) != 0) - { - double d; - __asm__ __volatile__ ("fldz; fld1; fdivp %%st, %%st(1); fwait" - : "=t" (d)); - (void) &d; - } - - /* Next: overflow. */ - if ((FE_OVERFLOW & excepts) != 0) - { - /* There is no way to raise only the overflow flag. Do it the - hard way. */ - fenv_t temp; - - /* Bah, we have to clear selected exceptions. Since there is no - `fldsw' instruction we have to do it the hard way. */ - __asm__ __volatile__ ("fnstenv %0" : "=m" (*&temp)); - - /* Set the relevant bits. */ - temp.__status_word |= FE_OVERFLOW; - - /* Put the new data in effect. */ - __asm__ __volatile__ ("fldenv %0" : : "m" (*&temp)); - - /* And raise the exception. */ - __asm__ __volatile__ ("fwait"); - } - - /* Next: underflow. */ - if ((FE_UNDERFLOW & excepts) != 0) - { - /* There is no way to raise only the underflow flag. Do it the - hard way. */ - fenv_t temp; - - /* Bah, we have to clear selected exceptions. Since there is no - `fldsw' instruction we have to do it the hard way. */ - __asm__ __volatile__ ("fnstenv %0" : "=m" (*&temp)); - - /* Set the relevant bits. */ - temp.__status_word |= FE_UNDERFLOW; - - /* Put the new data in effect. */ - __asm__ __volatile__ ("fldenv %0" : : "m" (*&temp)); - - /* And raise the exception. */ - __asm__ __volatile__ ("fwait"); - } - - /* Last: inexact. */ - if ((FE_INEXACT & excepts) != 0) - { - /* There is no way to raise only the inexact flag. Do it the - hard way. */ - fenv_t temp; - - /* Bah, we have to clear selected exceptions. Since there is no - `fldsw' instruction we have to do it the hard way. */ - __asm__ __volatile__ ("fnstenv %0" : "=m" (*&temp)); - - /* Set the relevant bits. */ - temp.__status_word |= FE_INEXACT; - - /* Put the new data in effect. */ - __asm__ __volatile__ ("fldenv %0" : : "m" (*&temp)); - - /* And raise the exception. */ - __asm__ __volatile__ ("fwait"); - } - - /* Success. */ - return 0; -} - -#include <shlib-compat.h> -#if SHLIB_COMPAT (libm, GLIBC_2_1, GLIBC_2_2) -strong_alias (__feraiseexcept, __old_feraiseexcept) -compat_symbol (libm, __old_feraiseexcept, feraiseexcept, GLIBC_2_1); -#endif - -libm_hidden_def (__feraiseexcept) -libm_hidden_ver (__feraiseexcept, feraiseexcept) -versioned_symbol (libm, __feraiseexcept, feraiseexcept, GLIBC_2_2); diff --git a/sysdeps/i386/fpu/fsetexcptflg.c b/sysdeps/i386/fpu/fsetexcptflg.c deleted file mode 100644 index efa64aaefd..0000000000 --- a/sysdeps/i386/fpu/fsetexcptflg.c +++ /dev/null @@ -1,69 +0,0 @@ -/* Set floating-point environment exception handling. - Copyright (C) 1997-2017 Free Software Foundation, Inc. - This file is part of the GNU C Library. - Contributed by Ulrich Drepper <drepper@cygnus.com>, 1997. - - The GNU C Library is free software; you can redistribute it and/or - modify it under the terms of the GNU Lesser General Public - License as published by the Free Software Foundation; either - version 2.1 of the License, or (at your option) any later version. - - The GNU C Library is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - Lesser General Public License for more details. - - You should have received a copy of the GNU Lesser General Public - License along with the GNU C Library; if not, see - <http://www.gnu.org/licenses/>. */ - -#include <fenv.h> -#include <math.h> -#include <unistd.h> -#include <ldsodefs.h> -#include <dl-procinfo.h> - -int -__fesetexceptflag (const fexcept_t *flagp, int excepts) -{ - fenv_t temp; - - /* Get the current environment. We have to do this since we cannot - separately set the status word. */ - __asm__ ("fnstenv %0" : "=m" (*&temp)); - - temp.__status_word &= ~(excepts & FE_ALL_EXCEPT); - temp.__status_word |= *flagp & excepts & FE_ALL_EXCEPT; - - /* Store the new status word (along with the rest of the environment. - Possibly new exceptions are set but they won't get executed unless - the next floating-point instruction. */ - __asm__ ("fldenv %0" : : "m" (*&temp)); - - /* If the CPU supports SSE, we set the MXCSR as well. */ - if (HAS_CPU_FEATURE (SSE)) - { - unsigned int xnew_exc; - - /* Get the current MXCSR. */ - __asm__ ("stmxcsr %0" : "=m" (*&xnew_exc)); - - /* Set the relevant bits. */ - xnew_exc &= ~(excepts & FE_ALL_EXCEPT); - xnew_exc |= *flagp & excepts & FE_ALL_EXCEPT; - - /* Put the new data in effect. */ - __asm__ ("ldmxcsr %0" : : "m" (*&xnew_exc)); - } - - /* Success. */ - return 0; -} - -#include <shlib-compat.h> -#if SHLIB_COMPAT (libm, GLIBC_2_1, GLIBC_2_2) -strong_alias (__fesetexceptflag, __old_fesetexceptflag) -compat_symbol (libm, __old_fesetexceptflag, fesetexceptflag, GLIBC_2_1); -#endif - -versioned_symbol (libm, __fesetexceptflag, fesetexceptflag, GLIBC_2_2); diff --git a/sysdeps/i386/fpu/ftestexcept.c b/sysdeps/i386/fpu/ftestexcept.c deleted file mode 100644 index f523f9e709..0000000000 --- a/sysdeps/i386/fpu/ftestexcept.c +++ /dev/null @@ -1,40 +0,0 @@ -/* Test exception in current environment. - Copyright (C) 1997-2017 Free Software Foundation, Inc. - This file is part of the GNU C Library. - Contributed by Ulrich Drepper <drepper@cygnus.com>, 1997. - - The GNU C Library is free software; you can redistribute it and/or - modify it under the terms of the GNU Lesser General Public - License as published by the Free Software Foundation; either - version 2.1 of the License, or (at your option) any later version. - - The GNU C Library is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - Lesser General Public License for more details. - - You should have received a copy of the GNU Lesser General Public - License along with the GNU C Library; if not, see - <http://www.gnu.org/licenses/>. */ - -#include <fenv.h> -#include <unistd.h> -#include <dl-procinfo.h> -#include <ldsodefs.h> - -int -fetestexcept (int excepts) -{ - short temp; - int xtemp = 0; - - /* Get current exceptions. */ - __asm__ ("fnstsw %0" : "=a" (temp)); - - /* If the CPU supports SSE we test the MXCSR as well. */ - if (HAS_CPU_FEATURE (SSE)) - __asm__ ("stmxcsr %0" : "=m" (*&xtemp)); - - return (temp | xtemp) & excepts & FE_ALL_EXCEPT; -} -libm_hidden_def (fetestexcept) diff --git a/sysdeps/i386/fpu/halfulp.c b/sysdeps/i386/fpu/halfulp.c deleted file mode 100644 index 1cc8931700..0000000000 --- a/sysdeps/i386/fpu/halfulp.c +++ /dev/null @@ -1 +0,0 @@ -/* Not needed. */ diff --git a/sysdeps/i386/fpu/i386-math-asm.h b/sysdeps/i386/fpu/i386-math-asm.h deleted file mode 100644 index 6ffc8e6f64..0000000000 --- a/sysdeps/i386/fpu/i386-math-asm.h +++ /dev/null @@ -1,340 +0,0 @@ -/* Helper macros for x86 libm functions. - Copyright (C) 2015-2017 Free Software Foundation, Inc. - This file is part of the GNU C Library. - - The GNU C Library is free software; you can redistribute it and/or - modify it under the terms of the GNU Lesser General Public - License as published by the Free Software Foundation; either - version 2.1 of the License, or (at your option) any later version. - - The GNU C Library is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - Lesser General Public License for more details. - - You should have received a copy of the GNU Lesser General Public - License along with the GNU C Library; if not, see - <http://www.gnu.org/licenses/>. */ - -#ifndef _I386_MATH_ASM_H -#define _I386_MATH_ASM_H 1 - -/* Remove excess range and precision by storing a value on the stack - and loading it back. */ -#define FLT_NARROW_EVAL \ - subl $4, %esp; \ - cfi_adjust_cfa_offset (4); \ - fstps (%esp); \ - flds (%esp); \ - addl $4, %esp; \ - cfi_adjust_cfa_offset (-4); -#define DBL_NARROW_EVAL \ - subl $8, %esp; \ - cfi_adjust_cfa_offset (8); \ - fstpl (%esp); \ - fldl (%esp); \ - addl $8, %esp; \ - cfi_adjust_cfa_offset (-8); - -/* Define constants for the minimum value of a floating-point - type. */ -#define DEFINE_FLT_MIN \ - .section .rodata.cst4,"aM",@progbits,4; \ - .p2align 2; \ - .type flt_min,@object; \ -flt_min: \ - .byte 0, 0, 0x80, 0; \ - .size flt_min, .-flt_min; -#define DEFINE_DBL_MIN \ - .section .rodata.cst8,"aM",@progbits,8; \ - .p2align 3; \ - .type dbl_min,@object; \ -dbl_min: \ - .byte 0, 0, 0, 0, 0, 0, 0x10, 0; \ - .size dbl_min, .-dbl_min; -#define DEFINE_LDBL_MIN \ - .section .rodata.cst16,"aM",@progbits,16; \ - .p2align 4; \ - .type ldbl_min,@object; \ -ldbl_min: \ - .byte 0, 0, 0, 0, 0, 0, 0, 0x80, 0x1, 0; \ - .byte 0, 0, 0, 0, 0, 0; \ - .size ldbl_min, .-ldbl_min; - -/* Remove excess range and precision by storing a value on the stack - and loading it back. The value is given to be nonnegative or NaN; - if it is subnormal, also force an underflow exception. The - relevant constant for the minimum of the type must have been - defined, the MO macro must have been defined for access to memory - operands, and, if PIC, the PIC register must have been loaded. */ -#define FLT_NARROW_EVAL_UFLOW_NONNEG_NAN \ - subl $4, %esp; \ - cfi_adjust_cfa_offset (4); \ - flds MO(flt_min); \ - fld %st(1); \ - fucompp; \ - fnstsw; \ - sahf; \ - jnc 6424f; \ - fld %st(0); \ - fmul %st(0); \ - fstps (%esp); \ -6424: fstps (%esp); \ - flds (%esp); \ - addl $4, %esp; \ - cfi_adjust_cfa_offset (-4); -#define DBL_NARROW_EVAL_UFLOW_NONNEG_NAN \ - subl $8, %esp; \ - cfi_adjust_cfa_offset (8); \ - fldl MO(dbl_min); \ - fld %st(1); \ - fucompp; \ - fnstsw; \ - sahf; \ - jnc 6453f; \ - fld %st(0); \ - fmul %st(0); \ - fstpl (%esp); \ -6453: fstpl (%esp); \ - fldl (%esp); \ - addl $8, %esp; \ - cfi_adjust_cfa_offset (-8); - -/* Likewise, but the argument is not a NaN (so fcom instructions, - which support memory operands, can be used). */ -#define FLT_NARROW_EVAL_UFLOW_NONNEG \ - subl $4, %esp; \ - cfi_adjust_cfa_offset (4); \ - fcoms MO(flt_min); \ - fnstsw; \ - sahf; \ - jnc 6424f; \ - fld %st(0); \ - fmul %st(0); \ - fstps (%esp); \ -6424: fstps (%esp); \ - flds (%esp); \ - addl $4, %esp; \ - cfi_adjust_cfa_offset (-4); -#define DBL_NARROW_EVAL_UFLOW_NONNEG \ - subl $8, %esp; \ - cfi_adjust_cfa_offset (8); \ - fcoml MO(dbl_min); \ - fnstsw; \ - sahf; \ - jnc 6453f; \ - fld %st(0); \ - fmul %st(0); \ - fstpl (%esp); \ -6453: fstpl (%esp); \ - fldl (%esp); \ - addl $8, %esp; \ - cfi_adjust_cfa_offset (-8); - -/* Likewise, but the non-NaN argument may be negative. */ -#define FLT_NARROW_EVAL_UFLOW_NONNAN \ - subl $4, %esp; \ - cfi_adjust_cfa_offset (4); \ - fld %st(0); \ - fabs; \ - fcomps MO(flt_min); \ - fnstsw; \ - sahf; \ - jnc 6424f; \ - fld %st(0); \ - fmul %st(0); \ - fstps (%esp); \ -6424: fstps (%esp); \ - flds (%esp); \ - addl $4, %esp; \ - cfi_adjust_cfa_offset (-4); -#define DBL_NARROW_EVAL_UFLOW_NONNAN \ - subl $8, %esp; \ - cfi_adjust_cfa_offset (8); \ - fld %st(0); \ - fabs; \ - fcompl MO(dbl_min); \ - fnstsw; \ - sahf; \ - jnc 6453f; \ - fld %st(0); \ - fmul %st(0); \ - fstpl (%esp); \ -6453: fstpl (%esp); \ - fldl (%esp); \ - addl $8, %esp; \ - cfi_adjust_cfa_offset (-8); - -/* Force an underflow exception if the given value is subnormal. The - relevant constant for the minimum of the type must have been - defined, the MO macro must have been defined for access to memory - operands, and, if PIC, the PIC register must have been loaded. */ -#define FLT_CHECK_FORCE_UFLOW \ - flds MO(flt_min); \ - fld %st(1); \ - fabs; \ - fucompp; \ - fnstsw; \ - sahf; \ - jnc 6424f; \ - subl $4, %esp; \ - cfi_adjust_cfa_offset (4); \ - fld %st(0); \ - fmul %st(0); \ - fstps (%esp); \ - addl $4, %esp; \ - cfi_adjust_cfa_offset (-4); \ -6424: -#define DBL_CHECK_FORCE_UFLOW \ - fldl MO(dbl_min); \ - fld %st(1); \ - fabs; \ - fucompp; \ - fnstsw; \ - sahf; \ - jnc 6453f; \ - subl $8, %esp; \ - cfi_adjust_cfa_offset (8); \ - fld %st(0); \ - fmul %st(0); \ - fstpl (%esp); \ - addl $8, %esp; \ - cfi_adjust_cfa_offset (-8); \ -6453: - -/* Likewise, but also remove excess range and precision if the value - is subnormal. */ -#define FLT_CHECK_FORCE_UFLOW_NARROW \ - flds MO(flt_min); \ - fld %st(1); \ - fabs; \ - fucompp; \ - fnstsw; \ - sahf; \ - jnc 6424f; \ - subl $4, %esp; \ - cfi_adjust_cfa_offset (4); \ - fld %st(0); \ - fmul %st(0); \ - fstps (%esp); \ - fstps (%esp); \ - flds (%esp); \ - addl $4, %esp; \ - cfi_adjust_cfa_offset (-4); \ -6424: -#define DBL_CHECK_FORCE_UFLOW_NARROW \ - fldl MO(dbl_min); \ - fld %st(1); \ - fabs; \ - fucompp; \ - fnstsw; \ - sahf; \ - jnc 6453f; \ - subl $8, %esp; \ - cfi_adjust_cfa_offset (8); \ - fld %st(0); \ - fmul %st(0); \ - fstpl (%esp); \ - fstpl (%esp); \ - fldl (%esp); \ - addl $8, %esp; \ - cfi_adjust_cfa_offset (-8); \ -6453: - -/* Likewise, but the argument is nonnegative or NaN. */ -#define LDBL_CHECK_FORCE_UFLOW_NONNEG_NAN \ - fldt MO(ldbl_min); \ - fld %st(1); \ - fucompp; \ - fnstsw; \ - sahf; \ - jnc 6464f; \ - fld %st(0); \ - fmul %st(0); \ - fstp %st(0); \ -6464: - -/* Likewise, but the argument is not a NaN. */ -#define FLT_CHECK_FORCE_UFLOW_NONNAN \ - fld %st(0); \ - fabs; \ - fcomps MO(flt_min); \ - fnstsw; \ - sahf; \ - jnc 6424f; \ - subl $4, %esp; \ - cfi_adjust_cfa_offset (4); \ - fld %st(0); \ - fmul %st(0); \ - fstps (%esp); \ - addl $4, %esp; \ - cfi_adjust_cfa_offset (-4); \ -6424: -#define DBL_CHECK_FORCE_UFLOW_NONNAN \ - fld %st(0); \ - fabs; \ - fcompl MO(dbl_min); \ - fnstsw; \ - sahf; \ - jnc 6453f; \ - subl $8, %esp; \ - cfi_adjust_cfa_offset (8); \ - fld %st(0); \ - fmul %st(0); \ - fstpl (%esp); \ - addl $8, %esp; \ - cfi_adjust_cfa_offset (-8); \ -6453: -#define LDBL_CHECK_FORCE_UFLOW_NONNAN \ - fldt MO(ldbl_min); \ - fld %st(1); \ - fabs; \ - fcompp; \ - fnstsw; \ - sahf; \ - jnc 6464f; \ - fld %st(0); \ - fmul %st(0); \ - fstp %st(0); \ -6464: - -/* Likewise, but the argument is nonnegative and not a NaN. */ -#define FLT_CHECK_FORCE_UFLOW_NONNEG \ - fcoms MO(flt_min); \ - fnstsw; \ - sahf; \ - jnc 6424f; \ - subl $4, %esp; \ - cfi_adjust_cfa_offset (4); \ - fld %st(0); \ - fmul %st(0); \ - fstps (%esp); \ - addl $4, %esp; \ - cfi_adjust_cfa_offset (-4); \ -6424: -#define DBL_CHECK_FORCE_UFLOW_NONNEG \ - fcoml MO(dbl_min); \ - fnstsw; \ - sahf; \ - jnc 6453f; \ - subl $8, %esp; \ - cfi_adjust_cfa_offset (8); \ - fld %st(0); \ - fmul %st(0); \ - fstpl (%esp); \ - addl $8, %esp; \ - cfi_adjust_cfa_offset (-8); \ -6453: -#define LDBL_CHECK_FORCE_UFLOW_NONNEG \ - fldt MO(ldbl_min); \ - fld %st(1); \ - fcompp; \ - fnstsw; \ - sahf; \ - jnc 6464f; \ - fld %st(0); \ - fmul %st(0); \ - fstp %st(0); \ -6464: - -#endif /* i386-math-asm.h. */ diff --git a/sysdeps/i386/fpu/libm-test-ulps b/sysdeps/i386/fpu/libm-test-ulps deleted file mode 100644 index 0fc50907ad..0000000000 --- a/sysdeps/i386/fpu/libm-test-ulps +++ /dev/null @@ -1,2202 +0,0 @@ -# Begin of automatic generation - -# Maximal error of functions: -Function: "acos": -double: 1 -idouble: 1 -ildouble: 1 -ldouble: 1 - -Function: "acos_downward": -ildouble: 2 -ldouble: 2 - -Function: "acos_towardzero": -ildouble: 2 -ldouble: 2 - -Function: "acos_upward": -double: 1 -idouble: 1 -ildouble: 2 -ldouble: 2 - -Function: "acosh": -double: 1 -idouble: 1 -ildouble: 4 -ldouble: 2 - -Function: "acosh_downward": -double: 1 -idouble: 1 -ildouble: 6 -ldouble: 4 - -Function: "acosh_towardzero": -double: 1 -idouble: 1 -ildouble: 6 -ldouble: 4 - -Function: "acosh_upward": -double: 1 -idouble: 1 -ildouble: 4 -ldouble: 3 - -Function: "asin": -double: 1 -idouble: 1 -ildouble: 1 -ldouble: 1 - -Function: "asin_downward": -double: 1 -idouble: 1 -ildouble: 2 -ldouble: 2 - -Function: "asin_towardzero": -double: 1 -idouble: 1 -ildouble: 1 -ldouble: 1 - -Function: "asin_upward": -double: 1 -float: 1 -idouble: 1 -ifloat: 1 -ildouble: 1 -ldouble: 1 - -Function: "asinh": -double: 1 -idouble: 1 -ildouble: 3 -ldouble: 3 - -Function: "asinh_downward": -double: 1 -float: 1 -idouble: 1 -ildouble: 5 -ldouble: 5 - -Function: "asinh_towardzero": -double: 1 -float: 1 -idouble: 1 -ildouble: 4 -ldouble: 4 - -Function: "asinh_upward": -double: 1 -float: 1 -idouble: 1 -ifloat: 1 -ildouble: 5 -ldouble: 5 - -Function: "atan": -double: 1 -idouble: 1 -ildouble: 1 -ldouble: 1 - -Function: "atan2": -double: 1 -idouble: 1 -ildouble: 1 -ldouble: 1 - -Function: "atan2_downward": -double: 1 -float: 1 -idouble: 1 -ifloat: 1 -ildouble: 1 -ldouble: 1 - -Function: "atan2_towardzero": -double: 1 -float: 1 -idouble: 1 -ifloat: 1 -ildouble: 1 -ldouble: 1 - -Function: "atan2_upward": -double: 1 -float: 1 -idouble: 1 -ifloat: 1 -ildouble: 1 -ldouble: 1 - -Function: "atan_downward": -double: 1 -float: 1 -idouble: 1 -ifloat: 1 -ildouble: 1 -ldouble: 1 - -Function: "atan_towardzero": -double: 1 -float: 1 -idouble: 1 -ifloat: 1 -ildouble: 1 -ldouble: 1 - -Function: "atan_upward": -double: 1 -float: 1 -idouble: 1 -ifloat: 1 -ildouble: 1 -ldouble: 1 - -Function: "atanh": -double: 1 -idouble: 1 -ildouble: 3 -ldouble: 3 - -Function: "atanh_downward": -double: 1 -float: 1 -idouble: 1 -ifloat: 1 -ildouble: 3 -ldouble: 4 - -Function: "atanh_towardzero": -double: 1 -float: 1 -idouble: 1 -ifloat: 1 -ildouble: 5 -ldouble: 3 - -Function: "atanh_upward": -double: 1 -float: 1 -idouble: 1 -ifloat: 1 -ildouble: 5 -ldouble: 5 - -Function: "cabs": -double: 1 -idouble: 1 -ildouble: 1 -ldouble: 1 - -Function: "cabs_downward": -double: 1 -idouble: 1 -ildouble: 1 -ldouble: 1 - -Function: "cabs_towardzero": -double: 1 -idouble: 1 -ildouble: 1 -ldouble: 1 - -Function: "cabs_upward": -double: 1 -idouble: 1 -ildouble: 1 -ldouble: 1 - -Function: Real part of "cacos": -double: 1 -float: 1 -idouble: 1 -ifloat: 1 -ildouble: 1 -ldouble: 1 - -Function: Imaginary part of "cacos": -double: 1 -float: 1 -idouble: 1 -ifloat: 1 -ildouble: 2 -ldouble: 2 - -Function: Real part of "cacos_downward": -double: 1 -float: 1 -idouble: 1 -ifloat: 1 -ildouble: 2 -ldouble: 2 - -Function: Imaginary part of "cacos_downward": -double: 5 -float: 3 -idouble: 5 -ifloat: 3 -ildouble: 6 -ldouble: 6 - -Function: Real part of "cacos_towardzero": -double: 1 -float: 1 -idouble: 1 -ifloat: 1 -ildouble: 2 -ldouble: 2 - -Function: Imaginary part of "cacos_towardzero": -double: 4 -float: 3 -idouble: 4 -ifloat: 3 -ildouble: 5 -ldouble: 5 - -Function: Real part of "cacos_upward": -double: 2 -float: 2 -idouble: 2 -ifloat: 2 -ildouble: 2 -ldouble: 2 - -Function: Imaginary part of "cacos_upward": -double: 7 -float: 7 -idouble: 7 -ifloat: 7 -ildouble: 7 -ldouble: 7 - -Function: Real part of "cacosh": -double: 1 -float: 1 -idouble: 1 -ifloat: 1 -ildouble: 2 -ldouble: 2 - -Function: Imaginary part of "cacosh": -double: 1 -float: 1 -idouble: 1 -ifloat: 1 -ildouble: 1 -ldouble: 1 - -Function: Real part of "cacosh_downward": -double: 4 -float: 3 -idouble: 4 -ifloat: 3 -ildouble: 5 -ldouble: 5 - -Function: Imaginary part of "cacosh_downward": -double: 2 -float: 2 -idouble: 2 -ifloat: 2 -ildouble: 3 -ldouble: 3 - -Function: Real part of "cacosh_towardzero": -double: 4 -float: 3 -idouble: 4 -ifloat: 3 -ildouble: 5 -ldouble: 5 - -Function: Imaginary part of "cacosh_towardzero": -double: 1 -float: 1 -idouble: 1 -ifloat: 1 -ildouble: 2 -ldouble: 2 - -Function: Real part of "cacosh_upward": -double: 4 -float: 4 -idouble: 4 -ifloat: 4 -ildouble: 5 -ldouble: 5 - -Function: Imaginary part of "cacosh_upward": -double: 3 -float: 2 -idouble: 3 -ifloat: 2 -ildouble: 3 -ldouble: 3 - -Function: "carg": -double: 1 -idouble: 1 -ildouble: 1 -ldouble: 1 - -Function: "carg_downward": -double: 1 -float: 1 -idouble: 1 -ifloat: 1 -ildouble: 1 -ldouble: 1 - -Function: "carg_towardzero": -double: 1 -float: 1 -idouble: 1 -ifloat: 1 -ildouble: 1 -ldouble: 1 - -Function: "carg_upward": -double: 1 -float: 1 -idouble: 1 -ifloat: 1 -ildouble: 1 -ldouble: 1 - -Function: Real part of "casin": -double: 1 -float: 1 -idouble: 1 -ifloat: 1 -ildouble: 1 -ldouble: 1 - -Function: Imaginary part of "casin": -double: 1 -float: 1 -idouble: 1 -ifloat: 1 -ildouble: 2 -ldouble: 2 - -Function: Real part of "casin_downward": -double: 3 -float: 2 -idouble: 3 -ifloat: 2 -ildouble: 3 -ldouble: 3 - -Function: Imaginary part of "casin_downward": -double: 5 -float: 3 -idouble: 5 -ifloat: 3 -ildouble: 6 -ldouble: 6 - -Function: Real part of "casin_towardzero": -double: 3 -float: 1 -idouble: 3 -ifloat: 1 -ildouble: 3 -ldouble: 3 - -Function: Imaginary part of "casin_towardzero": -double: 4 -float: 3 -idouble: 4 -ifloat: 3 -ildouble: 5 -ldouble: 5 - -Function: Real part of "casin_upward": -double: 2 -float: 2 -idouble: 2 -ifloat: 2 -ildouble: 2 -ldouble: 2 - -Function: Imaginary part of "casin_upward": -double: 7 -float: 7 -idouble: 7 -ifloat: 7 -ildouble: 7 -ldouble: 7 - -Function: Real part of "casinh": -double: 1 -float: 1 -idouble: 1 -ifloat: 1 -ildouble: 2 -ldouble: 2 - -Function: Imaginary part of "casinh": -double: 1 -float: 1 -idouble: 1 -ifloat: 1 -ildouble: 1 -ldouble: 1 - -Function: Real part of "casinh_downward": -double: 5 -float: 3 -idouble: 5 -ifloat: 3 -ildouble: 6 -ldouble: 6 - -Function: Imaginary part of "casinh_downward": -double: 3 -float: 2 -idouble: 3 -ifloat: 2 -ildouble: 3 -ldouble: 3 - -Function: Real part of "casinh_towardzero": -double: 4 -float: 3 -idouble: 4 -ifloat: 3 -ildouble: 5 -ldouble: 5 - -Function: Imaginary part of "casinh_towardzero": -double: 3 -float: 1 -idouble: 3 -ifloat: 1 -ildouble: 3 -ldouble: 3 - -Function: Real part of "casinh_upward": -double: 7 -float: 7 -idouble: 7 -ifloat: 7 -ildouble: 7 -ldouble: 7 - -Function: Imaginary part of "casinh_upward": -double: 2 -float: 2 -idouble: 2 -ifloat: 2 -ildouble: 2 -ldouble: 2 - -Function: Real part of "catan": -double: 1 -idouble: 1 -ildouble: 1 -ldouble: 1 - -Function: Imaginary part of "catan": -double: 1 -float: 1 -idouble: 1 -ifloat: 1 -ildouble: 1 -ldouble: 1 - -Function: Real part of "catan_downward": -double: 1 -float: 1 -idouble: 1 -ifloat: 1 -ildouble: 1 -ldouble: 1 - -Function: Imaginary part of "catan_downward": -double: 2 -float: 1 -idouble: 2 -ifloat: 1 -ildouble: 4 -ldouble: 4 - -Function: Real part of "catan_towardzero": -double: 1 -float: 1 -idouble: 1 -ifloat: 1 -ildouble: 1 -ldouble: 1 - -Function: Imaginary part of "catan_towardzero": -double: 2 -float: 1 -idouble: 2 -ifloat: 1 -ildouble: 4 -ldouble: 4 - -Function: Real part of "catan_upward": -double: 1 -float: 1 -idouble: 1 -ifloat: 1 -ildouble: 1 -ldouble: 1 - -Function: Imaginary part of "catan_upward": -double: 2 -float: 2 -idouble: 2 -ifloat: 2 -ildouble: 3 -ldouble: 3 - -Function: Real part of "catanh": -double: 1 -float: 1 -idouble: 1 -ifloat: 1 -ildouble: 1 -ldouble: 1 - -Function: Imaginary part of "catanh": -double: 1 -idouble: 1 -ildouble: 1 -ldouble: 1 - -Function: Real part of "catanh_downward": -double: 2 -float: 1 -idouble: 2 -ifloat: 1 -ildouble: 4 -ldouble: 4 - -Function: Imaginary part of "catanh_downward": -double: 1 -float: 1 -idouble: 1 -ifloat: 1 -ildouble: 1 -ldouble: 1 - -Function: Real part of "catanh_towardzero": -double: 2 -float: 1 -idouble: 2 -ifloat: 1 -ildouble: 4 -ldouble: 4 - -Function: Imaginary part of "catanh_towardzero": -double: 1 -float: 1 -idouble: 1 -ifloat: 1 -ildouble: 1 -ldouble: 1 - -Function: Real part of "catanh_upward": -double: 2 -float: 2 -idouble: 2 -ifloat: 2 -ildouble: 4 -ldouble: 4 - -Function: Imaginary part of "catanh_upward": -double: 1 -float: 1 -idouble: 1 -ifloat: 1 -ildouble: 1 -ldouble: 1 - -Function: "cbrt": -double: 1 -float: 1 -idouble: 1 -ifloat: 1 -ildouble: 3 -ldouble: 3 - -Function: "cbrt_downward": -double: 1 -float: 1 -idouble: 1 -ifloat: 1 -ildouble: 4 -ldouble: 4 - -Function: "cbrt_towardzero": -double: 1 -float: 1 -idouble: 1 -ifloat: 1 -ildouble: 3 -ldouble: 3 - -Function: "cbrt_upward": -double: 1 -float: 1 -idouble: 1 -ifloat: 1 -ildouble: 4 -ldouble: 4 - -Function: Real part of "ccos": -double: 1 -float: 1 -idouble: 1 -ifloat: 1 -ildouble: 1 -ldouble: 1 - -Function: Imaginary part of "ccos": -double: 1 -float: 1 -idouble: 1 -ifloat: 1 -ildouble: 1 -ldouble: 1 - -Function: Real part of "ccos_downward": -double: 1 -float: 1 -idouble: 1 -ifloat: 1 -ildouble: 3 -ldouble: 3 - -Function: Imaginary part of "ccos_downward": -double: 3 -float: 3 -idouble: 3 -ifloat: 3 -ildouble: 3 -ldouble: 3 - -Function: Real part of "ccos_towardzero": -double: 1 -float: 1 -idouble: 1 -ifloat: 1 -ildouble: 3 -ldouble: 3 - -Function: Imaginary part of "ccos_towardzero": -double: 3 -float: 3 -idouble: 3 -ifloat: 3 -ildouble: 3 -ldouble: 3 - -Function: Real part of "ccos_upward": -double: 2 -float: 2 -idouble: 2 -ifloat: 2 -ildouble: 2 -ldouble: 2 - -Function: Imaginary part of "ccos_upward": -double: 2 -float: 2 -idouble: 2 -ifloat: 2 -ildouble: 2 -ldouble: 2 - -Function: Real part of "ccosh": -double: 1 -float: 1 -idouble: 1 -ifloat: 1 -ildouble: 1 -ldouble: 1 - -Function: Imaginary part of "ccosh": -double: 1 -float: 1 -idouble: 1 -ifloat: 1 -ildouble: 1 -ldouble: 1 - -Function: Real part of "ccosh_downward": -double: 1 -float: 1 -idouble: 1 -ifloat: 1 -ildouble: 3 -ldouble: 3 - -Function: Imaginary part of "ccosh_downward": -double: 3 -float: 3 -idouble: 3 -ifloat: 3 -ildouble: 3 -ldouble: 3 - -Function: Real part of "ccosh_towardzero": -double: 1 -float: 2 -idouble: 1 -ifloat: 2 -ildouble: 3 -ldouble: 3 - -Function: Imaginary part of "ccosh_towardzero": -double: 3 -float: 3 -idouble: 3 -ifloat: 3 -ildouble: 3 -ldouble: 3 - -Function: Real part of "ccosh_upward": -double: 2 -float: 2 -idouble: 2 -ifloat: 2 -ildouble: 2 -ldouble: 2 - -Function: Imaginary part of "ccosh_upward": -double: 3 -float: 2 -idouble: 3 -ifloat: 2 -ildouble: 2 -ldouble: 2 - -Function: Real part of "cexp": -double: 2 -float: 1 -idouble: 2 -ifloat: 1 -ildouble: 1 -ldouble: 1 - -Function: Imaginary part of "cexp": -double: 1 -float: 2 -idouble: 1 -ifloat: 2 -ildouble: 1 -ldouble: 1 - -Function: Real part of "cexp_downward": -double: 2 -float: 2 -idouble: 2 -ifloat: 2 -ildouble: 3 -ldouble: 3 - -Function: Imaginary part of "cexp_downward": -double: 3 -float: 3 -idouble: 3 -ifloat: 3 -ildouble: 3 -ldouble: 3 - -Function: Real part of "cexp_towardzero": -double: 2 -float: 2 -idouble: 2 -ifloat: 2 -ildouble: 3 -ldouble: 3 - -Function: Imaginary part of "cexp_towardzero": -double: 3 -float: 3 -idouble: 3 -ifloat: 3 -ildouble: 3 -ldouble: 3 - -Function: Real part of "cexp_upward": -double: 1 -float: 2 -idouble: 1 -ifloat: 2 -ildouble: 2 -ldouble: 2 - -Function: Imaginary part of "cexp_upward": -double: 3 -float: 2 -idouble: 3 -ifloat: 2 -ildouble: 3 -ldouble: 3 - -Function: Real part of "clog": -double: 2 -float: 1 -idouble: 2 -ifloat: 1 -ildouble: 3 -ldouble: 3 - -Function: Imaginary part of "clog": -double: 1 -idouble: 1 -ildouble: 1 -ldouble: 1 - -Function: Real part of "clog10": -double: 2 -float: 2 -idouble: 2 -ifloat: 2 -ildouble: 4 -ldouble: 4 - -Function: Imaginary part of "clog10": -double: 1 -idouble: 1 -ildouble: 2 -ldouble: 2 - -Function: Real part of "clog10_downward": -double: 3 -float: 3 -idouble: 3 -ifloat: 3 -ildouble: 8 -ldouble: 8 - -Function: Imaginary part of "clog10_downward": -double: 1 -float: 1 -idouble: 1 -ifloat: 1 -ildouble: 3 -ldouble: 3 - -Function: Real part of "clog10_towardzero": -double: 3 -float: 3 -idouble: 3 -ifloat: 3 -ildouble: 8 -ldouble: 8 - -Function: Imaginary part of "clog10_towardzero": -double: 1 -float: 1 -idouble: 1 -ifloat: 1 -ildouble: 3 -ldouble: 3 - -Function: Real part of "clog10_upward": -double: 3 -float: 3 -idouble: 3 -ifloat: 3 -ildouble: 7 -ldouble: 7 - -Function: Imaginary part of "clog10_upward": -double: 1 -float: 1 -idouble: 1 -ifloat: 1 -ildouble: 3 -ldouble: 3 - -Function: Real part of "clog_downward": -double: 3 -float: 3 -idouble: 3 -ifloat: 3 -ildouble: 5 -ldouble: 5 - -Function: Imaginary part of "clog_downward": -double: 1 -float: 1 -idouble: 1 -ifloat: 1 -ildouble: 1 -ldouble: 1 - -Function: Real part of "clog_towardzero": -double: 3 -float: 3 -idouble: 3 -ifloat: 3 -ildouble: 5 -ldouble: 5 - -Function: Imaginary part of "clog_towardzero": -double: 1 -float: 1 -idouble: 1 -ifloat: 1 -ildouble: 1 -ldouble: 1 - -Function: Real part of "clog_upward": -double: 2 -float: 3 -idouble: 2 -ifloat: 3 -ildouble: 4 -ldouble: 4 - -Function: Imaginary part of "clog_upward": -double: 1 -float: 1 -idouble: 1 -ifloat: 1 -ildouble: 1 -ldouble: 1 - -Function: "cos": -float: 1 -ifloat: 1 -ildouble: 1 -ldouble: 1 - -Function: "cos_downward": -double: 1 -float: 1 -idouble: 1 -ifloat: 1 -ildouble: 3 -ldouble: 3 - -Function: "cos_towardzero": -double: 1 -float: 1 -idouble: 1 -ifloat: 1 -ildouble: 2 -ldouble: 2 - -Function: "cos_upward": -double: 1 -float: 1 -idouble: 1 -ifloat: 1 -ildouble: 2 -ldouble: 2 - -Function: "cosh": -double: 1 -float: 1 -idouble: 1 -ildouble: 2 -ldouble: 2 - -Function: "cosh_downward": -double: 2 -float: 1 -idouble: 1 -ifloat: 1 -ildouble: 2 -ldouble: 3 - -Function: "cosh_towardzero": -double: 2 -float: 1 -idouble: 1 -ifloat: 1 -ildouble: 2 -ldouble: 2 - -Function: "cosh_upward": -double: 4 -float: 2 -idouble: 1 -ifloat: 1 -ildouble: 2 -ldouble: 3 - -Function: Real part of "cpow": -double: 2 -float: 5 -idouble: 2 -ifloat: 5 -ildouble: 3 -ldouble: 3 - -Function: Imaginary part of "cpow": -float: 2 -ifloat: 2 -ildouble: 4 -ldouble: 4 - -Function: Real part of "cpow_downward": -double: 5 -float: 8 -idouble: 5 -ifloat: 8 -ildouble: 7 -ldouble: 7 - -Function: Imaginary part of "cpow_downward": -double: 2 -float: 2 -idouble: 2 -ifloat: 2 -ildouble: 2 -ldouble: 2 - -Function: Real part of "cpow_towardzero": -double: 5 -float: 8 -idouble: 5 -ifloat: 8 -ildouble: 7 -ldouble: 7 - -Function: Imaginary part of "cpow_towardzero": -double: 2 -float: 2 -idouble: 2 -ifloat: 2 -ildouble: 1 -ldouble: 1 - -Function: Real part of "cpow_upward": -double: 4 -float: 1 -idouble: 4 -ifloat: 1 -ildouble: 2 -ldouble: 2 - -Function: Imaginary part of "cpow_upward": -double: 1 -float: 2 -idouble: 1 -ifloat: 2 -ildouble: 2 -ldouble: 2 - -Function: Real part of "csin": -double: 1 -float: 1 -idouble: 1 -ifloat: 1 -ildouble: 1 -ldouble: 1 - -Function: Imaginary part of "csin": -float: 1 -ifloat: 1 - -Function: Real part of "csin_downward": -double: 3 -float: 3 -idouble: 3 -ifloat: 3 -ildouble: 3 -ldouble: 3 - -Function: Imaginary part of "csin_downward": -double: 1 -float: 1 -idouble: 1 -ifloat: 1 -ildouble: 3 -ldouble: 3 - -Function: Real part of "csin_towardzero": -double: 3 -float: 3 -idouble: 3 -ifloat: 3 -ildouble: 3 -ldouble: 3 - -Function: Imaginary part of "csin_towardzero": -double: 1 -float: 1 -idouble: 1 -ifloat: 1 -ildouble: 3 -ldouble: 3 - -Function: Real part of "csin_upward": -double: 3 -float: 2 -idouble: 3 -ifloat: 2 -ildouble: 2 -ldouble: 2 - -Function: Imaginary part of "csin_upward": -double: 2 -float: 2 -idouble: 2 -ifloat: 2 -ildouble: 2 -ldouble: 2 - -Function: Real part of "csinh": -ildouble: 1 -ldouble: 1 - -Function: Imaginary part of "csinh": -double: 1 -float: 1 -idouble: 1 -ifloat: 1 -ildouble: 1 -ldouble: 1 - -Function: Real part of "csinh_downward": -double: 1 -float: 1 -idouble: 1 -ifloat: 1 -ildouble: 3 -ldouble: 3 - -Function: Imaginary part of "csinh_downward": -double: 3 -float: 3 -idouble: 3 -ifloat: 3 -ildouble: 3 -ldouble: 3 - -Function: Real part of "csinh_towardzero": -double: 1 -float: 1 -idouble: 1 -ifloat: 1 -ildouble: 3 -ldouble: 3 - -Function: Imaginary part of "csinh_towardzero": -double: 3 -float: 3 -idouble: 3 -ifloat: 3 -ildouble: 3 -ldouble: 3 - -Function: Real part of "csinh_upward": -double: 2 -float: 2 -idouble: 2 -ifloat: 2 -ildouble: 2 -ldouble: 2 - -Function: Imaginary part of "csinh_upward": -double: 3 -float: 2 -idouble: 3 -ifloat: 2 -ildouble: 2 -ldouble: 2 - -Function: Real part of "csqrt": -double: 1 -idouble: 1 -ildouble: 2 -ldouble: 2 - -Function: Imaginary part of "csqrt": -double: 1 -float: 1 -idouble: 1 -ifloat: 1 -ildouble: 2 -ldouble: 2 - -Function: Real part of "csqrt_downward": -double: 1 -float: 1 -idouble: 1 -ifloat: 1 -ildouble: 5 -ldouble: 5 - -Function: Imaginary part of "csqrt_downward": -double: 1 -float: 1 -idouble: 1 -ifloat: 1 -ildouble: 4 -ldouble: 4 - -Function: Real part of "csqrt_towardzero": -double: 1 -float: 1 -idouble: 1 -ifloat: 1 -ildouble: 4 -ldouble: 4 - -Function: Imaginary part of "csqrt_towardzero": -double: 1 -float: 1 -idouble: 1 -ifloat: 1 -ildouble: 4 -ldouble: 4 - -Function: Real part of "csqrt_upward": -double: 1 -float: 1 -idouble: 1 -ifloat: 1 -ildouble: 5 -ldouble: 5 - -Function: Imaginary part of "csqrt_upward": -double: 1 -float: 2 -idouble: 1 -ifloat: 2 -ildouble: 4 -ldouble: 4 - -Function: Real part of "ctan": -double: 1 -float: 1 -idouble: 1 -ifloat: 1 -ildouble: 2 -ldouble: 2 - -Function: Imaginary part of "ctan": -double: 1 -float: 1 -idouble: 1 -ifloat: 1 -ildouble: 1 -ldouble: 1 - -Function: Real part of "ctan_downward": -double: 1 -float: 1 -idouble: 1 -ifloat: 1 -ildouble: 5 -ldouble: 5 - -Function: Imaginary part of "ctan_downward": -double: 2 -float: 1 -idouble: 2 -ifloat: 1 -ildouble: 4 -ldouble: 4 - -Function: Real part of "ctan_towardzero": -double: 3 -float: 2 -idouble: 3 -ifloat: 2 -ildouble: 5 -ldouble: 5 - -Function: Imaginary part of "ctan_towardzero": -double: 2 -float: 3 -idouble: 2 -ifloat: 3 -ildouble: 4 -ldouble: 4 - -Function: Real part of "ctan_upward": -double: 3 -float: 2 -idouble: 3 -ifloat: 2 -ildouble: 3 -ldouble: 3 - -Function: Imaginary part of "ctan_upward": -double: 2 -float: 1 -idouble: 2 -ifloat: 1 -ildouble: 3 -ldouble: 3 - -Function: Real part of "ctanh": -double: 1 -float: 1 -idouble: 1 -ifloat: 1 -ildouble: 1 -ldouble: 1 - -Function: Imaginary part of "ctanh": -double: 1 -float: 1 -idouble: 1 -ifloat: 1 -ildouble: 2 -ldouble: 2 - -Function: Real part of "ctanh_downward": -double: 2 -float: 2 -idouble: 2 -ifloat: 2 -ildouble: 4 -ldouble: 4 - -Function: Imaginary part of "ctanh_downward": -double: 2 -float: 1 -idouble: 2 -ifloat: 1 -ildouble: 4 -ldouble: 4 - -Function: Real part of "ctanh_towardzero": -double: 2 -float: 3 -idouble: 2 -ifloat: 3 -ildouble: 4 -ldouble: 4 - -Function: Imaginary part of "ctanh_towardzero": -double: 2 -float: 2 -idouble: 2 -ifloat: 2 -ildouble: 3 -ldouble: 3 - -Function: Real part of "ctanh_upward": -double: 2 -float: 1 -idouble: 2 -ifloat: 1 -ildouble: 3 -ldouble: 3 - -Function: Imaginary part of "ctanh_upward": -double: 3 -float: 2 -idouble: 3 -ifloat: 2 -ildouble: 3 -ldouble: 3 - -Function: "erf": -double: 1 -float: 1 -idouble: 1 -ifloat: 1 -ildouble: 1 -ldouble: 1 - -Function: "erf_downward": -double: 1 -float: 1 -idouble: 1 -ifloat: 1 -ildouble: 1 -ldouble: 1 - -Function: "erf_towardzero": -double: 1 -float: 1 -idouble: 1 -ifloat: 1 -ildouble: 1 -ldouble: 1 - -Function: "erf_upward": -double: 1 -float: 1 -idouble: 1 -ifloat: 1 -ildouble: 1 -ldouble: 1 - -Function: "erfc": -double: 1 -float: 1 -idouble: 1 -ifloat: 1 -ildouble: 3 -ldouble: 3 - -Function: "erfc_downward": -double: 2 -float: 3 -idouble: 2 -ifloat: 3 -ildouble: 4 -ldouble: 4 - -Function: "erfc_towardzero": -double: 2 -float: 2 -idouble: 2 -ifloat: 2 -ildouble: 4 -ldouble: 4 - -Function: "erfc_upward": -double: 2 -float: 3 -idouble: 2 -ifloat: 3 -ildouble: 5 -ldouble: 5 - -Function: "exp": -double: 1 -idouble: 1 -ildouble: 1 -ldouble: 1 - -Function: "exp10": -double: 1 -idouble: 1 -ildouble: 1 -ldouble: 1 - -Function: "exp10_downward": -double: 1 -float: 1 -idouble: 1 -ifloat: 1 -ildouble: 2 -ldouble: 2 - -Function: "exp10_towardzero": -double: 1 -float: 1 -idouble: 1 -ifloat: 1 -ildouble: 2 -ldouble: 2 - -Function: "exp10_upward": -double: 1 -float: 1 -idouble: 1 -ifloat: 1 -ildouble: 2 -ldouble: 2 - -Function: "exp2": -double: 1 -idouble: 1 -ildouble: 1 -ldouble: 1 - -Function: "exp2_downward": -ildouble: 1 -ldouble: 1 - -Function: "exp2_towardzero": -double: 1 -idouble: 1 -ildouble: 1 -ldouble: 1 - -Function: "exp2_upward": -ildouble: 1 -ldouble: 1 - -Function: "exp_downward": -double: 1 -idouble: 1 -ildouble: 1 -ldouble: 1 - -Function: "exp_towardzero": -double: 1 -idouble: 1 -ildouble: 2 -ldouble: 2 - -Function: "exp_upward": -double: 1 -idouble: 1 -ildouble: 1 -ldouble: 1 - -Function: "expm1": -double: 1 -idouble: 1 -ildouble: 2 -ldouble: 2 - -Function: "expm1_downward": -double: 1 -float: 1 -idouble: 1 -ifloat: 1 -ildouble: 4 -ldouble: 4 - -Function: "expm1_towardzero": -double: 1 -float: 1 -idouble: 1 -ifloat: 1 -ildouble: 4 -ldouble: 4 - -Function: "expm1_upward": -double: 1 -float: 1 -idouble: 1 -ifloat: 1 -ildouble: 4 -ldouble: 4 - -Function: "gamma": -double: 3 -float: 2 -idouble: 3 -ifloat: 2 -ildouble: 4 -ldouble: 4 - -Function: "gamma_downward": -double: 4 -float: 4 -idouble: 4 -ifloat: 4 -ildouble: 7 -ldouble: 7 - -Function: "gamma_towardzero": -double: 4 -float: 2 -idouble: 4 -ifloat: 2 -ildouble: 7 -ldouble: 7 - -Function: "gamma_upward": -double: 3 -float: 4 -idouble: 3 -ifloat: 4 -ildouble: 5 -ldouble: 5 - -Function: "hypot": -double: 1 -idouble: 1 -ildouble: 1 -ldouble: 1 - -Function: "hypot_downward": -double: 1 -idouble: 1 -ildouble: 1 -ldouble: 1 - -Function: "hypot_towardzero": -double: 1 -idouble: 1 -ildouble: 1 -ldouble: 1 - -Function: "hypot_upward": -double: 1 -idouble: 1 -ildouble: 1 -ldouble: 1 - -Function: "j0": -double: 1 -float: 1 -idouble: 1 -ifloat: 1 -ildouble: 2 -ldouble: 2 - -Function: "j0_downward": -double: 1 -float: 2 -idouble: 1 -ifloat: 2 -ildouble: 4 -ldouble: 4 - -Function: "j0_towardzero": -double: 2 -float: 1 -idouble: 2 -ifloat: 1 -ildouble: 5 -ldouble: 5 - -Function: "j0_upward": -double: 1 -float: 3 -idouble: 1 -ifloat: 3 -ildouble: 4 -ldouble: 4 - -Function: "j1": -double: 2 -float: 1 -idouble: 2 -ifloat: 1 -ildouble: 1 -ldouble: 1 - -Function: "j1_downward": -double: 2 -float: 2 -idouble: 2 -ifloat: 2 -ildouble: 4 -ldouble: 4 - -Function: "j1_towardzero": -double: 2 -float: 1 -idouble: 2 -ifloat: 1 -ildouble: 4 -ldouble: 4 - -Function: "j1_upward": -double: 2 -float: 3 -idouble: 2 -ifloat: 3 -ildouble: 3 -ldouble: 3 - -Function: "jn": -double: 2 -float: 3 -idouble: 2 -ifloat: 3 -ildouble: 4 -ldouble: 4 - -Function: "jn_downward": -double: 2 -float: 3 -idouble: 2 -ifloat: 3 -ildouble: 4 -ldouble: 4 - -Function: "jn_towardzero": -double: 2 -float: 3 -idouble: 2 -ifloat: 3 -ildouble: 5 -ldouble: 5 - -Function: "jn_upward": -double: 2 -float: 3 -idouble: 2 -ifloat: 3 -ildouble: 5 -ldouble: 5 - -Function: "lgamma": -double: 3 -float: 2 -idouble: 3 -ifloat: 2 -ildouble: 4 -ldouble: 4 - -Function: "lgamma_downward": -double: 4 -float: 4 -idouble: 4 -ifloat: 4 -ildouble: 7 -ldouble: 7 - -Function: "lgamma_towardzero": -double: 4 -float: 2 -idouble: 4 -ifloat: 2 -ildouble: 7 -ldouble: 7 - -Function: "lgamma_upward": -double: 3 -float: 4 -idouble: 3 -ifloat: 4 -ildouble: 5 -ldouble: 5 - -Function: "log": -double: 1 -idouble: 1 -ildouble: 1 -ldouble: 1 - -Function: "log10": -double: 1 -idouble: 1 -ildouble: 1 -ldouble: 1 - -Function: "log10_downward": -double: 1 -float: 1 -idouble: 1 -ifloat: 1 -ildouble: 2 -ldouble: 2 - -Function: "log10_towardzero": -double: 1 -float: 1 -idouble: 1 -ifloat: 1 -ildouble: 2 -ldouble: 2 - -Function: "log10_upward": -double: 1 -float: 1 -idouble: 1 -ifloat: 1 -ildouble: 1 -ldouble: 1 - -Function: "log1p": -double: 1 -idouble: 1 -ildouble: 2 -ldouble: 2 - -Function: "log1p_downward": -double: 1 -float: 1 -idouble: 1 -ifloat: 1 -ildouble: 4 -ldouble: 4 - -Function: "log1p_towardzero": -double: 1 -float: 1 -idouble: 1 -ifloat: 1 -ildouble: 4 -ldouble: 4 - -Function: "log1p_upward": -double: 1 -float: 1 -idouble: 1 -ifloat: 1 -ildouble: 3 -ldouble: 3 - -Function: "log2": -double: 1 -idouble: 1 -ildouble: 1 -ldouble: 1 - -Function: "log2_downward": -double: 1 -idouble: 1 -ildouble: 1 -ldouble: 1 - -Function: "log2_towardzero": -double: 1 -float: 1 -idouble: 1 -ifloat: 1 -ildouble: 1 -ldouble: 1 - -Function: "log2_upward": -double: 1 -float: 1 -idouble: 1 -ifloat: 1 -ildouble: 1 -ldouble: 1 - -Function: "log_downward": -double: 1 -idouble: 1 -ildouble: 2 -ldouble: 2 - -Function: "log_towardzero": -double: 1 -idouble: 1 -ildouble: 2 -ldouble: 2 - -Function: "log_upward": -double: 1 -idouble: 1 -ildouble: 1 -ldouble: 1 - -Function: "pow": -double: 1 -idouble: 1 -ildouble: 1 -ldouble: 1 - -Function: "pow10": -double: 1 -idouble: 1 -ildouble: 1 -ldouble: 1 - -Function: "pow10_downward": -double: 1 -float: 1 -idouble: 1 -ifloat: 1 -ildouble: 2 -ldouble: 2 - -Function: "pow10_towardzero": -double: 1 -float: 1 -idouble: 1 -ifloat: 1 -ildouble: 2 -ldouble: 2 - -Function: "pow10_upward": -double: 1 -float: 1 -idouble: 1 -ifloat: 1 -ildouble: 2 -ldouble: 2 - -Function: "pow_downward": -double: 1 -idouble: 1 -ildouble: 4 -ldouble: 4 - -Function: "pow_towardzero": -double: 1 -idouble: 1 -ildouble: 4 -ldouble: 4 - -Function: "pow_upward": -double: 1 -idouble: 1 -ildouble: 4 -ldouble: 4 - -Function: "sin": -float: 1 -ifloat: 1 -ildouble: 1 -ldouble: 1 - -Function: "sin_downward": -double: 1 -float: 1 -idouble: 1 -ifloat: 1 -ildouble: 3 -ldouble: 3 - -Function: "sin_towardzero": -double: 1 -float: 1 -idouble: 1 -ifloat: 1 -ildouble: 2 -ldouble: 2 - -Function: "sin_upward": -double: 1 -float: 1 -idouble: 1 -ifloat: 1 -ildouble: 3 -ldouble: 3 - -Function: "sincos": -float: 1 -ifloat: 1 -ildouble: 1 -ldouble: 1 - -Function: "sincos_downward": -double: 1 -float: 1 -idouble: 1 -ifloat: 1 -ildouble: 3 -ldouble: 3 - -Function: "sincos_towardzero": -double: 1 -float: 1 -idouble: 1 -ifloat: 1 -ildouble: 2 -ldouble: 2 - -Function: "sincos_upward": -double: 1 -float: 1 -idouble: 1 -ifloat: 1 -ildouble: 3 -ldouble: 3 - -Function: "sinh": -double: 1 -ildouble: 2 -ldouble: 2 - -Function: "sinh_downward": -double: 2 -float: 1 -idouble: 1 -ifloat: 1 -ildouble: 4 -ldouble: 5 - -Function: "sinh_towardzero": -double: 2 -float: 1 -idouble: 1 -ifloat: 1 -ildouble: 3 -ldouble: 4 - -Function: "sinh_upward": -double: 4 -float: 2 -idouble: 1 -ifloat: 1 -ildouble: 4 -ldouble: 5 - -Function: "tan": -float: 1 -ifloat: 1 -ildouble: 2 -ldouble: 2 - -Function: "tan_downward": -double: 1 -float: 2 -idouble: 1 -ifloat: 2 -ildouble: 3 -ldouble: 3 - -Function: "tan_towardzero": -double: 1 -float: 2 -idouble: 1 -ifloat: 2 -ildouble: 3 -ldouble: 3 - -Function: "tan_upward": -double: 1 -float: 2 -idouble: 1 -ifloat: 2 -ildouble: 2 -ldouble: 2 - -Function: "tanh": -double: 1 -idouble: 1 -ildouble: 3 -ldouble: 3 - -Function: "tanh_downward": -double: 1 -float: 1 -idouble: 1 -ifloat: 1 -ildouble: 7 -ldouble: 4 - -Function: "tanh_towardzero": -double: 1 -float: 1 -idouble: 1 -ifloat: 1 -ildouble: 3 -ldouble: 3 - -Function: "tanh_upward": -double: 1 -float: 1 -idouble: 1 -ifloat: 1 -ildouble: 5 -ldouble: 4 - -Function: "tgamma": -double: 3 -float: 3 -idouble: 3 -ifloat: 3 -ildouble: 5 -ldouble: 5 - -Function: "tgamma_downward": -double: 3 -float: 3 -idouble: 3 -ifloat: 3 -ildouble: 5 -ldouble: 5 - -Function: "tgamma_towardzero": -double: 3 -float: 3 -idouble: 3 -ifloat: 3 -ildouble: 5 -ldouble: 5 - -Function: "tgamma_upward": -double: 3 -float: 3 -idouble: 3 -ifloat: 3 -ildouble: 5 -ldouble: 5 - -Function: "y0": -double: 1 -float: 1 -idouble: 1 -ifloat: 1 -ildouble: 1 -ldouble: 1 - -Function: "y0_downward": -double: 2 -float: 2 -idouble: 2 -ifloat: 2 -ildouble: 5 -ldouble: 5 - -Function: "y0_towardzero": -double: 2 -float: 2 -idouble: 2 -ifloat: 2 -ildouble: 5 -ldouble: 5 - -Function: "y0_upward": -double: 1 -float: 2 -idouble: 1 -ifloat: 2 -ildouble: 3 -ldouble: 3 - -Function: "y1": -double: 2 -float: 2 -idouble: 2 -ifloat: 2 -ildouble: 2 -ldouble: 2 - -Function: "y1_downward": -double: 2 -float: 2 -idouble: 2 -ifloat: 2 -ildouble: 7 -ldouble: 7 - -Function: "y1_towardzero": -double: 2 -float: 2 -idouble: 2 -ifloat: 2 -ildouble: 5 -ldouble: 5 - -Function: "y1_upward": -double: 1 -float: 2 -idouble: 1 -ifloat: 2 -ildouble: 7 -ldouble: 7 - -Function: "yn": -double: 2 -float: 3 -idouble: 2 -ifloat: 3 -ildouble: 4 -ldouble: 4 - -Function: "yn_downward": -double: 2 -float: 2 -idouble: 2 -ifloat: 2 -ildouble: 5 -ldouble: 5 - -Function: "yn_towardzero": -double: 3 -float: 3 -idouble: 3 -ifloat: 3 -ildouble: 5 -ldouble: 5 - -Function: "yn_upward": -double: 3 -float: 3 -idouble: 3 -ifloat: 3 -ildouble: 4 -ldouble: 4 - -# end of automatic generation diff --git a/sysdeps/i386/fpu/libm-test-ulps-name b/sysdeps/i386/fpu/libm-test-ulps-name deleted file mode 100644 index 54ca0d8295..0000000000 --- a/sysdeps/i386/fpu/libm-test-ulps-name +++ /dev/null @@ -1 +0,0 @@ -ix86 diff --git a/sysdeps/i386/fpu/math-tests.h b/sysdeps/i386/fpu/math-tests.h deleted file mode 100644 index 26d0633dc0..0000000000 --- a/sysdeps/i386/fpu/math-tests.h +++ /dev/null @@ -1,27 +0,0 @@ -/* Configuration for math tests. 32-bit x86 version. - Copyright (C) 2013-2017 Free Software Foundation, Inc. - This file is part of the GNU C Library. - - The GNU C Library is free software; you can redistribute it and/or - modify it under the terms of the GNU Lesser General Public - License as published by the Free Software Foundation; either - version 2.1 of the License, or (at your option) any later version. - - The GNU C Library is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - Lesser General Public License for more details. - - You should have received a copy of the GNU Lesser General Public - License along with the GNU C Library; if not, see - <http://www.gnu.org/licenses/>. */ - -/* On 32-bit x86, versions of GCC up to at least 4.8 are happy to use FPU load - instructions for sNaN values, and loading a float or double sNaN value will - already raise an INVALID exception as well as turn the sNaN into a qNaN, - rendering certain tests infeasible in this scenario. - <http://gcc.gnu.org/PR56831>. */ -#define SNAN_TESTS_float 0 -#define SNAN_TESTS_double 0 - -#include_next <math-tests.h> diff --git a/sysdeps/i386/fpu/math_private.h b/sysdeps/i386/fpu/math_private.h deleted file mode 100644 index 485214391f..0000000000 --- a/sysdeps/i386/fpu/math_private.h +++ /dev/null @@ -1,7 +0,0 @@ -#ifndef I386_MATH_PRIVATE_H -#define I386_MATH_PRIVATE_H 1 - -#include "fenv_private.h" -#include_next <math_private.h> - -#endif diff --git a/sysdeps/i386/fpu/mpatan.c b/sysdeps/i386/fpu/mpatan.c deleted file mode 100644 index 1cc8931700..0000000000 --- a/sysdeps/i386/fpu/mpatan.c +++ /dev/null @@ -1 +0,0 @@ -/* Not needed. */ diff --git a/sysdeps/i386/fpu/mpatan2.c b/sysdeps/i386/fpu/mpatan2.c deleted file mode 100644 index 1cc8931700..0000000000 --- a/sysdeps/i386/fpu/mpatan2.c +++ /dev/null @@ -1 +0,0 @@ -/* Not needed. */ diff --git a/sysdeps/i386/fpu/mpexp.c b/sysdeps/i386/fpu/mpexp.c deleted file mode 100644 index 1cc8931700..0000000000 --- a/sysdeps/i386/fpu/mpexp.c +++ /dev/null @@ -1 +0,0 @@ -/* Not needed. */ diff --git a/sysdeps/i386/fpu/mplog.c b/sysdeps/i386/fpu/mplog.c deleted file mode 100644 index 1cc8931700..0000000000 --- a/sysdeps/i386/fpu/mplog.c +++ /dev/null @@ -1 +0,0 @@ -/* Not needed. */ diff --git a/sysdeps/i386/fpu/mpsqrt.c b/sysdeps/i386/fpu/mpsqrt.c deleted file mode 100644 index 1cc8931700..0000000000 --- a/sysdeps/i386/fpu/mpsqrt.c +++ /dev/null @@ -1 +0,0 @@ -/* Not needed. */ diff --git a/sysdeps/i386/fpu/s_asinh.S b/sysdeps/i386/fpu/s_asinh.S deleted file mode 100644 index 1a60f7de2c..0000000000 --- a/sysdeps/i386/fpu/s_asinh.S +++ /dev/null @@ -1,139 +0,0 @@ -/* ix87 specific implementation of arcsinh. - Copyright (C) 1996-2017 Free Software Foundation, Inc. - This file is part of the GNU C Library. - Contributed by Ulrich Drepper <drepper@cygnus.com>, 1996. - - The GNU C Library is free software; you can redistribute it and/or - modify it under the terms of the GNU Lesser General Public - License as published by the Free Software Foundation; either - version 2.1 of the License, or (at your option) any later version. - - The GNU C Library is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - Lesser General Public License for more details. - - You should have received a copy of the GNU Lesser General Public - License along with the GNU C Library; if not, see - <http://www.gnu.org/licenses/>. */ - -#include <machine/asm.h> - - .section .rodata - - .align ALIGNARG(4) - .type huge,@object -huge: .double 1e+300 - ASM_SIZE_DIRECTIVE(huge) - .type one,@object -one: .double 1.0 - ASM_SIZE_DIRECTIVE(one) - .type limit,@object -limit: .double 0.29 - ASM_SIZE_DIRECTIVE(limit) - -#ifdef PIC -#define MO(op) op##@GOTOFF(%edx) -#else -#define MO(op) op -#endif - - .text -ENTRY(__asinh) - movl 8(%esp), %ecx - movl $0x7fffffff, %eax - andl %ecx, %eax - andl $0x80000000, %ecx - movl %eax, %edx - orl $0x800fffff, %edx - incl %edx - jz 7f // x in ±Inf or NaN - xorl %ecx, 8(%esp) - fldl 4(%esp) // |x| - cmpl $0x3e300000, %eax - jb 2f // |x| < 2^-28 - fldln2 // log(2) : |x| - cmpl $0x41b00000, %eax - fxch // |x| : log(2) - ja 3f // |x| > 2^28 -#ifdef PIC - LOAD_PIC_REG (dx) -#endif - cmpl $0x40000000, %eax - ja 5f // |x| > 2 - - // 2^-28 <= |x| <= 2 => y = sign(x)*log1p(|x|+|x|^2/(1+sqrt(1+|x|^2))) - fld %st // |x| : |x| : log(2) - fmul %st(1) // |x|^2 : |x| : log(2) - fld %st // |x|^2 : |x|^2 : |x| : log(2) - faddl MO(one) // 1+|x|^2 : |x|^2 : |x| : log(2) - fsqrt // sqrt(1+|x|^2) : |x|^2 : |x| : log(2) - faddl MO(one) // 1+sqrt(1+|x|^2) : |x|^2 : |x| : log(2) - fdivrp // |x|^2/(1+sqrt(1+|x|^2)) : |x| : log(2) - faddp // |x|+|x|^2/(1+sqrt(1+|x|^2)) : log(2) - fcoml MO(limit) - fnstsw - sahf - ja 6f - fyl2xp1 - jecxz 4f - fchs -4: ret - -7: fldl 4(%esp) - ret - -6: faddl MO(one) - fyl2x - jecxz 4f - fchs -4: ret - - // |x| < 2^-28 => y = x (inexact iff |x| != 0.0) - .align ALIGNARG(4) -2: -#ifdef PIC - LOAD_PIC_REG (dx) -#endif - jecxz 4f - fchs // x -4: fld %st // x : x - faddl MO(huge) // huge+x : x - fstp %st(0) // x - cmpl $0x00100000, %eax - jae 8f - subl $8, %esp - cfi_adjust_cfa_offset (8) - fld %st(0) - fmul %st(0) - fstpl (%esp) - addl $8, %esp - cfi_adjust_cfa_offset (-8) -8: ret - - // |x| > 2^28 => y = sign(x) * (log(|x|) + log(2)) - .align ALIGNARG(4) -3: fyl2x // log(|x|) - fldln2 // log(2) : log(|x|) - faddp // log(|x|)+log(2) - jecxz 4f - fchs -4: ret - - // |x| > 2 => y = sign(x) * log(2*|x| + 1/(|x|+sqrt(x*x+1))) - .align ALIGNARG(4) -5: fld %st // |x| : |x| : log(2) - fadd %st, %st(1) // |x| : 2*|x| : log(2) - fld %st // |x| : |x| : 2*|x| : log(2) - fmul %st(1) // |x|^2 : |x| : 2*|x| : log(2) - faddl MO(one) // 1+|x|^2 : |x| : 2*|x| : log(2) - fsqrt // sqrt(1+|x|^2) : |x| : 2*|x| : log(2) - faddp // |x|+sqrt(1+|x|^2) : 2*|x| : log(2) - fdivrl MO(one) // 1/(|x|+sqrt(1+|x|^2)) : 2*|x| : log(2) - faddp // 2*|x|+1/(|x|+sqrt(1+|x|^2)) : log(2) - fyl2x // log(2*|x|+1/(|x|+sqrt(1+|x|^2))) - jecxz 4f - fchs -4: ret -END(__asinh) -weak_alias (__asinh, asinh) diff --git a/sysdeps/i386/fpu/s_asinhf.S b/sysdeps/i386/fpu/s_asinhf.S deleted file mode 100644 index 12bcfef934..0000000000 --- a/sysdeps/i386/fpu/s_asinhf.S +++ /dev/null @@ -1,139 +0,0 @@ -/* ix87 specific implementation of arcsinh. - Copyright (C) 1996-2017 Free Software Foundation, Inc. - This file is part of the GNU C Library. - Contributed by Ulrich Drepper <drepper@cygnus.com>, 1996. - - The GNU C Library is free software; you can redistribute it and/or - modify it under the terms of the GNU Lesser General Public - License as published by the Free Software Foundation; either - version 2.1 of the License, or (at your option) any later version. - - The GNU C Library is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - Lesser General Public License for more details. - - You should have received a copy of the GNU Lesser General Public - License along with the GNU C Library; if not, see - <http://www.gnu.org/licenses/>. */ - -#include <machine/asm.h> - - .section .rodata - - .align ALIGNARG(4) - .type huge,@object -huge: .double 1e+36 - ASM_SIZE_DIRECTIVE(huge) - .type one,@object -one: .double 1.0 - ASM_SIZE_DIRECTIVE(one) - .type limit,@object -limit: .double 0.29 - ASM_SIZE_DIRECTIVE(limit) - -#ifdef PIC -#define MO(op) op##@GOTOFF(%edx) -#else -#define MO(op) op -#endif - - .text -ENTRY(__asinhf) - movl 4(%esp), %ecx - movl $0x7fffffff, %eax - andl %ecx, %eax - andl $0x80000000, %ecx - movl %eax, %edx - orl $0x807fffff, %edx - incl %edx - jz 7f // x in ±Inf or NaN - xorl %ecx, 4(%esp) - flds 4(%esp) // |x| - cmpl $0x38000000, %eax - jb 2f // |x| < 2^-14 - fldln2 // log(2) : |x| - cmpl $0x47000000, %eax - fxch // |x| : log(2) - ja 3f // |x| > 2^14 -#ifdef PIC - LOAD_PIC_REG (dx) -#endif - cmpl $0x40000000, %eax - ja 5f // |x| > 2 - - // 2^-14 <= |x| <= 2 => y = sign(x)*log1p(|x|+|x|^2/(1+sqrt(1+|x|^2))) - fld %st // |x| : |x| : log(2) - fmul %st(1) // |x|^2 : |x| : log(2) - fld %st // |x|^2 : |x|^2 : |x| : log(2) - faddl MO(one) // 1+|x|^2 : |x|^2 : |x| : log(2) - fsqrt // sqrt(1+|x|^2) : |x|^2 : |x| : log(2) - faddl MO(one) // 1+sqrt(1+|x|^2) : |x|^2 : |x| : log(2) - fdivrp // |x|^2/(1+sqrt(1+|x|^2)) : |x| : log(2) - faddp // |x|+|x|^2/(1+sqrt(1+|x|^2)) : log(2) - fcoml MO(limit) - fnstsw - sahf - ja 6f - fyl2xp1 - jecxz 4f - fchs -4: ret - -7: flds 4(%esp) - ret - -6: faddl MO(one) - fyl2x - jecxz 4f - fchs -4: ret - - // |x| < 2^-14 => y = x (inexact iff |x| != 0.0) - .align ALIGNARG(4) -2: -#ifdef PIC - LOAD_PIC_REG (dx) -#endif - jecxz 4f - fchs // x -4: fld %st // x : x - faddl MO(huge) // huge+x : x - fstp %st(0) // x - cmpl $0x00800000, %eax - jae 8f - subl $4, %esp - cfi_adjust_cfa_offset (4) - fld %st(0) - fmul %st(0) - fstps (%esp) - addl $4, %esp - cfi_adjust_cfa_offset (-4) -8: ret - - // |x| > 2^14 => y = sign(x) * (log(|x|) + log(2)) - .align ALIGNARG(4) -3: fyl2x // log(|x|) - fldln2 // log(2) : log(|x|) - faddp // log(|x|)+log(2) - jecxz 4f - fchs -4: ret - - // |x| > 2 => y = sign(x) * log(2*|x| + 1/(|x|+sqrt(x*x+1))) - .align ALIGNARG(4) -5: fld %st // |x| : |x| : log(2) - fadd %st, %st(1) // |x| : 2*|x| : log(2) - fld %st // |x| : |x| : 2*|x| : log(2) - fmul %st(1) // |x|^2 : |x| : 2*|x| : log(2) - faddl MO(one) // 1+|x|^2 : |x| : 2*|x| : log(2) - fsqrt // sqrt(1+|x|^2) : |x| : 2*|x| : log(2) - faddp // |x|+sqrt(1+|x|^2) : 2*|x| : log(2) - fdivrl MO(one) // 1/(|x|+sqrt(1+|x|^2)) : 2*|x| : log(2) - faddp // 2*|x|+1/(|x|+sqrt(1+|x|^2)) : log(2) - fyl2x // log(2*|x|+1/(|x|+sqrt(1+|x|^2))) - jecxz 4f - fchs -4: ret -END(__asinhf) -weak_alias (__asinhf, asinhf) diff --git a/sysdeps/i386/fpu/s_asinhl.S b/sysdeps/i386/fpu/s_asinhl.S deleted file mode 100644 index f31a267e78..0000000000 --- a/sysdeps/i386/fpu/s_asinhl.S +++ /dev/null @@ -1,144 +0,0 @@ -/* ix87 specific implementation of arcsinh. - Copyright (C) 1996-2017 Free Software Foundation, Inc. - This file is part of the GNU C Library. - Contributed by Ulrich Drepper <drepper@cygnus.com>, 1996. - - The GNU C Library is free software; you can redistribute it and/or - modify it under the terms of the GNU Lesser General Public - License as published by the Free Software Foundation; either - version 2.1 of the License, or (at your option) any later version. - - The GNU C Library is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - Lesser General Public License for more details. - - You should have received a copy of the GNU Lesser General Public - License along with the GNU C Library; if not, see - <http://www.gnu.org/licenses/>. */ - -#include <machine/asm.h> - - .section .rodata - - .align ALIGNARG(4) - .type huge,@object -huge: .tfloat 1e+4930 - ASM_SIZE_DIRECTIVE(huge) - .align ALIGNARG(4) - /* Please note that we use double value for 1.0. This number - has an exact representation and so we don't get accuracy - problems. The advantage is that the code is simpler. */ - .type one,@object -one: .double 1.0 - ASM_SIZE_DIRECTIVE(one) - /* It is not important that this constant is precise. It is only - a value which is known to be on the safe side for using the - fyl2xp1 instruction. */ - .type limit,@object -limit: .double 0.29 - ASM_SIZE_DIRECTIVE(limit) - -#ifdef PIC -#define MO(op) op##@GOTOFF(%edx) -#else -#define MO(op) op -#endif - - .text -ENTRY(__asinhl) - movl 12(%esp), %ecx - movl $0x7fff, %eax - andl %ecx, %eax - andl $0x8000, %ecx - movl %eax, %edx - orl $0xffff8000, %edx - incl %edx - jz 7f // x in ±Inf or NaN - xorl %ecx, 12(%esp) - fldt 4(%esp) // |x| - cmpl $0x3fde, %eax - jb 2f // |x| < 2^-34 - fldln2 // log(2) : |x| - cmpl $0x4020, %eax - fxch // |x| : log(2) - ja 3f // |x| > 2^34 -#ifdef PIC - LOAD_PIC_REG (dx) -#endif - cmpl $0x4000, %eax - ja 5f // |x| > 2 - - // 2^-34 <= |x| <= 2 => y = sign(x)*log1p(|x|+|x|^2/(1+sqrt(1+|x|^2))) - fld %st // |x| : |x| : log(2) - fmul %st(1) // |x|^2 : |x| : log(2) - fld %st // |x|^2 : |x|^2 : |x| : log(2) - faddl MO(one) // 1+|x|^2 : |x|^2 : |x| : log(2) - fsqrt // sqrt(1+|x|^2) : |x|^2 : |x| : log(2) - faddl MO(one) // 1+sqrt(1+|x|^2) : |x|^2 : |x| : log(2) - fdivrp // |x|^2/(1+sqrt(1+|x|^2)) : |x| : log(2) - faddp // |x|+|x|^2/(1+sqrt(1+|x|^2)) : log(2) - fcoml MO(limit) - fnstsw - sahf - ja 6f - fyl2xp1 - jecxz 4f - fchs -4: ret - -7: fldt 4(%esp) - fadd %st - ret - -6: faddl MO(one) - fyl2x - jecxz 4f - fchs -4: ret - - // |x| < 2^-34 => y = x (inexact iff |x| != 0.0) - .align ALIGNARG(4) -2: -#ifdef PIC - LOAD_PIC_REG (dx) -#endif - jecxz 4f - fchs // x -4: fld %st // x : x - fldt MO(huge) // huge : x : x - faddp // huge+x : x - fstp %st(0) // x - cmpl $0x0001, %eax - jae 8f - fld %st(0) - fmul %st(0) - fstp %st(0) -8: ret - - // |x| > 2^34 => y = sign(x) * (log(|x|) + log(2)) - .align ALIGNARG(4) -3: fyl2x // log(|x|) - fldln2 // log(2) : log(|x|) - faddp // log(|x|)+log(2) - jecxz 4f - fchs -4: ret - - // |x| > 2 => y = sign(x) * log(2*|x| + 1/(|x|+sqrt(x*x+1))) - .align ALIGNARG(4) -5: fld %st // |x| : |x| : log(2) - fadd %st, %st(1) // |x| : 2*|x| : log(2) - fld %st // |x| : |x| : 2*|x| : log(2) - fmul %st(1) // |x|^2 : |x| : 2*|x| : log(2) - faddl MO(one) // 1+|x|^2 : |x| : 2*|x| : log(2) - fsqrt // sqrt(1+|x|^2) : |x| : 2*|x| : log(2) - faddp // |x|+sqrt(1+|x|^2) : 2*|x| : log(2) - fdivrl MO(one) // 1/(|x|+sqrt(1+|x|^2)) : 2*|x| : log(2) - faddp // 2*|x|+1/(|x|+sqrt(1+|x|^2)) : log(2) - fyl2x // log(2*|x|+1/(|x|+sqrt(1+|x|^2))) - jecxz 4f - fchs -4: ret -END(__asinhl) -weak_alias (__asinhl, asinhl) diff --git a/sysdeps/i386/fpu/s_atan.S b/sysdeps/i386/fpu/s_atan.S deleted file mode 100644 index 644de78feb..0000000000 --- a/sysdeps/i386/fpu/s_atan.S +++ /dev/null @@ -1,30 +0,0 @@ -/* - * Written by J.T. Conklin <jtc@netbsd.org>. - * Public domain. - */ - -#include <machine/asm.h> -#include <i386-math-asm.h> - -RCSID("$NetBSD: s_atan.S,v 1.4 1995/05/08 23:50:41 jtc Exp $") - -DEFINE_DBL_MIN - -#ifdef PIC -# define MO(op) op##@GOTOFF(%ecx) -#else -# define MO(op) op -#endif - - .text -ENTRY(__atan) -#ifdef PIC - LOAD_PIC_REG (cx) -#endif - fldl 4(%esp) - fld1 - fpatan - DBL_CHECK_FORCE_UFLOW - ret -END (__atan) -weak_alias (__atan, atan) diff --git a/sysdeps/i386/fpu/s_atanf.S b/sysdeps/i386/fpu/s_atanf.S deleted file mode 100644 index 0589c1135e..0000000000 --- a/sysdeps/i386/fpu/s_atanf.S +++ /dev/null @@ -1,30 +0,0 @@ -/* - * Written by J.T. Conklin <jtc@netbsd.org>. - * Public domain. - */ - -#include <machine/asm.h> -#include <i386-math-asm.h> - -RCSID("$NetBSD: s_atanf.S,v 1.3 1995/05/08 23:51:33 jtc Exp $") - -DEFINE_FLT_MIN - -#ifdef PIC -# define MO(op) op##@GOTOFF(%ecx) -#else -# define MO(op) op -#endif - - .text -ENTRY(__atanf) -#ifdef PIC - LOAD_PIC_REG (cx) -#endif - flds 4(%esp) - fld1 - fpatan - FLT_CHECK_FORCE_UFLOW - ret -END (__atanf) -weak_alias (__atanf, atanf) diff --git a/sysdeps/i386/fpu/s_atanl.c b/sysdeps/i386/fpu/s_atanl.c deleted file mode 100644 index b7dba88aad..0000000000 --- a/sysdeps/i386/fpu/s_atanl.c +++ /dev/null @@ -1,22 +0,0 @@ -/* - * Written by J.T. Conklin <jtc@netbsd.org>. - * Public domain. - * - * Adapted for `long double' by Ulrich Drepper <drepper@cygnus.com>. - */ - -#include <math_private.h> - -long double -__atanl (long double x) -{ - long double res; - - asm ("fld1\n" - "fpatan" - : "=t" (res) : "0" (x)); - - return res; -} - -weak_alias (__atanl, atanl) diff --git a/sysdeps/i386/fpu/s_cbrt.S b/sysdeps/i386/fpu/s_cbrt.S deleted file mode 100644 index 7f01659eae..0000000000 --- a/sysdeps/i386/fpu/s_cbrt.S +++ /dev/null @@ -1,200 +0,0 @@ -/* Compute cubic root of double value. - Copyright (C) 1997-2017 Free Software Foundation, Inc. - This file is part of the GNU C Library. - Contributed by Dirk Alboth <dirka@uni-paderborn.de> and - Ulrich Drepper <drepper@cygnus.com>, 1997. - - The GNU C Library is free software; you can redistribute it and/or - modify it under the terms of the GNU Lesser General Public - License as published by the Free Software Foundation; either - version 2.1 of the License, or (at your option) any later version. - - The GNU C Library is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - Lesser General Public License for more details. - - You should have received a copy of the GNU Lesser General Public - License along with the GNU C Library; if not, see - <http://www.gnu.org/licenses/>. */ - -#include <machine/asm.h> - - .section .rodata - - .align ALIGNARG(4) - .type f7,@object -f7: .double -0.145263899385486377 - ASM_SIZE_DIRECTIVE(f7) - .type f6,@object -f6: .double 0.784932344976639262 - ASM_SIZE_DIRECTIVE(f6) - .type f5,@object -f5: .double -1.83469277483613086 - ASM_SIZE_DIRECTIVE(f5) - .type f4,@object -f4: .double 2.44693122563534430 - ASM_SIZE_DIRECTIVE(f4) - .type f3,@object -f3: .double -2.11499494167371287 - ASM_SIZE_DIRECTIVE(f3) - .type f2,@object -f2: .double 1.50819193781584896 - ASM_SIZE_DIRECTIVE(f2) - .type f1,@object -f1: .double 0.354895765043919860 - ASM_SIZE_DIRECTIVE(f1) - -#define CBRT2 1.2599210498948731648 -#define ONE_CBRT2 0.793700525984099737355196796584 -#define SQR_CBRT2 1.5874010519681994748 -#define ONE_SQR_CBRT2 0.629960524947436582364439673883 - - .type factor,@object -factor: .double ONE_SQR_CBRT2 - .double ONE_CBRT2 - .double 1.0 - .double CBRT2 - .double SQR_CBRT2 - ASM_SIZE_DIRECTIVE(factor) - - .type two54,@object -two54: .byte 0, 0, 0, 0, 0, 0, 0x50, 0x43 - ASM_SIZE_DIRECTIVE(two54) - -#ifdef PIC -#define MO(op) op##@GOTOFF(%ebx) -#define MOX(op,x) op##@GOTOFF(%ebx,x,1) -#else -#define MO(op) op -#define MOX(op,x) op(x) -#endif - - .text -ENTRY(__cbrt) - movl 4(%esp), %ecx - movl 8(%esp), %eax - movl %eax, %edx - andl $0x7fffffff, %eax - orl %eax, %ecx - jz 1f - xorl %ecx, %ecx - cmpl $0x7ff00000, %eax - jae 1f - -#ifdef PIC - pushl %ebx - cfi_adjust_cfa_offset (4) - cfi_rel_offset (ebx, 0) - LOAD_PIC_REG (bx) -#endif - - cmpl $0x00100000, %eax - jae 2f - -#ifdef PIC - fldl 8(%esp) -#else - fldl 4(%esp) -#endif - fmull MO(two54) - movl $-54, %ecx -#ifdef PIC - fstpl 8(%esp) - movl 12(%esp), %eax -#else - fstpl 4(%esp) - movl 8(%esp), %eax -#endif - movl %eax, %edx - andl $0x7fffffff, %eax - -2: shrl $20, %eax - andl $0x800fffff, %edx - subl $1022, %eax - orl $0x3fe00000, %edx - addl %eax, %ecx -#ifdef PIC - movl %edx, 12(%esp) - - fldl 8(%esp) /* xm */ -#else - movl %edx, 8(%esp) - - fldl 4(%esp) /* xm */ -#endif - fabs - - /* The following code has two tracks: - a) compute the normalized cbrt value - b) compute xe/3 and xe%3 - The right track computes the value for b) and this is done - in an optimized way by avoiding division. - - But why two tracks at all? Very easy: efficiency. Some FP - instruction can overlap with a certain amount of integer (and - FP) instructions. So we get (except for the imull) all - instructions for free. */ - - fld %st(0) /* xm : xm */ - - fmull MO(f7) /* f7*xm : xm */ - movl $1431655766, %eax - faddl MO(f6) /* f6+f7*xm : xm */ - imull %ecx - fmul %st(1) /* (f6+f7*xm)*xm : xm */ - movl %ecx, %eax - faddl MO(f5) /* f5+(f6+f7*xm)*xm : xm */ - sarl $31, %eax - fmul %st(1) /* (f5+(f6+f7*xm)*xm)*xm : xm */ - subl %eax, %edx - faddl MO(f4) /* f4+(f5+(f6+f7*xm)*xm)*xm : xm */ - fmul %st(1) /* (f4+(f5+(f6+f7*xm)*xm)*xm)*xm : xm */ - faddl MO(f3) /* f3+(f4+(f5+(f6+f7*xm)*xm)*xm)*xm : xm */ - fmul %st(1) /* (f3+(f4+(f5+(f6+f7*xm)*xm)*xm)*xm)*xm : xm */ - faddl MO(f2) /* f2+(f3+(f4+(f5+(f6+f7*xm)*xm)*xm)*xm)*xm : xm */ - fmul %st(1) /* (f2+(f3+(f4+(f5+(f6+f7*xm)*xm)*xm)*xm)*xm)*xm : xm */ - faddl MO(f1) /* u:=f1+(f2+(f3+(f4+(f5+(f6+f7*xm)*xm)*xm)*xm)*xm)*xm : xm */ - - fld %st /* u : u : xm */ - fmul %st(1) /* u*u : u : xm */ - fld %st(2) /* xm : u*u : u : xm */ - fadd %st /* 2*xm : u*u : u : xm */ - fxch %st(1) /* u*u : 2*xm : u : xm */ - fmul %st(2) /* t2:=u*u*u : 2*xm : u : xm */ - movl %edx, %eax - fadd %st, %st(1) /* t2 : t2+2*xm : u : xm */ - leal (%edx,%edx,2),%edx - fadd %st(0) /* 2*t2 : t2+2*xm : u : xm */ - subl %edx, %ecx - faddp %st, %st(3) /* t2+2*xm : u : 2*t2+xm */ - shll $3, %ecx - fmulp /* u*(t2+2*xm) : 2*t2+xm */ - fdivp %st, %st(1) /* u*(t2+2*xm)/(2*t2+xm) */ - fmull MOX(16+factor,%ecx) /* u*(t2+2*xm)/(2*t2+xm)*FACT */ - pushl %eax - cfi_adjust_cfa_offset (4) - fildl (%esp) /* xe/3 : u*(t2+2*xm)/(2*t2+xm)*FACT */ - fxch /* u*(t2+2*xm)/(2*t2+xm)*FACT : xe/3 */ - fscale /* u*(t2+2*xm)/(2*t2+xm)*FACT*2^xe/3 */ - popl %edx - cfi_adjust_cfa_offset (-4) -#ifdef PIC - movl 12(%esp), %eax - popl %ebx - cfi_adjust_cfa_offset (-4) - cfi_restore (ebx) -#else - movl 8(%esp), %eax -#endif - testl %eax, %eax - fstp %st(1) - jns 4f - fchs -4: ret - - /* Return the argument. */ -1: fldl 4(%esp) - ret -END(__cbrt) -weak_alias (__cbrt, cbrt) diff --git a/sysdeps/i386/fpu/s_cbrtf.S b/sysdeps/i386/fpu/s_cbrtf.S deleted file mode 100644 index 645d24372d..0000000000 --- a/sysdeps/i386/fpu/s_cbrtf.S +++ /dev/null @@ -1,177 +0,0 @@ -/* Compute cubic root of float value. - Copyright (C) 1997-2017 Free Software Foundation, Inc. - This file is part of the GNU C Library. - Contributed by Dirk Alboth <dirka@uni-paderborn.de> and - Ulrich Drepper <drepper@cygnus.com>, 1997. - - The GNU C Library is free software; you can redistribute it and/or - modify it under the terms of the GNU Lesser General Public - License as published by the Free Software Foundation; either - version 2.1 of the License, or (at your option) any later version. - - The GNU C Library is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - Lesser General Public License for more details. - - You should have received a copy of the GNU Lesser General Public - License along with the GNU C Library; if not, see - <http://www.gnu.org/licenses/>. */ - -#include <machine/asm.h> - - .section .rodata - - .align ALIGNARG(4) - .type f3,@object -f3: .double 0.191502161678719066 - ASM_SIZE_DIRECTIVE(f3) - .type f2,@object -f2: .double 0.697570460207922770 - ASM_SIZE_DIRECTIVE(f2) - .type f1,@object -f1: .double 0.492659620528969547 - ASM_SIZE_DIRECTIVE(f1) - -#define CBRT2 1.2599210498948731648 -#define ONE_CBRT2 0.793700525984099737355196796584 -#define SQR_CBRT2 1.5874010519681994748 -#define ONE_SQR_CBRT2 0.629960524947436582364439673883 - - .type factor,@object - .align ALIGNARG(4) -factor: .double ONE_SQR_CBRT2 - .double ONE_CBRT2 - .double 1.0 - .double CBRT2 - .double SQR_CBRT2 - ASM_SIZE_DIRECTIVE(factor) - - .type two25,@object -two25: .byte 0, 0, 0, 0x4c - ASM_SIZE_DIRECTIVE(two25) - -#ifdef PIC -#define MO(op) op##@GOTOFF(%ebx) -#define MOX(op,x) op##@GOTOFF(%ebx,x,1) -#else -#define MO(op) op -#define MOX(op,x) op(x) -#endif - - .text -ENTRY(__cbrtf) - movl 4(%esp), %eax - xorl %ecx, %ecx - movl %eax, %edx - andl $0x7fffffff, %eax - jz 1f - cmpl $0x7f800000, %eax - jae 1f - -#ifdef PIC - pushl %ebx - cfi_adjust_cfa_offset (4) - cfi_rel_offset (ebx, 0) - LOAD_PIC_REG (bx) -#endif - - cmpl $0x00800000, %eax - jae 2f - -#ifdef PIC - flds 8(%esp) -#else - flds 4(%esp) -#endif - fmuls MO(two25) - movl $-25, %ecx -#ifdef PIC - fstps 8(%esp) - movl 8(%esp), %eax -#else - fstps 4(%esp) - movl 4(%esp), %eax -#endif - movl %eax, %edx - andl $0x7fffffff, %eax - -2: shrl $23, %eax - andl $0x807fffff, %edx - subl $126, %eax - orl $0x3f000000, %edx - addl %eax, %ecx -#ifdef PIC - movl %edx, 8(%esp) - - flds 8(%esp) /* xm */ -#else - movl %edx, 4(%esp) - - flds 4(%esp) /* xm */ -#endif - fabs - - /* The following code has two tracks: - a) compute the normalized cbrt value - b) compute xe/3 and xe%3 - The right track computes the value for b) and this is done - in an optimized way by avoiding division. - - But why two tracks at all? Very easy: efficiency. Some FP - instruction can overlap with a certain amount of integer (and - FP) instructions. So we get (except for the imull) all - instructions for free. */ - - fld %st(0) /* xm : xm */ - fmull MO(f3) /* f3*xm : xm */ - movl $1431655766, %eax - fsubrl MO(f2) /* f2-f3*xm : xm */ - imull %ecx - fmul %st(1) /* (f2-f3*xm)*xm : xm */ - movl %ecx, %eax - faddl MO(f1) /* u:=f1+(f2-f3*xm)*xm : xm */ - sarl $31, %eax - fld %st /* u : u : xm */ - subl %eax, %edx - fmul %st(1) /* u*u : u : xm */ - fld %st(2) /* xm : u*u : u : xm */ - fadd %st /* 2*xm : u*u : u : xm */ - fxch %st(1) /* u*u : 2*xm : u : xm */ - fmul %st(2) /* t2:=u*u*u : 2*xm : u : xm */ - movl %edx, %eax - fadd %st, %st(1) /* t2 : t2+2*xm : u : xm */ - leal (%edx,%edx,2),%edx - fadd %st(0) /* 2*t2 : t2+2*xm : u : xm */ - subl %edx, %ecx - faddp %st, %st(3) /* t2+2*xm : u : 2*t2+xm */ - shll $3, %ecx - fmulp /* u*(t2+2*xm) : 2*t2+xm */ - fdivp %st, %st(1) /* u*(t2+2*xm)/(2*t2+xm) */ - fmull MOX(16+factor,%ecx) /* u*(t2+2*xm)/(2*t2+xm)*FACT */ - pushl %eax - cfi_adjust_cfa_offset (4) - fildl (%esp) /* xe/3 : u*(t2+2*xm)/(2*t2+xm)*FACT */ - fxch /* u*(t2+2*xm)/(2*t2+xm)*FACT : xe/3 */ - fscale /* u*(t2+2*xm)/(2*t2+xm)*FACT*2^xe/3 */ - popl %edx - cfi_adjust_cfa_offset (-4) -#ifdef PIC - movl 8(%esp), %eax - popl %ebx - cfi_adjust_cfa_offset (-4) - cfi_restore (ebx) -#else - movl 4(%esp), %eax -#endif - testl %eax, %eax - fstp %st(1) - jns 4f - fchs -4: ret - - /* Return the argument. */ -1: flds 4(%esp) - ret -END(__cbrtf) -weak_alias (__cbrtf, cbrtf) diff --git a/sysdeps/i386/fpu/s_cbrtl.S b/sysdeps/i386/fpu/s_cbrtl.S deleted file mode 100644 index e4a72d29c6..0000000000 --- a/sysdeps/i386/fpu/s_cbrtl.S +++ /dev/null @@ -1,229 +0,0 @@ -/* Compute cubic root of long double value. - Copyright (C) 1997-2017 Free Software Foundation, Inc. - This file is part of the GNU C Library. - Contributed by Dirk Alboth <dirka@uni-paderborn.de> and - Ulrich Drepper <drepper@cygnus.com>, 1997. - - The GNU C Library is free software; you can redistribute it and/or - modify it under the terms of the GNU Lesser General Public - License as published by the Free Software Foundation; either - version 2.1 of the License, or (at your option) any later version. - - The GNU C Library is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - Lesser General Public License for more details. - - You should have received a copy of the GNU Lesser General Public - License along with the GNU C Library; if not, see - <http://www.gnu.org/licenses/>. */ - -#include <machine/asm.h> - - .section .rodata - - .align ALIGNARG(4) - .type f8,@object -f8: .tfloat 0.161617097923756032 - ASM_SIZE_DIRECTIVE(f8) - .align ALIGNARG(4) - .type f7,@object -f7: .tfloat -0.988553671195413709 - ASM_SIZE_DIRECTIVE(f7) - .align ALIGNARG(4) - .type f6,@object -f6: .tfloat 2.65298938441952296 - ASM_SIZE_DIRECTIVE(f6) - .align ALIGNARG(4) - .type f5,@object -f5: .tfloat -4.11151425200350531 - ASM_SIZE_DIRECTIVE(f5) - .align ALIGNARG(4) - .type f4,@object -f4: .tfloat 4.09559907378707839 - ASM_SIZE_DIRECTIVE(f4) - .align ALIGNARG(4) - .type f3,@object -f3: .tfloat -2.82414939754975962 - ASM_SIZE_DIRECTIVE(f3) - .align ALIGNARG(4) - .type f2,@object -f2: .tfloat 1.67595307700780102 - ASM_SIZE_DIRECTIVE(f2) - .align ALIGNARG(4) - .type f1,@object -f1: .tfloat 0.338058687610520237 - ASM_SIZE_DIRECTIVE(f1) - -#define CBRT2 1.2599210498948731648 -#define ONE_CBRT2 0.793700525984099737355196796584 -#define SQR_CBRT2 1.5874010519681994748 -#define ONE_SQR_CBRT2 0.629960524947436582364439673883 - - /* We make the entries in the following table all 16 bytes - wide to avoid having to implement a multiplication by 10. */ - .type factor,@object - .align ALIGNARG(4) -factor: .tfloat ONE_SQR_CBRT2 - .byte 0, 0, 0, 0, 0, 0 - .tfloat ONE_CBRT2 - .byte 0, 0, 0, 0, 0, 0 - .tfloat 1.0 - .byte 0, 0, 0, 0, 0, 0 - .tfloat CBRT2 - .byte 0, 0, 0, 0, 0, 0 - .tfloat SQR_CBRT2 - ASM_SIZE_DIRECTIVE(factor) - - .type two64,@object - .align ALIGNARG(4) -two64: .byte 0, 0, 0, 0, 0, 0, 0xf0, 0x43 - ASM_SIZE_DIRECTIVE(two64) - -#ifdef PIC -#define MO(op) op##@GOTOFF(%ebx) -#define MOX(op,x) op##@GOTOFF(%ebx,x,1) -#else -#define MO(op) op -#define MOX(op,x) op(x) -#endif - - .text -ENTRY(__cbrtl) - movl 4(%esp), %ecx - movl 12(%esp), %eax - orl 8(%esp), %ecx - movl %eax, %edx - andl $0x7fff, %eax - orl %eax, %ecx - jz 1f - xorl %ecx, %ecx - cmpl $0x7fff, %eax - je 1f - -#ifdef PIC - pushl %ebx - cfi_adjust_cfa_offset (4) - cfi_rel_offset (ebx, 0) - LOAD_PIC_REG (bx) -#endif - - cmpl $0, %eax - jne 2f - -#ifdef PIC - fldt 8(%esp) -#else - fldt 4(%esp) -#endif - fmull MO(two64) - movl $-64, %ecx -#ifdef PIC - fstpt 8(%esp) - movl 16(%esp), %eax -#else - fstpt 4(%esp) - movl 12(%esp), %eax -#endif - movl %eax, %edx - andl $0x7fff, %eax - -2: andl $0x8000, %edx - subl $16382, %eax - orl $0x3ffe, %edx - addl %eax, %ecx -#ifdef PIC - movl %edx, 16(%esp) - - fldt 8(%esp) /* xm */ -#else - movl %edx, 12(%esp) - - fldt 4(%esp) /* xm */ -#endif - fabs - - /* The following code has two tracks: - a) compute the normalized cbrt value - b) compute xe/3 and xe%3 - The right track computes the value for b) and this is done - in an optimized way by avoiding division. - - But why two tracks at all? Very easy: efficiency. Some FP - instruction can overlap with a certain amount of integer (and - FP) instructions. So we get (except for the imull) all - instructions for free. */ - - fldt MO(f8) /* f8 : xm */ - fmul %st(1) /* f8*xm : xm */ - - fldt MO(f7) - faddp /* f7+f8*xm : xm */ - fmul %st(1) /* (f7+f8*xm)*xm : xm */ - movl $1431655766, %eax - fldt MO(f6) - faddp /* f6+(f7+f8*xm)*xm : xm */ - imull %ecx - fmul %st(1) /* (f6+(f7+f8*xm)*xm)*xm : xm */ - movl %ecx, %eax - fldt MO(f5) - faddp /* f5+(f6+(f7+f8*xm)*xm)*xm : xm */ - sarl $31, %eax - fmul %st(1) /* (f5+(f6+(f7+f8*xm)*xm)*xm)*xm : xm */ - subl %eax, %edx - fldt MO(f4) - faddp /* f4+(f5+(f6+(f7+f8*xm)*xm)*xm)*xm : xm */ - fmul %st(1) /* (f4+(f5+(f6+(f7+f8*xm)*xm)*xm)*xm)*xm : xm */ - fldt MO(f3) - faddp /* f3+(f4+(f5+(f6+(f7+f8*xm)*xm)*xm)*xm)*xm : xm */ - fmul %st(1) /* (f3+(f4+(f5+(f6+(f7+f8*xm)*xm)*xm)*xm)*xm)*xm : xm */ - fldt MO(f2) - faddp /* f2+(f3+(f4+(f5+(f6+(f7+f8*xm)*xm)*xm)*xm)*xm)*xm : xm */ - fmul %st(1) /* (f2+(f3+(f4+(f5+(f6+(f7+f8*xm)*xm)*xm)*xm)*xm)*xm)*xm : xm */ - fldt MO(f1) - faddp /* u:=f1+(f2+(f3+(f4+(f5+(f6+(f7+f8*xm)*xm)*xm)*xm)*xm)*xm)*xm : xm */ - - fld %st /* u : u : xm */ - fmul %st(1) /* u*u : u : xm */ - fld %st(2) /* xm : u*u : u : xm */ - fadd %st /* 2*xm : u*u : u : xm */ - fxch %st(1) /* u*u : 2*xm : u : xm */ - fmul %st(2) /* t2:=u*u*u : 2*xm : u : xm */ - movl %edx, %eax - fadd %st, %st(1) /* t2 : t2+2*xm : u : xm */ - leal (%edx,%edx,2),%edx - fadd %st(0) /* 2*t2 : t2+2*xm : u : xm */ - subl %edx, %ecx - faddp %st, %st(3) /* t2+2*xm : u : 2*t2+xm */ - shll $4, %ecx - fmulp /* u*(t2+2*xm) : 2*t2+xm */ - fdivp %st, %st(1) /* u*(t2+2*xm)/(2*t2+xm) */ - fldt MOX(32+factor,%ecx) - fmulp /* u*(t2+2*xm)/(2*t2+xm)*FACT */ - pushl %eax - cfi_adjust_cfa_offset (4) - fildl (%esp) /* xe/3 : u*(t2+2*xm)/(2*t2+xm)*FACT */ - fxch /* u*(t2+2*xm)/(2*t2+xm)*FACT : xe/3 */ - fscale /* u*(t2+2*xm)/(2*t2+xm)*FACT*2^xe/3 */ - popl %edx - cfi_adjust_cfa_offset (-4) -#ifdef PIC - movl 16(%esp), %eax - popl %ebx - cfi_adjust_cfa_offset (-4) - cfi_restore (ebx) -#else - movl 12(%esp), %eax -#endif - testl $0x8000, %eax - fstp %st(1) - jz 4f - fchs -4: ret - - /* Return the argument. */ -1: fldt 4(%esp) - fadd %st - ret -END(__cbrtl) -weak_alias (__cbrtl, cbrtl) diff --git a/sysdeps/i386/fpu/s_ceil.S b/sysdeps/i386/fpu/s_ceil.S deleted file mode 100644 index 1226bb2f87..0000000000 --- a/sysdeps/i386/fpu/s_ceil.S +++ /dev/null @@ -1,34 +0,0 @@ -/* - * Written by J.T. Conklin <jtc@netbsd.org>. - * Public domain. - */ - -#include <machine/asm.h> - -RCSID("$NetBSD: s_ceil.S,v 1.4 1995/05/08 23:52:13 jtc Exp $") - -ENTRY(__ceil) - fldl 4(%esp) - subl $32,%esp - cfi_adjust_cfa_offset (32) - - fnstenv 4(%esp) /* store fpu environment */ - - /* We use here %edx although only the low 1 bits are defined. - But none of the operations should care and they are faster - than the 16 bit operations. */ - movl $0x0800,%edx /* round towards +oo */ - orl 4(%esp),%edx - andl $0xfbff,%edx - movl %edx,(%esp) - fldcw (%esp) /* load modified control word */ - - frndint /* round */ - - fldenv 4(%esp) /* restore original environment */ - - addl $32,%esp - cfi_adjust_cfa_offset (-32) - ret -END (__ceil) -weak_alias (__ceil, ceil) diff --git a/sysdeps/i386/fpu/s_ceilf.S b/sysdeps/i386/fpu/s_ceilf.S deleted file mode 100644 index d345c0973b..0000000000 --- a/sysdeps/i386/fpu/s_ceilf.S +++ /dev/null @@ -1,34 +0,0 @@ -/* - * Written by J.T. Conklin <jtc@netbsd.org>. - * Public domain. - */ - -#include <machine/asm.h> - -RCSID("$NetBSD: s_ceilf.S,v 1.3 1995/05/08 23:52:44 jtc Exp $") - -ENTRY(__ceilf) - flds 4(%esp) - subl $32,%esp - cfi_adjust_cfa_offset (32) - - fnstenv 4(%esp) /* store fpu environment */ - - /* We use here %edx although only the low 1 bits are defined. - But none of the operations should care and they are faster - than the 16 bit operations. */ - movl $0x0800,%edx /* round towards +oo */ - orl 4(%esp),%edx - andl $0xfbff,%edx - movl %edx,(%esp) - fldcw (%esp) /* load modified control word */ - - frndint /* round */ - - fldenv 4(%esp) /* restore original environment */ - - addl $32,%esp - cfi_adjust_cfa_offset (-32) - ret -END (__ceilf) -weak_alias (__ceilf, ceilf) diff --git a/sysdeps/i386/fpu/s_ceill.S b/sysdeps/i386/fpu/s_ceill.S deleted file mode 100644 index 7c08f43b24..0000000000 --- a/sysdeps/i386/fpu/s_ceill.S +++ /dev/null @@ -1,40 +0,0 @@ -/* - * Written by J.T. Conklin <jtc@netbsd.org>. - * Changes for long double by Ulrich Drepper <drepper@cygnus.com> - * Public domain. - */ - -#include <machine/asm.h> - -RCSID("$NetBSD: $") - -ENTRY(__ceill) - fldt 4(%esp) - subl $32,%esp - cfi_adjust_cfa_offset (32) - - fnstenv 4(%esp) /* store fpu environment */ - - /* We use here %edx although only the low 1 bits are defined. - But none of the operations should care and they are faster - than the 16 bit operations. */ - movl $0x0800,%edx /* round towards +oo */ - orl 4(%esp),%edx - andl $0xfbff,%edx - movl %edx,(%esp) - fldcw (%esp) /* load modified control word */ - - frndint /* round */ - - /* Preserve "invalid" exceptions from sNaN input. */ - fnstsw - andl $0x1, %eax - orl %eax, 8(%esp) - - fldenv 4(%esp) /* restore original environment */ - - addl $32,%esp - cfi_adjust_cfa_offset (-32) - ret -END (__ceill) -weak_alias (__ceill, ceill) diff --git a/sysdeps/i386/fpu/s_copysign.S b/sysdeps/i386/fpu/s_copysign.S deleted file mode 100644 index 2520a94427..0000000000 --- a/sysdeps/i386/fpu/s_copysign.S +++ /dev/null @@ -1,20 +0,0 @@ -/* - * Written by J.T. Conklin <jtc@netbsd.org>. - * Public domain. - */ - -#include <machine/asm.h> - -RCSID("$NetBSD: s_copysign.S,v 1.4 1995/05/08 23:53:02 jtc Exp $") - -ENTRY(__copysign) - movl 16(%esp),%edx - movl 8(%esp),%eax - andl $0x80000000,%edx - andl $0x7fffffff,%eax - orl %edx,%eax - movl %eax,8(%esp) - fldl 4(%esp) - ret -END (__copysign) -weak_alias (__copysign, copysign) diff --git a/sysdeps/i386/fpu/s_copysignf.S b/sysdeps/i386/fpu/s_copysignf.S deleted file mode 100644 index 57b1a6f119..0000000000 --- a/sysdeps/i386/fpu/s_copysignf.S +++ /dev/null @@ -1,20 +0,0 @@ -/* - * Written by J.T. Conklin <jtc@netbsd.org>. - * Public domain. - */ - -#include <machine/asm.h> - -RCSID("$NetBSD: s_copysignf.S,v 1.3 1995/05/08 23:53:25 jtc Exp $") - -ENTRY(__copysignf) - movl 8(%esp),%edx - movl 4(%esp),%eax - andl $0x80000000,%edx - andl $0x7fffffff,%eax - orl %edx,%eax - movl %eax,4(%esp) - flds 4(%esp) - ret -END (__copysignf) -weak_alias (__copysignf, copysignf) diff --git a/sysdeps/i386/fpu/s_copysignl.S b/sysdeps/i386/fpu/s_copysignl.S deleted file mode 100644 index 2163e7b014..0000000000 --- a/sysdeps/i386/fpu/s_copysignl.S +++ /dev/null @@ -1,21 +0,0 @@ -/* - * Written by J.T. Conklin <jtc@netbsd.org>. - * Changes for long double by Ulrich Drepper <drepper@cygnus.com> - * Public domain. - */ - -#include <machine/asm.h> - -RCSID("$NetBSD: $") - -ENTRY(__copysignl) - movl 24(%esp),%edx - movl 12(%esp),%eax - andl $0x8000,%edx - andl $0x7fff,%eax - orl %edx,%eax - movl %eax,12(%esp) - fldt 4(%esp) - ret -END (__copysignl) -weak_alias (__copysignl, copysignl) diff --git a/sysdeps/i386/fpu/s_expm1.S b/sysdeps/i386/fpu/s_expm1.S deleted file mode 100644 index 59fded2d5a..0000000000 --- a/sysdeps/i386/fpu/s_expm1.S +++ /dev/null @@ -1,113 +0,0 @@ -/* ix87 specific implementation of exp(x)-1. - Copyright (C) 1996-2017 Free Software Foundation, Inc. - This file is part of the GNU C Library. - Contributed by Ulrich Drepper <drepper@cygnus.com>, 1996. - Based on code by John C. Bowman <bowman@ipp-garching.mpg.de>. - Corrections by H.J. Lu (hjl@gnu.ai.mit.edu), 1997. - - The GNU C Library is free software; you can redistribute it and/or - modify it under the terms of the GNU Lesser General Public - License as published by the Free Software Foundation; either - version 2.1 of the License, or (at your option) any later version. - - The GNU C Library is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - Lesser General Public License for more details. - - You should have received a copy of the GNU Lesser General Public - License along with the GNU C Library; if not, see - <http://www.gnu.org/licenses/>. */ - - /* Using: e^x - 1 = 2^(x * log2(e)) - 1 */ - -#include <sysdep.h> -#include <machine/asm.h> -#include <i386-math-asm.h> - - .section .rodata - - .align ALIGNARG(4) - .type minus1,@object -minus1: .double -1.0 - ASM_SIZE_DIRECTIVE(minus1) - .type one,@object -one: .double 1.0 - ASM_SIZE_DIRECTIVE(one) - .type l2e,@object -l2e: .tfloat 1.442695040888963407359924681002 - ASM_SIZE_DIRECTIVE(l2e) - -DEFINE_DBL_MIN - -#ifdef PIC -#define MO(op) op##@GOTOFF(%edx) -#else -#define MO(op) op -#endif - - .text -ENTRY(__expm1) - movzwl 4+6(%esp), %eax - xorb $0x80, %ah // invert sign bit (now 1 is "positive") - cmpl $0xc086, %eax // is num >= 704? - jae HIDDEN_JUMPTARGET (__exp) - - fldl 4(%esp) // x - fxam // Is NaN, +-Inf or +-0? - xorb $0x80, %ah - cmpl $0xc043, %eax // is num <= -38.0? - fstsw %ax - movb $0x45, %ch - jb 4f - - // Below -38.0 (may be -NaN or -Inf). - andb %ah, %ch -#ifdef PIC - LOAD_PIC_REG (dx) -#endif - cmpb $0x01, %ch - je 5f // If -NaN, jump. - jmp 2f // -large, possibly -Inf. - -4: // In range -38.0 to 704.0 (may be +-0 but not NaN or +-Inf). - andb %ah, %ch - cmpb $0x40, %ch - je 3f // If +-0, jump. -#ifdef PIC - LOAD_PIC_REG (dx) -#endif - -5: fldt MO(l2e) // log2(e) : x - fmulp // log2(e)*x - fld %st // log2(e)*x : log2(e)*x - // Set round-to-nearest temporarily. - subl $8, %esp - cfi_adjust_cfa_offset (8) - fstcw 4(%esp) - movl $0xf3ff, %ecx - andl 4(%esp), %ecx - movl %ecx, (%esp) - fldcw (%esp) - frndint // int(log2(e)*x) : log2(e)*x - fldcw 4(%esp) - addl $8, %esp - cfi_adjust_cfa_offset (-8) - fsubr %st, %st(1) // int(log2(e)*x) : fract(log2(e)*x) - fxch // fract(log2(e)*x) : int(log2(e)*x) - f2xm1 // 2^fract(log2(e)*x)-1 : int(log2(e)*x) - fscale // 2^(log2(e)*x)-2^int(log2(e)*x) : int(log2(e)*x) - fxch // int(log2(e)*x) : 2^(log2(e)*x)-2^int(log2(e)*x) - fldl MO(one) // 1 : int(log2(e)*x) : 2^(log2(e)*x)-2^int(log2(e)*x) - fscale // 2^int(log2(e)*x) : int(log2(e)*x) : 2^(log2(e)*x)-2^int(log2(e)*x) - fsubrl MO(one) // 1-2^int(log2(e)*x) : int(log2(e)*x) : 2^(log2(e)*x)-2^int(log2(e)*x) - fstp %st(1) // 1-2^int(log2(e)*x) : 2^(log2(e)*x)-2^int(log2(e)*x) - fsubrp %st, %st(1) // 2^(log2(e)*x) - DBL_CHECK_FORCE_UFLOW - ret - -2: fstp %st - fldl MO(minus1) // Set result to -1.0. -3: ret -END(__expm1) -weak_alias (__expm1, expm1) diff --git a/sysdeps/i386/fpu/s_expm1f.S b/sysdeps/i386/fpu/s_expm1f.S deleted file mode 100644 index 4f0b2e7832..0000000000 --- a/sysdeps/i386/fpu/s_expm1f.S +++ /dev/null @@ -1,113 +0,0 @@ -/* ix87 specific implementation of exp(x)-1. - Copyright (C) 1996-2017 Free Software Foundation, Inc. - This file is part of the GNU C Library. - Contributed by Ulrich Drepper <drepper@cygnus.com>, 1996. - Based on code by John C. Bowman <bowman@ipp-garching.mpg.de>. - Corrections by H.J. Lu (hjl@gnu.ai.mit.edu), 1997. - - The GNU C Library is free software; you can redistribute it and/or - modify it under the terms of the GNU Lesser General Public - License as published by the Free Software Foundation; either - version 2.1 of the License, or (at your option) any later version. - - The GNU C Library is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - Lesser General Public License for more details. - - You should have received a copy of the GNU Lesser General Public - License along with the GNU C Library; if not, see - <http://www.gnu.org/licenses/>. */ - - /* Using: e^x - 1 = 2^(x * log2(e)) - 1 */ - -#include <sysdep.h> -#include <machine/asm.h> -#include <i386-math-asm.h> - - .section .rodata - - .align ALIGNARG(4) - .type minus1,@object -minus1: .double -1.0 - ASM_SIZE_DIRECTIVE(minus1) - .type one,@object -one: .double 1.0 - ASM_SIZE_DIRECTIVE(one) - .type l2e,@object -l2e: .tfloat 1.442695040888963407359924681002 - ASM_SIZE_DIRECTIVE(l2e) - -DEFINE_FLT_MIN - -#ifdef PIC -#define MO(op) op##@GOTOFF(%edx) -#else -#define MO(op) op -#endif - - .text -ENTRY(__expm1f) - movzwl 4+2(%esp), %eax - xorb $0x80, %ah // invert sign bit (now 1 is "positive") - cmpl $0xc2b1, %eax // is num >= 88.5? - jae HIDDEN_JUMPTARGET (__expf) - - flds 4(%esp) // x - fxam // Is NaN, +-Inf or +-0? - xorb $0x80, %ah - cmpl $0xc190, %eax // is num <= -18.0? - fstsw %ax - movb $0x45, %ch - jb 4f - - // Below -18.0 (may be -NaN or -Inf). - andb %ah, %ch -#ifdef PIC - LOAD_PIC_REG (dx) -#endif - cmpb $0x01, %ch - je 5f // If -NaN, jump. - jmp 2f // -large, possibly -Inf. - -4: // In range -18.0 to 88.5 (may be +-0 but not NaN or +-Inf). - andb %ah, %ch - cmpb $0x40, %ch - je 3f // If +-0, jump. -#ifdef PIC - LOAD_PIC_REG (dx) -#endif - -5: fldt MO(l2e) // log2(e) : x - fmulp // log2(e)*x - fld %st // log2(e)*x : log2(e)*x - // Set round-to-nearest temporarily. - subl $8, %esp - cfi_adjust_cfa_offset (8) - fstcw 4(%esp) - movl $0xf3ff, %ecx - andl 4(%esp), %ecx - movl %ecx, (%esp) - fldcw (%esp) - frndint // int(log2(e)*x) : log2(e)*x - fldcw 4(%esp) - addl $8, %esp - cfi_adjust_cfa_offset (-8) - fsubr %st, %st(1) // int(log2(e)*x) : fract(log2(e)*x) - fxch // fract(log2(e)*x) : int(log2(e)*x) - f2xm1 // 2^fract(log2(e)*x)-1 : int(log2(e)*x) - fscale // 2^(log2(e)*x)-2^int(log2(e)*x) : int(log2(e)*x) - fxch // int(log2(e)*x) : 2^(log2(e)*x)-2^int(log2(e)*x) - fldl MO(one) // 1 : int(log2(e)*x) : 2^(log2(e)*x)-2^int(log2(e)*x) - fscale // 2^int(log2(e)*x) : int(log2(e)*x) : 2^(log2(e)*x)-2^int(log2(e)*x) - fsubrl MO(one) // 1-2^int(log2(e)*x) : int(log2(e)*x) : 2^(log2(e)*x)-2^int(log2(e)*x) - fstp %st(1) // 1-2^int(log2(e)*x) : 2^(log2(e)*x)-2^int(log2(e)*x) - fsubrp %st, %st(1) // 2^(log2(e)*x) - FLT_CHECK_FORCE_UFLOW - ret - -2: fstp %st - fldl MO(minus1) // Set result to -1.0. -3: ret -END(__expm1f) -weak_alias (__expm1f, expm1f) diff --git a/sysdeps/i386/fpu/s_expm1l.S b/sysdeps/i386/fpu/s_expm1l.S deleted file mode 100644 index 7fbd99b0db..0000000000 --- a/sysdeps/i386/fpu/s_expm1l.S +++ /dev/null @@ -1,2 +0,0 @@ -#define USE_AS_EXPM1L -#include <e_expl.S> diff --git a/sysdeps/i386/fpu/s_fabs.S b/sysdeps/i386/fpu/s_fabs.S deleted file mode 100644 index 23ae9dccb9..0000000000 --- a/sysdeps/i386/fpu/s_fabs.S +++ /dev/null @@ -1,9 +0,0 @@ -#include <sysdep.h> - - .text -ENTRY(__fabs) - fldl 4(%esp) - fabs - ret -END(__fabs) -weak_alias (__fabs, fabs) diff --git a/sysdeps/i386/fpu/s_fabsf.S b/sysdeps/i386/fpu/s_fabsf.S deleted file mode 100644 index c0407a8839..0000000000 --- a/sysdeps/i386/fpu/s_fabsf.S +++ /dev/null @@ -1,9 +0,0 @@ -#include <sysdep.h> - - .text -ENTRY(__fabsf) - flds 4(%esp) - fabs - ret -END(__fabsf) -weak_alias (__fabsf, fabsf) diff --git a/sysdeps/i386/fpu/s_fabsl.S b/sysdeps/i386/fpu/s_fabsl.S deleted file mode 100644 index a12a3e050b..0000000000 --- a/sysdeps/i386/fpu/s_fabsl.S +++ /dev/null @@ -1,9 +0,0 @@ -#include <sysdep.h> - - .text -ENTRY(__fabsl) - fldt 4(%esp) - fabs - ret -END(__fabsl) -weak_alias (__fabsl, fabsl) diff --git a/sysdeps/i386/fpu/s_fdim.c b/sysdeps/i386/fpu/s_fdim.c deleted file mode 100644 index 6243c62998..0000000000 --- a/sysdeps/i386/fpu/s_fdim.c +++ /dev/null @@ -1,50 +0,0 @@ -/* Return positive difference between arguments. i386 version. - Copyright (C) 1997-2017 Free Software Foundation, Inc. - This file is part of the GNU C Library. - - The GNU C Library is free software; you can redistribute it and/or - modify it under the terms of the GNU Lesser General Public - License as published by the Free Software Foundation; either - version 2.1 of the License, or (at your option) any later version. - - The GNU C Library is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - Lesser General Public License for more details. - - You should have received a copy of the GNU Lesser General Public - License along with the GNU C Library; if not, see - <http://www.gnu.org/licenses/>. */ - -#include <errno.h> -#include <fpu_control.h> -#include <math.h> -#include <math_private.h> - -double -__fdim (double x, double y) -{ - if (islessequal (x, y)) - return 0.0; - - /* To avoid double rounding, set double precision for the - subtraction. math_narrow_eval is still needed to eliminate - excess range in the case of overflow. If the result of the - subtraction is in the subnormal range for double, it is exact, so - no issues of double rounding for subnormals arise. */ - fpu_control_t cw, cw_double; - _FPU_GETCW (cw); - cw_double = (cw & ~_FPU_EXTENDED) | _FPU_DOUBLE; - _FPU_SETCW (cw_double); - double r = math_narrow_eval (x - y); - _FPU_SETCW (cw); - if (isinf (r) && !isinf (x) && !isinf (y)) - __set_errno (ERANGE); - - return r; -} -weak_alias (__fdim, fdim) -#ifdef NO_LONG_DOUBLE -strong_alias (__fdim, __fdiml) -weak_alias (__fdim, fdiml) -#endif diff --git a/sysdeps/i386/fpu/s_finite.S b/sysdeps/i386/fpu/s_finite.S deleted file mode 100644 index 1ae4aed451..0000000000 --- a/sysdeps/i386/fpu/s_finite.S +++ /dev/null @@ -1,17 +0,0 @@ -/* - * Written by Joe Keane <jgk@jgk.org>. - */ - -#include <machine/asm.h> - -ENTRY(__finite) - movl 8(%esp),%eax - movl $0xFFEFFFFF,%ecx - subl %eax,%ecx - xorl %ecx,%eax - shrl $31, %eax - ret -END (__finite) -weak_alias (__finite, finite) -hidden_def (__finite) - diff --git a/sysdeps/i386/fpu/s_finitef.S b/sysdeps/i386/fpu/s_finitef.S deleted file mode 100644 index 69e72facff..0000000000 --- a/sysdeps/i386/fpu/s_finitef.S +++ /dev/null @@ -1,16 +0,0 @@ -/* - * Written by Joe Keane <jgk@jgk.org>. - */ - -#include <machine/asm.h> - -ENTRY(__finitef) - movl 4(%esp),%eax - movl $0xFF7FFFFF,%ecx - subl %eax,%ecx - xorl %ecx,%eax - shrl $31,%eax - ret -END (__finitef) -weak_alias (__finitef, finitef) -hidden_def (__finitef) diff --git a/sysdeps/i386/fpu/s_finitel.S b/sysdeps/i386/fpu/s_finitel.S deleted file mode 100644 index cce90e18fc..0000000000 --- a/sysdeps/i386/fpu/s_finitel.S +++ /dev/null @@ -1,15 +0,0 @@ -/* - * Written by Joe Keane <jgk@jgk.org>. - */ - -#include <machine/asm.h> - -ENTRY(__finitel) - movl 12(%esp),%eax - orl $0xffff8000, %eax - incl %eax - shrl $31, %eax - ret -END (__finitel) -weak_alias (__finitel, finitel) -hidden_def (__finitel) diff --git a/sysdeps/i386/fpu/s_floor.S b/sysdeps/i386/fpu/s_floor.S deleted file mode 100644 index ed837dae40..0000000000 --- a/sysdeps/i386/fpu/s_floor.S +++ /dev/null @@ -1,34 +0,0 @@ -/* - * Written by J.T. Conklin <jtc@netbsd.org>. - * Public domain. - */ - -#include <machine/asm.h> - -RCSID("$NetBSD: s_floor.S,v 1.4 1995/05/09 00:01:59 jtc Exp $") - -ENTRY(__floor) - fldl 4(%esp) - subl $32,%esp - cfi_adjust_cfa_offset (32) - - fnstenv 4(%esp) /* store fpu environment */ - - /* We use here %edx although only the low 1 bits are defined. - But none of the operations should care and they are faster - than the 16 bit operations. */ - movl $0x400,%edx /* round towards -oo */ - orl 4(%esp),%edx - andl $0xf7ff,%edx - movl %edx,(%esp) - fldcw (%esp) /* load modified control word */ - - frndint /* round */ - - fldenv 4(%esp) /* restore original environment */ - - addl $32,%esp - cfi_adjust_cfa_offset (-32) - ret -END (__floor) -weak_alias (__floor, floor) diff --git a/sysdeps/i386/fpu/s_floorf.S b/sysdeps/i386/fpu/s_floorf.S deleted file mode 100644 index 84b6f7ed99..0000000000 --- a/sysdeps/i386/fpu/s_floorf.S +++ /dev/null @@ -1,34 +0,0 @@ -/* - * Written by J.T. Conklin <jtc@netbsd.org>. - * Public domain. - */ - -#include <machine/asm.h> - -RCSID("$NetBSD: s_floorf.S,v 1.3 1995/05/09 00:04:32 jtc Exp $") - -ENTRY(__floorf) - flds 4(%esp) - subl $32,%esp - cfi_adjust_cfa_offset (32) - - fnstenv 4(%esp) /* store fpu environment */ - - /* We use here %edx although only the low 1 bits are defined. - But none of the operations should care and they are faster - than the 16 bit operations. */ - movl $0x400,%edx /* round towards -oo */ - orl 4(%esp),%edx - andl $0xf7ff,%edx - movl %edx,(%esp) - fldcw (%esp) /* load modified control word */ - - frndint /* round */ - - fldenv 4(%esp) /* restore original environment */ - - addl $32,%esp - cfi_adjust_cfa_offset (-32) - ret -END (__floorf) -weak_alias (__floorf, floorf) diff --git a/sysdeps/i386/fpu/s_floorl.S b/sysdeps/i386/fpu/s_floorl.S deleted file mode 100644 index dc74a0c446..0000000000 --- a/sysdeps/i386/fpu/s_floorl.S +++ /dev/null @@ -1,40 +0,0 @@ -/* - * Written by J.T. Conklin <jtc@netbsd.org>. - * Changes for long double by Ulrich Drepper <drepper@cygnus.com> - * Public domain. - */ - -#include <machine/asm.h> - -RCSID("$NetBSD: $") - -ENTRY(__floorl) - fldt 4(%esp) - subl $32,%esp - cfi_adjust_cfa_offset (32) - - fnstenv 4(%esp) /* store fpu environment */ - - /* We use here %edx although only the low 1 bits are defined. - But none of the operations should care and they are faster - than the 16 bit operations. */ - movl $0x400,%edx /* round towards -oo */ - orl 4(%esp),%edx - andl $0xf7ff,%edx - movl %edx,(%esp) - fldcw (%esp) /* load modified control word */ - - frndint /* round */ - - /* Preserve "invalid" exceptions from sNaN input. */ - fnstsw - andl $0x1, %eax - orl %eax, 8(%esp) - - fldenv 4(%esp) /* restore original environment */ - - addl $32,%esp - cfi_adjust_cfa_offset (-32) - ret -END (__floorl) -weak_alias (__floorl, floorl) diff --git a/sysdeps/i386/fpu/s_fmax.S b/sysdeps/i386/fpu/s_fmax.S deleted file mode 100644 index 218dcef421..0000000000 --- a/sysdeps/i386/fpu/s_fmax.S +++ /dev/null @@ -1,43 +0,0 @@ -/* Compute maximum of two numbers, regarding NaN as missing argument. - Copyright (C) 1997-2017 Free Software Foundation, Inc. - This file is part of the GNU C Library. - Contributed by Ulrich Drepper <drepper@cygnus.com>, 1997. - - The GNU C Library is free software; you can redistribute it and/or - modify it under the terms of the GNU Lesser General Public - License as published by the Free Software Foundation; either - version 2.1 of the License, or (at your option) any later version. - - The GNU C Library is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - Lesser General Public License for more details. - - You should have received a copy of the GNU Lesser General Public - License along with the GNU C Library; if not, see - <http://www.gnu.org/licenses/>. */ - -#include <sysdep.h> - - .text -ENTRY(__fmax) - fldl 12(%esp) // y - fxam - fnstsw - fldl 4(%esp) // y : x - - andb $0x45, %ah - cmpb $0x01, %ah - je 1f // y == NaN - - fucom %st(1) - fnstsw - sahf - jnc 1f - - fxch %st(1) -1: fstp %st(1) - - ret -END(__fmax) -weak_alias (__fmax, fmax) diff --git a/sysdeps/i386/fpu/s_fmaxf.S b/sysdeps/i386/fpu/s_fmaxf.S deleted file mode 100644 index b7a00cefeb..0000000000 --- a/sysdeps/i386/fpu/s_fmaxf.S +++ /dev/null @@ -1,43 +0,0 @@ -/* Compute maximum of two numbers, regarding NaN as missing argument. - Copyright (C) 1997-2017 Free Software Foundation, Inc. - This file is part of the GNU C Library. - Contributed by Ulrich Drepper <drepper@cygnus.com>, 1997. - - The GNU C Library is free software; you can redistribute it and/or - modify it under the terms of the GNU Lesser General Public - License as published by the Free Software Foundation; either - version 2.1 of the License, or (at your option) any later version. - - The GNU C Library is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - Lesser General Public License for more details. - - You should have received a copy of the GNU Lesser General Public - License along with the GNU C Library; if not, see - <http://www.gnu.org/licenses/>. */ - -#include <sysdep.h> - - .text -ENTRY(__fmaxf) - flds 8(%esp) // y - fxam - fnstsw - flds 4(%esp) // y : x - - andb $0x45, %ah - cmpb $0x01, %ah - je 1f // y == NaN - - fucom %st(1) - fnstsw - sahf - jnc 1f - - fxch %st(1) -1: fstp %st(1) - - ret -END(__fmaxf) -weak_alias (__fmaxf, fmaxf) diff --git a/sysdeps/i386/fpu/s_fmaxl.S b/sysdeps/i386/fpu/s_fmaxl.S deleted file mode 100644 index 68162921db..0000000000 --- a/sysdeps/i386/fpu/s_fmaxl.S +++ /dev/null @@ -1,71 +0,0 @@ -/* Compute maximum of two numbers, regarding NaN as missing argument. - Copyright (C) 1997-2017 Free Software Foundation, Inc. - This file is part of the GNU C Library. - Contributed by Ulrich Drepper <drepper@cygnus.com>, 1997. - - The GNU C Library is free software; you can redistribute it and/or - modify it under the terms of the GNU Lesser General Public - License as published by the Free Software Foundation; either - version 2.1 of the License, or (at your option) any later version. - - The GNU C Library is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - Lesser General Public License for more details. - - You should have received a copy of the GNU Lesser General Public - License along with the GNU C Library; if not, see - <http://www.gnu.org/licenses/>. */ - -#include <sysdep.h> - - .text -ENTRY(__fmaxl) - fldt 16(%esp) // y - fxam - fnstsw - fldt 4(%esp) // y : x - - andb $0x45, %ah - cmpb $0x01, %ah - je 2f // y == NaN - - fxam - fnstsw - andb $0x45, %ah - cmpb $0x01, %ah - je 3f // x == NaN - - fucom %st(1) - fnstsw - sahf - jnc 1f - - fxch %st(1) -1: fstp %st(1) - - ret - -2: // st(1) is a NaN; st(0) may or may not be. - fxam - fnstsw - andb $0x45, %ah - cmpb $0x01, %ah - je 4f - // st(1) is a NaN; st(0) is not. Test if st(1) is signaling. - testb $0x40, 23(%esp) - jz 4f - fstp %st(1) - ret - -3: // st(0) is a NaN; st(1) is not. Test if st(0) is signaling. - testb $0x40, 11(%esp) - jz 4f - fstp %st(0) - ret - -4: // Both arguments are NaNs, or one is a signaling NaN. - faddp - ret -END(__fmaxl) -weak_alias (__fmaxl, fmaxl) diff --git a/sysdeps/i386/fpu/s_fmin.S b/sysdeps/i386/fpu/s_fmin.S deleted file mode 100644 index a5bb0e06dd..0000000000 --- a/sysdeps/i386/fpu/s_fmin.S +++ /dev/null @@ -1,43 +0,0 @@ -/* Compute minimum of two numbers, regarding NaN as missing argument. - Copyright (C) 1997-2017 Free Software Foundation, Inc. - This file is part of the GNU C Library. - Contributed by Ulrich Drepper <drepper@cygnus.com>, 1997. - - The GNU C Library is free software; you can redistribute it and/or - modify it under the terms of the GNU Lesser General Public - License as published by the Free Software Foundation; either - version 2.1 of the License, or (at your option) any later version. - - The GNU C Library is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - Lesser General Public License for more details. - - You should have received a copy of the GNU Lesser General Public - License along with the GNU C Library; if not, see - <http://www.gnu.org/licenses/>. */ - -#include <sysdep.h> - - .text -ENTRY(__fmin) - fldl 4(%esp) // x - fldl 12(%esp) // x : y - - fxam - fnstsw - andb $0x45, %ah - cmpb $0x01, %ah - je 1f // y == NaN - - fucom %st(1) - fnstsw - sahf - jc 2f - -1: fxch %st(1) -2: fstp %st(1) - - ret -END(__fmin) -weak_alias (__fmin, fmin) diff --git a/sysdeps/i386/fpu/s_fminf.S b/sysdeps/i386/fpu/s_fminf.S deleted file mode 100644 index fba4a41120..0000000000 --- a/sysdeps/i386/fpu/s_fminf.S +++ /dev/null @@ -1,43 +0,0 @@ -/* Compute minimum of two numbers, regarding NaN as missing argument. - Copyright (C) 1997-2017 Free Software Foundation, Inc. - This file is part of the GNU C Library. - Contributed by Ulrich Drepper <drepper@cygnus.com>, 1997. - - The GNU C Library is free software; you can redistribute it and/or - modify it under the terms of the GNU Lesser General Public - License as published by the Free Software Foundation; either - version 2.1 of the License, or (at your option) any later version. - - The GNU C Library is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - Lesser General Public License for more details. - - You should have received a copy of the GNU Lesser General Public - License along with the GNU C Library; if not, see - <http://www.gnu.org/licenses/>. */ - -#include <sysdep.h> - - .text -ENTRY(__fminf) - flds 4(%esp) // x - flds 8(%esp) // x : y - - fxam - fnstsw - andb $0x45, %ah - cmpb $0x01, %ah - je 1f // y == NaN - - fucom %st(1) - fnstsw - sahf - jc 2f - -1: fxch %st(1) -2: fstp %st(1) - - ret -END(__fminf) -weak_alias (__fminf, fminf) diff --git a/sysdeps/i386/fpu/s_fminl.S b/sysdeps/i386/fpu/s_fminl.S deleted file mode 100644 index 12ef21fda9..0000000000 --- a/sysdeps/i386/fpu/s_fminl.S +++ /dev/null @@ -1,71 +0,0 @@ -/* Compute minimum of two numbers, regarding NaN as missing argument. - Copyright (C) 1997-2017 Free Software Foundation, Inc. - This file is part of the GNU C Library. - Contributed by Ulrich Drepper <drepper@cygnus.com>, 1997. - - The GNU C Library is free software; you can redistribute it and/or - modify it under the terms of the GNU Lesser General Public - License as published by the Free Software Foundation; either - version 2.1 of the License, or (at your option) any later version. - - The GNU C Library is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - Lesser General Public License for more details. - - You should have received a copy of the GNU Lesser General Public - License along with the GNU C Library; if not, see - <http://www.gnu.org/licenses/>. */ - -#include <sysdep.h> - - .text -ENTRY(__fminl) - fldt 16(%esp) // y - fxam - fnstsw - fldt 4(%esp) // y : x - - andb $0x45, %ah - cmpb $0x01, %ah - je 2f // y == NaN - - fxam - fnstsw - andb $0x45, %ah - cmpb $0x01, %ah - je 3f // x == NaN - - fucom %st(1) - fnstsw - sahf - jc 1f - - fxch %st(1) -1: fstp %st(1) - - ret - -2: // st(1) is a NaN; st(0) may or may not be. - fxam - fnstsw - andb $0x45, %ah - cmpb $0x01, %ah - je 4f - // st(1) is a NaN; st(0) is not. Test if st(1) is signaling. - testb $0x40, 23(%esp) - jz 4f - fstp %st(1) - ret - -3: // st(0) is a NaN; st(1) is not. Test if st(0) is signaling. - testb $0x40, 11(%esp) - jz 4f - fstp %st(0) - ret - -4: // Both arguments are NaNs, or one is a signaling NaN. - faddp - ret -END(__fminl) -weak_alias (__fminl, fminl) diff --git a/sysdeps/i386/fpu/s_fpclassifyl.c b/sysdeps/i386/fpu/s_fpclassifyl.c deleted file mode 100644 index ce19fd0035..0000000000 --- a/sysdeps/i386/fpu/s_fpclassifyl.c +++ /dev/null @@ -1,42 +0,0 @@ -/* Return classification value corresponding to argument. - Copyright (C) 1997-2017 Free Software Foundation, Inc. - This file is part of the GNU C Library. - Contributed by Ulrich Drepper <drepper@cygnus.com>, 1997. - - The GNU C Library is free software; you can redistribute it and/or - modify it under the terms of the GNU Lesser General Public - License as published by the Free Software Foundation; either - version 2.1 of the License, or (at your option) any later version. - - The GNU C Library is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - Lesser General Public License for more details. - - You should have received a copy of the GNU Lesser General Public - License along with the GNU C Library; if not, see - <http://www.gnu.org/licenses/>. */ - -#include <math.h> - -#include <math_private.h> - - -int -__fpclassifyl (long double x) -{ - u_int32_t ex, hx, lx; - int retval = FP_NORMAL; - - GET_LDOUBLE_WORDS (ex, hx, lx, x); - ex &= 0x7fff; - if ((ex | lx | hx) == 0) - retval = FP_ZERO; - else if (ex == 0 && (hx & 0x80000000) == 0) - retval = FP_SUBNORMAL; - else if (ex == 0x7fff) - retval = ((hx & 0x7fffffff) | lx) != 0 ? FP_NAN : FP_INFINITE; - - return retval; -} -libm_hidden_def (__fpclassifyl) diff --git a/sysdeps/i386/fpu/s_frexp.S b/sysdeps/i386/fpu/s_frexp.S deleted file mode 100644 index 104f733bf6..0000000000 --- a/sysdeps/i386/fpu/s_frexp.S +++ /dev/null @@ -1,83 +0,0 @@ -/* ix87 specific frexp implementation for double. - Copyright (C) 1997-2017 Free Software Foundation, Inc. - This file is part of the GNU C Library. - Contributed by Ulrich Drepper <drepper@cygnus.com>, 1997. - - The GNU C Library is free software; you can redistribute it and/or - modify it under the terms of the GNU Lesser General Public - License as published by the Free Software Foundation; either - version 2.1 of the License, or (at your option) any later version. - - The GNU C Library is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - Lesser General Public License for more details. - - You should have received a copy of the GNU Lesser General Public - License along with the GNU C Library; if not, see - <http://www.gnu.org/licenses/>. */ - -#include <machine/asm.h> - - .section .rodata - - .align ALIGNARG(4) - .type two54,@object -two54: .byte 0, 0, 0, 0, 0, 0, 0x50, 0x43 - ASM_SIZE_DIRECTIVE(two54) - -#ifdef PIC -#define MO(op) op##@GOTOFF(%edx) -#else -#define MO(op) op -#endif - -#define PARMS 4 /* no space for saved regs */ -#define VAL0 PARMS -#define VAL1 VAL0+4 -#define EXPP VAL1+4 - - .text -ENTRY (__frexp) - - movl VAL0(%esp), %ecx - movl VAL1(%esp), %eax - movl %eax, %edx - andl $0x7fffffff, %eax - orl %eax, %ecx - jz 1f - xorl %ecx, %ecx - cmpl $0x7ff00000, %eax - jae 1f - - cmpl $0x00100000, %eax - jae 2f - - fldl VAL0(%esp) -#ifdef PIC - LOAD_PIC_REG (dx) -#endif - fmull MO(two54) - movl $-54, %ecx - fstpl VAL0(%esp) - fwait - movl VAL1(%esp), %eax - movl %eax, %edx - andl $0x7fffffff, %eax - -2: shrl $20, %eax - andl $0x800fffff, %edx - subl $1022, %eax - orl $0x3fe00000, %edx - addl %eax, %ecx - movl %edx, VAL1(%esp) - - /* Store %ecx in the variable pointed to by the second argument, - get the factor from the stack and return. */ -1: movl EXPP(%esp), %eax - fldl VAL0(%esp) - movl %ecx, (%eax) - - ret -END (__frexp) -weak_alias (__frexp, frexp) diff --git a/sysdeps/i386/fpu/s_frexpf.S b/sysdeps/i386/fpu/s_frexpf.S deleted file mode 100644 index f21c39ec4b..0000000000 --- a/sysdeps/i386/fpu/s_frexpf.S +++ /dev/null @@ -1,80 +0,0 @@ -/* ix87 specific frexp implementation for float. - Copyright (C) 1997-2017 Free Software Foundation, Inc. - This file is part of the GNU C Library. - Contributed by Ulrich Drepper <drepper@cygnus.com>, 1997. - - The GNU C Library is free software; you can redistribute it and/or - modify it under the terms of the GNU Lesser General Public - License as published by the Free Software Foundation; either - version 2.1 of the License, or (at your option) any later version. - - The GNU C Library is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - Lesser General Public License for more details. - - You should have received a copy of the GNU Lesser General Public - License along with the GNU C Library; if not, see - <http://www.gnu.org/licenses/>. */ - -#include <machine/asm.h> - - .section .rodata - - .align ALIGNARG(4) - .type two25,@object -two25: .byte 0, 0, 0, 0x4c - ASM_SIZE_DIRECTIVE(two25) - -#ifdef PIC -#define MO(op) op##@GOTOFF(%edx) -#else -#define MO(op) op -#endif - -#define PARMS 4 /* no space for saved regs */ -#define VAL PARMS -#define EXPP VAL+4 - - .text -ENTRY (__frexpf) - - movl VAL(%esp), %eax - xorl %ecx, %ecx - movl %eax, %edx - andl $0x7fffffff, %eax - jz 1f - cmpl $0x7f800000, %eax - jae 1f - - cmpl $0x00800000, %eax - jae 2f - - flds VAL(%esp) -#ifdef PIC - LOAD_PIC_REG (dx) -#endif - fmuls MO(two25) - movl $-25, %ecx - fstps VAL(%esp) - fwait - movl VAL(%esp), %eax - movl %eax, %edx - andl $0x7fffffff, %eax - -2: shrl $23, %eax - andl $0x807fffff, %edx - subl $126, %eax - orl $0x3f000000, %edx - addl %eax, %ecx - movl %edx, VAL(%esp) - - /* Store %ecx in the variable pointed to by the second argument, - get the factor from the stack and return. */ -1: movl EXPP(%esp), %eax - flds VAL(%esp) - movl %ecx, (%eax) - - ret -END (__frexpf) -weak_alias (__frexpf, frexpf) diff --git a/sysdeps/i386/fpu/s_frexpl.S b/sysdeps/i386/fpu/s_frexpl.S deleted file mode 100644 index 04f28888d2..0000000000 --- a/sysdeps/i386/fpu/s_frexpl.S +++ /dev/null @@ -1,92 +0,0 @@ -/* ix87 specific frexp implementation for long double. - Copyright (C) 1997-2017 Free Software Foundation, Inc. - This file is part of the GNU C Library. - Contributed by Ulrich Drepper <drepper@cygnus.com>, 1997. - - The GNU C Library is free software; you can redistribute it and/or - modify it under the terms of the GNU Lesser General Public - License as published by the Free Software Foundation; either - version 2.1 of the License, or (at your option) any later version. - - The GNU C Library is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - Lesser General Public License for more details. - - You should have received a copy of the GNU Lesser General Public - License along with the GNU C Library; if not, see - <http://www.gnu.org/licenses/>. */ - -#include <machine/asm.h> - - .section .rodata - - .align ALIGNARG(4) - .type two64,@object -two64: .byte 0, 0, 0, 0, 0, 0, 0xf0, 0x43 - ASM_SIZE_DIRECTIVE(two64) - -#ifdef PIC -#define MO(op) op##@GOTOFF(%edx) -#else -#define MO(op) op -#endif - -#define PARMS 4 /* no space for saved regs */ -#define VAL0 PARMS -#define VAL1 VAL0+4 -#define VAL2 VAL1+4 -#define EXPP VAL2+4 - - .text -ENTRY (__frexpl) - - movl VAL0(%esp), %ecx - movl VAL2(%esp), %eax - orl VAL1(%esp), %ecx - movl %eax, %edx - andl $0x7fff, %eax - orl %eax, %ecx - jz 1f - xorl %ecx, %ecx - cmpl $0x7fff, %eax - je 3f - - cmpl $0, %eax - jne 2f - - fldt VAL0(%esp) -#ifdef PIC - LOAD_PIC_REG (dx) -#endif - - fmull MO(two64) /* It's not necessary to use a 80bit factor */ - movl $-64, %ecx - fstpt VAL0(%esp) - fwait - movl VAL2(%esp), %eax - movl %eax, %edx - andl $0x7fff, %eax - -2: andl $0x8000, %edx - subl $16382, %eax - orl $0x3ffe, %edx - addl %eax, %ecx - movl %edx, VAL2(%esp) - - /* Store %ecx in the variable pointed to by the second argument, - get the factor from the stack and return. */ -1: movl EXPP(%esp), %eax - fldt VAL0(%esp) - movl %ecx, (%eax) - - ret - - /* Infinity or NaN; ensure signaling NaNs are quieted. */ -3: movl EXPP(%esp), %eax - fldt VAL0(%esp) - fadd %st - movl %ecx, (%eax) - ret -END (__frexpl) -weak_alias (__frexpl, frexpl) diff --git a/sysdeps/i386/fpu/s_isinfl.c b/sysdeps/i386/fpu/s_isinfl.c deleted file mode 100644 index cdd77183fa..0000000000 --- a/sysdeps/i386/fpu/s_isinfl.c +++ /dev/null @@ -1,32 +0,0 @@ -/* - * Written by J.T. Conklin <jtc@netbsd.org>. - * Change for long double by Ulrich Drepper <drepper@cygnus.com>. - * Intel i387 specific version. - * Public domain. - */ - -#if defined(LIBM_SCCS) && !defined(lint) -static char rcsid[] = "$NetBSD: $"; -#endif - -/* - * isinfl(x) returns 1 if x is inf, -1 if x is -inf, else 0; - * no branching! - */ - -#include <math.h> -#include <math_private.h> - -int __isinfl(long double x) -{ - int32_t se,hx,lx; - GET_LDOUBLE_WORDS(se,hx,lx,x); - /* This additional ^ 0x80000000 is necessary because in Intel's - internal representation of the implicit one is explicit. */ - lx |= (hx ^ 0x80000000) | ((se & 0x7fff) ^ 0x7fff); - lx |= -lx; - se &= 0x8000; - return ~(lx >> 31) & (1 - (se >> 14)); -} -hidden_def (__isinfl) -weak_alias (__isinfl, isinfl) diff --git a/sysdeps/i386/fpu/s_isnanl.c b/sysdeps/i386/fpu/s_isnanl.c deleted file mode 100644 index 816396d8fb..0000000000 --- a/sysdeps/i386/fpu/s_isnanl.c +++ /dev/null @@ -1,43 +0,0 @@ -/* s_isnanl.c -- long double version for i387 of s_isnan.c. - * Conversion to long double by Ulrich Drepper, - * Cygnus Support, drepper@cygnus.com. - */ - -/* - * ==================================================== - * Copyright (C) 1993 by Sun Microsystems, Inc. All rights reserved. - * - * Developed at SunPro, a Sun Microsystems, Inc. business. - * Permission to use, copy, modify, and distribute this - * software is freely granted, provided that this notice - * is preserved. - * ==================================================== - */ - -#if defined(LIBM_SCCS) && !defined(lint) -static char rcsid[] = "$NetBSD: $"; -#endif - -/* - * isnanl(x) returns 1 is x is nan, else 0; - * no branching! - */ - -#include <math.h> -#include <math_private.h> - -int __isnanl(long double x) -{ - int32_t se,hx,lx; - GET_LDOUBLE_WORDS(se,hx,lx,x); - se = (se & 0x7fff) << 1; - /* The additional & 0x7fffffff is required because Intel's - extended format has the normally implicit 1 explicit - present. Sigh! */ - lx |= hx & 0x7fffffff; - se |= (u_int32_t)(lx|(-lx))>>31; - se = 0xfffe - se; - return (int)((u_int32_t)(se))>>16; -} -hidden_def (__isnanl) -weak_alias (__isnanl, isnanl) diff --git a/sysdeps/i386/fpu/s_llrint.S b/sysdeps/i386/fpu/s_llrint.S deleted file mode 100644 index a597183aab..0000000000 --- a/sysdeps/i386/fpu/s_llrint.S +++ /dev/null @@ -1,36 +0,0 @@ -/* Round argument to nearest integral value according to current rounding - direction. - Copyright (C) 1997-2017 Free Software Foundation, Inc. - This file is part of the GNU C Library. - Contributed by Ulrich Drepper <drepper@cygnus.com>, 1997. - - The GNU C Library is free software; you can redistribute it and/or - modify it under the terms of the GNU Lesser General Public - License as published by the Free Software Foundation; either - version 2.1 of the License, or (at your option) any later version. - - The GNU C Library is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - Lesser General Public License for more details. - - You should have received a copy of the GNU Lesser General Public - License along with the GNU C Library; if not, see - <http://www.gnu.org/licenses/>. */ - -#include <sysdep.h> - - .text -ENTRY(__llrint) - fldl 4(%esp) - subl $8, %esp - cfi_adjust_cfa_offset (8) - fistpll (%esp) - fwait - popl %eax - cfi_adjust_cfa_offset (-4) - popl %edx - cfi_adjust_cfa_offset (-4) - ret -END(__llrint) -weak_alias (__llrint, llrint) diff --git a/sysdeps/i386/fpu/s_llrintf.S b/sysdeps/i386/fpu/s_llrintf.S deleted file mode 100644 index a4b574eccb..0000000000 --- a/sysdeps/i386/fpu/s_llrintf.S +++ /dev/null @@ -1,36 +0,0 @@ -/* Round argument to nearest integral value according to current rounding - direction. - Copyright (C) 1997-2017 Free Software Foundation, Inc. - This file is part of the GNU C Library. - Contributed by Ulrich Drepper <drepper@cygnus.com>, 1997. - - The GNU C Library is free software; you can redistribute it and/or - modify it under the terms of the GNU Lesser General Public - License as published by the Free Software Foundation; either - version 2.1 of the License, or (at your option) any later version. - - The GNU C Library is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - Lesser General Public License for more details. - - You should have received a copy of the GNU Lesser General Public - License along with the GNU C Library; if not, see - <http://www.gnu.org/licenses/>. */ - -#include <sysdep.h> - - .text -ENTRY(__llrintf) - flds 4(%esp) - subl $8, %esp - cfi_adjust_cfa_offset (8) - fistpll (%esp) - fwait - popl %eax - cfi_adjust_cfa_offset (-4) - popl %edx - cfi_adjust_cfa_offset (-4) - ret -END(__llrintf) -weak_alias (__llrintf, llrintf) diff --git a/sysdeps/i386/fpu/s_llrintl.S b/sysdeps/i386/fpu/s_llrintl.S deleted file mode 100644 index 7b48c02ef4..0000000000 --- a/sysdeps/i386/fpu/s_llrintl.S +++ /dev/null @@ -1,36 +0,0 @@ -/* Round argument to nearest integral value according to current rounding - direction. - Copyright (C) 1997-2017 Free Software Foundation, Inc. - This file is part of the GNU C Library. - Contributed by Ulrich Drepper <drepper@cygnus.com>, 1997. - - The GNU C Library is free software; you can redistribute it and/or - modify it under the terms of the GNU Lesser General Public - License as published by the Free Software Foundation; either - version 2.1 of the License, or (at your option) any later version. - - The GNU C Library is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - Lesser General Public License for more details. - - You should have received a copy of the GNU Lesser General Public - License along with the GNU C Library; if not, see - <http://www.gnu.org/licenses/>. */ - -#include <sysdep.h> - - .text -ENTRY(__llrintl) - fldt 4(%esp) - subl $8, %esp - cfi_adjust_cfa_offset (8) - fistpll (%esp) - fwait - popl %eax - cfi_adjust_cfa_offset (-4) - popl %edx - cfi_adjust_cfa_offset (-4) - ret -END(__llrintl) -weak_alias (__llrintl, llrintl) diff --git a/sysdeps/i386/fpu/s_log1p.S b/sysdeps/i386/fpu/s_log1p.S deleted file mode 100644 index 7978e76095..0000000000 --- a/sysdeps/i386/fpu/s_log1p.S +++ /dev/null @@ -1,67 +0,0 @@ -/* - * Written by J.T. Conklin <jtc@netbsd.org>. - * Public domain. - */ - -#include <machine/asm.h> -#include <i386-math-asm.h> - -RCSID("$NetBSD: s_log1p.S,v 1.7 1995/05/09 00:10:58 jtc Exp $") - - .section .rodata - - .align ALIGNARG(4) - /* The fyl2xp1 can only be used for values in - -1 + sqrt(2) / 2 <= x <= 1 - sqrt(2) / 2 - 0.29 is a safe value. - */ -limit: .double 0.29 -one: .double 1.0 - -DEFINE_DBL_MIN - -#ifdef PIC -# define MO(op) op##@GOTOFF(%edx) -#else -# define MO(op) op -#endif - -/* - * Use the fyl2xp1 function when the argument is in the range -0.29 to 0.29, - * otherwise fyl2x with the needed extra computation. - */ - .text -ENTRY(__log1p) - fldln2 - - fldl 4(%esp) - -#ifdef PIC - LOAD_PIC_REG (dx) -#endif - - fxam - fnstsw - fld %st - sahf - jc 3f // in case x is NaN or ±Inf -4: fabs - fcompl MO(limit) - fnstsw - sahf - jc 2f - - faddl MO(one) - fyl2x - ret - -2: fyl2xp1 - DBL_CHECK_FORCE_UFLOW_NONNAN - ret - -3: jp 4b // in case x is ±Inf - fstp %st(1) - fstp %st(1) - ret - -END (__log1p) diff --git a/sysdeps/i386/fpu/s_log1pf.S b/sysdeps/i386/fpu/s_log1pf.S deleted file mode 100644 index acaa299d94..0000000000 --- a/sysdeps/i386/fpu/s_log1pf.S +++ /dev/null @@ -1,67 +0,0 @@ -/* - * Written by J.T. Conklin <jtc@netbsd.org>. - * Public domain. - */ - -#include <machine/asm.h> -#include <i386-math-asm.h> - -RCSID("$NetBSD: s_log1pf.S,v 1.4 1995/05/09 00:13:05 jtc Exp $") - - .section .rodata - - .align ALIGNARG(4) - /* The fyl2xp1 can only be used for values in - -1 + sqrt(2) / 2 <= x <= 1 - sqrt(2) / 2 - 0.29 is a safe value. - */ -limit: .float 0.29 -one: .float 1.0 - -DEFINE_FLT_MIN - -#ifdef PIC -# define MO(op) op##@GOTOFF(%edx) -#else -# define MO(op) op -#endif - -/* - * Use the fyl2xp1 function when the argument is in the range -0.29 to 0.29, - * otherwise fyl2x with the needed extra computation. - */ - .text -ENTRY(__log1pf) - fldln2 - - flds 4(%esp) - -#ifdef PIC - LOAD_PIC_REG (dx) -#endif - - fxam - fnstsw - fld %st - sahf - jc 3f // in case x is NaN or ±Inf -4: fabs - fcomps MO(limit) - fnstsw - sahf - jc 2f - - fadds MO(one) - fyl2x - ret - -2: fyl2xp1 - FLT_CHECK_FORCE_UFLOW_NONNAN - ret - -3: jp 4b // in case x is ±Inf - fstp %st(1) - fstp %st(1) - ret - -END (__log1pf) diff --git a/sysdeps/i386/fpu/s_log1pl.S b/sysdeps/i386/fpu/s_log1pl.S deleted file mode 100644 index 0fd05cbdb3..0000000000 --- a/sysdeps/i386/fpu/s_log1pl.S +++ /dev/null @@ -1,76 +0,0 @@ -/* - * Written by J.T. Conklin <jtc@netbsd.org>. - * Public domain. - * - * Adapted for `long double' by Ulrich Drepper <drepper@cygnus.com>. - */ - -#include <machine/asm.h> - -RCSID("$NetBSD: s_log1p.S,v 1.7 1995/05/09 00:10:58 jtc Exp $") - - .section .rodata - - .align ALIGNARG(4) - /* The fyl2xp1 can only be used for values in - -1 + sqrt(2) / 2 <= x <= 1 - sqrt(2) / 2 - 0.29 is a safe value. - */ -limit: .tfloat 0.29 - /* Please note: we use a double value here. Since 1.0 has - an exact representation this does not effect the accuracy - but it helps to optimize the code. */ -one: .double 1.0 - -#ifdef PIC -# define MO(op) op##@GOTOFF(%edx) -#else -# define MO(op) op -#endif - -/* - * Use the fyl2xp1 function when the argument is in the range -0.29 to 0.29, - * otherwise fyl2x with the needed extra computation. - */ - .text -ENTRY(__log1pl) - fldln2 - - fldt 4(%esp) - -#ifdef PIC - LOAD_PIC_REG (dx) -#endif - - fxam - fnstsw - fld %st - sahf - jc 3f // in case x is NaN or ±Inf -4: - fabs - fldt MO(limit) - fcompp - fnstsw - sahf - jnc 2f - - movzwl 4+8(%esp), %eax - xorb $0x80, %ah - cmpl $0xc040, %eax - jae 5f - - faddl MO(one) -5: fyl2x - ret - -2: fyl2xp1 - ret - -3: jp 4b // in case x is ±Inf - fstp %st(1) - fstp %st(1) - fadd %st(0) - ret - -END (__log1pl) diff --git a/sysdeps/i386/fpu/s_logb.S b/sysdeps/i386/fpu/s_logb.S deleted file mode 100644 index f78c091c8a..0000000000 --- a/sysdeps/i386/fpu/s_logb.S +++ /dev/null @@ -1,16 +0,0 @@ -/* - * Written by J.T. Conklin <jtc@netbsd.org>. - * Public domain. - */ - -#include <machine/asm.h> - -RCSID("$NetBSD: s_logb.S,v 1.4 1995/05/09 00:14:30 jtc Exp $") - -ENTRY(__logb) - fldl 4(%esp) - fxtract - fstp %st - ret -END (__logb) -weak_alias (__logb, logb) diff --git a/sysdeps/i386/fpu/s_logbf.S b/sysdeps/i386/fpu/s_logbf.S deleted file mode 100644 index 91eb3d2925..0000000000 --- a/sysdeps/i386/fpu/s_logbf.S +++ /dev/null @@ -1,16 +0,0 @@ -/* - * Written by J.T. Conklin <jtc@netbsd.org>. - * Public domain. - */ - -#include <machine/asm.h> - -RCSID("$NetBSD: s_logbf.S,v 1.3 1995/05/09 00:15:12 jtc Exp $") - -ENTRY(__logbf) - flds 4(%esp) - fxtract - fstp %st - ret -END (__logbf) -weak_alias (__logbf, logbf) diff --git a/sysdeps/i386/fpu/s_logbl.c b/sysdeps/i386/fpu/s_logbl.c deleted file mode 100644 index 391e2db489..0000000000 --- a/sysdeps/i386/fpu/s_logbl.c +++ /dev/null @@ -1,19 +0,0 @@ -/* - * Written by J.T. Conklin <jtc@netbsd.org>. - * Changes for long double by Ulrich Drepper <drepper@cygnus.com> - * Public domain. - */ - -#include <math_private.h> - -long double -__logbl (long double x) -{ - long double res; - - asm ("fxtract\n" - "fstp %%st" : "=t" (res) : "0" (x)); - return res; -} - -weak_alias (__logbl, logbl) diff --git a/sysdeps/i386/fpu/s_lrint.S b/sysdeps/i386/fpu/s_lrint.S deleted file mode 100644 index 79a374b399..0000000000 --- a/sysdeps/i386/fpu/s_lrint.S +++ /dev/null @@ -1,34 +0,0 @@ -/* Round argument to nearest integral value according to current rounding - direction. - Copyright (C) 1997-2017 Free Software Foundation, Inc. - This file is part of the GNU C Library. - Contributed by Ulrich Drepper <drepper@cygnus.com>, 1997. - - The GNU C Library is free software; you can redistribute it and/or - modify it under the terms of the GNU Lesser General Public - License as published by the Free Software Foundation; either - version 2.1 of the License, or (at your option) any later version. - - The GNU C Library is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - Lesser General Public License for more details. - - You should have received a copy of the GNU Lesser General Public - License along with the GNU C Library; if not, see - <http://www.gnu.org/licenses/>. */ - -#include <sysdep.h> - - .text -ENTRY(__lrint) - fldl 4(%esp) - subl $4, %esp - cfi_adjust_cfa_offset (4) - fistpl (%esp) - fwait - popl %eax - cfi_adjust_cfa_offset (-4) - ret -END(__lrint) -weak_alias (__lrint, lrint) diff --git a/sysdeps/i386/fpu/s_lrintf.S b/sysdeps/i386/fpu/s_lrintf.S deleted file mode 100644 index fc6e68e073..0000000000 --- a/sysdeps/i386/fpu/s_lrintf.S +++ /dev/null @@ -1,34 +0,0 @@ -/* Round argument to nearest integral value according to current rounding - direction. - Copyright (C) 1997-2017 Free Software Foundation, Inc. - This file is part of the GNU C Library. - Contributed by Ulrich Drepper <drepper@cygnus.com>, 1997. - - The GNU C Library is free software; you can redistribute it and/or - modify it under the terms of the GNU Lesser General Public - License as published by the Free Software Foundation; either - version 2.1 of the License, or (at your option) any later version. - - The GNU C Library is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - Lesser General Public License for more details. - - You should have received a copy of the GNU Lesser General Public - License along with the GNU C Library; if not, see - <http://www.gnu.org/licenses/>. */ - -#include <sysdep.h> - - .text -ENTRY(__lrintf) - flds 4(%esp) - subl $4, %esp - cfi_adjust_cfa_offset (4) - fistpl (%esp) - fwait - popl %eax - cfi_adjust_cfa_offset (-4) - ret -END(__lrintf) -weak_alias (__lrintf, lrintf) diff --git a/sysdeps/i386/fpu/s_lrintl.S b/sysdeps/i386/fpu/s_lrintl.S deleted file mode 100644 index ba6dbdf44c..0000000000 --- a/sysdeps/i386/fpu/s_lrintl.S +++ /dev/null @@ -1,34 +0,0 @@ -/* Round argument to nearest integral value according to current rounding - direction. - Copyright (C) 1997-2017 Free Software Foundation, Inc. - This file is part of the GNU C Library. - Contributed by Ulrich Drepper <drepper@cygnus.com>, 1997. - - The GNU C Library is free software; you can redistribute it and/or - modify it under the terms of the GNU Lesser General Public - License as published by the Free Software Foundation; either - version 2.1 of the License, or (at your option) any later version. - - The GNU C Library is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - Lesser General Public License for more details. - - You should have received a copy of the GNU Lesser General Public - License along with the GNU C Library; if not, see - <http://www.gnu.org/licenses/>. */ - -#include <sysdep.h> - - .text -ENTRY(__lrintl) - fldt 4(%esp) - subl $4, %esp - cfi_adjust_cfa_offset (4) - fistpl (%esp) - fwait - popl %eax - cfi_adjust_cfa_offset (-4) - ret -END(__lrintl) -weak_alias (__lrintl, lrintl) diff --git a/sysdeps/i386/fpu/s_nearbyint.S b/sysdeps/i386/fpu/s_nearbyint.S deleted file mode 100644 index f7b79b6ff2..0000000000 --- a/sysdeps/i386/fpu/s_nearbyint.S +++ /dev/null @@ -1,20 +0,0 @@ -/* - * Written by J.T. Conklin <jtc@netbsd.org>. - * Public domain. - */ -/* Adapted for use as nearbyint by Ulrich Drepper <drepper@cygnus.com>. */ - -#include <machine/asm.h> - -ENTRY(__nearbyint) - fldl 4(%esp) - subl $32, %esp - cfi_adjust_cfa_offset (32) - fnstenv 4(%esp) - frndint - fldenv 4(%esp) - addl $32, %esp - cfi_adjust_cfa_offset (-32) - ret -END (__nearbyint) -weak_alias (__nearbyint, nearbyint) diff --git a/sysdeps/i386/fpu/s_nearbyintf.S b/sysdeps/i386/fpu/s_nearbyintf.S deleted file mode 100644 index 92df2f87b3..0000000000 --- a/sysdeps/i386/fpu/s_nearbyintf.S +++ /dev/null @@ -1,20 +0,0 @@ -/* - * Written by J.T. Conklin <jtc@netbsd.org>. - * Public domain. - */ -/* Adapted for use as nearbyint by Ulrich Drepper <drepper@cygnus.com>. */ - -#include <machine/asm.h> - -ENTRY(__nearbyintf) - flds 4(%esp) - subl $32, %esp - cfi_adjust_cfa_offset (32) - fnstenv 4(%esp) - frndint - fldenv 4(%esp) - addl $32, %esp - cfi_adjust_cfa_offset (-32) - ret -END (__nearbyintf) -weak_alias (__nearbyintf, nearbyintf) diff --git a/sysdeps/i386/fpu/s_nearbyintl.S b/sysdeps/i386/fpu/s_nearbyintl.S deleted file mode 100644 index 3b7d1e2436..0000000000 --- a/sysdeps/i386/fpu/s_nearbyintl.S +++ /dev/null @@ -1,23 +0,0 @@ -/* - * Written by J.T. Conklin <jtc@netbsd.org>. - * Public domain. - */ -/* Adapted for use as nearbyint by Ulrich Drepper <drepper@cygnus.com>. */ - -#include <machine/asm.h> - -ENTRY(__nearbyintl) - fldt 4(%esp) - subl $32, %esp - cfi_adjust_cfa_offset (32) - fnstenv 4(%esp) - frndint - fnstsw - andl $0x1, %eax - orl %eax, 8(%esp) - fldenv 4(%esp) - addl $32, %esp - cfi_adjust_cfa_offset (-32) - ret -END (__nearbyintl) -weak_alias (__nearbyintl, nearbyintl) diff --git a/sysdeps/i386/fpu/s_nextafterl.c b/sysdeps/i386/fpu/s_nextafterl.c deleted file mode 100644 index 600ad7a8d3..0000000000 --- a/sysdeps/i386/fpu/s_nextafterl.c +++ /dev/null @@ -1,125 +0,0 @@ -/* s_nextafterl.c -- long double version of s_nextafter.c. - * Special version for i387. - * Conversion to long double by Ulrich Drepper, - * Cygnus Support, drepper@cygnus.com. - */ - -/* - * ==================================================== - * Copyright (C) 1993 by Sun Microsystems, Inc. All rights reserved. - * - * Developed at SunPro, a Sun Microsystems, Inc. business. - * Permission to use, copy, modify, and distribute this - * software is freely granted, provided that this notice - * is preserved. - * ==================================================== - */ - -#if defined(LIBM_SCCS) && !defined(lint) -static char rcsid[] = "$NetBSD: $"; -#endif - -/* IEEE functions - * nextafterl(x,y) - * return the next machine floating-point number of x in the - * direction toward y. - * Special cases: - */ - -#include <errno.h> -#include <math.h> -#include <math_private.h> - -long double __nextafterl(long double x, long double y) -{ - u_int32_t hx,hy,ix,iy; - u_int32_t lx,ly; - int32_t esx,esy; - - GET_LDOUBLE_WORDS(esx,hx,lx,x); - GET_LDOUBLE_WORDS(esy,hy,ly,y); - ix = esx&0x7fff; /* |x| */ - iy = esy&0x7fff; /* |y| */ - - /* Intel's extended format has the normally implicit 1 explicit - present. Sigh! */ - if(((ix==0x7fff)&&(((hx&0x7fffffff)|lx)!=0)) || /* x is nan */ - ((iy==0x7fff)&&(((hy&0x7fffffff)|ly)!=0))) /* y is nan */ - return x+y; - if(x==y) return y; /* x=y, return y */ - if((ix|hx|lx)==0) { /* x == 0 */ - long double u; - SET_LDOUBLE_WORDS(x,esy&0x8000,0,1);/* return +-minsubnormal */ - u = math_opt_barrier (x); - u = u * u; - math_force_eval (u); /* raise underflow flag */ - return x; - } - if(esx>=0) { /* x > 0 */ - if(esx>esy||((esx==esy) && (hx>hy||((hx==hy)&&(lx>ly))))) { - /* x > y, x -= ulp */ - if(lx==0) { - if (hx <= 0x80000000) { - if (esx == 0) { - --hx; - } else { - esx -= 1; - hx = hx - 1; - if (esx > 0) - hx |= 0x80000000; - } - } else - hx -= 1; - } - lx -= 1; - } else { /* x < y, x += ulp */ - lx += 1; - if(lx==0) { - hx += 1; - if (hx==0 || (esx == 0 && hx == 0x80000000)) { - esx += 1; - hx |= 0x80000000; - } - } - } - } else { /* x < 0 */ - if(esy>=0||(esx>esy||((esx==esy)&&(hx>hy||((hx==hy)&&(lx>ly)))))){ - /* x < y, x -= ulp */ - if(lx==0) { - if (hx <= 0x80000000 && esx != 0xffff8000) { - esx -= 1; - hx = hx - 1; - if ((esx&0x7fff) > 0) - hx |= 0x80000000; - } else - hx -= 1; - } - lx -= 1; - } else { /* x > y, x += ulp */ - lx += 1; - if(lx==0) { - hx += 1; - if (hx==0 || (esx == 0xffff8000 && hx == 0x80000000)) { - esx += 1; - hx |= 0x80000000; - } - } - } - } - esy = esx&0x7fff; - if(esy==0x7fff) { - long double u = x + x; /* overflow */ - math_force_eval (u); - __set_errno (ERANGE); - } - if(esy==0) { - long double u = x*x; /* underflow */ - math_force_eval (u); /* raise underflow flag */ - __set_errno (ERANGE); - } - SET_LDOUBLE_WORDS(x,esx,hx,lx); - return x; -} -weak_alias (__nextafterl, nextafterl) -strong_alias (__nextafterl, __nexttowardl) -weak_alias (__nextafterl, nexttowardl) diff --git a/sysdeps/i386/fpu/s_nexttoward.c b/sysdeps/i386/fpu/s_nexttoward.c deleted file mode 100644 index 0b47044760..0000000000 --- a/sysdeps/i386/fpu/s_nexttoward.c +++ /dev/null @@ -1,93 +0,0 @@ -/* s_nexttoward.c - * Special i387 version - * Conversion from s_nextafter.c by Ulrich Drepper, Cygnus Support, - * drepper@cygnus.com. - */ - -/* - * ==================================================== - * Copyright (C) 1993 by Sun Microsystems, Inc. All rights reserved. - * - * Developed at SunPro, a Sun Microsystems, Inc. business. - * Permission to use, copy, modify, and distribute this - * software is freely granted, provided that this notice - * is preserved. - * ==================================================== - */ - -#if defined(LIBM_SCCS) && !defined(lint) -static char rcsid[] = "$NetBSD: $"; -#endif - -/* IEEE functions - * nexttoward(x,y) - * return the next machine floating-point number of x in the - * direction toward y. - * Special cases: - */ - -#include <errno.h> -#include <math.h> -#include <math_private.h> -#include <float.h> - -double __nexttoward(double x, long double y) -{ - int32_t hx,ix,iy; - u_int32_t lx,hy,ly,esy; - - EXTRACT_WORDS(hx,lx,x); - GET_LDOUBLE_WORDS(esy,hy,ly,y); - ix = hx&0x7fffffff; /* |x| */ - iy = esy&0x7fff; /* |y| */ - - /* Intel's extended format has the normally implicit 1 explicit - present. Sigh! */ - if(((ix>=0x7ff00000)&&((ix-0x7ff00000)|lx)!=0) || /* x is nan */ - ((iy>=0x7fff)&&((hy&0x7fffffff)|ly)!=0)) /* y is nan */ - return x+y; - if((long double) x==y) return y; /* x=y, return y */ - if((ix|lx)==0) { /* x == 0 */ - double u; - INSERT_WORDS(x,(esy&0x8000)<<16,1); /* return +-minsub */ - u = math_opt_barrier (x); - u = u * u; - math_force_eval (u); /* raise underflow flag */ - return x; - } - if(hx>=0) { /* x > 0 */ - if (x > y) { /* x -= ulp */ - if(lx==0) hx -= 1; - lx -= 1; - } else { /* x < y, x += ulp */ - lx += 1; - if(lx==0) hx += 1; - } - } else { /* x < 0 */ - if (x < y) { /* x -= ulp */ - if(lx==0) hx -= 1; - lx -= 1; - } else { /* x > y, x += ulp */ - lx += 1; - if(lx==0) hx += 1; - } - } - hy = hx&0x7ff00000; - if(hy>=0x7ff00000) { - double u = x+x; /* overflow */ - math_force_eval (u); - __set_errno (ERANGE); - } - if(hy<0x00100000) { - double u = x*x; /* underflow */ - math_force_eval (u); /* raise underflow flag */ - __set_errno (ERANGE); - } - INSERT_WORDS(x,hx,lx); - return x; -} -weak_alias (__nexttoward, nexttoward) -#ifdef NO_LONG_DOUBLE -strong_alias (__nexttoward, __nexttowardl) -weak_alias (__nexttoward, nexttowardl) -#endif diff --git a/sysdeps/i386/fpu/s_nexttowardf.c b/sysdeps/i386/fpu/s_nexttowardf.c deleted file mode 100644 index e1156d1e4f..0000000000 --- a/sysdeps/i386/fpu/s_nexttowardf.c +++ /dev/null @@ -1,77 +0,0 @@ -/* s_nexttowardf.c -- float version of s_nextafter.c. - * Special i387 version. - * Conversion to float by Ian Lance Taylor, Cygnus Support, ian@cygnus.com. - */ - -/* - * ==================================================== - * Copyright (C) 1993 by Sun Microsystems, Inc. All rights reserved. - * - * Developed at SunPro, a Sun Microsystems, Inc. business. - * Permission to use, copy, modify, and distribute this - * software is freely granted, provided that this notice - * is preserved. - * ==================================================== - */ - -#if defined(LIBM_SCCS) && !defined(lint) -static char rcsid[] = "$NetBSD: $"; -#endif - -#include <errno.h> -#include <math.h> -#include <math_private.h> -#include <float.h> - -float __nexttowardf(float x, long double y) -{ - int32_t hx,ix,iy; - u_int32_t hy,ly,esy; - - GET_FLOAT_WORD(hx,x); - GET_LDOUBLE_WORDS(esy,hy,ly,y); - ix = hx&0x7fffffff; /* |x| */ - iy = esy&0x7fff; /* |y| */ - - /* Intel's extended format has the normally implicit 1 explicit - present. Sigh! */ - if((ix>0x7f800000) || /* x is nan */ - (iy>=0x7fff&&(((hy&0x7fffffff)|ly)!=0))) /* y is nan */ - return x+y; - if((long double) x==y) return y; /* x=y, return y */ - if(ix==0) { /* x == 0 */ - float u; - SET_FLOAT_WORD(x,((esy&0x8000)<<16)|1);/* return +-minsub*/ - u = math_opt_barrier (x); - u = u * u; - math_force_eval (u); /* raise underflow flag */ - return x; - } - if(hx>=0) { /* x > 0 */ - if(x > y) { /* x -= ulp */ - hx -= 1; - } else { /* x < y, x += ulp */ - hx += 1; - } - } else { /* x < 0 */ - if(x < y) { /* x -= ulp */ - hx -= 1; - } else { /* x > y, x += ulp */ - hx += 1; - } - } - hy = hx&0x7f800000; - if(hy>=0x7f800000) { - float u = x+x; /* overflow */ - math_force_eval (u); - __set_errno (ERANGE); - } - if(hy<0x00800000) { - float u = x*x; /* underflow */ - math_force_eval (u); /* raise underflow flag */ - __set_errno (ERANGE); - } - SET_FLOAT_WORD(x,hx); - return x; -} -weak_alias (__nexttowardf, nexttowardf) diff --git a/sysdeps/i386/fpu/s_remquo.S b/sysdeps/i386/fpu/s_remquo.S deleted file mode 100644 index 341285db30..0000000000 --- a/sysdeps/i386/fpu/s_remquo.S +++ /dev/null @@ -1,45 +0,0 @@ -/* - * Written by Ulrich Drepper <drepper@cygnus.com>. - * Based on e_remainder by J.T. Conklin <jtc@netbsd.org>. - * Public domain. - */ - -#include <machine/asm.h> - -#define PARMS 4 /* no space for saved regs */ -#define DVDND PARMS -#define DVSOR DVDND+8 -#define QUOP DVSOR+8 - - .text -ENTRY (__remquo) - - fldl DVSOR(%esp) - fldl DVDND(%esp) -1: fprem1 - fstsw %ax - sahf - jp 1b - fstp %st(1) - /* Compute the congruent of the quotient. */ - movl %eax, %ecx - shrl $8, %eax - shrl $12, %ecx - andl $4, %ecx - andl $3, %eax - orl %eax, %ecx - leal (%ecx,%ecx,2),%ecx - movl $0xef2a60, %eax - shrl %cl, %eax - andl $7, %eax - movl QUOP(%esp), %ecx - movl DVDND+4(%esp), %edx - xorl DVSOR+4(%esp), %edx - testl $0x80000000, %edx - jz 1f - negl %eax -1: movl %eax, (%ecx) - - ret -END (__remquo) -weak_alias (__remquo, remquo) diff --git a/sysdeps/i386/fpu/s_remquof.S b/sysdeps/i386/fpu/s_remquof.S deleted file mode 100644 index 62063f068f..0000000000 --- a/sysdeps/i386/fpu/s_remquof.S +++ /dev/null @@ -1,45 +0,0 @@ -/* - * Written by Ulrich Drepper <drepper@cygnus.com>. - * Based on e_remainder by J.T. Conklin <jtc@netbsd.org>. - * Public domain. - */ - -#include <machine/asm.h> - -#define PARMS 4 /* no space for saved regs */ -#define DVDND PARMS -#define DVSOR DVDND+4 -#define QUOP DVSOR+4 - - .text -ENTRY (__remquof) - - flds DVSOR(%esp) - flds DVDND(%esp) -1: fprem1 - fstsw %ax - sahf - jp 1b - fstp %st(1) - /* Compute the congruent of the quotient. */ - movl %eax, %ecx - shrl $8, %eax - shrl $12, %ecx - andl $4, %ecx - andl $3, %eax - orl %eax, %ecx - leal (%ecx,%ecx,2),%ecx - movl $0xef2a60, %eax - shrl %cl, %eax - andl $7, %eax - movl QUOP(%esp), %ecx - movl DVDND(%esp), %edx - xorl DVSOR(%esp), %edx - testl $0x80000000, %edx - jz 1f - negl %eax -1: movl %eax, (%ecx) - - ret -END (__remquof) -weak_alias (__remquof, remquof) diff --git a/sysdeps/i386/fpu/s_remquol.S b/sysdeps/i386/fpu/s_remquol.S deleted file mode 100644 index f3d84fc7c2..0000000000 --- a/sysdeps/i386/fpu/s_remquol.S +++ /dev/null @@ -1,45 +0,0 @@ -/* - * Written by Ulrich Drepper <drepper@cygnus.com>. - * Based on e_remainder by J.T. Conklin <jtc@netbsd.org>. - * Public domain. - */ - -#include <machine/asm.h> - -#define PARMS 4 /* no space for saved regs */ -#define DVDND PARMS -#define DVSOR DVDND+12 -#define QUOP DVSOR+12 - - .text -ENTRY (__remquol) - - fldt DVSOR(%esp) - fldt DVDND(%esp) -1: fprem1 - fstsw %ax - sahf - jp 1b - fstp %st(1) - /* Compute the congruent of the quotient. */ - movl %eax, %ecx - shrl $8, %eax - shrl $12, %ecx - andl $4, %ecx - andl $3, %eax - orl %eax, %ecx - leal (%ecx,%ecx,2),%ecx - movl $0xef2a60, %eax - shrl %cl, %eax - andl $7, %eax - movl QUOP(%esp), %ecx - movl DVDND+8(%esp), %edx - xorl DVSOR+8(%esp), %edx - testl $0x8000, %edx - jz 1f - negl %eax -1: movl %eax, (%ecx) - - ret -END (__remquol) -weak_alias (__remquol, remquol) diff --git a/sysdeps/i386/fpu/s_rint.S b/sysdeps/i386/fpu/s_rint.S deleted file mode 100644 index be36c5f0ca..0000000000 --- a/sysdeps/i386/fpu/s_rint.S +++ /dev/null @@ -1,15 +0,0 @@ -/* - * Written by J.T. Conklin <jtc@netbsd.org>. - * Public domain. - */ - -#include <machine/asm.h> - -RCSID("$NetBSD: s_rint.S,v 1.4 1995/05/09 00:16:08 jtc Exp $") - -ENTRY(__rint) - fldl 4(%esp) - frndint - ret -END (__rint) -weak_alias (__rint, rint) diff --git a/sysdeps/i386/fpu/s_rintf.S b/sysdeps/i386/fpu/s_rintf.S deleted file mode 100644 index 2b358c1cf1..0000000000 --- a/sysdeps/i386/fpu/s_rintf.S +++ /dev/null @@ -1,15 +0,0 @@ -/* - * Written by J.T. Conklin <jtc@netbsd.org>. - * Public domain. - */ - -#include <machine/asm.h> - -RCSID("$NetBSD: s_rintf.S,v 1.3 1995/05/09 00:17:22 jtc Exp $") - -ENTRY(__rintf) - flds 4(%esp) - frndint - ret -END (__rintf) -weak_alias (__rintf, rintf) diff --git a/sysdeps/i386/fpu/s_rintl.c b/sysdeps/i386/fpu/s_rintl.c deleted file mode 100644 index 66af9cb675..0000000000 --- a/sysdeps/i386/fpu/s_rintl.c +++ /dev/null @@ -1,18 +0,0 @@ -/* - * Written by J.T. Conklin <jtc@netbsd.org>. - * Changes for long double by Ulrich Drepper <drepper@cygnus.com> - * Public domain. - */ - -#include <math_private.h> - -long double -__rintl (long double x) -{ - long double res; - - asm ("frndint" : "=t" (res) : "0" (x)); - return res; -} - -weak_alias (__rintl, rintl) diff --git a/sysdeps/i386/fpu/s_scalbln.c b/sysdeps/i386/fpu/s_scalbln.c deleted file mode 100644 index 1009713fbc..0000000000 --- a/sysdeps/i386/fpu/s_scalbln.c +++ /dev/null @@ -1,2 +0,0 @@ -/* Nothing to do. This function is the same as scalbn. So we define an - alias. */ diff --git a/sysdeps/i386/fpu/s_scalblnf.c b/sysdeps/i386/fpu/s_scalblnf.c deleted file mode 100644 index 5e558c3540..0000000000 --- a/sysdeps/i386/fpu/s_scalblnf.c +++ /dev/null @@ -1,2 +0,0 @@ -/* Nothing to do. This function is the same as scalbnf. So we define an - alias. */ diff --git a/sysdeps/i386/fpu/s_scalblnl.c b/sysdeps/i386/fpu/s_scalblnl.c deleted file mode 100644 index cda2ec11c8..0000000000 --- a/sysdeps/i386/fpu/s_scalblnl.c +++ /dev/null @@ -1,2 +0,0 @@ -/* Nothing to do. This function is the same as scalbnl. So we define an - alias. */ diff --git a/sysdeps/i386/fpu/s_scalbn.S b/sysdeps/i386/fpu/s_scalbn.S deleted file mode 100644 index 4e90903115..0000000000 --- a/sysdeps/i386/fpu/s_scalbn.S +++ /dev/null @@ -1,24 +0,0 @@ -/* - * Written by J.T. Conklin <jtc@netbsd.org>. - * Public domain. - */ - -#include <machine/asm.h> -#include <i386-math-asm.h> - -RCSID("$NetBSD: s_scalbn.S,v 1.4 1995/05/09 00:19:06 jtc Exp $") - -ENTRY(__scalbn) - fildl 12(%esp) - fldl 4(%esp) - fscale - fstp %st(1) - DBL_NARROW_EVAL - ret -END (__scalbn) -strong_alias (__scalbn, __scalbln) - -#include <shlib-compat.h> -#if SHLIB_COMPAT (libc, GLIBC_2_1, GLIBC_2_20) -compat_symbol (libc, __scalbn, scalbln, GLIBC_2_1); -#endif diff --git a/sysdeps/i386/fpu/s_scalbnf.S b/sysdeps/i386/fpu/s_scalbnf.S deleted file mode 100644 index f8353c4c75..0000000000 --- a/sysdeps/i386/fpu/s_scalbnf.S +++ /dev/null @@ -1,24 +0,0 @@ -/* - * Written by J.T. Conklin <jtc@netbsd.org>. - * Public domain. - */ - -#include <machine/asm.h> -#include <i386-math-asm.h> - -RCSID("$NetBSD: s_scalbnf.S,v 1.3 1995/05/09 00:19:59 jtc Exp $") - -ENTRY(__scalbnf) - fildl 8(%esp) - flds 4(%esp) - fscale - fstp %st(1) - FLT_NARROW_EVAL - ret -END (__scalbnf) -strong_alias (__scalbnf, __scalblnf) - -#include <shlib-compat.h> -#if SHLIB_COMPAT (libc, GLIBC_2_1, GLIBC_2_20) -compat_symbol (libc, __scalbnf, scalblnf, GLIBC_2_1); -#endif diff --git a/sysdeps/i386/fpu/s_scalbnl.S b/sysdeps/i386/fpu/s_scalbnl.S deleted file mode 100644 index 839b5ff353..0000000000 --- a/sysdeps/i386/fpu/s_scalbnl.S +++ /dev/null @@ -1,23 +0,0 @@ -/* - * Written by J.T. Conklin <jtc@netbsd.org>. - * Changes for long double by Ulrich Drepper <drepper@cygnus.com> - * Public domain. - */ - -#include <machine/asm.h> - -RCSID("$NetBSD: $") - -ENTRY(__scalbnl) - fildl 16(%esp) - fldt 4(%esp) - fscale - fstp %st(1) - ret -END (__scalbnl) -strong_alias (__scalbnl, __scalblnl) - -#include <shlib-compat.h> -#if SHLIB_COMPAT (libc, GLIBC_2_1, GLIBC_2_20) -compat_symbol (libc, __scalbnl, scalblnl, GLIBC_2_1); -#endif diff --git a/sysdeps/i386/fpu/s_significand.S b/sysdeps/i386/fpu/s_significand.S deleted file mode 100644 index 4859b7ed71..0000000000 --- a/sysdeps/i386/fpu/s_significand.S +++ /dev/null @@ -1,16 +0,0 @@ -/* - * Written by J.T. Conklin <jtc@netbsd.org>. - * Public domain. - */ - -#include <machine/asm.h> - -RCSID("$NetBSD: s_significand.S,v 1.4 1995/05/09 00:21:47 jtc Exp $") - -ENTRY(__significand) - fldl 4(%esp) - fxtract - fstp %st(1) - ret -END (__significand) -weak_alias (__significand, significand) diff --git a/sysdeps/i386/fpu/s_significandf.S b/sysdeps/i386/fpu/s_significandf.S deleted file mode 100644 index 3a2de97759..0000000000 --- a/sysdeps/i386/fpu/s_significandf.S +++ /dev/null @@ -1,16 +0,0 @@ -/* - * Written by J.T. Conklin <jtc@netbsd.org>. - * Public domain. - */ - -#include <machine/asm.h> - -RCSID("$NetBSD: s_significandf.S,v 1.3 1995/05/09 00:24:07 jtc Exp $") - -ENTRY(__significandf) - flds 4(%esp) - fxtract - fstp %st(1) - ret -END (__significandf) -weak_alias (__significandf, significandf) diff --git a/sysdeps/i386/fpu/s_significandl.c b/sysdeps/i386/fpu/s_significandl.c deleted file mode 100644 index b8cb093502..0000000000 --- a/sysdeps/i386/fpu/s_significandl.c +++ /dev/null @@ -1,19 +0,0 @@ -/* - * Written by J.T. Conklin <jtc@netbsd.org>. - * Changes for long double by Ulrich Drepper <drepper@cygnus.com> - * Public domain. - */ - -#include <math_private.h> - -long double -__significandl (long double x) -{ - long double res; - - asm ("fxtract\n" - "fstp %%st(1)" : "=t" (res) : "0" (x)); - return res; -} - -weak_alias (__significandl, significandl) diff --git a/sysdeps/i386/fpu/s_trunc.S b/sysdeps/i386/fpu/s_trunc.S deleted file mode 100644 index e9a850b877..0000000000 --- a/sysdeps/i386/fpu/s_trunc.S +++ /dev/null @@ -1,37 +0,0 @@ -/* Truncate double value. - Copyright (C) 1997-2017 Free Software Foundation, Inc. - This file is part of the GNU C Library. - Contributed by Ulrich Drepper <drepper@cygnus.com>, 1997. - - The GNU C Library is free software; you can redistribute it and/or - modify it under the terms of the GNU Lesser General Public - License as published by the Free Software Foundation; either - version 2.1 of the License, or (at your option) any later version. - - The GNU C Library is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - Lesser General Public License for more details. - - You should have received a copy of the GNU Lesser General Public - License along with the GNU C Library; if not, see - <http://www.gnu.org/licenses/>. */ - -#include <machine/asm.h> - -ENTRY(__trunc) - fldl 4(%esp) - subl $32, %esp - cfi_adjust_cfa_offset (32) - fnstenv 4(%esp) - movl $0xc00, %edx - orl 4(%esp), %edx - movl %edx, (%esp) - fldcw (%esp) - frndint - fldenv 4(%esp) - addl $32, %esp - cfi_adjust_cfa_offset (-32) - ret -END(__trunc) -weak_alias (__trunc, trunc) diff --git a/sysdeps/i386/fpu/s_truncf.S b/sysdeps/i386/fpu/s_truncf.S deleted file mode 100644 index a93f5b9a2e..0000000000 --- a/sysdeps/i386/fpu/s_truncf.S +++ /dev/null @@ -1,37 +0,0 @@ -/* Truncate float value. - Copyright (C) 1997-2017 Free Software Foundation, Inc. - This file is part of the GNU C Library. - Contributed by Ulrich Drepper <drepper@cygnus.com>, 1997. - - The GNU C Library is free software; you can redistribute it and/or - modify it under the terms of the GNU Lesser General Public - License as published by the Free Software Foundation; either - version 2.1 of the License, or (at your option) any later version. - - The GNU C Library is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - Lesser General Public License for more details. - - You should have received a copy of the GNU Lesser General Public - License along with the GNU C Library; if not, see - <http://www.gnu.org/licenses/>. */ - -#include <machine/asm.h> - -ENTRY(__truncf) - flds 4(%esp) - subl $32, %esp - cfi_adjust_cfa_offset (32) - fnstenv 4(%esp) - movl $0xc00, %edx - orl 4(%esp), %edx - movl %edx, (%esp) - fldcw (%esp) - frndint - fldenv 4(%esp) - addl $32, %esp - cfi_adjust_cfa_offset (-32) - ret -END(__truncf) -weak_alias (__truncf, truncf) diff --git a/sysdeps/i386/fpu/s_truncl.S b/sysdeps/i386/fpu/s_truncl.S deleted file mode 100644 index a884123612..0000000000 --- a/sysdeps/i386/fpu/s_truncl.S +++ /dev/null @@ -1,40 +0,0 @@ -/* Truncate long double value. - Copyright (C) 1997-2017 Free Software Foundation, Inc. - This file is part of the GNU C Library. - Contributed by Ulrich Drepper <drepper@cygnus.com>, 1997. - - The GNU C Library is free software; you can redistribute it and/or - modify it under the terms of the GNU Lesser General Public - License as published by the Free Software Foundation; either - version 2.1 of the License, or (at your option) any later version. - - The GNU C Library is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - Lesser General Public License for more details. - - You should have received a copy of the GNU Lesser General Public - License along with the GNU C Library; if not, see - <http://www.gnu.org/licenses/>. */ - -#include <machine/asm.h> - -ENTRY(__truncl) - fldt 4(%esp) - subl $32, %esp - cfi_adjust_cfa_offset (32) - fnstenv 4(%esp) - movl $0xc00, %edx - orl 4(%esp), %edx - movl %edx, (%esp) - fldcw (%esp) - frndint - fnstsw - andl $0x1, %eax - orl %eax, 8(%esp) - fldenv 4(%esp) - addl $32, %esp - cfi_adjust_cfa_offset (-32) - ret -END(__truncl) -weak_alias (__truncl, truncl) diff --git a/sysdeps/i386/fpu/slowexp.c b/sysdeps/i386/fpu/slowexp.c deleted file mode 100644 index 1cc8931700..0000000000 --- a/sysdeps/i386/fpu/slowexp.c +++ /dev/null @@ -1 +0,0 @@ -/* Not needed. */ diff --git a/sysdeps/i386/fpu/slowpow.c b/sysdeps/i386/fpu/slowpow.c deleted file mode 100644 index 1cc8931700..0000000000 --- a/sysdeps/i386/fpu/slowpow.c +++ /dev/null @@ -1 +0,0 @@ -/* Not needed. */ diff --git a/sysdeps/i386/fpu/t_exp.c b/sysdeps/i386/fpu/t_exp.c deleted file mode 100644 index fd37963b05..0000000000 --- a/sysdeps/i386/fpu/t_exp.c +++ /dev/null @@ -1 +0,0 @@ -/* Empty. Not needed. */ diff --git a/sysdeps/i386/fpu/w_sqrt_compat.c b/sysdeps/i386/fpu/w_sqrt_compat.c deleted file mode 100644 index ddd36d0964..0000000000 --- a/sysdeps/i386/fpu/w_sqrt_compat.c +++ /dev/null @@ -1,8 +0,0 @@ -/* The inline __ieee754_sqrt is not correctly rounding; it's OK for - most internal uses in glibc, but not for sqrt itself. */ -#define __ieee754_sqrt __avoid_ieee754_sqrt -#include <math.h> -#include <math_private.h> -#undef __ieee754_sqrt -extern double __ieee754_sqrt (double); -#include <math/w_sqrt_compat.c> diff --git a/sysdeps/i386/gccframe.h b/sysdeps/i386/gccframe.h deleted file mode 100644 index 579da40ae9..0000000000 --- a/sysdeps/i386/gccframe.h +++ /dev/null @@ -1,27 +0,0 @@ -/* Definition of object in frame unwind info. i386 version. - Copyright (C) 2001-2017 Free Software Foundation, Inc. - This file is part of the GNU C Library. - - The GNU C Library is free software; you can redistribute it and/or - modify it under the terms of the GNU Lesser General Public - License as published by the Free Software Foundation; either - version 2.1 of the License, or (at your option) any later version. - - The GNU C Library is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - Lesser General Public License for more details. - - You should have received a copy of the GNU Lesser General Public - License along with the GNU C Library; if not, see - <http://www.gnu.org/licenses/>. */ - -#define DWARF_FRAME_REGISTERS 17 - -#define CRT_GET_RFIB_DATA(BASE) \ - { \ - register void *__ebx __asm__("ebx");\ - BASE = __ebx; \ - } - -#include <sysdeps/generic/gccframe.h> diff --git a/sysdeps/i386/gmp-mparam.h b/sysdeps/i386/gmp-mparam.h deleted file mode 100644 index 7ea503a403..0000000000 --- a/sysdeps/i386/gmp-mparam.h +++ /dev/null @@ -1,28 +0,0 @@ -/* gmp-mparam.h -- Compiler/machine parameter header file. - -Copyright (C) 1991-2017 Free Software Foundation, Inc. - -This file is part of the GNU MP Library. - -The GNU MP Library is free software; you can redistribute it and/or modify -it under the terms of the GNU Lesser General Public License as published by -the Free Software Foundation; either version 2.1 of the License, or (at your -option) any later version. - -The GNU MP Library is distributed in the hope that it will be useful, but -WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY -or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public -License for more details. - -You should have received a copy of the GNU Lesser General Public License -along with the GNU MP Library; see the file COPYING.LIB. If not, see -<http://www.gnu.org/licenses/>. */ - -#define BITS_PER_MP_LIMB 32 -#define BYTES_PER_MP_LIMB 4 -#define BITS_PER_LONGINT 32 -#define BITS_PER_INT 32 -#define BITS_PER_SHORTINT 16 -#define BITS_PER_CHAR 8 - -#define IEEE_DOUBLE_BIG_ENDIAN 0 diff --git a/sysdeps/i386/htonl.S b/sysdeps/i386/htonl.S deleted file mode 100644 index 63279bb6e1..0000000000 --- a/sysdeps/i386/htonl.S +++ /dev/null @@ -1,34 +0,0 @@ -/* Change byte order in word. For Intel 80x86, x >= 4. - Copyright (C) 1997-2017 Free Software Foundation, Inc. - This file is part of the GNU C Library. - - The GNU C Library is free software; you can redistribute it and/or - modify it under the terms of the GNU Lesser General Public - License as published by the Free Software Foundation; either - version 2.1 of the License, or (at your option) any later version. - - The GNU C Library is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - Lesser General Public License for more details. - - You should have received a copy of the GNU Lesser General Public - License along with the GNU C Library; if not, see - <http://www.gnu.org/licenses/>. */ - -#include <sysdep.h> -#include "asm-syntax.h" - -/* - INPUT PARAMETERS: - word (sp + 4) -*/ - - .text -ENTRY (htonl) - movl 4(%esp), %eax - bswap %eax - ret -END (htonl) - -weak_alias (htonl, ntohl) diff --git a/sysdeps/i386/htons.S b/sysdeps/i386/htons.S deleted file mode 100644 index a3c53a9944..0000000000 --- a/sysdeps/i386/htons.S +++ /dev/null @@ -1,35 +0,0 @@ -/* Change byte order in word. For Intel 80x86, x >= 3. - Copyright (C) 1997-2017 Free Software Foundation, Inc. - This file is part of the GNU C Library. - - The GNU C Library is free software; you can redistribute it and/or - modify it under the terms of the GNU Lesser General Public - License as published by the Free Software Foundation; either - version 2.1 of the License, or (at your option) any later version. - - The GNU C Library is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - Lesser General Public License for more details. - - You should have received a copy of the GNU Lesser General Public - License along with the GNU C Library; if not, see - <http://www.gnu.org/licenses/>. */ - -#include <sysdep.h> -#include "asm-syntax.h" - -/* - INPUT PARAMETERS: - word (sp + 4) -*/ - - .text -ENTRY (htons) - movl 4(%esp), %eax - andl $0xffff, %eax - rorw $8, %ax - ret -END (htons) - -weak_alias (htons, ntohs) diff --git a/sysdeps/i386/i386-mcount.S b/sysdeps/i386/i386-mcount.S deleted file mode 100644 index 733b8c78e7..0000000000 --- a/sysdeps/i386/i386-mcount.S +++ /dev/null @@ -1,79 +0,0 @@ -/* i386-specific implementation of profiling support. - Copyright (C) 1997-2017 Free Software Foundation, Inc. - This file is part of the GNU C Library. - Contributed by Ulrich Drepper <drepper@cygnus.com>, 1997. - - The GNU C Library is free software; you can redistribute it and/or - modify it under the terms of the GNU Lesser General Public - License as published by the Free Software Foundation; either - version 2.1 of the License, or (at your option) any later version. - - The GNU C Library is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - Lesser General Public License for more details. - - You should have received a copy of the GNU Lesser General Public - License along with the GNU C Library; if not, see - <http://www.gnu.org/licenses/>. */ - -#include <sysdep.h> - -/* We need a special version of the `mcount' function since for ix86 it - must not clobber any register. This has several reasons: - - there is a bug in gcc as of version 2.7.2.2 which prohibits the - use of profiling together with nested functions - - the ELF `fixup' function uses GCC's regparm feature - - some (future) systems might want to pass parameters in registers. */ - - .globl C_SYMBOL_NAME(_mcount) - .type C_SYMBOL_NAME(_mcount), @function - .align ALIGNARG(4) -C_LABEL(_mcount) - /* Save the caller-clobbered registers. */ - pushl %eax - pushl %ecx - pushl %edx - - movl 12(%esp), %edx - movl 4(%ebp), %eax - - /* No need to access the PLT or GOT, __mcount_internal is an - internal function and we can make a relative call. */ - call C_SYMBOL_NAME(__mcount_internal) - - /* Pop the saved registers. Please note that `mcount' has no - return value. */ - popl %edx - popl %ecx - popl %eax - ret - ASM_SIZE_DIRECTIVE(C_SYMBOL_NAME(_mcount)) - -#undef mcount -weak_alias (_mcount, mcount) - - /* Same as above, but doesn't require a frame pointer */ - .globl C_SYMBOL_NAME(__fentry__) - .type C_SYMBOL_NAME(__fentry__), @function - .align ALIGNARG(4) -C_LABEL(__fentry__) - /* Save the caller-clobbered registers. */ - pushl %eax - pushl %ecx - pushl %edx - - movl 12(%esp), %edx - movl 16(%esp), %eax - - /* No need to access the PLT or GOT, __mcount_internal is an - internal function and we can make a relative call. */ - call C_SYMBOL_NAME(__mcount_internal) - - /* Pop the saved registers. Please note that `__fentry__' has no - return value. */ - popl %edx - popl %ecx - popl %eax - ret - ASM_SIZE_DIRECTIVE(C_SYMBOL_NAME(__fentry__)) diff --git a/sysdeps/i386/i586/add_n.S b/sysdeps/i386/i586/add_n.S deleted file mode 100644 index f73df092f0..0000000000 --- a/sysdeps/i386/i586/add_n.S +++ /dev/null @@ -1,143 +0,0 @@ -/* Pentium __mpn_add_n -- Add two limb vectors of the same length > 0 and store - sum in a third limb vector. - Copyright (C) 1992-2017 Free Software Foundation, Inc. - This file is part of the GNU MP Library. - - The GNU MP Library is free software; you can redistribute it and/or modify - it under the terms of the GNU Lesser General Public License as published by - the Free Software Foundation; either version 2.1 of the License, or (at your - option) any later version. - - The GNU MP Library is distributed in the hope that it will be useful, but - WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY - or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public - License for more details. - - You should have received a copy of the GNU Lesser General Public License - along with the GNU MP Library; see the file COPYING.LIB. If not, - see <http://www.gnu.org/licenses/>. */ - -#include "sysdep.h" -#include "asm-syntax.h" - -#define PARMS 4+16 /* space for 4 saved regs */ -#define RES PARMS -#define S1 RES+4 -#define S2 S1+4 -#define SIZE S2+4 - - .text -ENTRY (__mpn_add_n) - - pushl %edi - cfi_adjust_cfa_offset (4) - pushl %esi - cfi_adjust_cfa_offset (4) - pushl %ebp - cfi_adjust_cfa_offset (4) - pushl %ebx - cfi_adjust_cfa_offset (4) - - movl RES(%esp),%edi - cfi_rel_offset (edi, 12) - movl S1(%esp),%esi - cfi_rel_offset (esi, 8) - movl S2(%esp),%ebx - cfi_rel_offset (ebx, 0) - movl SIZE(%esp),%ecx - movl (%ebx),%ebp - cfi_rel_offset (ebp, 4) - - decl %ecx - movl %ecx,%edx - shrl $3,%ecx - andl $7,%edx - testl %ecx,%ecx /* zero carry flag */ - jz L(end) - pushl %edx - cfi_adjust_cfa_offset (4) - - ALIGN (3) -L(oop): movl 28(%edi),%eax /* fetch destination cache line */ - leal 32(%edi),%edi - -L(1): movl (%esi),%eax - movl 4(%esi),%edx - adcl %ebp,%eax - movl 4(%ebx),%ebp - adcl %ebp,%edx - movl 8(%ebx),%ebp - movl %eax,-32(%edi) - movl %edx,-28(%edi) - -L(2): movl 8(%esi),%eax - movl 12(%esi),%edx - adcl %ebp,%eax - movl 12(%ebx),%ebp - adcl %ebp,%edx - movl 16(%ebx),%ebp - movl %eax,-24(%edi) - movl %edx,-20(%edi) - -L(3): movl 16(%esi),%eax - movl 20(%esi),%edx - adcl %ebp,%eax - movl 20(%ebx),%ebp - adcl %ebp,%edx - movl 24(%ebx),%ebp - movl %eax,-16(%edi) - movl %edx,-12(%edi) - -L(4): movl 24(%esi),%eax - movl 28(%esi),%edx - adcl %ebp,%eax - movl 28(%ebx),%ebp - adcl %ebp,%edx - movl 32(%ebx),%ebp - movl %eax,-8(%edi) - movl %edx,-4(%edi) - - leal 32(%esi),%esi - leal 32(%ebx),%ebx - decl %ecx - jnz L(oop) - - popl %edx - cfi_adjust_cfa_offset (-4) -L(end): - decl %edx /* test %edx w/o clobbering carry */ - js L(end2) - incl %edx -L(oop2): - leal 4(%edi),%edi - movl (%esi),%eax - adcl %ebp,%eax - movl 4(%ebx),%ebp - movl %eax,-4(%edi) - leal 4(%esi),%esi - leal 4(%ebx),%ebx - decl %edx - jnz L(oop2) -L(end2): - movl (%esi),%eax - adcl %ebp,%eax - movl %eax,(%edi) - - sbbl %eax,%eax - negl %eax - - popl %ebx - cfi_adjust_cfa_offset (-4) - cfi_restore (ebx) - popl %ebp - cfi_adjust_cfa_offset (-4) - cfi_restore (ebp) - popl %esi - cfi_adjust_cfa_offset (-4) - cfi_restore (esi) - popl %edi - cfi_adjust_cfa_offset (-4) - cfi_restore (edi) - - ret -END (__mpn_add_n) diff --git a/sysdeps/i386/i586/addmul_1.S b/sysdeps/i386/i586/addmul_1.S deleted file mode 100644 index a713192982..0000000000 --- a/sysdeps/i386/i586/addmul_1.S +++ /dev/null @@ -1,94 +0,0 @@ -/* Pentium __mpn_addmul_1 -- Multiply a limb vector with a limb and add - the result to a second limb vector. - Copyright (C) 1992-2017 Free Software Foundation, Inc. - This file is part of the GNU MP Library. - - The GNU MP Library is free software; you can redistribute it and/or modify - it under the terms of the GNU Lesser General Public License as published by - the Free Software Foundation; either version 2.1 of the License, or (at your - option) any later version. - - The GNU MP Library is distributed in the hope that it will be useful, but - WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY - or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public - License for more details. - - You should have received a copy of the GNU Lesser General Public License - along with the GNU MP Library; see the file COPYING.LIB. If not, - see <http://www.gnu.org/licenses/>. */ - -#include "sysdep.h" -#include "asm-syntax.h" - -#define PARMS 4+16 /* space for 4 saved regs */ -#define RES PARMS -#define S1 RES+4 -#define SIZE S1+4 -#define S2LIMB SIZE+4 - -#define res_ptr edi -#define s1_ptr esi -#define size ecx -#define s2_limb ebx - - .text -ENTRY (__mpn_addmul_1) - - pushl %res_ptr - cfi_adjust_cfa_offset (4) - pushl %s1_ptr - cfi_adjust_cfa_offset (4) - pushl %ebp - cfi_adjust_cfa_offset (4) - pushl %s2_limb - cfi_adjust_cfa_offset (4) - - movl RES(%esp), %res_ptr - cfi_rel_offset (res_ptr, 12) - movl S1(%esp), %s1_ptr - cfi_rel_offset (s1_ptr, 8) - movl SIZE(%esp), %size - movl S2LIMB(%esp), %s2_limb - cfi_rel_offset (s2_limb, 0) - leal (%res_ptr,%size,4), %res_ptr - leal (%s1_ptr,%size,4), %s1_ptr - negl %size - xorl %ebp, %ebp - cfi_rel_offset (ebp, 4) - ALIGN (3) - -L(oop): adcl $0, %ebp - movl (%s1_ptr,%size,4), %eax - - mull %s2_limb - - addl %ebp, %eax - movl (%res_ptr,%size,4), %ebp - - adcl $0, %edx - addl %eax, %ebp - - movl %ebp, (%res_ptr,%size,4) - incl %size - - movl %edx, %ebp - jnz L(oop) - - adcl $0, %ebp - movl %ebp, %eax - popl %s2_limb - cfi_adjust_cfa_offset (-4) - cfi_restore (s2_limb) - popl %ebp - cfi_adjust_cfa_offset (-4) - cfi_restore (ebp) - popl %s1_ptr - cfi_adjust_cfa_offset (-4) - cfi_restore (s1_ptr) - popl %res_ptr - cfi_adjust_cfa_offset (-4) - cfi_restore (res_ptr) - - ret -#undef size -END (__mpn_addmul_1) diff --git a/sysdeps/i386/i586/bzero.S b/sysdeps/i386/i586/bzero.S deleted file mode 100644 index 2a106719a4..0000000000 --- a/sysdeps/i386/i586/bzero.S +++ /dev/null @@ -1,4 +0,0 @@ -#define USE_AS_BZERO -#define memset __bzero -#include <sysdeps/i386/i586/memset.S> -weak_alias (__bzero, bzero) diff --git a/sysdeps/i386/i586/init-arch.h b/sysdeps/i386/i586/init-arch.h deleted file mode 100644 index 4711212e6f..0000000000 --- a/sysdeps/i386/i586/init-arch.h +++ /dev/null @@ -1,19 +0,0 @@ -/* Copyright (C) 2015-2017 Free Software Foundation, Inc. - This file is part of the GNU C Library. - - The GNU C Library is free software; you can redistribute it and/or - modify it under the terms of the GNU Lesser General Public - License as published by the Free Software Foundation; either - version 2.1 of the License, or (at your option) any later version. - - The GNU C Library is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - Lesser General Public License for more details. - - You should have received a copy of the GNU Lesser General Public - License along with the GNU C Library; if not, see - <http://www.gnu.org/licenses/>. */ - -#define MINIMUM_ISA 586 -#include <sysdeps/x86/init-arch.h> diff --git a/sysdeps/i386/i586/lshift.S b/sysdeps/i386/i586/lshift.S deleted file mode 100644 index 7941c28d9d..0000000000 --- a/sysdeps/i386/i586/lshift.S +++ /dev/null @@ -1,255 +0,0 @@ -/* Pentium optimized __mpn_lshift -- - Copyright (C) 1992-2017 Free Software Foundation, Inc. - This file is part of the GNU C Library. - - The GNU C Library is free software; you can redistribute it and/or - modify it under the terms of the GNU Lesser General Public - License as published by the Free Software Foundation; either - version 2.1 of the License, or (at your option) any later version. - - The GNU C Library is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - Lesser General Public License for more details. - - You should have received a copy of the GNU Lesser General Public - License along with the GNU C Library; if not, see - <http://www.gnu.org/licenses/>. */ - -#include "sysdep.h" -#include "asm-syntax.h" - -#define PARMS 4+16 /* space for 4 saved regs */ -#define RES PARMS -#define S RES+4 -#define SIZE S+4 -#define CNT SIZE+4 - - .text -ENTRY (__mpn_lshift) - - pushl %edi - cfi_adjust_cfa_offset (4) - pushl %esi - cfi_adjust_cfa_offset (4) - pushl %ebp - cfi_adjust_cfa_offset (4) - cfi_rel_offset (ebp, 0) - pushl %ebx - cfi_adjust_cfa_offset (4) - - movl RES(%esp),%edi - cfi_rel_offset (edi, 12) - movl S(%esp),%esi - cfi_rel_offset (esi, 8) - movl SIZE(%esp),%ebx - cfi_rel_offset (ebx, 0) - movl CNT(%esp),%ecx - -/* We can use faster code for shift-by-1 under certain conditions. */ - cmp $1,%ecx - jne L(normal) - leal 4(%esi),%eax - cmpl %edi,%eax - jnc L(special) /* jump if s_ptr + 1 >= res_ptr */ - leal (%esi,%ebx,4),%eax - cmpl %eax,%edi - jnc L(special) /* jump if res_ptr >= s_ptr + size */ - -L(normal): - leal -4(%edi,%ebx,4),%edi - leal -4(%esi,%ebx,4),%esi - - movl (%esi),%edx - subl $4,%esi - xorl %eax,%eax - shldl %cl,%edx,%eax /* compute carry limb */ - pushl %eax /* push carry limb onto stack */ - cfi_adjust_cfa_offset (4) - - decl %ebx - pushl %ebx - cfi_adjust_cfa_offset (4) - shrl $3,%ebx - jz L(end) - - movl (%edi),%eax /* fetch destination cache line */ - - ALIGN (2) -L(oop): movl -28(%edi),%eax /* fetch destination cache line */ - movl %edx,%ebp - - movl (%esi),%eax - movl -4(%esi),%edx - shldl %cl,%eax,%ebp - shldl %cl,%edx,%eax - movl %ebp,(%edi) - movl %eax,-4(%edi) - - movl -8(%esi),%ebp - movl -12(%esi),%eax - shldl %cl,%ebp,%edx - shldl %cl,%eax,%ebp - movl %edx,-8(%edi) - movl %ebp,-12(%edi) - - movl -16(%esi),%edx - movl -20(%esi),%ebp - shldl %cl,%edx,%eax - shldl %cl,%ebp,%edx - movl %eax,-16(%edi) - movl %edx,-20(%edi) - - movl -24(%esi),%eax - movl -28(%esi),%edx - shldl %cl,%eax,%ebp - shldl %cl,%edx,%eax - movl %ebp,-24(%edi) - movl %eax,-28(%edi) - - subl $32,%esi - subl $32,%edi - decl %ebx - jnz L(oop) - -L(end): popl %ebx - cfi_adjust_cfa_offset (-4) - andl $7,%ebx - jz L(end2) -L(oop2): - movl (%esi),%eax - shldl %cl,%eax,%edx - movl %edx,(%edi) - movl %eax,%edx - subl $4,%esi - subl $4,%edi - decl %ebx - jnz L(oop2) - -L(end2): - shll %cl,%edx /* compute least significant limb */ - movl %edx,(%edi) /* store it */ - - popl %eax /* pop carry limb */ - cfi_adjust_cfa_offset (-4) - - popl %ebx - cfi_adjust_cfa_offset (-4) - cfi_restore (ebx) - popl %ebp - cfi_adjust_cfa_offset (-4) - cfi_restore (ebp) - popl %esi - cfi_adjust_cfa_offset (-4) - cfi_restore (esi) - popl %edi - cfi_adjust_cfa_offset (-4) - cfi_restore (edi) - - ret - -/* We loop from least significant end of the arrays, which is only - permissible if the source and destination don't overlap, since the - function is documented to work for overlapping source and destination. -*/ - - cfi_adjust_cfa_offset (16) - cfi_rel_offset (edi, 12) - cfi_rel_offset (esi, 8) - cfi_rel_offset (ebp, 4) - cfi_rel_offset (ebx, 0) -L(special): - movl (%esi),%edx - addl $4,%esi - - decl %ebx - pushl %ebx - cfi_adjust_cfa_offset (4) - shrl $3,%ebx - - addl %edx,%edx - incl %ebx - decl %ebx - jz L(Lend) - - movl (%edi),%eax /* fetch destination cache line */ - - ALIGN (2) -L(Loop): - movl 28(%edi),%eax /* fetch destination cache line */ - movl %edx,%ebp - - movl (%esi),%eax - movl 4(%esi),%edx - adcl %eax,%eax - movl %ebp,(%edi) - adcl %edx,%edx - movl %eax,4(%edi) - - movl 8(%esi),%ebp - movl 12(%esi),%eax - adcl %ebp,%ebp - movl %edx,8(%edi) - adcl %eax,%eax - movl %ebp,12(%edi) - - movl 16(%esi),%edx - movl 20(%esi),%ebp - adcl %edx,%edx - movl %eax,16(%edi) - adcl %ebp,%ebp - movl %edx,20(%edi) - - movl 24(%esi),%eax - movl 28(%esi),%edx - adcl %eax,%eax - movl %ebp,24(%edi) - adcl %edx,%edx - movl %eax,28(%edi) - - leal 32(%esi),%esi /* use leal not to clobber carry */ - leal 32(%edi),%edi - decl %ebx - jnz L(Loop) - -L(Lend): - popl %ebx - cfi_adjust_cfa_offset (-4) - sbbl %eax,%eax /* save carry in %eax */ - andl $7,%ebx - jz L(Lend2) - addl %eax,%eax /* restore carry from eax */ -L(Loop2): - movl %edx,%ebp - movl (%esi),%edx - adcl %edx,%edx - movl %ebp,(%edi) - - leal 4(%esi),%esi /* use leal not to clobber carry */ - leal 4(%edi),%edi - decl %ebx - jnz L(Loop2) - - jmp L(L1) -L(Lend2): - addl %eax,%eax /* restore carry from eax */ -L(L1): movl %edx,(%edi) /* store last limb */ - - sbbl %eax,%eax - negl %eax - - popl %ebx - cfi_adjust_cfa_offset (-4) - cfi_restore (ebx) - popl %ebp - cfi_adjust_cfa_offset (-4) - cfi_restore (ebp) - popl %esi - cfi_adjust_cfa_offset (-4) - cfi_restore (esi) - popl %edi - cfi_adjust_cfa_offset (-4) - cfi_restore (edi) - - ret -END (__mpn_lshift) diff --git a/sysdeps/i386/i586/memcopy.h b/sysdeps/i386/i586/memcopy.h deleted file mode 100644 index 39f020a746..0000000000 --- a/sysdeps/i386/i586/memcopy.h +++ /dev/null @@ -1,95 +0,0 @@ -/* memcopy.h -- definitions for memory copy functions. Pentium version. - Copyright (C) 1994-2017 Free Software Foundation, Inc. - Contributed by Torbjorn Granlund (tege@sics.se). - This file is part of the GNU C Library. - - The GNU C Library is free software; you can redistribute it and/or - modify it under the terms of the GNU Lesser General Public - License as published by the Free Software Foundation; either - version 2.1 of the License, or (at your option) any later version. - - The GNU C Library is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - Lesser General Public License for more details. - - You should have received a copy of the GNU Lesser General Public - License along with the GNU C Library; if not, see - <http://www.gnu.org/licenses/>. */ - -/* Get the i386 definitions. We will override some of them below. */ -#include <sysdeps/i386/memcopy.h> - -/* Written like this, the Pentium pipeline can execute the loop at a - sustained rate of 2 instructions/clock, or asymptotically 480 - Mbytes/second at 60Mhz. */ - -#undef WORD_COPY_FWD -#define WORD_COPY_FWD(dst_bp, src_bp, nbytes_left, nbytes) \ - do \ - { \ - asm volatile ("subl $32,%2\n" \ - "js 2f\n" \ - "movl 0(%0),%%edx\n" /* alloc dest line */ \ - "1:\n" \ - "movl 28(%0),%%eax\n" /* alloc dest line */ \ - "subl $32,%2\n" /* decr loop count */ \ - "movl 0(%1),%%eax\n" /* U pipe */ \ - "movl 4(%1),%%edx\n" /* V pipe */ \ - "movl %%eax,0(%0)\n" /* U pipe */ \ - "movl %%edx,4(%0)\n" /* V pipe */ \ - "movl 8(%1),%%eax\n" \ - "movl 12(%1),%%edx\n" \ - "movl %%eax,8(%0)\n" \ - "movl %%edx,12(%0)\n" \ - "movl 16(%1),%%eax\n" \ - "movl 20(%1),%%edx\n" \ - "movl %%eax,16(%0)\n" \ - "movl %%edx,20(%0)\n" \ - "movl 24(%1),%%eax\n" \ - "movl 28(%1),%%edx\n" \ - "movl %%eax,24(%0)\n" \ - "movl %%edx,28(%0)\n" \ - "leal 32(%1),%1\n" /* update src ptr */ \ - "leal 32(%0),%0\n" /* update dst ptr */ \ - "jns 1b\n" \ - "2: addl $32,%2" : \ - "=r" (dst_bp), "=r" (src_bp), "=r" (nbytes_left) : \ - "0" (dst_bp), "1" (src_bp), "2" (nbytes) : \ - "ax", "dx"); \ - } while (0) - -#undef WORD_COPY_BWD -#define WORD_COPY_BWD(dst_ep, src_ep, nbytes_left, nbytes) \ - do \ - { \ - asm volatile ("subl $32,%2\n" \ - "js 2f\n" \ - "movl -4(%0),%%edx\n" \ - "1:\n" \ - "movl -32(%0),%%eax\n" \ - "subl $32,%2\n" \ - "movl -4(%1),%%eax\n" \ - "movl -8(%1),%%edx\n" \ - "movl %%eax,-4(%0)\n" \ - "movl %%edx,-8(%0)\n" \ - "movl -12(%1),%%eax\n" \ - "movl -16(%1),%%edx\n" \ - "movl %%eax,-12(%0)\n" \ - "movl %%edx,-16(%0)\n" \ - "movl -20(%1),%%eax\n" \ - "movl -24(%1),%%edx\n" \ - "movl %%eax,-20(%0)\n" \ - "movl %%edx,-24(%0)\n" \ - "movl -28(%1),%%eax\n" \ - "movl -32(%1),%%edx\n" \ - "movl %%eax,-28(%0)\n" \ - "movl %%edx,-32(%0)\n" \ - "leal -32(%1),%1\n" \ - "leal -32(%0),%0\n" \ - "jns 1b\n" \ - "2: addl $32,%2" : \ - "=r" (dst_ep), "=r" (src_ep), "=r" (nbytes_left) : \ - "0" (dst_ep), "1" (src_ep), "2" (nbytes) : \ - "ax", "dx"); \ - } while (0) diff --git a/sysdeps/i386/i586/memcpy.S b/sysdeps/i386/i586/memcpy.S deleted file mode 100644 index 6474a3f653..0000000000 --- a/sysdeps/i386/i586/memcpy.S +++ /dev/null @@ -1,124 +0,0 @@ -/* Highly optimized version for i586. - Copyright (C) 1997-2017 Free Software Foundation, Inc. - This file is part of the GNU C Library. - Contributed by Ulrich Drepper <drepper@cygnus.com>, 1997. - - The GNU C Library is free software; you can redistribute it and/or - modify it under the terms of the GNU Lesser General Public - License as published by the Free Software Foundation; either - version 2.1 of the License, or (at your option) any later version. - - The GNU C Library is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - Lesser General Public License for more details. - - You should have received a copy of the GNU Lesser General Public - License along with the GNU C Library; if not, see - <http://www.gnu.org/licenses/>. */ - -#include <sysdep.h> -#include "asm-syntax.h" - -#define PARMS 4+8 /* space for 2 saved regs */ -#define RTN PARMS -#define DEST RTN -#define SRC DEST+4 -#define LEN SRC+4 - - .text -#if defined PIC && IS_IN (libc) -ENTRY (__memcpy_chk) - movl 12(%esp), %eax - cmpl %eax, 16(%esp) - jb HIDDEN_JUMPTARGET (__chk_fail) -END (__memcpy_chk) -#endif -ENTRY (memcpy) - - pushl %edi - cfi_adjust_cfa_offset (4) - pushl %esi - cfi_adjust_cfa_offset (4) - - movl DEST(%esp), %edi - cfi_rel_offset (edi, 4) - movl SRC(%esp), %esi - cfi_rel_offset (esi, 0) - movl LEN(%esp), %ecx - movl %edi, %eax - - /* We need this in any case. */ - cld - - /* Cutoff for the big loop is a size of 32 bytes since otherwise - the loop will never be entered. */ - cmpl $32, %ecx - jbe L(1) - - negl %eax - andl $3, %eax - subl %eax, %ecx - xchgl %eax, %ecx - - rep; movsb - - movl %eax, %ecx - subl $32, %ecx - js L(2) - - /* Read ahead to make sure we write in the cache since the stupid - i586 designers haven't implemented read-on-write-miss. */ - movl (%edi), %eax -L(3): movl 28(%edi), %edx - - /* Now correct the loop counter. Please note that in the following - code the flags are not changed anymore. */ - subl $32, %ecx - - movl (%esi), %eax - movl 4(%esi), %edx - movl %eax, (%edi) - movl %edx, 4(%edi) - movl 8(%esi), %eax - movl 12(%esi), %edx - movl %eax, 8(%edi) - movl %edx, 12(%edi) - movl 16(%esi), %eax - movl 20(%esi), %edx - movl %eax, 16(%edi) - movl %edx, 20(%edi) - movl 24(%esi), %eax - movl 28(%esi), %edx - movl %eax, 24(%edi) - movl %edx, 28(%edi) - - leal 32(%esi), %esi - leal 32(%edi), %edi - - jns L(3) - - /* Correct extra loop counter modification. */ -L(2): addl $32, %ecx -#ifndef USE_AS_MEMPCPY - movl DEST(%esp), %eax -#endif - -L(1): rep; movsb - -#ifdef USE_AS_MEMPCPY - movl %edi, %eax -#endif - - popl %esi - cfi_adjust_cfa_offset (-4) - cfi_restore (esi) - popl %edi - cfi_adjust_cfa_offset (-4) - cfi_restore (edi) - - ret -END (memcpy) -#ifndef USE_AS_MEMPCPY -libc_hidden_builtin_def (memcpy) -#endif diff --git a/sysdeps/i386/i586/mempcpy.S b/sysdeps/i386/i586/mempcpy.S deleted file mode 100644 index 720a4c0923..0000000000 --- a/sysdeps/i386/i586/mempcpy.S +++ /dev/null @@ -1,8 +0,0 @@ -#define USE_AS_MEMPCPY -#define memcpy __mempcpy -#define __memcpy_chk __mempcpy_chk -#include <sysdeps/i386/i586/memcpy.S> - -libc_hidden_def (__mempcpy) -weak_alias (__mempcpy, mempcpy) -libc_hidden_builtin_def (mempcpy) diff --git a/sysdeps/i386/i586/memset.S b/sysdeps/i386/i586/memset.S deleted file mode 100644 index 4f8f1bcf94..0000000000 --- a/sysdeps/i386/i586/memset.S +++ /dev/null @@ -1,121 +0,0 @@ -/* memset/bzero -- set memory area to CH/0 - Highly optimized version for ix86, x>=5. - Copyright (C) 1996-2017 Free Software Foundation, Inc. - This file is part of the GNU C Library. - Contributed by Torbjorn Granlund, <tege@matematik.su.se> - - The GNU C Library is free software; you can redistribute it and/or - modify it under the terms of the GNU Lesser General Public - License as published by the Free Software Foundation; either - version 2.1 of the License, or (at your option) any later version. - - The GNU C Library is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - Lesser General Public License for more details. - - You should have received a copy of the GNU Lesser General Public - License along with the GNU C Library; if not, see - <http://www.gnu.org/licenses/>. */ - -#include <sysdep.h> -#include "asm-syntax.h" - -#define PARMS 4+4 /* space for 1 saved reg */ -#define RTN PARMS -#define DEST RTN -#ifdef USE_AS_BZERO -# define LEN DEST+4 -#else -# define CHR DEST+4 -# define LEN CHR+4 -#endif - - .text -#if defined SHARED && IS_IN (libc) && !defined USE_AS_BZERO -ENTRY (__memset_chk) - movl 12(%esp), %eax - cmpl %eax, 16(%esp) - jb HIDDEN_JUMPTARGET (__chk_fail) -END (__memset_chk) -#endif -ENTRY (memset) - - pushl %edi - cfi_adjust_cfa_offset (4) - - movl DEST(%esp), %edi - cfi_rel_offset (edi, 0) - movl LEN(%esp), %edx -#ifdef USE_AS_BZERO - xorl %eax, %eax /* we fill with 0 */ -#else - movb CHR(%esp), %al - movb %al, %ah - movl %eax, %ecx - shll $16, %eax - movw %cx, %ax -#endif - cld - -/* If less than 36 bytes to write, skip tricky code (it wouldn't work). */ - cmpl $36, %edx - movl %edx, %ecx /* needed when branch is taken! */ - jl L(2) - -/* First write 0-3 bytes to make the pointer 32-bit aligned. */ - movl %edi, %ecx /* Copy ptr to ecx... */ - negl %ecx /* ...and negate that and... */ - andl $3, %ecx /* ...mask to get byte count. */ - subl %ecx, %edx /* adjust global byte count */ - rep - stosb - - subl $32, %edx /* offset count for unrolled loop */ - movl (%edi), %ecx /* Fetch destination cache line */ - - .align 2, 0x90 /* supply 0x90 for broken assemblers */ -L(1): movl 28(%edi), %ecx /* allocate cache line for destination */ - subl $32, %edx /* decr loop count */ - movl %eax, 0(%edi) /* store words pairwise */ - movl %eax, 4(%edi) - movl %eax, 8(%edi) - movl %eax, 12(%edi) - movl %eax, 16(%edi) - movl %eax, 20(%edi) - movl %eax, 24(%edi) - movl %eax, 28(%edi) - leal 32(%edi), %edi /* update destination pointer */ - jge L(1) - - leal 32(%edx), %ecx /* reset offset count */ - -/* Write last 0-7 full 32-bit words (up to 8 words if loop was skipped). */ -L(2): shrl $2, %ecx /* convert byte count to longword count */ - rep - stosl - -/* Finally write the last 0-3 bytes. */ - movl %edx, %ecx - andl $3, %ecx - rep - stosb - -#ifndef USE_AS_BZERO - /* Load result (only if used as memset). */ - movl DEST(%esp), %eax /* start address of destination is result */ -#endif - popl %edi - cfi_adjust_cfa_offset (-4) - cfi_restore (edi) - - ret -END (memset) -libc_hidden_builtin_def (memset) - -#if defined SHARED && IS_IN (libc) && !defined __memset_chk \ - && !defined USE_AS_BZERO -strong_alias (__memset_chk, __memset_zero_constant_len_parameter) - .section .gnu.warning.__memset_zero_constant_len_parameter - .string "memset used with constant zero length parameter; this could be due to transposed parameters" -#endif diff --git a/sysdeps/i386/i586/memusage.h b/sysdeps/i386/i586/memusage.h deleted file mode 100644 index c8170874d0..0000000000 --- a/sysdeps/i386/i586/memusage.h +++ /dev/null @@ -1 +0,0 @@ -#include "../i686/memusage.h" diff --git a/sysdeps/i386/i586/mul_1.S b/sysdeps/i386/i586/mul_1.S deleted file mode 100644 index bd3a07de90..0000000000 --- a/sysdeps/i386/i586/mul_1.S +++ /dev/null @@ -1,90 +0,0 @@ -/* Pentium __mpn_mul_1 -- Multiply a limb vector with a limb and store - the result in a second limb vector. - Copyright (C) 1992-2017 Free Software Foundation, Inc. - This file is part of the GNU MP Library. - - The GNU MP Library is free software; you can redistribute it and/or modify - it under the terms of the GNU Lesser General Public License as published by - the Free Software Foundation; either version 2.1 of the License, or (at your - option) any later version. - - The GNU MP Library is distributed in the hope that it will be useful, but - WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY - or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public - License for more details. - - You should have received a copy of the GNU Lesser General Public License - along with the GNU MP Library; see the file COPYING.LIB. If not, - see <http://www.gnu.org/licenses/>. */ - -#include "sysdep.h" -#include "asm-syntax.h" - -#define PARMS 4+16 /* space for 4 saved regs */ -#define RES PARMS -#define S1 RES+4 -#define SIZE S1+4 -#define S2LIMB SIZE+4 - -#define res_ptr edi -#define s1_ptr esi -#define size ecx -#define s2_limb ebx - - .text -ENTRY (__mpn_mul_1) - - pushl %res_ptr - cfi_adjust_cfa_offset (4) - pushl %s1_ptr - cfi_adjust_cfa_offset (4) - pushl %ebp - cfi_adjust_cfa_offset (4) - pushl %s2_limb - cfi_adjust_cfa_offset (4) - - movl RES(%esp), %res_ptr - cfi_rel_offset (res_ptr, 12) - movl S1(%esp), %s1_ptr - cfi_rel_offset (s1_ptr, 8) - movl SIZE(%esp), %size - movl S2LIMB(%esp), %s2_limb - cfi_rel_offset (s2_limb, 0) - leal (%res_ptr,%size,4), %res_ptr - leal (%s1_ptr,%size,4), %s1_ptr - negl %size - xorl %ebp, %ebp - cfi_rel_offset (ebp, 4) - ALIGN (3) - -L(oop): adcl $0, %ebp - movl (%s1_ptr,%size,4), %eax - - mull %s2_limb - - addl %eax, %ebp - - movl %ebp, (%res_ptr,%size,4) - incl %size - - movl %edx, %ebp - jnz L(oop) - - adcl $0, %ebp - movl %ebp, %eax - popl %s2_limb - cfi_adjust_cfa_offset (-4) - cfi_restore (s2_limb) - popl %ebp - cfi_adjust_cfa_offset (-4) - cfi_restore (ebp) - popl %s1_ptr - cfi_adjust_cfa_offset (-4) - cfi_restore (s1_ptr) - popl %res_ptr - cfi_adjust_cfa_offset (-4) - cfi_restore (res_ptr) - - ret -#undef size -END (__mpn_mul_1) diff --git a/sysdeps/i386/i586/rshift.S b/sysdeps/i386/i586/rshift.S deleted file mode 100644 index 24c76ee0bb..0000000000 --- a/sysdeps/i386/i586/rshift.S +++ /dev/null @@ -1,255 +0,0 @@ -/* Pentium optimized __mpn_rshift -- - Copyright (C) 1992-2017 Free Software Foundation, Inc. - This file is part of the GNU MP Library. - - The GNU MP Library is free software; you can redistribute it and/or modify - it under the terms of the GNU Lesser General Public License as published by - the Free Software Foundation; either version 2.1 of the License, or (at your - option) any later version. - - The GNU MP Library is distributed in the hope that it will be useful, but - WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY - or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public - License for more details. - - You should have received a copy of the GNU Lesser General Public License - along with the GNU MP Library; see the file COPYING.LIB. If not, - see <http://www.gnu.org/licenses/>. */ - -#include "sysdep.h" -#include "asm-syntax.h" - -#define PARMS 4+16 /* space for 4 saved regs */ -#define RES PARMS -#define S RES+4 -#define SIZE S+4 -#define CNT SIZE+4 - - .text -ENTRY (__mpn_rshift) - - pushl %edi - cfi_adjust_cfa_offset (4) - pushl %esi - cfi_adjust_cfa_offset (4) - pushl %ebp - cfi_adjust_cfa_offset (4) - cfi_rel_offset (ebp, 0) - pushl %ebx - cfi_adjust_cfa_offset (4) - - movl RES(%esp),%edi - cfi_rel_offset (edi, 12) - movl S(%esp),%esi - cfi_rel_offset (esi, 8) - movl SIZE(%esp),%ebx - cfi_rel_offset (ebx, 0) - movl CNT(%esp),%ecx - -/* We can use faster code for shift-by-1 under certain conditions. */ - cmp $1,%ecx - jne L(normal) - leal 4(%edi),%eax - cmpl %esi,%eax - jnc L(special) /* jump if res_ptr + 1 >= s_ptr */ - leal (%edi,%ebx,4),%eax - cmpl %eax,%esi - jnc L(special) /* jump if s_ptr >= res_ptr + size */ - -L(normal): - movl (%esi),%edx - addl $4,%esi - xorl %eax,%eax - shrdl %cl,%edx,%eax /* compute carry limb */ - pushl %eax /* push carry limb onto stack */ - cfi_adjust_cfa_offset (4) - - decl %ebx - pushl %ebx - cfi_adjust_cfa_offset (4) - shrl $3,%ebx - jz L(end) - - movl (%edi),%eax /* fetch destination cache line */ - - ALIGN (2) -L(oop): movl 28(%edi),%eax /* fetch destination cache line */ - movl %edx,%ebp - - movl (%esi),%eax - movl 4(%esi),%edx - shrdl %cl,%eax,%ebp - shrdl %cl,%edx,%eax - movl %ebp,(%edi) - movl %eax,4(%edi) - - movl 8(%esi),%ebp - movl 12(%esi),%eax - shrdl %cl,%ebp,%edx - shrdl %cl,%eax,%ebp - movl %edx,8(%edi) - movl %ebp,12(%edi) - - movl 16(%esi),%edx - movl 20(%esi),%ebp - shrdl %cl,%edx,%eax - shrdl %cl,%ebp,%edx - movl %eax,16(%edi) - movl %edx,20(%edi) - - movl 24(%esi),%eax - movl 28(%esi),%edx - shrdl %cl,%eax,%ebp - shrdl %cl,%edx,%eax - movl %ebp,24(%edi) - movl %eax,28(%edi) - - addl $32,%esi - addl $32,%edi - decl %ebx - jnz L(oop) - -L(end): popl %ebx - cfi_adjust_cfa_offset (-4) - andl $7,%ebx - jz L(end2) -L(oop2): - movl (%esi),%eax - shrdl %cl,%eax,%edx /* compute result limb */ - movl %edx,(%edi) - movl %eax,%edx - addl $4,%esi - addl $4,%edi - decl %ebx - jnz L(oop2) - -L(end2): - shrl %cl,%edx /* compute most significant limb */ - movl %edx,(%edi) /* store it */ - - popl %eax /* pop carry limb */ - cfi_adjust_cfa_offset (-4) - - popl %ebx - cfi_adjust_cfa_offset (-4) - cfi_restore (ebx) - popl %ebp - cfi_adjust_cfa_offset (-4) - cfi_restore (ebp) - popl %esi - cfi_adjust_cfa_offset (-4) - cfi_restore (esi) - popl %edi - cfi_adjust_cfa_offset (-4) - cfi_restore (edi) - - ret - -/* We loop from least significant end of the arrays, which is only - permissible if the source and destination don't overlap, since the - function is documented to work for overlapping source and destination. -*/ - - cfi_adjust_cfa_offset (16) - cfi_rel_offset (edi, 12) - cfi_rel_offset (esi, 8) - cfi_rel_offset (ebp, 4) - cfi_rel_offset (ebx, 0) -L(special): - leal -4(%edi,%ebx,4),%edi - leal -4(%esi,%ebx,4),%esi - - movl (%esi),%edx - subl $4,%esi - - decl %ebx - pushl %ebx - cfi_adjust_cfa_offset (4) - shrl $3,%ebx - - shrl $1,%edx - incl %ebx - decl %ebx - jz L(Lend) - - movl (%edi),%eax /* fetch destination cache line */ - - ALIGN (2) -L(Loop): - movl -28(%edi),%eax /* fetch destination cache line */ - movl %edx,%ebp - - movl (%esi),%eax - movl -4(%esi),%edx - rcrl $1,%eax - movl %ebp,(%edi) - rcrl $1,%edx - movl %eax,-4(%edi) - - movl -8(%esi),%ebp - movl -12(%esi),%eax - rcrl $1,%ebp - movl %edx,-8(%edi) - rcrl $1,%eax - movl %ebp,-12(%edi) - - movl -16(%esi),%edx - movl -20(%esi),%ebp - rcrl $1,%edx - movl %eax,-16(%edi) - rcrl $1,%ebp - movl %edx,-20(%edi) - - movl -24(%esi),%eax - movl -28(%esi),%edx - rcrl $1,%eax - movl %ebp,-24(%edi) - rcrl $1,%edx - movl %eax,-28(%edi) - - leal -32(%esi),%esi /* use leal not to clobber carry */ - leal -32(%edi),%edi - decl %ebx - jnz L(Loop) - -L(Lend): - popl %ebx - cfi_adjust_cfa_offset (-4) - sbbl %eax,%eax /* save carry in %eax */ - andl $7,%ebx - jz L(Lend2) - addl %eax,%eax /* restore carry from eax */ -L(Loop2): - movl %edx,%ebp - movl (%esi),%edx - rcrl $1,%edx - movl %ebp,(%edi) - - leal -4(%esi),%esi /* use leal not to clobber carry */ - leal -4(%edi),%edi - decl %ebx - jnz L(Loop2) - - jmp L(L1) -L(Lend2): - addl %eax,%eax /* restore carry from eax */ -L(L1): movl %edx,(%edi) /* store last limb */ - - movl $0,%eax - rcrl $1,%eax - - popl %ebx - cfi_adjust_cfa_offset (-4) - cfi_restore (ebx) - popl %ebp - cfi_adjust_cfa_offset (-4) - cfi_restore (ebp) - popl %esi - cfi_adjust_cfa_offset (-4) - cfi_restore (esi) - popl %edi - cfi_adjust_cfa_offset (-4) - cfi_restore (edi) - - ret -END (__mpn_rshift) diff --git a/sysdeps/i386/i586/stpcpy.S b/sysdeps/i386/i586/stpcpy.S deleted file mode 100644 index 8691efd01c..0000000000 --- a/sysdeps/i386/i586/stpcpy.S +++ /dev/null @@ -1,8 +0,0 @@ -#define USE_AS_STPCPY -#define STRCPY __stpcpy - -#include <sysdeps/i386/i586/strcpy.S> - -weak_alias (__stpcpy, stpcpy) -libc_hidden_def (__stpcpy) -libc_hidden_builtin_def (stpcpy) diff --git a/sysdeps/i386/i586/strchr.S b/sysdeps/i386/i586/strchr.S deleted file mode 100644 index 02f66b8f72..0000000000 --- a/sysdeps/i386/i586/strchr.S +++ /dev/null @@ -1,348 +0,0 @@ -/* Find character CH in a NUL terminated string. - Highly optimized version for ix85, x>=5. - Copyright (C) 1995-2017 Free Software Foundation, Inc. - This file is part of the GNU C Library. - Contributed by Ulrich Drepper, <drepper@gnu.ai.mit.edu>. - - The GNU C Library is free software; you can redistribute it and/or - modify it under the terms of the GNU Lesser General Public - License as published by the Free Software Foundation; either - version 2.1 of the License, or (at your option) any later version. - - The GNU C Library is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - Lesser General Public License for more details. - - You should have received a copy of the GNU Lesser General Public - License along with the GNU C Library; if not, see - <http://www.gnu.org/licenses/>. */ - -#include <sysdep.h> -#include "asm-syntax.h" - -/* This version is especially optimized for the i586 (and following?) - processors. This is mainly done by using the two pipelines. The - version optimized for i486 is weak in this aspect because to get - as much parallelism we have to execute some *more* instructions. - - The code below is structured to reflect the pairing of the instructions - as *I think* it is. I have no processor data book to verify this. - If you find something you think is incorrect let me know. */ - - -/* The magic value which is used throughout in the whole code. */ -#define magic 0xfefefeff - -#define PARMS 4+16 /* space for 4 saved regs */ -#define RTN PARMS -#define STR RTN -#define CHR STR+4 - - .text -ENTRY (strchr) - - pushl %edi /* Save callee-safe registers. */ - cfi_adjust_cfa_offset (-4) - pushl %esi - cfi_adjust_cfa_offset (-4) - - pushl %ebx - cfi_adjust_cfa_offset (-4) - pushl %ebp - cfi_adjust_cfa_offset (-4) - - movl STR(%esp), %eax - movl CHR(%esp), %edx - - movl %eax, %edi /* duplicate string pointer for later */ - cfi_rel_offset (edi, 12) - xorl %ecx, %ecx /* clear %ecx */ - - /* At the moment %edx contains C. What we need for the - algorithm is C in all bytes of the dword. Avoid - operations on 16 bit words because these require an - prefix byte (and one more cycle). */ - movb %dl, %dh /* now it is 0|0|c|c */ - movb %dl, %cl /* we construct the lower half in %ecx */ - - shll $16, %edx /* now %edx is c|c|0|0 */ - movb %cl, %ch /* now %ecx is 0|0|c|c */ - - orl %ecx, %edx /* and finally c|c|c|c */ - andl $3, %edi /* mask alignment bits */ - - jz L(11) /* alignment is 0 => start loop */ - - movb %dl, %cl /* 0 is needed below */ - jp L(0) /* exactly two bits set */ - - xorb (%eax), %cl /* is byte the one we are looking for? */ - jz L(out) /* yes => return pointer */ - - xorb %dl, %cl /* load single byte and test for NUL */ - je L(3) /* yes => return NULL */ - - movb 1(%eax), %cl /* load single byte */ - incl %eax - - cmpb %cl, %dl /* is byte == C? */ - je L(out) /* aligned => return pointer */ - - cmpb $0, %cl /* is byte NUL? */ - je L(3) /* yes => return NULL */ - - incl %eax - decl %edi - - jne L(11) - -L(0): movb (%eax), %cl /* load single byte */ - - cmpb %cl, %dl /* is byte == C? */ - je L(out) /* aligned => return pointer */ - - cmpb $0, %cl /* is byte NUL? */ - je L(3) /* yes => return NULL */ - - incl %eax /* increment pointer */ - - cfi_rel_offset (esi, 8) - cfi_rel_offset (ebx, 4) - cfi_rel_offset (ebp, 0) - - /* The following code is the preparation for the loop. The - four instruction up to `L1' will not be executed in the loop - because the same code is found at the end of the loop, but - there it is executed in parallel with other instructions. */ -L(11): movl (%eax), %ecx - movl $magic, %ebp - - movl $magic, %edi - addl %ecx, %ebp - - /* The main loop: it looks complex and indeed it is. I would - love to say `it was hard to write, so it should he hard to - read' but I will give some more hints. To fully understand - this code you should first take a look at the i486 version. - The basic algorithm is the same, but here the code organized - in a way which permits to use both pipelines all the time. - - I tried to make it a bit more understandable by indenting - the code according to stage in the algorithm. It goes as - follows: - check for 0 in 1st word - check for C in 1st word - check for 0 in 2nd word - check for C in 2nd word - check for 0 in 3rd word - check for C in 3rd word - check for 0 in 4th word - check for C in 4th word - - Please note that doing the test for NUL before the test for - C allows us to overlap the test for 0 in the next word with - the test for C. */ - -L(1): xorl %ecx, %ebp /* (word^magic) */ - addl %ecx, %edi /* add magic word */ - - leal 4(%eax), %eax /* increment pointer */ - jnc L(4) /* previous addl caused overflow? */ - - movl %ecx, %ebx /* duplicate original word */ - orl $magic, %ebp /* (word^magic)|magic */ - - addl $1, %ebp /* (word^magic)|magic == 0xffffffff? */ - jne L(4) /* yes => we found word with NUL */ - - movl $magic, %esi /* load magic value */ - xorl %edx, %ebx /* clear words which are C */ - - movl (%eax), %ecx - addl %ebx, %esi /* (word+magic) */ - - movl $magic, %edi - jnc L(5) /* previous addl caused overflow? */ - - movl %edi, %ebp - xorl %ebx, %esi /* (word+magic)^word */ - - addl %ecx, %ebp - orl $magic, %esi /* ((word+magic)^word)|magic */ - - addl $1, %esi /* ((word+magic)^word)|magic==0xf..f?*/ - jne L(5) /* yes => we found word with C */ - - xorl %ecx, %ebp - addl %ecx, %edi - - leal 4(%eax), %eax - jnc L(4) - - movl %ecx, %ebx - orl $magic, %ebp - - addl $1, %ebp - jne L(4) - - movl $magic, %esi - xorl %edx, %ebx - - movl (%eax), %ecx - addl %ebx, %esi - - movl $magic, %edi - jnc L(5) - - movl %edi, %ebp - xorl %ebx, %esi - - addl %ecx, %ebp - orl $magic, %esi - - addl $1, %esi - jne L(5) - - xorl %ecx, %ebp - addl %ecx, %edi - - leal 4(%eax), %eax - jnc L(4) - - movl %ecx, %ebx - orl $magic, %ebp - - addl $1, %ebp - jne L(4) - - movl $magic, %esi - xorl %edx, %ebx - - movl (%eax), %ecx - addl %ebx, %esi - - movl $magic, %edi - jnc L(5) - - movl %edi, %ebp - xorl %ebx, %esi - - addl %ecx, %ebp - orl $magic, %esi - - addl $1, %esi - jne L(5) - - xorl %ecx, %ebp - addl %ecx, %edi - - leal 4(%eax), %eax - jnc L(4) - - movl %ecx, %ebx - orl $magic, %ebp - - addl $1, %ebp - jne L(4) - - movl $magic, %esi - xorl %edx, %ebx - - movl (%eax), %ecx - addl %ebx, %esi - - movl $magic, %edi - jnc L(5) - - movl %edi, %ebp - xorl %ebx, %esi - - addl %ecx, %ebp - orl $magic, %esi - - addl $1, %esi - - je L(1) - - /* We know there is no NUL byte but a C byte in the word. - %ebx contains NUL in this particular byte. */ -L(5): subl $4, %eax /* adjust pointer */ - testb %bl, %bl /* first byte == C? */ - - jz L(out) /* yes => return pointer */ - - incl %eax /* increment pointer */ - testb %bh, %bh /* second byte == C? */ - - jz L(out) /* yes => return pointer */ - - shrl $16, %ebx /* make upper bytes accessible */ - incl %eax /* increment pointer */ - - cmp $0, %bl /* third byte == C */ - je L(out) /* yes => return pointer */ - - incl %eax /* increment pointer */ - -L(out): popl %ebp /* restore saved registers */ - cfi_adjust_cfa_offset (-4) - cfi_restore (ebp) - popl %ebx - cfi_adjust_cfa_offset (-4) - cfi_restore (ebx) - - popl %esi - cfi_adjust_cfa_offset (-4) - cfi_restore (esi) - popl %edi - cfi_adjust_cfa_offset (-4) - cfi_restore (edi) - - ret - - cfi_adjust_cfa_offset (16) - cfi_rel_offset (edi, 12) - cfi_rel_offset (esi, 8) - cfi_rel_offset (ebx, 4) - cfi_rel_offset (ebp, 0) - /* We know there is a NUL byte in the word. But we have to test - whether there is an C byte before it in the word. */ -L(4): subl $4, %eax /* adjust pointer */ - cmpb %dl, %cl /* first byte == C? */ - - je L(out) /* yes => return pointer */ - - cmpb $0, %cl /* first byte == NUL? */ - je L(3) /* yes => return NULL */ - - incl %eax /* increment pointer */ - - cmpb %dl, %ch /* second byte == C? */ - je L(out) /* yes => return pointer */ - - cmpb $0, %ch /* second byte == NUL? */ - je L(3) /* yes => return NULL */ - - shrl $16, %ecx /* make upper bytes accessible */ - incl %eax /* increment pointer */ - - cmpb %dl, %cl /* third byte == C? */ - je L(out) /* yes => return pointer */ - - cmpb $0, %cl /* third byte == NUL? */ - je L(3) /* yes => return NULL */ - - incl %eax /* increment pointer */ - - /* The test four the fourth byte is necessary! */ - cmpb %dl, %ch /* fourth byte == C? */ - je L(out) /* yes => return pointer */ - -L(3): xorl %eax, %eax - jmp L(out) -END (strchr) - -#undef index -weak_alias (strchr, index) -libc_hidden_builtin_def (strchr) diff --git a/sysdeps/i386/i586/strcpy.S b/sysdeps/i386/i586/strcpy.S deleted file mode 100644 index a444604f4f..0000000000 --- a/sysdeps/i386/i586/strcpy.S +++ /dev/null @@ -1,169 +0,0 @@ -/* strcpy/stpcpy implementation for i586. - Copyright (C) 1997-2017 Free Software Foundation, Inc. - This file is part of the GNU C Library. - Contributed by Ulrich Drepper <drepper@cygnus.com>, 1997. - - The GNU C Library is free software; you can redistribute it and/or - modify it under the terms of the GNU Lesser General Public - License as published by the Free Software Foundation; either - version 2.1 of the License, or (at your option) any later version. - - The GNU C Library is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - Lesser General Public License for more details. - - You should have received a copy of the GNU Lesser General Public - License along with the GNU C Library; if not, see - <http://www.gnu.org/licenses/>. */ - -#include <sysdep.h> -#include "asm-syntax.h" - -#define PARMS 4+12 /* space for 3 saved regs */ -#define RTN PARMS -#define DEST RTN -#define SRC DEST+4 - -#ifndef USE_AS_STPCPY -# define STRCPY strcpy -#endif - -#define magic 0xfefefeff - - .text -ENTRY (STRCPY) - - pushl %edi - cfi_adjust_cfa_offset (4) - pushl %esi - cfi_adjust_cfa_offset (4) - pushl %ebx - cfi_adjust_cfa_offset (4) - - movl DEST(%esp), %edi - cfi_rel_offset (edi, 8) - movl SRC(%esp), %esi - cfi_rel_offset (esi, 4) - - xorl %eax, %eax - leal -1(%esi), %ecx - - movl $magic, %ebx - cfi_rel_offset (ebx, 0) - andl $3, %ecx - -#ifdef PIC - call 2f - cfi_adjust_cfa_offset (4) -2: popl %edx - cfi_adjust_cfa_offset (-4) - /* 0xb is the distance between 2: and 1: but we avoid writing - 1f-2b because the assembler generates worse code. */ - leal 0xb(%edx,%ecx,8), %ecx -#else - leal 1f(,%ecx,8), %ecx -#endif - - jmp *%ecx - - .align 8 -1: - orb (%esi), %al - jz L(end) - stosb - xorl %eax, %eax - incl %esi - - orb (%esi), %al - jz L(end) - stosb - xorl %eax, %eax - incl %esi - - orb (%esi), %al - jz L(end) - stosb - xorl %eax, %eax - incl %esi - -L(1): movl (%esi), %ecx - leal 4(%esi),%esi - - subl %ecx, %eax - addl %ebx, %ecx - - decl %eax - jnc L(3) - - movl %ecx, %edx - xorl %ecx, %eax - - subl %ebx, %edx - andl $~magic, %eax - - jne L(4) - - movl %edx, (%edi) - leal 4(%edi),%edi - - jmp L(1) - -L(3): movl %ecx, %edx - - subl %ebx, %edx - -L(4): movb %dl, (%edi) - testb %dl, %dl - - movl %edx, %eax - jz L(end2) - - shrl $16, %eax - movb %dh, 1(%edi) -#ifdef USE_AS_STPCPY - addl $1, %edi -#endif - - cmpb $0, %dh - jz L(end2) - -#ifdef USE_AS_STPCPY - movb %al, 1(%edi) - addl $1, %edi - - cmpb $0, %al - jz L(end2) - - addl $1, %edi -#else - movb %al, 2(%edi) - testb %al, %al - - leal 3(%edi), %edi - jz L(end2) -#endif - -L(end): movb %ah, (%edi) - -L(end2): -#ifdef USE_AS_STPCPY - movl %edi, %eax -#else - movl DEST(%esp), %eax -#endif - popl %ebx - cfi_adjust_cfa_offset (-4) - cfi_restore (ebx) - popl %esi - cfi_adjust_cfa_offset (-4) - cfi_restore (esi) - popl %edi - cfi_adjust_cfa_offset (-4) - cfi_restore (edi) - - ret -END (STRCPY) -#ifndef USE_AS_STPCPY -libc_hidden_builtin_def (strcpy) -#endif diff --git a/sysdeps/i386/i586/strlen.S b/sysdeps/i386/i586/strlen.S deleted file mode 100644 index cfea2e020f..0000000000 --- a/sysdeps/i386/i586/strlen.S +++ /dev/null @@ -1,182 +0,0 @@ -/* strlen -- Compute length of NUL terminated string. - Highly optimized version for ix86, x>=5. - Copyright (C) 1995-2017 Free Software Foundation, Inc. - This file is part of the GNU C Library. - Contributed by Ulrich Drepper, <drepper@gnu.ai.mit.edu>. - - The GNU C Library is free software; you can redistribute it and/or - modify it under the terms of the GNU Lesser General Public - License as published by the Free Software Foundation; either - version 2.1 of the License, or (at your option) any later version. - - The GNU C Library is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - Lesser General Public License for more details. - - You should have received a copy of the GNU Lesser General Public - License along with the GNU C Library; if not, see - <http://www.gnu.org/licenses/>. */ - -#include <sysdep.h> -#include "asm-syntax.h" - -/* This version is especially optimized for the i586 (and following?) - processors. This is mainly done by using the two pipelines. The - version optimized for i486 is weak in this aspect because to get - as much parallelism we have to execute some *more* instructions. - - The code below is structured to reflect the pairing of the instructions - as *I think* it is. I have no processor data book to verify this. - If you find something you think is incorrect let me know. */ - - -/* The magic value which is used throughout in the whole code. */ -#define magic 0xfefefeff - -#define PARMS 4 /* no space for saved regs */ -#define STR PARMS - - .text -ENTRY (strlen) - - movl STR(%esp), %eax - movl $3, %edx /* load mask (= 3) */ - - andl %eax, %edx /* separate last two bits of address */ - - jz L(1) /* aligned => start loop */ - jp L(0) /* exactly two bits set */ - - cmpb %dh, (%eax) /* is byte NUL? */ - je L(2) /* yes => return */ - - incl %eax /* increment pointer */ - cmpb %dh, (%eax) /* is byte NUL? */ - - je L(2) /* yes => return */ - - incl %eax /* increment pointer */ - xorl $2, %edx - - jz L(1) - -L(0): cmpb %dh, (%eax) /* is byte NUL? */ - je L(2) /* yes => return */ - - incl %eax /* increment pointer */ - xorl %edx, %edx /* We need %edx == 0 for later */ - - /* We exit the loop if adding MAGIC_BITS to LONGWORD fails to - change any of the hole bits of LONGWORD. - - 1) Is this safe? Will it catch all the zero bytes? - Suppose there is a byte with all zeros. Any carry bits - propagating from its left will fall into the hole at its - least significant bit and stop. Since there will be no - carry from its most significant bit, the LSB of the - byte to the left will be unchanged, and the zero will be - detected. - - 2) Is this worthwhile? Will it ignore everything except - zero bytes? Suppose every byte of LONGWORD has a bit set - somewhere. There will be a carry into bit 8. If bit 8 - is set, this will carry into bit 16. If bit 8 is clear, - one of bits 9-15 must be set, so there will be a carry - into bit 16. Similarly, there will be a carry into bit - 24. If one of bits 24-31 is set, there will be a carry - into bit 32 (=carry flag), so all of the hole bits will - be changed. - - Note: %edx == 0 in any case here. */ - -L(1): - movl (%eax), %ecx /* get word (= 4 bytes) in question */ - addl $4, %eax /* adjust pointer for *next* word */ - - subl %ecx, %edx /* first step to negate word */ - addl $magic, %ecx /* add magic word */ - - decl %edx /* complete negation of word */ - jnc L(3) /* previous addl caused overflow? */ - - xorl %ecx, %edx /* (word+magic)^word */ - - andl $~magic, %edx /* any of the carry flags set? */ - - jne L(3) /* yes => determine byte */ - - - movl (%eax), %ecx /* get word (= 4 bytes) in question */ - addl $4, %eax /* adjust pointer for *next* word */ - - subl %ecx, %edx /* first step to negate word */ - addl $magic, %ecx /* add magic word */ - - decl %edx /* complete negation of word */ - jnc L(3) /* previous addl caused overflow? */ - - xorl %ecx, %edx /* (word+magic)^word */ - - andl $~magic, %edx /* any of the carry flags set? */ - - jne L(3) /* yes => determine byte */ - - - movl (%eax), %ecx /* get word (= 4 bytes) in question */ - addl $4, %eax /* adjust pointer for *next* word */ - - subl %ecx, %edx /* first step to negate word */ - addl $magic, %ecx /* add magic word */ - - decl %edx /* complete negation of word */ - jnc L(3) /* previous addl caused overflow? */ - - xorl %ecx, %edx /* (word+magic)^word */ - - andl $~magic, %edx /* any of the carry flags set? */ - - jne L(3) /* yes => determine byte */ - - - movl (%eax), %ecx /* get word (= 4 bytes) in question */ - addl $4, %eax /* adjust pointer for *next* word */ - - subl %ecx, %edx /* first step to negate word */ - addl $magic, %ecx /* add magic word */ - - decl %edx /* complete negation of word */ - jnc L(3) /* previous addl caused overflow? */ - - xorl %ecx, %edx /* (word+magic)^word */ - - andl $~magic, %edx /* any of the carry flags set? */ - - je L(1) /* no => start loop again */ - - -L(3): subl $4, %eax /* correct too early pointer increment */ - subl $magic, %ecx - - cmpb $0, %cl /* lowest byte NUL? */ - jz L(2) /* yes => return */ - - inc %eax /* increment pointer */ - testb %ch, %ch /* second byte NUL? */ - - jz L(2) /* yes => return */ - - shrl $16, %ecx /* make upper bytes accessible */ - incl %eax /* increment pointer */ - - cmpb $0, %cl /* is third byte NUL? */ - jz L(2) /* yes => return */ - - incl %eax /* increment pointer */ - -L(2): subl STR(%esp), %eax /* now compute the length as difference - between start and terminating NUL - character */ - ret -END (strlen) -libc_hidden_builtin_def (strlen) diff --git a/sysdeps/i386/i586/sub_n.S b/sysdeps/i386/i586/sub_n.S deleted file mode 100644 index 21b5a2742c..0000000000 --- a/sysdeps/i386/i586/sub_n.S +++ /dev/null @@ -1,143 +0,0 @@ -/* Pentium __mpn_sub_n -- Subtract two limb vectors of the same length > 0 - and store difference in a third limb vector. - Copyright (C) 1992-2017 Free Software Foundation, Inc. - This file is part of the GNU MP Library. - - The GNU MP Library is free software; you can redistribute it and/or modify - it under the terms of the GNU Lesser General Public License as published by - the Free Software Foundation; either version 2.1 of the License, or (at your - option) any later version. - - The GNU MP Library is distributed in the hope that it will be useful, but - WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY - or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public - License for more details. - - You should have received a copy of the GNU Lesser General Public License - along with the GNU MP Library; see the file COPYING.LIB. If not, - see <http://www.gnu.org/licenses/>. */ - -#include "sysdep.h" -#include "asm-syntax.h" - -#define PARMS 4+16 /* space for 4 saved regs */ -#define RES PARMS -#define S1 RES+4 -#define S2 S1+4 -#define SIZE S2+4 - - .text -ENTRY (__mpn_sub_n) - - pushl %edi - cfi_adjust_cfa_offset (4) - pushl %esi - cfi_adjust_cfa_offset (4) - pushl %ebp - cfi_adjust_cfa_offset (4) - pushl %ebx - cfi_adjust_cfa_offset (4) - - movl RES(%esp),%edi - cfi_rel_offset (edi, 12) - movl S1(%esp),%esi - cfi_rel_offset (esi, 8) - movl S2(%esp),%ebx - cfi_rel_offset (ebx, 0) - movl SIZE(%esp),%ecx - movl (%ebx),%ebp - cfi_rel_offset (ebp, 4) - - decl %ecx - movl %ecx,%edx - shrl $3,%ecx - andl $7,%edx - testl %ecx,%ecx /* zero carry flag */ - jz L(end) - pushl %edx - cfi_adjust_cfa_offset (4) - - ALIGN (3) -L(oop): movl 28(%edi),%eax /* fetch destination cache line */ - leal 32(%edi),%edi - -L(1): movl (%esi),%eax - movl 4(%esi),%edx - sbbl %ebp,%eax - movl 4(%ebx),%ebp - sbbl %ebp,%edx - movl 8(%ebx),%ebp - movl %eax,-32(%edi) - movl %edx,-28(%edi) - -L(2): movl 8(%esi),%eax - movl 12(%esi),%edx - sbbl %ebp,%eax - movl 12(%ebx),%ebp - sbbl %ebp,%edx - movl 16(%ebx),%ebp - movl %eax,-24(%edi) - movl %edx,-20(%edi) - -L(3): movl 16(%esi),%eax - movl 20(%esi),%edx - sbbl %ebp,%eax - movl 20(%ebx),%ebp - sbbl %ebp,%edx - movl 24(%ebx),%ebp - movl %eax,-16(%edi) - movl %edx,-12(%edi) - -L(4): movl 24(%esi),%eax - movl 28(%esi),%edx - sbbl %ebp,%eax - movl 28(%ebx),%ebp - sbbl %ebp,%edx - movl 32(%ebx),%ebp - movl %eax,-8(%edi) - movl %edx,-4(%edi) - - leal 32(%esi),%esi - leal 32(%ebx),%ebx - decl %ecx - jnz L(oop) - - popl %edx - cfi_adjust_cfa_offset (-4) -L(end): - decl %edx /* test %edx w/o clobbering carry */ - js L(end2) - incl %edx -L(oop2): - leal 4(%edi),%edi - movl (%esi),%eax - sbbl %ebp,%eax - movl 4(%ebx),%ebp - movl %eax,-4(%edi) - leal 4(%esi),%esi - leal 4(%ebx),%ebx - decl %edx - jnz L(oop2) -L(end2): - movl (%esi),%eax - sbbl %ebp,%eax - movl %eax,(%edi) - - sbbl %eax,%eax - negl %eax - - popl %ebx - cfi_adjust_cfa_offset (-4) - cfi_restore (ebx) - popl %ebp - cfi_adjust_cfa_offset (-4) - cfi_restore (ebp) - popl %esi - cfi_adjust_cfa_offset (-4) - cfi_restore (esi) - popl %edi - cfi_adjust_cfa_offset (-4) - cfi_restore (edi) - - ret -END (__mpn_sub_n) diff --git a/sysdeps/i386/i586/submul_1.S b/sysdeps/i386/i586/submul_1.S deleted file mode 100644 index 5e5e121ca2..0000000000 --- a/sysdeps/i386/i586/submul_1.S +++ /dev/null @@ -1,94 +0,0 @@ -/* Pentium __mpn_submul_1 -- Multiply a limb vector with a limb and subtract - the result from a second limb vector. - Copyright (C) 1992-2017 Free Software Foundation, Inc. - This file is part of the GNU MP Library. - - The GNU MP Library is free software; you can redistribute it and/or modify - it under the terms of the GNU Lesser General Public License as published by - the Free Software Foundation; either version 2.1 of the License, or (at your - option) any later version. - - The GNU MP Library is distributed in the hope that it will be useful, but - WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY - or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public - License for more details. - - You should have received a copy of the GNU Lesser General Public License - along with the GNU MP Library; see the file COPYING.LIB. If not, - see <http://www.gnu.org/licenses/>. */ - -#include "sysdep.h" -#include "asm-syntax.h" - -#define PARMS 4+16 /* space for 4 saved regs */ -#define RES PARMS -#define S1 RES+4 -#define SIZE S1+4 -#define S2LIMB SIZE+4 - -#define res_ptr edi -#define s1_ptr esi -#define size ecx -#define s2_limb ebx - - .text -ENTRY (__mpn_submul_1) - - pushl %res_ptr - cfi_adjust_cfa_offset (4) - pushl %s1_ptr - cfi_adjust_cfa_offset (4) - pushl %ebp - cfi_adjust_cfa_offset (4) - pushl %s2_limb - cfi_adjust_cfa_offset (4) - - movl RES(%esp), %res_ptr - cfi_rel_offset (res_ptr, 12) - movl S1(%esp), %s1_ptr - cfi_rel_offset (s1_ptr, 8) - movl SIZE(%esp), %size - movl S2LIMB(%esp), %s2_limb - cfi_rel_offset (s2_limb, 0) - leal (%res_ptr,%size,4), %res_ptr - leal (%s1_ptr,%size,4), %s1_ptr - negl %size - xorl %ebp, %ebp - cfi_rel_offset (ebp, 4) - ALIGN (3) - -L(oop): adcl $0, %ebp - movl (%s1_ptr,%size,4), %eax - - mull %s2_limb - - addl %ebp, %eax - movl (%res_ptr,%size,4), %ebp - - adcl $0, %edx - subl %eax, %ebp - - movl %ebp, (%res_ptr,%size,4) - incl %size - - movl %edx, %ebp - jnz L(oop) - - adcl $0, %ebp - movl %ebp, %eax - popl %s2_limb - cfi_adjust_cfa_offset (-4) - cfi_restore (s2_limb) - popl %ebp - cfi_adjust_cfa_offset (-4) - cfi_restore (ebp) - popl %s1_ptr - cfi_adjust_cfa_offset (-4) - cfi_restore (s1_ptr) - popl %res_ptr - cfi_adjust_cfa_offset (-4) - cfi_restore (res_ptr) - - ret -#undef size -END (__mpn_submul_1) diff --git a/sysdeps/i386/i686/Makefile b/sysdeps/i386/i686/Makefile deleted file mode 100644 index 311042787b..0000000000 --- a/sysdeps/i386/i686/Makefile +++ /dev/null @@ -1,12 +0,0 @@ -# So that we can test __m128's alignment -stack-align-test-flags += -msse - -CFLAGS-.o += -Wa,-mtune=i686 -CFLAGS-.os += -Wa,-mtune=i686 -CFLAGS-.op += -Wa,-mtune=i686 -CFLAGS-.oS += -Wa,-mtune=i686 - -ASFLAGS-.o += -Wa,-mtune=i686 -ASFLAGS-.os += -Wa,-mtune=i686 -ASFLAGS-.op += -Wa,-mtune=i686 -ASFLAGS-.oS += -Wa,-mtune=i686 diff --git a/sysdeps/i386/i686/add_n.S b/sysdeps/i386/i686/add_n.S deleted file mode 100644 index 4afa648ceb..0000000000 --- a/sysdeps/i386/i686/add_n.S +++ /dev/null @@ -1,110 +0,0 @@ -/* Add two limb vectors of the same length > 0 and store sum in a third - limb vector. - Copyright (C) 1992-2017 Free Software Foundation, Inc. - This file is part of the GNU MP Library. - - The GNU MP Library is free software; you can redistribute it and/or modify - it under the terms of the GNU Lesser General Public License as published by - the Free Software Foundation; either version 2.1 of the License, or (at your - option) any later version. - - The GNU MP Library is distributed in the hope that it will be useful, but - WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY - or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public - License for more details. - - You should have received a copy of the GNU Lesser General Public License - along with the GNU MP Library; see the file COPYING.LIB. If not, - see <http://www.gnu.org/licenses/>. */ - -#include "sysdep.h" -#include "asm-syntax.h" - -#define PARMS 4+8 /* space for 2 saved regs */ -#define RES PARMS -#define S1 RES+4 -#define S2 S1+4 -#define SIZE S2+4 - - .text -#ifdef PIC -L(1): addl (%esp), %eax - ret -#endif -ENTRY (__mpn_add_n) - - pushl %edi - cfi_adjust_cfa_offset (4) - pushl %esi - cfi_adjust_cfa_offset (4) - - movl RES(%esp),%edi - cfi_rel_offset (edi, 4) - movl S1(%esp),%esi - cfi_rel_offset (esi, 0) - movl S2(%esp),%edx - movl SIZE(%esp),%ecx - movl %ecx,%eax - shrl $3,%ecx /* compute count for unrolled loop */ - negl %eax - andl $7,%eax /* get index where to start loop */ - jz L(oop) /* necessary special case for 0 */ - incl %ecx /* adjust loop count */ - shll $2,%eax /* adjustment for pointers... */ - subl %eax,%edi /* ... since they are offset ... */ - subl %eax,%esi /* ... by a constant when we ... */ - subl %eax,%edx /* ... enter the loop */ - shrl $2,%eax /* restore previous value */ -#ifdef PIC -/* Calculate start address in loop for PIC. */ - leal (L(oop)-L(0)-3)(%eax,%eax,8),%eax - call L(1) -L(0): -#else -/* Calculate start address in loop for non-PIC. */ - leal (L(oop) - 3)(%eax,%eax,8),%eax -#endif - jmp *%eax /* jump into loop */ - ALIGN (3) -L(oop): movl (%esi),%eax - adcl (%edx),%eax - movl %eax,(%edi) - movl 4(%esi),%eax - adcl 4(%edx),%eax - movl %eax,4(%edi) - movl 8(%esi),%eax - adcl 8(%edx),%eax - movl %eax,8(%edi) - movl 12(%esi),%eax - adcl 12(%edx),%eax - movl %eax,12(%edi) - movl 16(%esi),%eax - adcl 16(%edx),%eax - movl %eax,16(%edi) - movl 20(%esi),%eax - adcl 20(%edx),%eax - movl %eax,20(%edi) - movl 24(%esi),%eax - adcl 24(%edx),%eax - movl %eax,24(%edi) - movl 28(%esi),%eax - adcl 28(%edx),%eax - movl %eax,28(%edi) - leal 32(%edi),%edi - leal 32(%esi),%esi - leal 32(%edx),%edx - decl %ecx - jnz L(oop) - - sbbl %eax,%eax - negl %eax - - popl %esi - cfi_adjust_cfa_offset (-4) - cfi_restore (esi) - popl %edi - cfi_adjust_cfa_offset (-4) - cfi_restore (edi) - - ret -END (__mpn_add_n) diff --git a/sysdeps/i386/i686/bcopy.S b/sysdeps/i386/i686/bcopy.S deleted file mode 100644 index 15ef9419a4..0000000000 --- a/sysdeps/i386/i686/bcopy.S +++ /dev/null @@ -1,3 +0,0 @@ -#define USE_AS_BCOPY -#define memmove bcopy -#include <sysdeps/i386/i686/memmove.S> diff --git a/sysdeps/i386/i686/bzero.S b/sysdeps/i386/i686/bzero.S deleted file mode 100644 index c7898f18e0..0000000000 --- a/sysdeps/i386/i686/bzero.S +++ /dev/null @@ -1,4 +0,0 @@ -#define USE_AS_BZERO -#define memset __bzero -#include <sysdeps/i386/i686/memset.S> -weak_alias (__bzero, bzero) diff --git a/sysdeps/i386/i686/dl-hash.h b/sysdeps/i386/i686/dl-hash.h deleted file mode 100644 index ceda785b32..0000000000 --- a/sysdeps/i386/i686/dl-hash.h +++ /dev/null @@ -1,79 +0,0 @@ -/* Compute hash alue for given string according to ELF standard. - Copyright (C) 1998-2017 Free Software Foundation, Inc. - This file is part of the GNU C Library. - Contributed by Ulrich Drepper <drepper@cygnus.com>, 1998. - - The GNU C Library is free software; you can redistribute it and/or - modify it under the terms of the GNU Lesser General Public - License as published by the Free Software Foundation; either - version 2.1 of the License, or (at your option) any later version. - - The GNU C Library is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - Lesser General Public License for more details. - - You should have received a copy of the GNU Lesser General Public - License along with the GNU C Library; if not, see - <http://www.gnu.org/licenses/>. */ - -#ifndef _DL_HASH_H -#define _DL_HASH_H 1 - - -/* This is the hashing function specified by the ELF ABI. It is highly - optimized for the PII processors. Though it will run on i586 it - would be much slower than the generic C implementation. So don't - use it. */ -static unsigned int -__attribute__ ((unused)) -_dl_elf_hash (const char *name) -{ - unsigned int result; - unsigned int temp0; - unsigned int temp1; - - __asm__ __volatile__ - ("movzbl (%1),%2\n\t" - "testl %2, %2\n\t" - "jz 1f\n\t" - "movl %2, %0\n\t" - "movzbl 1(%1), %2\n\t" - "jecxz 1f\n\t" - "shll $4, %0\n\t" - "addl %2, %0\n\t" - "movzbl 2(%1), %2\n\t" - "jecxz 1f\n\t" - "shll $4, %0\n\t" - "addl %2, %0\n\t" - "movzbl 3(%1), %2\n\t" - "jecxz 1f\n\t" - "shll $4, %0\n\t" - "addl %2, %0\n\t" - "movzbl 4(%1), %2\n\t" - "jecxz 1f\n\t" - "shll $4, %0\n\t" - "addl $5, %1\n\t" - "addl %2, %0\n\t" - "movzbl (%1), %2\n\t" - "jecxz 1f\n" - "2:\t" - "shll $4, %0\n\t" - "movl $0xf0000000, %3\n\t" - "incl %1\n\t" - "addl %2, %0\n\t" - "andl %0, %3\n\t" - "andl $0x0fffffff, %0\n\t" - "shrl $24, %3\n\t" - "movzbl (%1), %2\n\t" - "xorl %3, %0\n\t" - "testl %2, %2\n\t" - "jnz 2b\n" - "1:\t" - : "=&r" (result), "=r" (name), "=&c" (temp0), "=&r" (temp1) - : "0" (0), "1" ((const unsigned char *) name)); - - return result; -} - -#endif /* dl-hash.h */ diff --git a/sysdeps/i386/i686/ffs.c b/sysdeps/i386/i686/ffs.c deleted file mode 100644 index cbe36ff873..0000000000 --- a/sysdeps/i386/i686/ffs.c +++ /dev/null @@ -1,48 +0,0 @@ -/* ffs -- find first set bit in a word, counted from least significant end. - For Intel 80x86, x>=6. - This file is part of the GNU C Library. - Copyright (C) 1991-2017 Free Software Foundation, Inc. - Contributed by Ulrich Drepper <drepper@cygnus.com>. - - The GNU C Library is free software; you can redistribute it and/or - modify it under the terms of the GNU Lesser General Public - License as published by the Free Software Foundation; either - version 2.1 of the License, or (at your option) any later version. - - The GNU C Library is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - Lesser General Public License for more details. - - You should have received a copy of the GNU Lesser General Public - License along with the GNU C Library; if not, see - <http://www.gnu.org/licenses/>. */ - -#define ffsl __something_else -#include <string.h> - -#undef ffs - -#ifdef __GNUC__ - -int -__ffs (int x) -{ - int cnt; - int tmp; - - asm ("bsfl %2,%0\n" /* Count low bits in X and store in %1. */ - "cmovel %1,%0\n" /* If number was zero, use -1 as result. */ - : "=&r" (cnt), "=r" (tmp) : "rm" (x), "1" (-1)); - - return cnt + 1; -} -weak_alias (__ffs, ffs) -libc_hidden_def (__ffs) -libc_hidden_builtin_def (ffs) -#undef ffsl -weak_alias (__ffs, ffsl) - -#else -#include <string/ffs.c> -#endif diff --git a/sysdeps/i386/i686/fpu/e_log.S b/sysdeps/i386/i686/fpu/e_log.S deleted file mode 100644 index 73060b088c..0000000000 --- a/sysdeps/i386/i686/fpu/e_log.S +++ /dev/null @@ -1,29 +0,0 @@ -/* - * Written by J.T. Conklin <jtc@netbsd.org>. - * Public domain. - * - * Adapted for i686 instructions. - */ - -#include <machine/asm.h> - - - .text -ENTRY(__ieee754_log) - fldln2 // log(2) - fldl 4(%esp) // x : log(2) - fucomi %st - jp 3f - fyl2x // log(x) - ret - -3: fstp %st(1) - ret -END (__ieee754_log) - -ENTRY(__log_finite) - fldln2 // log(2) - fldl 4(%esp) // x : log(2) - fyl2x // log(x) - ret -END(__log_finite) diff --git a/sysdeps/i386/i686/fpu/e_logf.S b/sysdeps/i386/i686/fpu/e_logf.S deleted file mode 100644 index 6fd39d50d3..0000000000 --- a/sysdeps/i386/i686/fpu/e_logf.S +++ /dev/null @@ -1,30 +0,0 @@ -/* - * Written by J.T. Conklin <jtc@netbsd.org>. - * Public domain. - * Adapted for float by Ulrich Drepper <drepper@cygnus.com>. - * - * Adapted for i686 instructions. - */ - -#include <machine/asm.h> - - - .text -ENTRY(__ieee754_logf) - fldln2 // log(2) - flds 4(%esp) // x : log(2) - fucomi %st - jp 3f - fyl2x // log(x) - ret - -3: fstp %st(1) - ret -END (__ieee754_logf) - -ENTRY(__logf_finite) - fldln2 // log(2) - flds 4(%esp) // x : log(2) - fyl2x // log(x) - ret -END(__logf_finite) diff --git a/sysdeps/i386/i686/fpu/e_logl.S b/sysdeps/i386/i686/fpu/e_logl.S deleted file mode 100644 index 7e3bc8d817..0000000000 --- a/sysdeps/i386/i686/fpu/e_logl.S +++ /dev/null @@ -1,94 +0,0 @@ -/* - * Written by J.T. Conklin <jtc@netbsd.org>. - * Public domain. - * - * Adapted for `long double' by Ulrich Drepper <drepper@cygnus.com>. - * Changed to use fyl2xp1 for values near 1, <drepper@cygnus.com>. - * Adapted for i686 instructions. - */ - -#include <machine/asm.h> - - .section .rodata.cst8,"aM",@progbits,8 - - .p2align 3 - .type one,@object -one: .double 1.0 - ASM_SIZE_DIRECTIVE(one) - /* It is not important that this constant is precise. It is only - a value which is known to be on the safe side for using the - fyl2xp1 instruction. */ - .type limit,@object -limit: .double 0.29 - ASM_SIZE_DIRECTIVE(limit) - - -#ifdef PIC -# define MO(op) op##@GOTOFF(%edx) -#else -# define MO(op) op -#endif - - .text -ENTRY(__ieee754_logl) - fldln2 // log(2) - fldt 4(%esp) // x : log(2) - fucomi %st - jp 3f -#ifdef PIC - LOAD_PIC_REG (dx) -#endif - fld %st // x : x : log(2) - movzwl 4+8(%esp), %eax - cmpl $0xc000, %eax - jae 5f // x <= -2, avoid overflow from -LDBL_MAX - 1. - fsubl MO(one) // x-1 : x : log(2) -5: fld %st // x-1 : x-1 : x : log(2) - fabs // |x-1| : x-1 : x : log(2) - fld MO(limit) // 0.29 : |x-1| : x-1 : x : log(2) - fcomip %st(1) // |x-1| : x-1 : x : log(2) - fstp %st(0) // x-1 : x : log(2) - jc 2f - fxam - fnstsw - andb $0x45, %ah - cmpb $0x40, %ah - jne 4f - fabs // log(1) is +0 in all rounding modes. -4: fstp %st(1) // x-1 : log(2) - fyl2xp1 // log(x) - ret - -2: fstp %st(0) // x : log(2) - fyl2x // log(x) - ret - -3: fstp %st(1) - fadd %st(0) - ret -END (__ieee754_logl) - -ENTRY(__logl_finite) - fldln2 // log(2) - fldt 4(%esp) // x : log(2) -#ifdef PIC - LOAD_PIC_REG (dx) -#endif - fld %st // x : x : log(2) - fsubl MO(one) // x-1 : x : log(2) - fld %st // x-1 : x-1 : x : log(2) - fabs // |x-1| : x-1 : x : log(2) - fld MO(limit) // 0.29 : |x-1| : x-1 : x : log(2) - fcomip %st(1) // |x-1| : x-1 : x : log(2) - fstp %st(0) // x-1 : x : log(2) - jc 2b - fxam - fnstsw - andb $0x45, %ah - cmpb $0x40, %ah - jne 6f - fabs // log(1) is +0 in all rounding modes. -6: fstp %st(1) // x-1 : log(2) - fyl2xp1 // log(x) - ret -END(__logl_finite) diff --git a/sysdeps/i386/i686/fpu/multiarch/Makefile b/sysdeps/i386/i686/fpu/multiarch/Makefile deleted file mode 100644 index 7d9089232f..0000000000 --- a/sysdeps/i386/i686/fpu/multiarch/Makefile +++ /dev/null @@ -1,4 +0,0 @@ -ifeq ($(subdir),math) -libm-sysdep_routines += e_expf-sse2 e_expf-ia32 s_sinf-sse2 s_cosf-sse2 \ - s_sincosf-sse2 -endif diff --git a/sysdeps/i386/i686/fpu/multiarch/e_expf-ia32.S b/sysdeps/i386/i686/fpu/multiarch/e_expf-ia32.S deleted file mode 100644 index b486b4d1ca..0000000000 --- a/sysdeps/i386/i686/fpu/multiarch/e_expf-ia32.S +++ /dev/null @@ -1,22 +0,0 @@ -/* - Copyright (C) 2012-2017 Free Software Foundation, Inc. - This file is part of the GNU C Library. - - The GNU C Library is free software; you can redistribute it and/or - modify it under the terms of the GNU Lesser General Public - License as published by the Free Software Foundation; either - version 2.1 of the License, or (at your option) any later version. - - The GNU C Library is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - Lesser General Public License for more details. - - You should have received a copy of the GNU Lesser General Public - License along with the GNU C Library; if not, see - <http://www.gnu.org/licenses/>. */ - -#define __ieee754_expf __ieee754_expf_ia32 -#define __expf_finite __expf_finite_ia32 - -#include <sysdeps/i386/fpu/e_expf.S> diff --git a/sysdeps/i386/i686/fpu/multiarch/e_expf-sse2.S b/sysdeps/i386/i686/fpu/multiarch/e_expf-sse2.S deleted file mode 100644 index e6bb6fa289..0000000000 --- a/sysdeps/i386/i686/fpu/multiarch/e_expf-sse2.S +++ /dev/null @@ -1,325 +0,0 @@ -/* SSE2 version of __ieee754_expf and __expf_finite - Copyright (C) 2012-2017 Free Software Foundation, Inc. - This file is part of the GNU C Library. - - The GNU C Library is free software; you can redistribute it and/or - modify it under the terms of the GNU Lesser General Public - License as published by the Free Software Foundation; either - version 2.1 of the License, or (at your option) any later version. - - The GNU C Library is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - Lesser General Public License for more details. - - You should have received a copy of the GNU Lesser General Public - License along with the GNU C Library; if not, see - <http://www.gnu.org/licenses/>. */ - - -#include <sysdep.h> - -/* Short algorithm description: - * - * Let K = 64 (table size). - * e^x = 2^(x/log(2)) = 2^n * T[j] * (1 + P(y)) - * where - * x = m*log(2)/K + y, y in [0.0..log(2)/K] - * m = n*K + j, m,n,j - signed integer, j in [0..K-1] - * values of 2^(j/K) are tabulated as T[j]. - * - * P(y) is a minimax polynomial approximation of expf(x)-1 - * on small interval [0.0..log(2)/K]. - * - * P(y) = P3*y*y*y*y + P2*y*y*y + P1*y*y + P0*y, calculated as - * z = y*y; P(y) = (P3*z + P1)*z + (P2*z + P0)*y - * - * Special cases: - * __ieee754_expf_sse2(NaN) = NaN - * __ieee754_expf_sse2(+INF) = +INF - * __ieee754_expf_sse2(-INF) = 0 - * __ieee754_expf_sse2(x) = 1 for subnormals - * for finite argument, only __ieee754_expf_sse2(0)=1 is exact - * __ieee754_expf_sse2(x) overflows if x>700 - * __ieee754_expf_sse2(x) underflows if x<-700 - * - * Note: - * For |x|<700, __ieee754_expf_sse2 computes result in double precision, - * with accuracy a bit more than needed for expf, and does not round it - * to single precision. - */ - - -#ifdef PIC -# define MO1(symbol) L(symbol)##@GOTOFF(%edx) -# define MO2(symbol,reg2,_scale) L(symbol)##@GOTOFF(%edx,reg2,_scale) -#else -# define MO1(symbol) L(symbol) -# define MO2(symbol,reg2,_scale) L(symbol)(,reg2,_scale) -#endif - - .text -ENTRY(__ieee754_expf_sse2) - /* Input: single precision x on stack at address 4(%esp) */ - -#ifdef PIC - LOAD_PIC_REG(dx) -#endif - - cvtss2sd 4(%esp), %xmm1 /* Convert x to double precision */ - mov 4(%esp), %ecx /* Copy x */ - movsd MO1(DP_KLN2), %xmm2 /* DP K/log(2) */ - movsd MO1(DP_P2), %xmm3 /* DP P2 */ - movl %ecx, %eax /* x */ - mulsd %xmm1, %xmm2 /* DP x*K/log(2) */ - andl $0x7fffffff, %ecx /* |x| */ - cmpl $0x442f0000, %ecx /* |x|<700 ? */ - movsd MO1(DP_P3), %xmm4 /* DP P3 */ - addsd MO1(DP_RS), %xmm2 /* DP x*K/log(2)+RS */ - jae L(special_paths) - - /* Here if |x|<700 */ - cmpl $0x31800000, %ecx /* |x|<2^(-28) ? */ - jb L(small_arg) - - /* Main path: here if 2^(-28)<=|x|<700 */ - cvtsd2ss %xmm2, %xmm2 /* SP x*K/log(2)+RS */ - movd %xmm2, %eax /* bits of n*K+j with trash */ - subss MO1(SP_RS), %xmm2 /* SP t=round(x*K/log(2)) */ - movl %eax, %ecx /* n*K+j with trash */ - cvtss2sd %xmm2, %xmm2 /* DP t */ - andl $0x3f, %eax /* bits of j */ - mulsd MO1(DP_NLN2K), %xmm2 /* DP -t*log(2)/K */ - andl $0xffffffc0, %ecx /* bits of n */ -#ifdef __AVX__ - vaddsd %xmm1, %xmm2, %xmm0 /* DP y=x-t*log(2)/K */ - vmulsd %xmm0, %xmm0, %xmm2 /* DP z=y*y */ -#else - addsd %xmm1, %xmm2 /* DP y=x-t*log(2)/K */ - movaps %xmm2, %xmm0 /* DP y */ - mulsd %xmm2, %xmm2 /* DP z=y*y */ -#endif - mulsd %xmm2, %xmm4 /* DP P3*z */ - addl $0xffc0, %ecx /* bits of n + DP exponent bias */ - mulsd %xmm2, %xmm3 /* DP P2*z */ - shrl $2, %ecx /* High 2 bytes of DP 2^n */ - pxor %xmm1, %xmm1 /* clear %xmm1 */ - addsd MO1(DP_P1), %xmm4 /* DP P3*z+P1 */ - addsd MO1(DP_P0), %xmm3 /* DP P2*z+P0 */ - pinsrw $3, %ecx, %xmm1 /* DP 2^n */ - mulsd %xmm2, %xmm4 /* DP (P3*z+P1)*z */ - mulsd %xmm3, %xmm0 /* DP (P2*z+P0)*y */ - addsd %xmm4, %xmm0 /* DP P(y) */ - mulsd MO2(DP_T,%eax,8), %xmm0 /* DP P(y)*T[j] */ - addsd MO2(DP_T,%eax,8), %xmm0 /* DP T[j]*(P(y)+1) */ - mulsd %xmm1, %xmm0 /* DP result=2^n*(T[j]*(P(y)+1)) */ - cvtsd2ss %xmm0, %xmm1 - - lea -4(%esp), %esp /* Borrow 4 bytes of stack frame */ - movss %xmm1, 0(%esp) /* Move result from sse... */ - flds 0(%esp) /* ...to FPU. */ - lea 4(%esp), %esp /* Return back 4 bytes of stack frame */ - ret - - .p2align 4 -L(small_arg): - /* Here if 0<=|x|<2^(-28) */ - movss 4(%esp), %xmm0 /* load x */ - addss MO1(SP_ONE), %xmm0 /* 1.0 + x */ - /* Return 1.0 with inexact raised, except for x==0 */ - jmp L(epilogue) - - .p2align 4 -L(special_paths): - /* Here if x is NaN, or Inf, or finite |x|>=700 */ - movss 4(%esp), %xmm0 /* load x */ - - cmpl $0x7f800000, %ecx /* |x| is finite ? */ - jae L(arg_inf_or_nan) - - /* Here if finite |x|>=700 */ - testl $0x80000000, %eax /* sign of x nonzero ? */ - je L(res_overflow) - - /* Here if finite x<=-700 */ - movss MO1(SP_SMALL), %xmm0 /* load small value 2^(-100) */ - mulss %xmm0, %xmm0 /* Return underflowed result (zero or subnormal) */ - jmp L(epilogue) - - .p2align 4 -L(res_overflow): - /* Here if finite x>=700 */ - movss MO1(SP_LARGE), %xmm0 /* load large value 2^100 */ - mulss %xmm0, %xmm0 /* Return overflowed result (Inf or max normal) */ - jmp L(epilogue) - - .p2align 4 -L(arg_inf_or_nan): - /* Here if |x| is Inf or NAN */ - jne L(arg_nan) /* |x| is Inf ? */ - - /* Here if |x| is Inf */ - shrl $31, %eax /* Get sign bit of x */ - movss MO2(SP_INF_0,%eax,4), %xmm0/* return zero or Inf, depending on sign of x */ - jmp L(epilogue) - - .p2align 4 -L(arg_nan): - /* Here if |x| is NaN */ - addss %xmm0, %xmm0 /* Return x+x (raise invalid) */ - - .p2align 4 -L(epilogue): - lea -4(%esp), %esp /* Borrow 4 bytes of stack frame */ - movss %xmm0, 0(%esp) /* Move result from sse... */ - flds 0(%esp) /* ...to FPU. */ - lea 4(%esp), %esp /* Return back 4 bytes of stack frame */ - ret -END(__ieee754_expf_sse2) - - .section .rodata, "a" - .p2align 3 -L(DP_T): /* table of double precision values 2^(j/K) for j=[0..K-1] */ - .long 0x00000000, 0x3ff00000 - .long 0x3e778061, 0x3ff02c9a - .long 0xd3158574, 0x3ff059b0 - .long 0x18759bc8, 0x3ff08745 - .long 0x6cf9890f, 0x3ff0b558 - .long 0x32d3d1a2, 0x3ff0e3ec - .long 0xd0125b51, 0x3ff11301 - .long 0xaea92de0, 0x3ff1429a - .long 0x3c7d517b, 0x3ff172b8 - .long 0xeb6fcb75, 0x3ff1a35b - .long 0x3168b9aa, 0x3ff1d487 - .long 0x88628cd6, 0x3ff2063b - .long 0x6e756238, 0x3ff2387a - .long 0x65e27cdd, 0x3ff26b45 - .long 0xf51fdee1, 0x3ff29e9d - .long 0xa6e4030b, 0x3ff2d285 - .long 0x0a31b715, 0x3ff306fe - .long 0xb26416ff, 0x3ff33c08 - .long 0x373aa9cb, 0x3ff371a7 - .long 0x34e59ff7, 0x3ff3a7db - .long 0x4c123422, 0x3ff3dea6 - .long 0x21f72e2a, 0x3ff4160a - .long 0x6061892d, 0x3ff44e08 - .long 0xb5c13cd0, 0x3ff486a2 - .long 0xd5362a27, 0x3ff4bfda - .long 0x769d2ca7, 0x3ff4f9b2 - .long 0x569d4f82, 0x3ff5342b - .long 0x36b527da, 0x3ff56f47 - .long 0xdd485429, 0x3ff5ab07 - .long 0x15ad2148, 0x3ff5e76f - .long 0xb03a5585, 0x3ff6247e - .long 0x82552225, 0x3ff66238 - .long 0x667f3bcd, 0x3ff6a09e - .long 0x3c651a2f, 0x3ff6dfb2 - .long 0xe8ec5f74, 0x3ff71f75 - .long 0x564267c9, 0x3ff75feb - .long 0x73eb0187, 0x3ff7a114 - .long 0x36cf4e62, 0x3ff7e2f3 - .long 0x994cce13, 0x3ff82589 - .long 0x9b4492ed, 0x3ff868d9 - .long 0x422aa0db, 0x3ff8ace5 - .long 0x99157736, 0x3ff8f1ae - .long 0xb0cdc5e5, 0x3ff93737 - .long 0x9fde4e50, 0x3ff97d82 - .long 0x82a3f090, 0x3ff9c491 - .long 0x7b5de565, 0x3ffa0c66 - .long 0xb23e255d, 0x3ffa5503 - .long 0x5579fdbf, 0x3ffa9e6b - .long 0x995ad3ad, 0x3ffae89f - .long 0xb84f15fb, 0x3ffb33a2 - .long 0xf2fb5e47, 0x3ffb7f76 - .long 0x904bc1d2, 0x3ffbcc1e - .long 0xdd85529c, 0x3ffc199b - .long 0x2e57d14b, 0x3ffc67f1 - .long 0xdcef9069, 0x3ffcb720 - .long 0x4a07897c, 0x3ffd072d - .long 0xdcfba487, 0x3ffd5818 - .long 0x03db3285, 0x3ffda9e6 - .long 0x337b9b5f, 0x3ffdfc97 - .long 0xe78b3ff6, 0x3ffe502e - .long 0xa2a490da, 0x3ffea4af - .long 0xee615a27, 0x3ffefa1b - .long 0x5b6e4540, 0x3fff5076 - .long 0x819e90d8, 0x3fffa7c1 - .type L(DP_T), @object - ASM_SIZE_DIRECTIVE(L(DP_T)) - - .section .rodata.cst8,"aM",@progbits,8 - .p2align 3 -L(DP_KLN2): /* double precision K/log(2) */ - .long 0x652b82fe, 0x40571547 - .type L(DP_KLN2), @object - ASM_SIZE_DIRECTIVE(L(DP_KLN2)) - - .p2align 3 -L(DP_NLN2K): /* double precision -log(2)/K */ - .long 0xfefa39ef, 0xbf862e42 - .type L(DP_NLN2K), @object - ASM_SIZE_DIRECTIVE(L(DP_NLN2K)) - - .p2align 3 -L(DP_RS): /* double precision 2^23+2^22 */ - .long 0x00000000, 0x41680000 - .type L(DP_RS), @object - ASM_SIZE_DIRECTIVE(L(DP_RS)) - - .p2align 3 -L(DP_P3): /* double precision polynomial coefficient P3 */ - .long 0xeb78fa85, 0x3fa56420 - .type L(DP_P3), @object - ASM_SIZE_DIRECTIVE(L(DP_P3)) - - .p2align 3 -L(DP_P1): /* double precision polynomial coefficient P1 */ - .long 0x008d6118, 0x3fe00000 - .type L(DP_P1), @object - ASM_SIZE_DIRECTIVE(L(DP_P1)) - - .p2align 3 -L(DP_P2): /* double precision polynomial coefficient P2 */ - .long 0xda752d4f, 0x3fc55550 - .type L(DP_P2), @object - ASM_SIZE_DIRECTIVE(L(DP_P2)) - - .p2align 3 -L(DP_P0): /* double precision polynomial coefficient P0 */ - .long 0xffffe7c6, 0x3fefffff - .type L(DP_P0), @object - ASM_SIZE_DIRECTIVE(L(DP_P0)) - - .p2align 2 -L(SP_INF_0): - .long 0x7f800000 /* single precision Inf */ - .long 0 /* single precision zero */ - .type L(SP_INF_0), @object - ASM_SIZE_DIRECTIVE(L(SP_INF_0)) - - .section .rodata.cst4,"aM",@progbits,4 - .p2align 2 -L(SP_RS): /* single precision 2^23+2^22 */ - .long 0x4b400000 - .type L(SP_RS), @object - ASM_SIZE_DIRECTIVE(L(SP_RS)) - - .p2align 2 -L(SP_SMALL): /* single precision small value 2^(-100) */ - .long 0x0d800000 - .type L(SP_SMALL), @object - ASM_SIZE_DIRECTIVE(L(SP_SMALL)) - - .p2align 2 -L(SP_LARGE): /* single precision large value 2^100 */ - .long 0x71800000 - .type L(SP_LARGE), @object - ASM_SIZE_DIRECTIVE(L(SP_LARGE)) - - .p2align 2 -L(SP_ONE): /* single precision 1.0 */ - .long 0x3f800000 - .type L(SP_ONE), @object - ASM_SIZE_DIRECTIVE(L(SP_ONE)) - -strong_alias (__ieee754_expf_sse2, __expf_finite_sse2) diff --git a/sysdeps/i386/i686/fpu/multiarch/e_expf.c b/sysdeps/i386/i686/fpu/multiarch/e_expf.c deleted file mode 100644 index 388cf98a39..0000000000 --- a/sysdeps/i386/i686/fpu/multiarch/e_expf.c +++ /dev/null @@ -1,37 +0,0 @@ -/* Multiple versions of expf - Copyright (C) 2012-2017 Free Software Foundation, Inc. - This file is part of the GNU C Library. - - The GNU C Library is free software; you can redistribute it and/or - modify it under the terms of the GNU Lesser General Public - License as published by the Free Software Foundation; either - version 2.1 of the License, or (at your option) any later version. - - The GNU C Library is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - Lesser General Public License for more details. - - You should have received a copy of the GNU Lesser General Public - License along with the GNU C Library; if not, see - <http://www.gnu.org/licenses/>. */ - -#include <init-arch.h> - -extern double __ieee754_expf_sse2 (double); -extern double __ieee754_expf_ia32 (double); - -double __ieee754_expf (double); -libm_ifunc (__ieee754_expf, - HAS_CPU_FEATURE (SSE2) - ? __ieee754_expf_sse2 - : __ieee754_expf_ia32); - -extern double __expf_finite_sse2 (double); -extern double __expf_finite_ia32 (double); - -double __expf_finite (double); -libm_ifunc (__expf_finite, - HAS_CPU_FEATURE (SSE2) - ? __expf_finite_sse2 - : __expf_finite_ia32); diff --git a/sysdeps/i386/i686/fpu/multiarch/libm-test-ulps b/sysdeps/i386/i686/fpu/multiarch/libm-test-ulps deleted file mode 100644 index 04bc23b37b..0000000000 --- a/sysdeps/i386/i686/fpu/multiarch/libm-test-ulps +++ /dev/null @@ -1,2188 +0,0 @@ -# Begin of automatic generation - -# Maximal error of functions: -Function: "acos": -double: 1 -idouble: 1 -ildouble: 1 -ldouble: 1 - -Function: "acos_downward": -ildouble: 2 -ldouble: 2 - -Function: "acos_towardzero": -ildouble: 2 -ldouble: 2 - -Function: "acos_upward": -double: 1 -idouble: 1 -ildouble: 2 -ldouble: 2 - -Function: "acosh": -double: 1 -idouble: 1 -ildouble: 4 -ldouble: 2 - -Function: "acosh_downward": -double: 1 -idouble: 1 -ildouble: 6 -ldouble: 4 - -Function: "acosh_towardzero": -double: 1 -idouble: 1 -ildouble: 6 -ldouble: 4 - -Function: "acosh_upward": -double: 1 -idouble: 1 -ildouble: 4 -ldouble: 3 - -Function: "asin": -double: 1 -idouble: 1 -ildouble: 1 -ldouble: 1 - -Function: "asin_downward": -double: 1 -idouble: 1 -ildouble: 2 -ldouble: 2 - -Function: "asin_towardzero": -double: 1 -idouble: 1 -ildouble: 1 -ldouble: 1 - -Function: "asin_upward": -double: 1 -float: 1 -idouble: 1 -ifloat: 1 -ildouble: 1 -ldouble: 1 - -Function: "asinh": -double: 1 -idouble: 1 -ildouble: 3 -ldouble: 3 - -Function: "asinh_downward": -double: 1 -float: 1 -idouble: 1 -ildouble: 5 -ldouble: 5 - -Function: "asinh_towardzero": -double: 1 -float: 1 -idouble: 1 -ildouble: 4 -ldouble: 4 - -Function: "asinh_upward": -double: 1 -float: 1 -idouble: 1 -ifloat: 1 -ildouble: 5 -ldouble: 5 - -Function: "atan": -double: 1 -idouble: 1 -ildouble: 1 -ldouble: 1 - -Function: "atan2": -double: 1 -idouble: 1 -ildouble: 1 -ldouble: 1 - -Function: "atan2_downward": -double: 1 -float: 1 -idouble: 1 -ifloat: 1 -ildouble: 1 -ldouble: 1 - -Function: "atan2_towardzero": -double: 1 -float: 1 -idouble: 1 -ifloat: 1 -ildouble: 1 -ldouble: 1 - -Function: "atan2_upward": -double: 1 -float: 1 -idouble: 1 -ifloat: 1 -ildouble: 1 -ldouble: 1 - -Function: "atan_downward": -double: 1 -float: 1 -idouble: 1 -ifloat: 1 -ildouble: 1 -ldouble: 1 - -Function: "atan_towardzero": -double: 1 -float: 1 -idouble: 1 -ifloat: 1 -ildouble: 1 -ldouble: 1 - -Function: "atan_upward": -double: 1 -float: 1 -idouble: 1 -ifloat: 1 -ildouble: 1 -ldouble: 1 - -Function: "atanh": -double: 1 -idouble: 1 -ildouble: 3 -ldouble: 3 - -Function: "atanh_downward": -double: 1 -float: 1 -idouble: 1 -ifloat: 1 -ildouble: 3 -ldouble: 4 - -Function: "atanh_towardzero": -double: 1 -float: 1 -idouble: 1 -ifloat: 1 -ildouble: 5 -ldouble: 3 - -Function: "atanh_upward": -double: 1 -float: 1 -idouble: 1 -ifloat: 1 -ildouble: 5 -ldouble: 5 - -Function: "cabs": -double: 1 -idouble: 1 -ildouble: 1 -ldouble: 1 - -Function: "cabs_downward": -double: 1 -idouble: 1 -ildouble: 1 -ldouble: 1 - -Function: "cabs_towardzero": -double: 1 -idouble: 1 -ildouble: 1 -ldouble: 1 - -Function: "cabs_upward": -double: 1 -idouble: 1 -ildouble: 1 -ldouble: 1 - -Function: Real part of "cacos": -double: 1 -float: 1 -idouble: 1 -ifloat: 1 -ildouble: 1 -ldouble: 1 - -Function: Imaginary part of "cacos": -double: 1 -float: 1 -idouble: 1 -ifloat: 1 -ildouble: 2 -ldouble: 2 - -Function: Real part of "cacos_downward": -double: 2 -float: 1 -idouble: 2 -ifloat: 1 -ildouble: 2 -ldouble: 2 - -Function: Imaginary part of "cacos_downward": -double: 5 -float: 3 -idouble: 5 -ifloat: 3 -ildouble: 6 -ldouble: 6 - -Function: Real part of "cacos_towardzero": -double: 2 -float: 1 -idouble: 2 -ifloat: 1 -ildouble: 2 -ldouble: 2 - -Function: Imaginary part of "cacos_towardzero": -double: 4 -float: 3 -idouble: 4 -ifloat: 3 -ildouble: 5 -ldouble: 5 - -Function: Real part of "cacos_upward": -double: 2 -float: 2 -idouble: 2 -ifloat: 2 -ildouble: 2 -ldouble: 2 - -Function: Imaginary part of "cacos_upward": -double: 7 -float: 7 -idouble: 7 -ifloat: 7 -ildouble: 7 -ldouble: 7 - -Function: Real part of "cacosh": -double: 1 -float: 1 -idouble: 1 -ifloat: 1 -ildouble: 2 -ldouble: 2 - -Function: Imaginary part of "cacosh": -double: 1 -float: 1 -idouble: 1 -ifloat: 1 -ildouble: 1 -ldouble: 1 - -Function: Real part of "cacosh_downward": -double: 4 -float: 3 -idouble: 4 -ifloat: 3 -ildouble: 5 -ldouble: 5 - -Function: Imaginary part of "cacosh_downward": -double: 2 -float: 2 -idouble: 2 -ifloat: 2 -ildouble: 3 -ldouble: 3 - -Function: Real part of "cacosh_towardzero": -double: 4 -float: 3 -idouble: 4 -ifloat: 3 -ildouble: 5 -ldouble: 5 - -Function: Imaginary part of "cacosh_towardzero": -double: 2 -float: 1 -idouble: 2 -ifloat: 1 -ildouble: 2 -ldouble: 2 - -Function: Real part of "cacosh_upward": -double: 4 -float: 4 -idouble: 4 -ifloat: 4 -ildouble: 5 -ldouble: 5 - -Function: Imaginary part of "cacosh_upward": -double: 3 -float: 2 -idouble: 3 -ifloat: 2 -ildouble: 3 -ldouble: 3 - -Function: "carg": -double: 1 -idouble: 1 -ildouble: 1 -ldouble: 1 - -Function: "carg_downward": -double: 1 -float: 1 -idouble: 1 -ifloat: 1 -ildouble: 1 -ldouble: 1 - -Function: "carg_towardzero": -double: 1 -float: 1 -idouble: 1 -ifloat: 1 -ildouble: 1 -ldouble: 1 - -Function: "carg_upward": -double: 1 -float: 1 -idouble: 1 -ifloat: 1 -ildouble: 1 -ldouble: 1 - -Function: Real part of "casin": -double: 1 -float: 1 -idouble: 1 -ifloat: 1 -ildouble: 1 -ldouble: 1 - -Function: Imaginary part of "casin": -double: 1 -float: 1 -idouble: 1 -ifloat: 1 -ildouble: 2 -ldouble: 2 - -Function: Real part of "casin_downward": -double: 3 -float: 2 -idouble: 3 -ifloat: 2 -ildouble: 3 -ldouble: 3 - -Function: Imaginary part of "casin_downward": -double: 5 -float: 3 -idouble: 5 -ifloat: 3 -ildouble: 6 -ldouble: 6 - -Function: Real part of "casin_towardzero": -double: 3 -float: 1 -idouble: 3 -ifloat: 1 -ildouble: 3 -ldouble: 3 - -Function: Imaginary part of "casin_towardzero": -double: 4 -float: 3 -idouble: 4 -ifloat: 3 -ildouble: 5 -ldouble: 5 - -Function: Real part of "casin_upward": -double: 2 -float: 2 -idouble: 2 -ifloat: 2 -ildouble: 2 -ldouble: 2 - -Function: Imaginary part of "casin_upward": -double: 7 -float: 7 -idouble: 7 -ifloat: 7 -ildouble: 7 -ldouble: 7 - -Function: Real part of "casinh": -double: 1 -float: 1 -idouble: 1 -ifloat: 1 -ildouble: 2 -ldouble: 2 - -Function: Imaginary part of "casinh": -double: 1 -float: 1 -idouble: 1 -ifloat: 1 -ildouble: 1 -ldouble: 1 - -Function: Real part of "casinh_downward": -double: 5 -float: 3 -idouble: 5 -ifloat: 3 -ildouble: 6 -ldouble: 6 - -Function: Imaginary part of "casinh_downward": -double: 3 -float: 2 -idouble: 3 -ifloat: 2 -ildouble: 3 -ldouble: 3 - -Function: Real part of "casinh_towardzero": -double: 4 -float: 3 -idouble: 4 -ifloat: 3 -ildouble: 5 -ldouble: 5 - -Function: Imaginary part of "casinh_towardzero": -double: 3 -float: 1 -idouble: 3 -ifloat: 1 -ildouble: 3 -ldouble: 3 - -Function: Real part of "casinh_upward": -double: 7 -float: 7 -idouble: 7 -ifloat: 7 -ildouble: 7 -ldouble: 7 - -Function: Imaginary part of "casinh_upward": -double: 2 -float: 2 -idouble: 2 -ifloat: 2 -ildouble: 2 -ldouble: 2 - -Function: Real part of "catan": -double: 1 -idouble: 1 -ildouble: 1 -ldouble: 1 - -Function: Imaginary part of "catan": -double: 1 -float: 1 -idouble: 1 -ifloat: 1 -ildouble: 1 -ldouble: 1 - -Function: Real part of "catan_downward": -double: 1 -float: 1 -idouble: 1 -ifloat: 1 -ildouble: 1 -ldouble: 1 - -Function: Imaginary part of "catan_downward": -double: 2 -float: 1 -idouble: 2 -ifloat: 1 -ildouble: 4 -ldouble: 4 - -Function: Real part of "catan_towardzero": -double: 1 -float: 1 -idouble: 1 -ifloat: 1 -ildouble: 1 -ldouble: 1 - -Function: Imaginary part of "catan_towardzero": -double: 2 -float: 1 -idouble: 2 -ifloat: 1 -ildouble: 4 -ldouble: 4 - -Function: Real part of "catan_upward": -double: 1 -float: 1 -idouble: 1 -ifloat: 1 -ildouble: 1 -ldouble: 1 - -Function: Imaginary part of "catan_upward": -double: 2 -float: 2 -idouble: 2 -ifloat: 2 -ildouble: 3 -ldouble: 3 - -Function: Real part of "catanh": -double: 1 -float: 1 -idouble: 1 -ifloat: 1 -ildouble: 1 -ldouble: 1 - -Function: Imaginary part of "catanh": -double: 1 -idouble: 1 -ildouble: 1 -ldouble: 1 - -Function: Real part of "catanh_downward": -double: 2 -float: 1 -idouble: 2 -ifloat: 1 -ildouble: 4 -ldouble: 4 - -Function: Imaginary part of "catanh_downward": -double: 1 -float: 1 -idouble: 1 -ifloat: 1 -ildouble: 1 -ldouble: 1 - -Function: Real part of "catanh_towardzero": -double: 2 -float: 1 -idouble: 2 -ifloat: 1 -ildouble: 4 -ldouble: 4 - -Function: Imaginary part of "catanh_towardzero": -double: 1 -float: 1 -idouble: 1 -ifloat: 1 -ildouble: 1 -ldouble: 1 - -Function: Real part of "catanh_upward": -double: 2 -float: 2 -idouble: 2 -ifloat: 2 -ildouble: 4 -ldouble: 4 - -Function: Imaginary part of "catanh_upward": -double: 1 -float: 1 -idouble: 1 -ifloat: 1 -ildouble: 1 -ldouble: 1 - -Function: "cbrt": -double: 1 -float: 1 -idouble: 1 -ifloat: 1 -ildouble: 3 -ldouble: 3 - -Function: "cbrt_downward": -double: 1 -float: 1 -idouble: 1 -ifloat: 1 -ildouble: 4 -ldouble: 4 - -Function: "cbrt_towardzero": -double: 1 -float: 1 -idouble: 1 -ifloat: 1 -ildouble: 3 -ldouble: 3 - -Function: "cbrt_upward": -double: 1 -float: 1 -idouble: 1 -ifloat: 1 -ildouble: 4 -ldouble: 4 - -Function: Real part of "ccos": -double: 1 -float: 1 -idouble: 1 -ifloat: 1 -ildouble: 1 -ldouble: 1 - -Function: Imaginary part of "ccos": -double: 1 -float: 1 -idouble: 1 -ifloat: 1 -ildouble: 1 -ldouble: 1 - -Function: Real part of "ccos_downward": -double: 1 -float: 1 -idouble: 1 -ifloat: 1 -ildouble: 3 -ldouble: 3 - -Function: Imaginary part of "ccos_downward": -double: 3 -float: 3 -idouble: 3 -ifloat: 3 -ildouble: 3 -ldouble: 3 - -Function: Real part of "ccos_towardzero": -double: 1 -float: 1 -idouble: 1 -ifloat: 1 -ildouble: 3 -ldouble: 3 - -Function: Imaginary part of "ccos_towardzero": -double: 3 -float: 3 -idouble: 3 -ifloat: 3 -ildouble: 3 -ldouble: 3 - -Function: Real part of "ccos_upward": -double: 2 -float: 2 -idouble: 2 -ifloat: 2 -ildouble: 2 -ldouble: 2 - -Function: Imaginary part of "ccos_upward": -double: 2 -float: 2 -idouble: 2 -ifloat: 2 -ildouble: 2 -ldouble: 2 - -Function: Real part of "ccosh": -double: 1 -float: 1 -idouble: 1 -ifloat: 1 -ildouble: 1 -ldouble: 1 - -Function: Imaginary part of "ccosh": -double: 1 -float: 1 -idouble: 1 -ifloat: 1 -ildouble: 1 -ldouble: 1 - -Function: Real part of "ccosh_downward": -double: 1 -float: 2 -idouble: 1 -ifloat: 2 -ildouble: 3 -ldouble: 3 - -Function: Imaginary part of "ccosh_downward": -double: 3 -float: 3 -idouble: 3 -ifloat: 3 -ildouble: 3 -ldouble: 3 - -Function: Real part of "ccosh_towardzero": -double: 1 -float: 2 -idouble: 1 -ifloat: 2 -ildouble: 3 -ldouble: 3 - -Function: Imaginary part of "ccosh_towardzero": -double: 3 -float: 3 -idouble: 3 -ifloat: 3 -ildouble: 3 -ldouble: 3 - -Function: Real part of "ccosh_upward": -double: 2 -float: 2 -idouble: 2 -ifloat: 2 -ildouble: 2 -ldouble: 2 - -Function: Imaginary part of "ccosh_upward": -double: 3 -float: 2 -idouble: 3 -ifloat: 2 -ildouble: 2 -ldouble: 2 - -Function: Real part of "cexp": -double: 2 -float: 1 -idouble: 2 -ifloat: 1 -ildouble: 1 -ldouble: 1 - -Function: Imaginary part of "cexp": -double: 1 -float: 2 -idouble: 1 -ifloat: 2 -ildouble: 1 -ldouble: 1 - -Function: Real part of "cexp_downward": -double: 2 -float: 2 -idouble: 2 -ifloat: 2 -ildouble: 3 -ldouble: 3 - -Function: Imaginary part of "cexp_downward": -double: 3 -float: 3 -idouble: 3 -ifloat: 3 -ildouble: 3 -ldouble: 3 - -Function: Real part of "cexp_towardzero": -double: 2 -float: 2 -idouble: 2 -ifloat: 2 -ildouble: 3 -ldouble: 3 - -Function: Imaginary part of "cexp_towardzero": -double: 3 -float: 3 -idouble: 3 -ifloat: 3 -ildouble: 3 -ldouble: 3 - -Function: Real part of "cexp_upward": -double: 1 -float: 2 -idouble: 1 -ifloat: 2 -ildouble: 2 -ldouble: 2 - -Function: Imaginary part of "cexp_upward": -double: 3 -float: 2 -idouble: 3 -ifloat: 2 -ildouble: 3 -ldouble: 3 - -Function: Real part of "clog": -double: 2 -float: 1 -idouble: 2 -ifloat: 1 -ildouble: 3 -ldouble: 3 - -Function: Imaginary part of "clog": -double: 1 -idouble: 1 -ildouble: 1 -ldouble: 1 - -Function: Real part of "clog10": -double: 2 -float: 2 -idouble: 2 -ifloat: 2 -ildouble: 4 -ldouble: 4 - -Function: Imaginary part of "clog10": -double: 1 -idouble: 1 -ildouble: 2 -ldouble: 2 - -Function: Real part of "clog10_downward": -double: 3 -float: 3 -idouble: 3 -ifloat: 3 -ildouble: 8 -ldouble: 8 - -Function: Imaginary part of "clog10_downward": -double: 1 -float: 1 -idouble: 1 -ifloat: 1 -ildouble: 3 -ldouble: 3 - -Function: Real part of "clog10_towardzero": -double: 3 -float: 3 -idouble: 3 -ifloat: 3 -ildouble: 8 -ldouble: 8 - -Function: Imaginary part of "clog10_towardzero": -double: 1 -float: 1 -idouble: 1 -ifloat: 1 -ildouble: 3 -ldouble: 3 - -Function: Real part of "clog10_upward": -double: 3 -float: 3 -idouble: 3 -ifloat: 3 -ildouble: 7 -ldouble: 7 - -Function: Imaginary part of "clog10_upward": -double: 1 -float: 1 -idouble: 1 -ifloat: 1 -ildouble: 3 -ldouble: 3 - -Function: Real part of "clog_downward": -double: 3 -float: 3 -idouble: 3 -ifloat: 3 -ildouble: 5 -ldouble: 5 - -Function: Imaginary part of "clog_downward": -double: 1 -float: 1 -idouble: 1 -ifloat: 1 -ildouble: 1 -ldouble: 1 - -Function: Real part of "clog_towardzero": -double: 3 -float: 3 -idouble: 3 -ifloat: 3 -ildouble: 5 -ldouble: 5 - -Function: Imaginary part of "clog_towardzero": -double: 1 -float: 1 -idouble: 1 -ifloat: 1 -ildouble: 1 -ldouble: 1 - -Function: Real part of "clog_upward": -double: 2 -float: 3 -idouble: 2 -ifloat: 3 -ildouble: 4 -ldouble: 4 - -Function: Imaginary part of "clog_upward": -double: 1 -float: 1 -idouble: 1 -ifloat: 1 -ildouble: 1 -ldouble: 1 - -Function: "cos": -ildouble: 1 -ldouble: 1 - -Function: "cos_downward": -double: 1 -idouble: 1 -ildouble: 3 -ldouble: 3 - -Function: "cos_towardzero": -double: 1 -idouble: 1 -ildouble: 2 -ldouble: 2 - -Function: "cos_upward": -double: 1 -idouble: 1 -ildouble: 2 -ldouble: 2 - -Function: "cosh": -double: 1 -float: 1 -idouble: 1 -ildouble: 2 -ldouble: 2 - -Function: "cosh_downward": -double: 2 -float: 1 -idouble: 1 -ifloat: 1 -ildouble: 2 -ldouble: 3 - -Function: "cosh_towardzero": -double: 2 -float: 1 -idouble: 1 -ifloat: 1 -ildouble: 2 -ldouble: 2 - -Function: "cosh_upward": -double: 4 -float: 2 -idouble: 1 -ifloat: 1 -ildouble: 2 -ldouble: 3 - -Function: Real part of "cpow": -double: 2 -float: 5 -idouble: 2 -ifloat: 5 -ildouble: 3 -ldouble: 3 - -Function: Imaginary part of "cpow": -double: 1 -float: 2 -idouble: 1 -ifloat: 2 -ildouble: 4 -ldouble: 4 - -Function: Real part of "cpow_downward": -double: 5 -float: 8 -idouble: 5 -ifloat: 8 -ildouble: 7 -ldouble: 7 - -Function: Imaginary part of "cpow_downward": -double: 2 -float: 2 -idouble: 2 -ifloat: 2 -ildouble: 2 -ldouble: 2 - -Function: Real part of "cpow_towardzero": -double: 5 -float: 8 -idouble: 5 -ifloat: 8 -ildouble: 7 -ldouble: 7 - -Function: Imaginary part of "cpow_towardzero": -double: 2 -float: 2 -idouble: 2 -ifloat: 2 -ildouble: 1 -ldouble: 1 - -Function: Real part of "cpow_upward": -double: 4 -float: 1 -idouble: 4 -ifloat: 1 -ildouble: 2 -ldouble: 2 - -Function: Imaginary part of "cpow_upward": -double: 1 -float: 2 -idouble: 1 -ifloat: 2 -ildouble: 2 -ldouble: 2 - -Function: Real part of "csin": -double: 1 -float: 1 -idouble: 1 -ifloat: 1 -ildouble: 1 -ldouble: 1 - -Function: Imaginary part of "csin": -double: 1 -float: 1 -idouble: 1 -ifloat: 1 - -Function: Real part of "csin_downward": -double: 3 -float: 3 -idouble: 3 -ifloat: 3 -ildouble: 3 -ldouble: 3 - -Function: Imaginary part of "csin_downward": -double: 1 -float: 1 -idouble: 1 -ifloat: 1 -ildouble: 3 -ldouble: 3 - -Function: Real part of "csin_towardzero": -double: 3 -float: 3 -idouble: 3 -ifloat: 3 -ildouble: 3 -ldouble: 3 - -Function: Imaginary part of "csin_towardzero": -double: 1 -float: 1 -idouble: 1 -ifloat: 1 -ildouble: 3 -ldouble: 3 - -Function: Real part of "csin_upward": -double: 3 -float: 2 -idouble: 3 -ifloat: 2 -ildouble: 3 -ldouble: 3 - -Function: Imaginary part of "csin_upward": -double: 2 -float: 2 -idouble: 2 -ifloat: 2 -ildouble: 3 -ldouble: 3 - -Function: Real part of "csinh": -double: 1 -float: 1 -idouble: 1 -ifloat: 1 -ildouble: 1 -ldouble: 1 - -Function: Imaginary part of "csinh": -double: 1 -float: 1 -idouble: 1 -ifloat: 1 -ildouble: 1 -ldouble: 1 - -Function: Real part of "csinh_downward": -double: 1 -float: 2 -idouble: 1 -ifloat: 2 -ildouble: 3 -ldouble: 3 - -Function: Imaginary part of "csinh_downward": -double: 3 -float: 3 -idouble: 3 -ifloat: 3 -ildouble: 3 -ldouble: 3 - -Function: Real part of "csinh_towardzero": -double: 1 -float: 2 -idouble: 1 -ifloat: 2 -ildouble: 3 -ldouble: 3 - -Function: Imaginary part of "csinh_towardzero": -double: 3 -float: 3 -idouble: 3 -ifloat: 3 -ildouble: 3 -ldouble: 3 - -Function: Real part of "csinh_upward": -double: 2 -float: 2 -idouble: 2 -ifloat: 2 -ildouble: 3 -ldouble: 3 - -Function: Imaginary part of "csinh_upward": -double: 3 -float: 2 -idouble: 3 -ifloat: 2 -ildouble: 3 -ldouble: 3 - -Function: Real part of "csqrt": -double: 1 -idouble: 1 -ildouble: 2 -ldouble: 2 - -Function: Imaginary part of "csqrt": -double: 1 -float: 1 -idouble: 1 -ifloat: 1 -ildouble: 2 -ldouble: 2 - -Function: Real part of "csqrt_downward": -double: 1 -float: 1 -idouble: 1 -ifloat: 1 -ildouble: 5 -ldouble: 5 - -Function: Imaginary part of "csqrt_downward": -double: 1 -float: 1 -idouble: 1 -ifloat: 1 -ildouble: 4 -ldouble: 4 - -Function: Real part of "csqrt_towardzero": -double: 1 -float: 1 -idouble: 1 -ifloat: 1 -ildouble: 4 -ldouble: 4 - -Function: Imaginary part of "csqrt_towardzero": -double: 1 -float: 1 -idouble: 1 -ifloat: 1 -ildouble: 4 -ldouble: 4 - -Function: Real part of "csqrt_upward": -double: 1 -float: 1 -idouble: 1 -ifloat: 1 -ildouble: 5 -ldouble: 5 - -Function: Imaginary part of "csqrt_upward": -double: 1 -float: 2 -idouble: 1 -ifloat: 2 -ildouble: 4 -ldouble: 4 - -Function: Real part of "ctan": -double: 1 -float: 1 -idouble: 1 -ifloat: 1 -ildouble: 2 -ldouble: 2 - -Function: Imaginary part of "ctan": -double: 1 -float: 1 -idouble: 1 -ifloat: 1 -ildouble: 1 -ldouble: 1 - -Function: Real part of "ctan_downward": -double: 1 -float: 2 -idouble: 1 -ifloat: 2 -ildouble: 5 -ldouble: 5 - -Function: Imaginary part of "ctan_downward": -double: 2 -float: 2 -idouble: 2 -ifloat: 2 -ildouble: 4 -ldouble: 4 - -Function: Real part of "ctan_towardzero": -double: 3 -float: 1 -idouble: 3 -ifloat: 1 -ildouble: 5 -ldouble: 5 - -Function: Imaginary part of "ctan_towardzero": -double: 2 -float: 1 -idouble: 2 -ifloat: 1 -ildouble: 4 -ldouble: 4 - -Function: Real part of "ctan_upward": -double: 3 -float: 2 -idouble: 3 -ifloat: 2 -ildouble: 3 -ldouble: 3 - -Function: Imaginary part of "ctan_upward": -double: 2 -float: 1 -idouble: 2 -ifloat: 1 -ildouble: 3 -ldouble: 3 - -Function: Real part of "ctanh": -double: 1 -float: 1 -idouble: 1 -ifloat: 1 -ildouble: 1 -ldouble: 1 - -Function: Imaginary part of "ctanh": -double: 1 -float: 1 -idouble: 1 -ifloat: 1 -ildouble: 2 -ldouble: 2 - -Function: Real part of "ctanh_downward": -double: 2 -float: 2 -idouble: 2 -ifloat: 2 -ildouble: 4 -ldouble: 4 - -Function: Imaginary part of "ctanh_downward": -double: 2 -float: 1 -idouble: 2 -ifloat: 1 -ildouble: 4 -ldouble: 4 - -Function: Real part of "ctanh_towardzero": -double: 2 -float: 1 -idouble: 2 -ifloat: 1 -ildouble: 4 -ldouble: 4 - -Function: Imaginary part of "ctanh_towardzero": -double: 2 -float: 1 -idouble: 2 -ifloat: 1 -ildouble: 3 -ldouble: 3 - -Function: Real part of "ctanh_upward": -double: 2 -float: 1 -idouble: 2 -ifloat: 1 -ildouble: 3 -ldouble: 3 - -Function: Imaginary part of "ctanh_upward": -double: 3 -float: 2 -idouble: 3 -ifloat: 2 -ildouble: 3 -ldouble: 3 - -Function: "erf": -double: 1 -float: 1 -idouble: 1 -ifloat: 1 -ildouble: 1 -ldouble: 1 - -Function: "erf_downward": -double: 1 -float: 1 -idouble: 1 -ifloat: 1 -ildouble: 1 -ldouble: 1 - -Function: "erf_towardzero": -double: 1 -float: 1 -idouble: 1 -ifloat: 1 -ildouble: 1 -ldouble: 1 - -Function: "erf_upward": -double: 1 -float: 1 -idouble: 1 -ifloat: 1 -ildouble: 1 -ldouble: 1 - -Function: "erfc": -double: 1 -float: 1 -idouble: 1 -ifloat: 1 -ildouble: 3 -ldouble: 3 - -Function: "erfc_downward": -double: 2 -float: 3 -idouble: 2 -ifloat: 3 -ildouble: 4 -ldouble: 4 - -Function: "erfc_towardzero": -double: 2 -float: 2 -idouble: 2 -ifloat: 2 -ildouble: 4 -ldouble: 4 - -Function: "erfc_upward": -double: 2 -float: 3 -idouble: 2 -ifloat: 3 -ildouble: 5 -ldouble: 5 - -Function: "exp": -double: 1 -idouble: 1 -ildouble: 1 -ldouble: 1 - -Function: "exp10": -double: 1 -idouble: 1 -ildouble: 1 -ldouble: 1 - -Function: "exp10_downward": -double: 1 -float: 1 -idouble: 1 -ifloat: 1 -ildouble: 2 -ldouble: 2 - -Function: "exp10_towardzero": -double: 1 -float: 1 -idouble: 1 -ifloat: 1 -ildouble: 2 -ldouble: 2 - -Function: "exp10_upward": -double: 1 -float: 1 -idouble: 1 -ifloat: 1 -ildouble: 2 -ldouble: 2 - -Function: "exp2": -double: 1 -idouble: 1 -ildouble: 1 -ldouble: 1 - -Function: "exp2_downward": -ildouble: 1 -ldouble: 1 - -Function: "exp2_towardzero": -double: 1 -idouble: 1 -ildouble: 1 -ldouble: 1 - -Function: "exp2_upward": -ildouble: 1 -ldouble: 1 - -Function: "exp_downward": -double: 1 -idouble: 1 -ildouble: 1 -ldouble: 1 - -Function: "exp_towardzero": -double: 1 -idouble: 1 -ildouble: 2 -ldouble: 2 - -Function: "exp_upward": -double: 1 -float: 1 -idouble: 1 -ifloat: 1 -ildouble: 1 -ldouble: 1 - -Function: "expm1": -double: 1 -idouble: 1 -ildouble: 2 -ldouble: 2 - -Function: "expm1_downward": -double: 1 -float: 1 -idouble: 1 -ifloat: 1 -ildouble: 4 -ldouble: 4 - -Function: "expm1_towardzero": -double: 1 -float: 1 -idouble: 1 -ifloat: 1 -ildouble: 4 -ldouble: 4 - -Function: "expm1_upward": -double: 1 -float: 1 -idouble: 1 -ifloat: 1 -ildouble: 4 -ldouble: 4 - -Function: "gamma": -double: 3 -float: 3 -idouble: 3 -ifloat: 3 -ildouble: 4 -ldouble: 4 - -Function: "gamma_downward": -double: 4 -float: 4 -idouble: 4 -ifloat: 4 -ildouble: 7 -ldouble: 7 - -Function: "gamma_towardzero": -double: 4 -float: 4 -idouble: 4 -ifloat: 4 -ildouble: 7 -ldouble: 7 - -Function: "gamma_upward": -double: 3 -float: 4 -idouble: 3 -ifloat: 4 -ildouble: 5 -ldouble: 5 - -Function: "hypot": -double: 1 -idouble: 1 -ildouble: 1 -ldouble: 1 - -Function: "hypot_downward": -double: 1 -idouble: 1 -ildouble: 1 -ldouble: 1 - -Function: "hypot_towardzero": -double: 1 -idouble: 1 -ildouble: 1 -ldouble: 1 - -Function: "hypot_upward": -double: 1 -idouble: 1 -ildouble: 1 -ldouble: 1 - -Function: "j0": -double: 1 -float: 1 -idouble: 1 -ifloat: 1 -ildouble: 2 -ldouble: 2 - -Function: "j0_downward": -double: 1 -float: 3 -idouble: 1 -ifloat: 3 -ildouble: 4 -ldouble: 4 - -Function: "j0_towardzero": -double: 2 -float: 1 -idouble: 2 -ifloat: 1 -ildouble: 5 -ldouble: 5 - -Function: "j0_upward": -double: 1 -float: 3 -idouble: 1 -ifloat: 3 -ildouble: 4 -ldouble: 4 - -Function: "j1": -double: 2 -float: 1 -idouble: 2 -ifloat: 1 -ildouble: 1 -ldouble: 1 - -Function: "j1_downward": -double: 2 -float: 2 -idouble: 2 -ifloat: 2 -ildouble: 4 -ldouble: 4 - -Function: "j1_towardzero": -double: 2 -float: 1 -idouble: 2 -ifloat: 1 -ildouble: 4 -ldouble: 4 - -Function: "j1_upward": -double: 2 -float: 3 -idouble: 2 -ifloat: 3 -ildouble: 3 -ldouble: 3 - -Function: "jn": -double: 2 -float: 3 -idouble: 2 -ifloat: 3 -ildouble: 4 -ldouble: 4 - -Function: "jn_downward": -double: 2 -float: 3 -idouble: 2 -ifloat: 3 -ildouble: 4 -ldouble: 4 - -Function: "jn_towardzero": -double: 2 -float: 3 -idouble: 2 -ifloat: 3 -ildouble: 5 -ldouble: 5 - -Function: "jn_upward": -double: 2 -float: 3 -idouble: 2 -ifloat: 3 -ildouble: 5 -ldouble: 5 - -Function: "lgamma": -double: 3 -float: 3 -idouble: 3 -ifloat: 3 -ildouble: 4 -ldouble: 4 - -Function: "lgamma_downward": -double: 4 -float: 4 -idouble: 4 -ifloat: 4 -ildouble: 7 -ldouble: 7 - -Function: "lgamma_towardzero": -double: 4 -float: 4 -idouble: 4 -ifloat: 4 -ildouble: 7 -ldouble: 7 - -Function: "lgamma_upward": -double: 3 -float: 4 -idouble: 3 -ifloat: 4 -ildouble: 5 -ldouble: 5 - -Function: "log": -double: 1 -idouble: 1 -ildouble: 1 -ldouble: 1 - -Function: "log10": -double: 1 -idouble: 1 -ildouble: 1 -ldouble: 1 - -Function: "log10_downward": -double: 1 -float: 1 -idouble: 1 -ifloat: 1 -ildouble: 2 -ldouble: 2 - -Function: "log10_towardzero": -double: 1 -float: 1 -idouble: 1 -ifloat: 1 -ildouble: 2 -ldouble: 2 - -Function: "log10_upward": -double: 1 -float: 1 -idouble: 1 -ifloat: 1 -ildouble: 1 -ldouble: 1 - -Function: "log1p": -double: 1 -idouble: 1 -ildouble: 2 -ldouble: 2 - -Function: "log1p_downward": -double: 1 -float: 1 -idouble: 1 -ifloat: 1 -ildouble: 4 -ldouble: 4 - -Function: "log1p_towardzero": -double: 1 -float: 1 -idouble: 1 -ifloat: 1 -ildouble: 4 -ldouble: 4 - -Function: "log1p_upward": -double: 1 -float: 1 -idouble: 1 -ifloat: 1 -ildouble: 3 -ldouble: 3 - -Function: "log2": -double: 1 -idouble: 1 -ildouble: 1 -ldouble: 1 - -Function: "log2_downward": -double: 1 -idouble: 1 -ildouble: 1 -ldouble: 1 - -Function: "log2_towardzero": -double: 1 -float: 1 -idouble: 1 -ifloat: 1 -ildouble: 1 -ldouble: 1 - -Function: "log2_upward": -double: 1 -float: 1 -idouble: 1 -ifloat: 1 -ildouble: 1 -ldouble: 1 - -Function: "log_downward": -double: 1 -idouble: 1 -ildouble: 2 -ldouble: 2 - -Function: "log_towardzero": -double: 1 -idouble: 1 -ildouble: 2 -ldouble: 2 - -Function: "log_upward": -double: 1 -idouble: 1 -ildouble: 1 -ldouble: 1 - -Function: "pow": -double: 1 -idouble: 1 -ildouble: 1 -ldouble: 1 - -Function: "pow10": -double: 1 -idouble: 1 -ildouble: 1 -ldouble: 1 - -Function: "pow10_downward": -double: 1 -float: 1 -idouble: 1 -ifloat: 1 -ildouble: 2 -ldouble: 2 - -Function: "pow10_towardzero": -double: 1 -float: 1 -idouble: 1 -ifloat: 1 -ildouble: 2 -ldouble: 2 - -Function: "pow10_upward": -double: 1 -float: 1 -idouble: 1 -ifloat: 1 -ildouble: 2 -ldouble: 2 - -Function: "pow_downward": -double: 1 -idouble: 1 -ildouble: 4 -ldouble: 4 - -Function: "pow_towardzero": -double: 1 -idouble: 1 -ildouble: 4 -ldouble: 4 - -Function: "pow_upward": -double: 1 -idouble: 1 -ildouble: 4 -ldouble: 4 - -Function: "sin": -ildouble: 1 -ldouble: 1 - -Function: "sin_downward": -double: 1 -idouble: 1 -ildouble: 3 -ldouble: 3 - -Function: "sin_towardzero": -double: 1 -idouble: 1 -ildouble: 2 -ldouble: 2 - -Function: "sin_upward": -double: 1 -idouble: 1 -ildouble: 3 -ldouble: 3 - -Function: "sincos": -ildouble: 1 -ldouble: 1 - -Function: "sincos_downward": -double: 1 -idouble: 1 -ildouble: 3 -ldouble: 3 - -Function: "sincos_towardzero": -double: 1 -idouble: 1 -ildouble: 2 -ldouble: 2 - -Function: "sincos_upward": -double: 1 -idouble: 1 -ildouble: 3 -ldouble: 3 - -Function: "sinh": -double: 1 -ildouble: 2 -ldouble: 2 - -Function: "sinh_downward": -double: 2 -float: 1 -idouble: 1 -ifloat: 1 -ildouble: 4 -ldouble: 5 - -Function: "sinh_towardzero": -double: 2 -float: 1 -idouble: 1 -ifloat: 1 -ildouble: 3 -ldouble: 4 - -Function: "sinh_upward": -double: 4 -float: 2 -idouble: 1 -ifloat: 1 -ildouble: 4 -ldouble: 5 - -Function: "tan": -float: 1 -ifloat: 1 -ildouble: 2 -ldouble: 2 - -Function: "tan_downward": -double: 1 -float: 2 -idouble: 1 -ifloat: 2 -ildouble: 3 -ldouble: 3 - -Function: "tan_towardzero": -double: 1 -float: 2 -idouble: 1 -ifloat: 2 -ildouble: 3 -ldouble: 3 - -Function: "tan_upward": -double: 1 -float: 2 -idouble: 1 -ifloat: 2 -ildouble: 2 -ldouble: 2 - -Function: "tanh": -double: 1 -idouble: 1 -ildouble: 3 -ldouble: 3 - -Function: "tanh_downward": -double: 1 -float: 1 -idouble: 1 -ifloat: 1 -ildouble: 7 -ldouble: 4 - -Function: "tanh_towardzero": -double: 1 -float: 1 -idouble: 1 -ifloat: 1 -ildouble: 3 -ldouble: 3 - -Function: "tanh_upward": -double: 1 -float: 1 -idouble: 1 -ifloat: 1 -ildouble: 5 -ldouble: 4 - -Function: "tgamma": -double: 3 -float: 3 -idouble: 3 -ifloat: 3 -ildouble: 5 -ldouble: 5 - -Function: "tgamma_downward": -double: 3 -float: 4 -idouble: 3 -ifloat: 4 -ildouble: 5 -ldouble: 5 - -Function: "tgamma_towardzero": -double: 4 -float: 4 -idouble: 4 -ifloat: 4 -ildouble: 5 -ldouble: 5 - -Function: "tgamma_upward": -double: 4 -float: 4 -idouble: 4 -ifloat: 4 -ildouble: 5 -ldouble: 5 - -Function: "y0": -double: 1 -float: 1 -idouble: 1 -ifloat: 1 -ildouble: 1 -ldouble: 1 - -Function: "y0_downward": -double: 2 -float: 2 -idouble: 2 -ifloat: 2 -ildouble: 5 -ldouble: 5 - -Function: "y0_towardzero": -double: 2 -float: 2 -idouble: 2 -ifloat: 2 -ildouble: 5 -ldouble: 5 - -Function: "y0_upward": -double: 1 -float: 2 -idouble: 1 -ifloat: 2 -ildouble: 3 -ldouble: 3 - -Function: "y1": -double: 2 -float: 2 -idouble: 2 -ifloat: 2 -ildouble: 2 -ldouble: 2 - -Function: "y1_downward": -double: 2 -float: 2 -idouble: 2 -ifloat: 2 -ildouble: 7 -ldouble: 7 - -Function: "y1_towardzero": -double: 2 -float: 2 -idouble: 2 -ifloat: 2 -ildouble: 5 -ldouble: 5 - -Function: "y1_upward": -double: 1 -float: 3 -idouble: 1 -ifloat: 3 -ildouble: 7 -ldouble: 7 - -Function: "yn": -double: 2 -float: 3 -idouble: 2 -ifloat: 3 -ildouble: 4 -ldouble: 4 - -Function: "yn_downward": -double: 2 -float: 2 -idouble: 2 -ifloat: 2 -ildouble: 5 -ldouble: 5 - -Function: "yn_towardzero": -double: 3 -float: 3 -idouble: 3 -ifloat: 3 -ildouble: 5 -ldouble: 5 - -Function: "yn_upward": -double: 3 -float: 3 -idouble: 3 -ifloat: 3 -ildouble: 4 -ldouble: 4 - -# end of automatic generation diff --git a/sysdeps/i386/i686/fpu/multiarch/libm-test-ulps-name b/sysdeps/i386/i686/fpu/multiarch/libm-test-ulps-name deleted file mode 100644 index 193dd704b3..0000000000 --- a/sysdeps/i386/i686/fpu/multiarch/libm-test-ulps-name +++ /dev/null @@ -1 +0,0 @@ -i686 diff --git a/sysdeps/i386/i686/fpu/multiarch/s_cosf-sse2.S b/sysdeps/i386/i686/fpu/multiarch/s_cosf-sse2.S deleted file mode 100644 index f37850d0b3..0000000000 --- a/sysdeps/i386/i686/fpu/multiarch/s_cosf-sse2.S +++ /dev/null @@ -1,553 +0,0 @@ -/* Optimized with sse2 version of cosf - Copyright (C) 2012-2017 Free Software Foundation, Inc. - This file is part of the GNU C Library. - - The GNU C Library is free software; you can redistribute it and/or - modify it under the terms of the GNU Lesser General Public - License as published by the Free Software Foundation; either - version 2.1 of the License, or (at your option) any later version. - - The GNU C Library is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - Lesser General Public License for more details. - - You should have received a copy of the GNU Lesser General Public - License along with the GNU C Library; if not, see - <http://www.gnu.org/licenses/>. */ - -#include <sysdep.h> -#define __need_Emath -#include <bits/errno.h> - -/* Short algorithm description: - * - * 1) if |x| == 0: return 1.0-|x|. - * 2) if |x| < 2^-27: return 1.0-|x|. - * 3) if |x| < 2^-5 : return 1.0+x^2*DP_COS2_0+x^5*DP_COS2_1. - * 4) if |x| < Pi/4: return 1.0+x^2*(C0+x^2*(C1+x^2*(C2+x^2*(C3+x^2*C4)))). - * 5) if |x| < 9*Pi/4: - * 5.1) Range reduction: k=trunc(|x|/(Pi/4)), j=(k+1)&0x0e, n=k+3, - * t=|x|-j*Pi/4. - * 5.2) Reconstruction: - * s = (-1.0)^((n>>2)&1) - * if(n&2 != 0) { - * using cos(t) polynomial for |t|<Pi/4, result is - * s * (1.0+t^2*(C0+t^2*(C1+t^2*(C2+t^2*(C3+t^2*C4))))). - * } else { - * using sin(t) polynomial for |t|<Pi/4, result is - * s * t * (1.0+t^2*(S0+t^2*(S1+t^2*(S2+t^2*(S3+t^2*S4))))). - * } - * 6) if |x| < 2^23, large args: - * 6.1) Range reduction: k=trunc(|x|/(Pi/4)), j=(k+1)&0xfffffffe, n=k+3, - * t=|x|-j*Pi/4. - * 6.2) Reconstruction same as (5.2). - * 7) if |x| >= 2^23, very large args: - * 7.1) Range reduction: k=trunc(|x|/(Pi/4)), j=(k+1)&0xfffffffe, n=k+3, - * t=|x|-j*Pi/4. - * 7.2) Reconstruction same as (5.2). - * 8) if x is Inf, return x-x, and set errno=EDOM. - * 9) if x is NaN, return x-x. - * - * Special cases: - * cos(+-0) = 1 not raising inexact, - * cos(subnormal) raises inexact, - * cos(min_normalized) raises inexact, - * cos(normalized) raises inexact, - * cos(Inf) = NaN, raises invalid, sets errno to EDOM, - * cos(NaN) = NaN. - */ - -#ifdef PIC -# define MO1(symbol) L(symbol)##@GOTOFF(%ebx) -# define MO2(symbol,reg2,_scale) L(symbol)##@GOTOFF(%ebx,reg2,_scale) -# define CFI_PUSH(REG) cfi_adjust_cfa_offset(4); cfi_rel_offset(REG,0) -# define CFI_POP(REG) cfi_adjust_cfa_offset(-4); cfi_restore(REG) -# define PUSH(REG) pushl REG; CFI_PUSH(REG) -# define POP(REG) popl REG; CFI_POP(REG) -# define ENTRANCE PUSH(%ebx); LOAD_PIC_REG(bx) -# define RETURN POP(%ebx); ret; CFI_PUSH(%ebx) -# define ARG_X 8(%esp) -#else -# define MO1(symbol) L(symbol) -# define MO2(symbol,reg2,_scale) L(symbol)(,reg2,_scale) -# define ENTRANCE -# define RETURN ret -# define ARG_X 4(%esp) -#endif - - .text -ENTRY(__cosf_sse2) - /* Input: single precision x on stack at address ARG_X */ - - ENTRANCE - movl ARG_X, %eax /* Bits of x */ - cvtss2sd ARG_X, %xmm0 /* DP x */ - andl $0x7fffffff, %eax /* |x| */ - - cmpl $0x3f490fdb, %eax /* |x|<Pi/4? */ - jb L(arg_less_pio4) - - /* Here if |x|>=Pi/4 */ - movd %eax, %xmm3 /* SP |x| */ - andpd MO1(DP_ABS_MASK),%xmm0 /* DP |x| */ - movss MO1(SP_INVPIO4), %xmm2 /* SP 1/(Pi/4) */ - - cmpl $0x40e231d6, %eax /* |x|<9*Pi/4? */ - jae L(large_args) - - /* Here if Pi/4<=|x|<9*Pi/4 */ - mulss %xmm3, %xmm2 /* SP |x|/(Pi/4) */ - cvttss2si %xmm2, %eax /* k, number of Pi/4 in x */ - addl $1, %eax /* k+1 */ - movl $0x0e, %edx - andl %eax, %edx /* j = (k+1)&0x0e */ - addl $2, %eax /* n */ - subsd MO2(PIO4J,%edx,8), %xmm0 /* t = |x| - j * Pi/4 */ - -L(reconstruction): - /* Input: %eax=n, %xmm0=t */ - testl $2, %eax /* n&2 != 0? */ - jz L(sin_poly) - -/*L(cos_poly):*/ - /* Here if cos(x) calculated using cos(t) polynomial for |t|<Pi/4: - * y = t*t; z = y*y; - * s = sign(x) * (-1.0)^((n>>2)&1) - * result = s * (1.0+t^2*(C0+t^2*(C1+t^2*(C2+t^2*(C3+t^2*C4))))) - */ - shrl $2, %eax /* n>>2 */ - mulsd %xmm0, %xmm0 /* y=t^2 */ - andl $1, %eax /* (n>>2)&1 */ - movaps %xmm0, %xmm1 /* y */ - mulsd %xmm0, %xmm0 /* z=t^4 */ - - movsd MO1(DP_C4), %xmm4 /* C4 */ - mulsd %xmm0, %xmm4 /* z*C4 */ - movsd MO1(DP_C3), %xmm3 /* C3 */ - mulsd %xmm0, %xmm3 /* z*C3 */ - addsd MO1(DP_C2), %xmm4 /* C2+z*C4 */ - mulsd %xmm0, %xmm4 /* z*(C2+z*C4) */ - lea -8(%esp), %esp /* Borrow 4 bytes of stack frame */ - addsd MO1(DP_C1), %xmm3 /* C1+z*C3 */ - mulsd %xmm0, %xmm3 /* z*(C1+z*C3) */ - addsd MO1(DP_C0), %xmm4 /* C0+z*(C2+z*C4) */ - mulsd %xmm1, %xmm4 /* y*(C0+z*(C2+z*C4)) */ - - addsd %xmm4, %xmm3 /* y*(C0+y*(C1+y*(C2+y*(C3+y*C4)))) */ - /* 1.0+y*(C0+y*(C1+y*(C2+y*(C3+y*C4)))) */ - addsd MO1(DP_ONES), %xmm3 - - mulsd MO2(DP_ONES,%eax,8), %xmm3 /* DP result */ - movsd %xmm3, 0(%esp) /* Move result from sse... */ - fldl 0(%esp) /* ...to FPU. */ - /* Return back 4 bytes of stack frame */ - lea 8(%esp), %esp - RETURN - - .p2align 4 -L(sin_poly): - /* Here if cos(x) calculated using sin(t) polynomial for |t|<Pi/4: - * y = t*t; z = y*y; - * s = sign(x) * (-1.0)^((n>>2)&1) - * result = s * t * (1.0+t^2*(S0+t^2*(S1+t^2*(S2+t^2*(S3+t^2*S4))))) - */ - - movaps %xmm0, %xmm4 /* t */ - shrl $2, %eax /* n>>2 */ - mulsd %xmm0, %xmm0 /* y=t^2 */ - andl $1, %eax /* (n>>2)&1 */ - movaps %xmm0, %xmm1 /* y */ - mulsd %xmm0, %xmm0 /* z=t^4 */ - - movsd MO1(DP_S4), %xmm2 /* S4 */ - mulsd %xmm0, %xmm2 /* z*S4 */ - movsd MO1(DP_S3), %xmm3 /* S3 */ - mulsd %xmm0, %xmm3 /* z*S3 */ - lea -8(%esp), %esp /* Borrow 4 bytes of stack frame */ - addsd MO1(DP_S2), %xmm2 /* S2+z*S4 */ - mulsd %xmm0, %xmm2 /* z*(S2+z*S4) */ - addsd MO1(DP_S1), %xmm3 /* S1+z*S3 */ - mulsd %xmm0, %xmm3 /* z*(S1+z*S3) */ - addsd MO1(DP_S0), %xmm2 /* S0+z*(S2+z*S4) */ - mulsd %xmm1, %xmm2 /* y*(S0+z*(S2+z*S4)) */ - /* t*s, where s = sign(x) * (-1.0)^((n>>2)&1) */ - mulsd MO2(DP_ONES,%eax,8), %xmm4 - addsd %xmm2, %xmm3 /* y*(S0+y*(S1+y*(S2+y*(S3+y*S4)))) */ - /* t*s*y*(S0+y*(S1+y*(S2+y*(S3+y*S4)))) */ - mulsd %xmm4, %xmm3 - /* t*s*(1.0+y*(S0+y*(S1+y*(S2+y*(S3+y*S4)))) */ - addsd %xmm4, %xmm3 - movsd %xmm3, 0(%esp) /* Move result from sse... */ - fldl 0(%esp) /* ...to FPU. */ - /* Return back 4 bytes of stack frame */ - lea 8(%esp), %esp - RETURN - - .p2align 4 -L(large_args): - /* Here if |x|>=9*Pi/4 */ - cmpl $0x7f800000, %eax /* x is Inf or NaN? */ - jae L(arg_inf_or_nan) - - /* Here if finite |x|>=9*Pi/4 */ - cmpl $0x4b000000, %eax /* |x|<2^23? */ - jae L(very_large_args) - - /* Here if 9*Pi/4<=|x|<2^23 */ - movsd MO1(DP_INVPIO4), %xmm1 /* 1/(Pi/4) */ - mulsd %xmm0, %xmm1 /* |x|/(Pi/4) */ - cvttsd2si %xmm1, %eax /* k=trunc(|x|/(Pi/4)) */ - addl $1, %eax /* k+1 */ - movl %eax, %edx - andl $0xfffffffe, %edx /* j=(k+1)&0xfffffffe */ - cvtsi2sdl %edx, %xmm4 /* DP j */ - movsd MO1(DP_PIO4HI), %xmm2 /* -PIO4HI = high part of -Pi/4 */ - mulsd %xmm4, %xmm2 /* -j*PIO4HI */ - movsd MO1(DP_PIO4LO), %xmm3 /* -PIO4LO = low part of -Pi/4 */ - addsd %xmm2, %xmm0 /* |x| - j*PIO4HI */ - addl $2, %eax /* n */ - mulsd %xmm3, %xmm4 /* j*PIO4LO */ - addsd %xmm4, %xmm0 /* t = |x| - j*PIO4HI - j*PIO4LO */ - jmp L(reconstruction) - - .p2align 4 -L(very_large_args): - /* Here if finite |x|>=2^23 */ - - /* bitpos = (ix>>23) - BIAS_32 + 59; */ - shrl $23, %eax /* eb = biased exponent of x */ - /* bitpos = eb - 0x7f + 59, where 0x7f is exponent bias */ - subl $68, %eax - movl $28, %ecx /* %cl=28 */ - movl %eax, %edx /* bitpos copy */ - - /* j = bitpos/28; */ - div %cl /* j in register %al=%ax/%cl */ - movapd %xmm0, %xmm3 /* |x| */ - /* clear unneeded remainder from %ah */ - andl $0xff, %eax - - imull $28, %eax, %ecx /* j*28 */ - movsd MO1(DP_HI_MASK), %xmm4 /* DP_HI_MASK */ - movapd %xmm0, %xmm5 /* |x| */ - mulsd -2*8+MO2(_FPI,%eax,8), %xmm3 /* tmp3 = FPI[j-2]*|x| */ - movapd %xmm0, %xmm1 /* |x| */ - mulsd -1*8+MO2(_FPI,%eax,8), %xmm5 /* tmp2 = FPI[j-1]*|x| */ - mulsd 0*8+MO2(_FPI,%eax,8), %xmm0 /* tmp0 = FPI[j]*|x| */ - addl $19, %ecx /* j*28+19 */ - mulsd 1*8+MO2(_FPI,%eax,8), %xmm1 /* tmp1 = FPI[j+1]*|x| */ - cmpl %ecx, %edx /* bitpos>=j*28+19? */ - jl L(very_large_skip1) - - /* Here if bitpos>=j*28+19 */ - andpd %xmm3, %xmm4 /* HI(tmp3) */ - subsd %xmm4, %xmm3 /* tmp3 = tmp3 - HI(tmp3) */ -L(very_large_skip1): - - movsd MO1(DP_2POW52), %xmm6 - movapd %xmm5, %xmm2 /* tmp2 copy */ - addsd %xmm3, %xmm5 /* tmp5 = tmp3 + tmp2 */ - movl $1, %edx - addsd %xmm5, %xmm6 /* tmp6 = tmp5 + 2^52 */ - movsd 8+MO1(DP_2POW52), %xmm4 - movd %xmm6, %eax /* k = I64_LO(tmp6); */ - addsd %xmm6, %xmm4 /* tmp4 = tmp6 - 2^52 */ - comisd %xmm5, %xmm4 /* tmp4 > tmp5? */ - jbe L(very_large_skip2) - - /* Here if tmp4 > tmp5 */ - subl $1, %eax /* k-- */ - addsd 8+MO1(DP_ONES), %xmm4 /* tmp4 -= 1.0 */ -L(very_large_skip2): - - andl %eax, %edx /* k&1 */ - subsd %xmm4, %xmm3 /* tmp3 -= tmp4 */ - addsd MO2(DP_ZERONE,%edx,8), %xmm3 /* t = DP_ZERONE[k&1] + tmp3 */ - addsd %xmm2, %xmm3 /* t += tmp2 */ - addsd %xmm3, %xmm0 /* t += tmp0 */ - addl $3, %eax /* n=k+3 */ - addsd %xmm1, %xmm0 /* t += tmp1 */ - mulsd MO1(DP_PIO4), %xmm0 /* t *= PI04 */ - - jmp L(reconstruction) /* end of very_large_args peth */ - - .p2align 4 -L(arg_less_pio4): - /* Here if |x|<Pi/4 */ - cmpl $0x3d000000, %eax /* |x|<2^-5? */ - jl L(arg_less_2pn5) - - /* Here if 2^-5<=|x|<Pi/4 */ - mulsd %xmm0, %xmm0 /* y=x^2 */ - movaps %xmm0, %xmm1 /* y */ - mulsd %xmm0, %xmm0 /* z=x^4 */ - movsd MO1(DP_C4), %xmm3 /* C4 */ - mulsd %xmm0, %xmm3 /* z*C4 */ - movsd MO1(DP_C3), %xmm5 /* C3 */ - mulsd %xmm0, %xmm5 /* z*C3 */ - addsd MO1(DP_C2), %xmm3 /* C2+z*C4 */ - mulsd %xmm0, %xmm3 /* z*(C2+z*C4) */ - addsd MO1(DP_C1), %xmm5 /* C1+z*C3 */ - mulsd %xmm0, %xmm5 /* z*(C1+z*C3) */ - addsd MO1(DP_C0), %xmm3 /* C0+z*(C2+z*C4) */ - mulsd %xmm1, %xmm3 /* y*(C0+z*(C2+z*C4)) */ - addsd %xmm5, %xmm3 /* y*(C0+y*(C1+y*(C2+y*(C3+y*C4)))) */ - /* 1.0 + y*(C0+y*(C1+y*(C2+y*(C3+y*C4)))) */ - addsd MO1(DP_ONES), %xmm3 - cvtsd2ss %xmm3, %xmm3 /* SP result */ - -L(epilogue): - lea -4(%esp), %esp /* Borrow 4 bytes of stack frame */ - movss %xmm3, 0(%esp) /* Move result from sse... */ - flds 0(%esp) /* ...to FPU. */ - /* Return back 4 bytes of stack frame */ - lea 4(%esp), %esp - RETURN - - .p2align 4 -L(arg_less_2pn5): - /* Here if |x|<2^-5 */ - cmpl $0x32000000, %eax /* |x|<2^-27? */ - jl L(arg_less_2pn27) - - /* Here if 2^-27<=|x|<2^-5 */ - mulsd %xmm0, %xmm0 /* DP x^2 */ - movsd MO1(DP_COS2_1), %xmm3 /* DP DP_COS2_1 */ - mulsd %xmm0, %xmm3 /* DP x^2*DP_COS2_1 */ - addsd MO1(DP_COS2_0), %xmm3 /* DP DP_COS2_0+x^2*DP_COS2_1 */ - mulsd %xmm0, %xmm3 /* DP x^2*DP_COS2_0+x^4*DP_COS2_1 */ - /* DP 1.0+x^2*DP_COS2_0+x^4*DP_COS2_1 */ - addsd MO1(DP_ONES), %xmm3 - cvtsd2ss %xmm3, %xmm3 /* SP result */ - jmp L(epilogue) - - .p2align 4 -L(arg_less_2pn27): - /* Here if |x|<2^-27 */ - movss ARG_X, %xmm0 /* x */ - andps MO1(SP_ABS_MASK),%xmm0 /* |x| */ - movss MO1(SP_ONE), %xmm3 /* 1.0 */ - subss %xmm0, %xmm3 /* result is 1.0-|x| */ - jmp L(epilogue) - - .p2align 4 -L(arg_inf_or_nan): - /* Here if |x| is Inf or NAN */ - jne L(skip_errno_setting) /* in case of x is NaN */ - - /* Here if x is Inf. Set errno to EDOM. */ - call JUMPTARGET(__errno_location) - movl $EDOM, (%eax) - - .p2align 4 -L(skip_errno_setting): - /* Here if |x| is Inf or NAN. Continued. */ - movss ARG_X, %xmm3 /* load x */ - subss %xmm3, %xmm3 /* Result is NaN */ - jmp L(epilogue) -END(__cosf_sse2) - - .section .rodata, "a" - .p2align 3 -L(PIO4J): /* Table of j*Pi/4, for j=0,1,..,10 */ - .long 0x00000000,0x00000000 - .long 0x54442d18,0x3fe921fb - .long 0x54442d18,0x3ff921fb - .long 0x7f3321d2,0x4002d97c - .long 0x54442d18,0x400921fb - .long 0x2955385e,0x400f6a7a - .long 0x7f3321d2,0x4012d97c - .long 0xe9bba775,0x4015fdbb - .long 0x54442d18,0x401921fb - .long 0xbeccb2bb,0x401c463a - .long 0x2955385e,0x401f6a7a - .type L(PIO4J), @object - ASM_SIZE_DIRECTIVE(L(PIO4J)) - - .p2align 3 -L(_FPI): /* 4/Pi broken into sum of positive DP values */ - .long 0x00000000,0x00000000 - .long 0x6c000000,0x3ff45f30 - .long 0x2a000000,0x3e3c9c88 - .long 0xa8000000,0x3c54fe13 - .long 0xd0000000,0x3aaf47d4 - .long 0x6c000000,0x38fbb81b - .long 0xe0000000,0x3714acc9 - .long 0x7c000000,0x3560e410 - .long 0x56000000,0x33bca2c7 - .long 0xac000000,0x31fbd778 - .long 0xe0000000,0x300b7246 - .long 0xe8000000,0x2e5d2126 - .long 0x48000000,0x2c970032 - .long 0xe8000000,0x2ad77504 - .long 0xe0000000,0x290921cf - .long 0xb0000000,0x274deb1c - .long 0xe0000000,0x25829a73 - .long 0xbe000000,0x23fd1046 - .long 0x10000000,0x2224baed - .long 0x8e000000,0x20709d33 - .long 0x80000000,0x1e535a2f - .long 0x64000000,0x1cef904e - .long 0x30000000,0x1b0d6398 - .long 0x24000000,0x1964ce7d - .long 0x16000000,0x17b908bf - .type L(_FPI), @object - ASM_SIZE_DIRECTIVE(L(_FPI)) - -/* Coefficients of polynomial - for cos(x)~=1.0+x^2*DP_COS2_0+x^4*DP_COS2_1, |x|<2^-5. */ - .p2align 3 -L(DP_COS2_0): - .long 0xff5cc6fd,0xbfdfffff - .type L(DP_COS2_0), @object - ASM_SIZE_DIRECTIVE(L(DP_COS2_0)) - - .p2align 3 -L(DP_COS2_1): - .long 0xb178dac5,0x3fa55514 - .type L(DP_COS2_1), @object - ASM_SIZE_DIRECTIVE(L(DP_COS2_1)) - - .p2align 3 -L(DP_ZERONE): - .long 0x00000000,0x00000000 /* 0.0 */ - .long 0x00000000,0xbff00000 /* 1.0 */ - .type L(DP_ZERONE),@object - ASM_SIZE_DIRECTIVE(L(DP_ZERONE)) - - .p2align 3 -L(DP_ONES): - .long 0x00000000,0x3ff00000 /* +1.0 */ - .long 0x00000000,0xbff00000 /* -1.0 */ - .type L(DP_ONES), @object - ASM_SIZE_DIRECTIVE(L(DP_ONES)) - -/* Coefficients of polynomial - for sin(t)~=t+t^3*(S0+t^2*(S1+t^2*(S2+t^2*(S3+t^2*S4)))), |t|<Pi/4. */ - .p2align 3 -L(DP_S3): - .long 0x64e6b5b4,0x3ec71d72 - .type L(DP_S3), @object - ASM_SIZE_DIRECTIVE(L(DP_S3)) - - .p2align 3 -L(DP_S1): - .long 0x10c2688b,0x3f811111 - .type L(DP_S1), @object - ASM_SIZE_DIRECTIVE(L(DP_S1)) - - .p2align 3 -L(DP_S4): - .long 0x1674b58a,0xbe5a947e - .type L(DP_S4), @object - ASM_SIZE_DIRECTIVE(L(DP_S4)) - - .p2align 3 -L(DP_S2): - .long 0x8b4bd1f9,0xbf2a019f - .type L(DP_S2), @object - ASM_SIZE_DIRECTIVE(L(DP_S2)) - - .p2align 3 -L(DP_S0): - .long 0x55551cd9,0xbfc55555 - .type L(DP_S0), @object - ASM_SIZE_DIRECTIVE(L(DP_S0)) - -/* Coefficients of polynomial - for cos(t)~=1.0+t^2*(C0+t^2*(C1+t^2*(C2+t^2*(C3+t^2*C4)))), |t|<Pi/4. */ - .p2align 3 -L(DP_C3): - .long 0x9ac43cc0,0x3efa00eb - .type L(DP_C3), @object - ASM_SIZE_DIRECTIVE(L(DP_C3)) - - .p2align 3 -L(DP_C1): - .long 0x545c50c7,0x3fa55555 - .type L(DP_C1), @object - ASM_SIZE_DIRECTIVE(L(DP_C1)) - - .p2align 3 -L(DP_C4): - .long 0xdd8844d7,0xbe923c97 - .type L(DP_C4), @object - ASM_SIZE_DIRECTIVE(L(DP_C4)) - - .p2align 3 -L(DP_C2): - .long 0x348b6874,0xbf56c16b - .type L(DP_C2), @object - ASM_SIZE_DIRECTIVE(L(DP_C2)) - - .p2align 3 -L(DP_C0): - .long 0xfffe98ae,0xbfdfffff - .type L(DP_C0), @object - ASM_SIZE_DIRECTIVE(L(DP_C0)) - - .p2align 3 -L(DP_PIO4): - .long 0x54442d18,0x3fe921fb /* Pi/4 */ - .type L(DP_PIO4), @object - ASM_SIZE_DIRECTIVE(L(DP_PIO4)) - - .p2align 3 -L(DP_2POW52): - .long 0x00000000,0x43300000 /* +2^52 */ - .long 0x00000000,0xc3300000 /* -2^52 */ - .type L(DP_2POW52), @object - ASM_SIZE_DIRECTIVE(L(DP_2POW52)) - - .p2align 3 -L(DP_INVPIO4): - .long 0x6dc9c883,0x3ff45f30 /* 4/Pi */ - .type L(DP_INVPIO4), @object - ASM_SIZE_DIRECTIVE(L(DP_INVPIO4)) - - .p2align 3 -L(DP_PIO4HI): - .long 0x54000000,0xbfe921fb /* High part of Pi/4 */ - .type L(DP_PIO4HI), @object - ASM_SIZE_DIRECTIVE(L(DP_PIO4HI)) - - .p2align 3 -L(DP_PIO4LO): - .long 0x11A62633,0xbe010b46 /* Low part of Pi/4 */ - .type L(DP_PIO4LO), @object - ASM_SIZE_DIRECTIVE(L(DP_PIO4LO)) - - .p2align 2 -L(SP_INVPIO4): - .long 0x3fa2f983 /* 4/Pi */ - .type L(SP_INVPIO4), @object - ASM_SIZE_DIRECTIVE(L(SP_INVPIO4)) - - .p2align 4 -L(DP_ABS_MASK): /* Mask for getting DP absolute value */ - .long 0xffffffff,0x7fffffff - .long 0xffffffff,0x7fffffff - .type L(DP_ABS_MASK), @object - ASM_SIZE_DIRECTIVE(L(DP_ABS_MASK)) - - .p2align 3 -L(DP_HI_MASK): /* Mask for getting high 21 bits of DP value */ - .long 0x00000000,0xffffffff - .type L(DP_HI_MASK), @object - ASM_SIZE_DIRECTIVE(L(DP_HI_MASK)) - - .p2align 4 -L(SP_ABS_MASK): /* Mask for getting SP absolute value */ - .long 0x7fffffff,0x7fffffff - .long 0x7fffffff,0x7fffffff - .type L(SP_ABS_MASK), @object - ASM_SIZE_DIRECTIVE(L(SP_ABS_MASK)) - - .p2align 2 -L(SP_ONE): - .long 0x3f800000 /* 1.0 */ - .type L(SP_ONE), @object - ASM_SIZE_DIRECTIVE(L(SP_ONE)) - -weak_alias (__cosf, cosf) diff --git a/sysdeps/i386/i686/fpu/multiarch/s_cosf.c b/sysdeps/i386/i686/fpu/multiarch/s_cosf.c deleted file mode 100644 index af588de9dc..0000000000 --- a/sysdeps/i386/i686/fpu/multiarch/s_cosf.c +++ /dev/null @@ -1,29 +0,0 @@ -/* Multiple versions of cosf - Copyright (C) 2012-2017 Free Software Foundation, Inc. - This file is part of the GNU C Library. - - The GNU C Library is free software; you can redistribute it and/or - modify it under the terms of the GNU Lesser General Public - License as published by the Free Software Foundation; either - version 2.1 of the License, or (at your option) any later version. - - The GNU C Library is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - Lesser General Public License for more details. - - You should have received a copy of the GNU Lesser General Public - License along with the GNU C Library; if not, see - <http://www.gnu.org/licenses/>. */ - -#include <init-arch.h> - -extern float __cosf_sse2 (float); -extern float __cosf_ia32 (float); -float __cosf (float); - -libm_ifunc (__cosf, HAS_CPU_FEATURE (SSE2) ? __cosf_sse2 : __cosf_ia32); -weak_alias (__cosf, cosf); - -#define COSF __cosf_ia32 -#include <sysdeps/ieee754/flt-32/s_cosf.c> diff --git a/sysdeps/i386/i686/fpu/multiarch/s_sincosf-sse2.S b/sysdeps/i386/i686/fpu/multiarch/s_sincosf-sse2.S deleted file mode 100644 index f31a925522..0000000000 --- a/sysdeps/i386/i686/fpu/multiarch/s_sincosf-sse2.S +++ /dev/null @@ -1,586 +0,0 @@ -/* Optimized with sse2 version of sincosf - Copyright (C) 2012-2017 Free Software Foundation, Inc. - This file is part of the GNU C Library. - - The GNU C Library is free software; you can redistribute it and/or - modify it under the terms of the GNU Lesser General Public - License as published by the Free Software Foundation; either - version 2.1 of the License, or (at your option) any later version. - - The GNU C Library is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - Lesser General Public License for more details. - - You should have received a copy of the GNU Lesser General Public - License along with the GNU C Library; if not, see - <http://www.gnu.org/licenses/>. */ - -#include <sysdep.h> -#define __need_Emath -#include <bits/errno.h> - -/* Short algorithm description: - * - * 1) if |x|==0: sin(x)=x, - * cos(x)=1. - * 2) if |x|<2^-27: sin(x)=x-x*DP_SMALL, raising underflow only when needed, - * cos(x)=1-|x|. - * 3) if |x|<2^-5 : sin(x)=x+x*x^2*DP_SIN2_0+x^5*DP_SIN2_1, - * cos(x)=1+1*x^2*DP_COS2_0+x^5*DP_COS2_1 - * 4) if |x|< Pi/4: sin(x)=x+x*x^2*(S0+x^2*(S1+x^2*(S2+x^2*(S3+x^2*S4)))), - * cos(x)=1+1*x^2*(C0+x^2*(C1+x^2*(C2+x^2*(C3+x^2*C4)))). - * 5) if |x| < 9*Pi/4: - * 5.1) Range reduction: - * k=trunc(|x|/(Pi/4)), j=(k+1)&0x0e, n=k+1, t=|x|-j*Pi/4. - * 5.2) Reconstruction: - * sign_sin = sign(x) * (-1.0)^(( n >>2)&1) - * sign_cos = (-1.0)^(((n+2)>>2)&1) - * poly_sin = ((((S4*t^2 + S3)*t^2 + S2)*t^2 + S1)*t^2 + S0)*t^2*t+t - * poly_cos = ((((C4*t^2 + C3)*t^2 + C2)*t^2 + C1)*t^2 + C0)*t^2*s+s - * if(n&2 != 0) { - * using cos(t) and sin(t) polynomials for |t|<Pi/4, results are - * cos(x) = poly_sin * sign_cos - * sin(x) = poly_cos * sign_sin - * } else { - * sin(x) = poly_sin * sign_sin - * cos(x) = poly_cos * sign_cos - * } - * 6) if |x| < 2^23, large args: - * 6.1) Range reduction: - * k=trunc(|x|/(Pi/4)), j=(k+1)&0xfffffffe, n=k+1, t=|x|-j*Pi/4 - * 6.2) Reconstruction same as (5.2). - * 7) if |x| >= 2^23, very large args: - * 7.1) Range reduction: - * k=trunc(|x|/(Pi/4)), j=(k+1)&0xfffffffe, n=k+1, t=|x|-j*Pi/4. - * 7.2) Reconstruction same as (5.2). - * 8) if x is Inf, return x-x, and set errno=EDOM. - * 9) if x is NaN, return x-x. - * - * Special cases: - * sin/cos(+-0) = +-0/1 not raising inexact/underflow, - * sin/cos(subnormal) raises inexact/underflow, - * sin/cos(min_normalized) raises inexact/underflow, - * sin/cos(normalized) raises inexact, - * sin/cos(Inf) = NaN, raises invalid, sets errno to EDOM, - * sin/cos(NaN) = NaN. - */ - -#ifdef PIC -# define MO1(symbol) L(symbol)##@GOTOFF(%ebx) -# define MO2(symbol,reg2,_scale) L(symbol)##@GOTOFF(%ebx,reg2,_scale) -# define CFI_PUSH(REG) cfi_adjust_cfa_offset(4); cfi_rel_offset(REG,0) -# define CFI_POP(REG) cfi_adjust_cfa_offset(-4); cfi_restore(REG) -# define PUSH(REG) pushl REG; CFI_PUSH(REG) -# define POP(REG) popl REG; CFI_POP(REG) -# define ENTRANCE PUSH(%ebx); LOAD_PIC_REG(bx) -# define RETURN POP(%ebx); ret; CFI_PUSH(%ebx) -# define ARG_X 8(%esp) -# define ARG_SIN_PTR 12(%esp) -# define ARG_COS_PTR 16(%esp) -#else -# define MO1(symbol) L(symbol) -# define MO2(symbol,reg2,_scale) L(symbol)(,reg2,_scale) -# define ENTRANCE -# define RETURN ret -# define ARG_X 4(%esp) -# define ARG_SIN_PTR 8(%esp) -# define ARG_COS_PTR 12(%esp) -#endif - - .text -ENTRY(__sincosf_sse2) - /* Input: single precision x on stack at address ARG_X */ - /* pointer to sin result on stack at address ARG_SIN_PTR */ - /* pointer to cos result on stack at address ARG_COS_PTR */ - - ENTRANCE - movl ARG_X, %eax /* Bits of x */ - cvtss2sd ARG_X, %xmm0 /* DP x */ - andl $0x7fffffff, %eax /* |x| */ - - cmpl $0x3f490fdb, %eax /* |x|<Pi/4 ? */ - jb L(arg_less_pio4) - - /* Here if |x|>=Pi/4 */ - movd %eax, %xmm3 /* SP |x| */ - andpd MO1(DP_ABS_MASK),%xmm0 /* DP |x| */ - movss MO1(SP_INVPIO4), %xmm2 /* SP 1/(Pi/4) */ - - cmpl $0x40e231d6, %eax /* |x|<9*Pi/4 ? */ - jae L(large_args) - - /* Here if Pi/4<=|x|<9*Pi/4 */ - mulss %xmm3, %xmm2 /* SP |x|/(Pi/4) */ - movl ARG_X, %ecx /* Load x */ - cvttss2si %xmm2, %eax /* k, number of Pi/4 in x */ - shrl $29, %ecx /* (sign of x) << 2 */ - addl $1, %eax /* k+1 */ - movl $0x0e, %edx - andl %eax, %edx /* j = (k+1)&0x0e */ - subsd MO2(PIO4J,%edx,8), %xmm0/* t = |x| - j * Pi/4 */ - -L(reconstruction): - /* Input: %eax=n, %xmm0=t, %ecx=sign(x) */ - - movaps %xmm0, %xmm4 /* t */ - movhpd MO1(DP_ONES), %xmm4 /* 1|t */ - mulsd %xmm0, %xmm0 /* y=t^2 */ - movl $2, %edx - unpcklpd %xmm0, %xmm0 /* y|y */ - addl %eax, %edx /* k+2 */ - movaps %xmm0, %xmm1 /* y|y */ - mulpd %xmm0, %xmm0 /* z=t^4|z=t^4 */ - - movaps MO1(DP_SC4), %xmm2 /* S4 */ - mulpd %xmm0, %xmm2 /* z*S4 */ - movaps MO1(DP_SC3), %xmm3 /* S3 */ - mulpd %xmm0, %xmm3 /* z*S3 */ - xorl %eax, %ecx /* (sign_x ^ (k>>2))<<2 */ - addpd MO1(DP_SC2), %xmm2 /* S2+z*S4 */ - mulpd %xmm0, %xmm2 /* z*(S2+z*S4) */ - shrl $2, %edx /* (k+2)>>2 */ - addpd MO1(DP_SC1), %xmm3 /* S1+z*S3 */ - mulpd %xmm0, %xmm3 /* z*(S1+z*S3) */ - shrl $2, %ecx /* sign_x ^ k>>2 */ - addpd MO1(DP_SC0), %xmm2 /* S0+z*(S2+z*S4) */ - andl $1, %edx /* sign_cos = ((k+2)>>2)&1 */ - mulpd %xmm1, %xmm2 /* y*(S0+z*(S2+z*S4)) */ - andl $1, %ecx /* sign_sin = sign_x ^ ((k>>2)&1) */ - addpd %xmm2, %xmm3 /* y*(S0+y*(S1+y*(S2+y*(S3+y*S4)))) */ - mulpd %xmm4, %xmm3 /*t*y*(S0+y*(S1+y*(S2+y*(S3+y*S4))))*/ - testl $2, %eax /* n&2 != 0 ? */ - addpd %xmm4, %xmm3 /*t+t*y*(S0+y*(S1+y*(S2+y*(S3+y*S4))*/ - jnz L(sin_result_sin_poly) - -/*L(sin_result_cos_poly):*/ - /* - * Here if - * cos(x) = poly_sin * sign_cos - * sin(x) = poly_cos * sign_sin - */ - movsd MO2(DP_ONES,%ecx,8), %xmm4/* 0|sign_sin */ - movhpd MO2(DP_ONES,%edx,8), %xmm4/* sign_cos|sign_sin */ - mulpd %xmm4, %xmm3 /* result_cos|result_sin */ - movl ARG_SIN_PTR, %eax - cvtpd2ps %xmm3, %xmm0 /* SP results */ - movl ARG_COS_PTR, %ecx - movss %xmm0, (%eax) /* store sin(x) from xmm0[0] */ - shufps $1, %xmm0, %xmm0 /* move cos(x) to xmm0[0] */ - movss %xmm0, (%ecx) /* store cos(x) */ - RETURN - - .p2align 4 -L(sin_result_sin_poly): - /* - * Here if - * sin(x) = poly_sin * sign_sin - * cos(x) = poly_cos * sign_cos - */ - movsd MO2(DP_ONES,%edx,8), %xmm4/* 0|sign_cos */ - movhpd MO2(DP_ONES,%ecx,8), %xmm4/* sign_sin|sign_cos */ - mulpd %xmm4, %xmm3 /* result_sin|result_cos */ - movl ARG_SIN_PTR, %eax - cvtpd2ps %xmm3, %xmm0 /* SP results */ - movl ARG_COS_PTR, %ecx - movss %xmm0, (%ecx) /* store cos(x) from xmm0[0] */ - shufps $1, %xmm0, %xmm0 /* move sin(x) to xmm0[0] */ - movss %xmm0, (%eax) /* store sin(x) */ - RETURN - - .p2align 4 -L(large_args): - /* Here if |x|>=9*Pi/4 */ - cmpl $0x7f800000, %eax /* x is Inf or NaN ? */ - jae L(arg_inf_or_nan) - - /* Here if finite |x|>=9*Pi/4 */ - cmpl $0x4b000000, %eax /* |x|<2^23 ? */ - jae L(very_large_args) - - /* Here if 9*Pi/4<=|x|<2^23 */ - movsd MO1(DP_INVPIO4), %xmm1 /* 1/(Pi/4) */ - mulsd %xmm0, %xmm1 /* |x|/(Pi/4) */ - cvttsd2si %xmm1, %eax /* k=trunc(|x|/(Pi/4)) */ - addl $1, %eax /* k+1 */ - movl %eax, %edx - andl $0xfffffffe, %edx /* j=(k+1)&0xfffffffe */ - cvtsi2sdl %edx, %xmm4 /* DP j */ - movl ARG_X, %ecx /* Load x */ - movsd MO1(DP_PIO4HI), %xmm2 /* -PIO4HI = high part of -Pi/4 */ - shrl $29, %ecx /* (sign of x) << 2 */ - mulsd %xmm4, %xmm2 /* -j*PIO4HI */ - movsd MO1(DP_PIO4LO), %xmm3 /* -PIO4LO = low part of -Pi/4 */ - addsd %xmm2, %xmm0 /* |x| - j*PIO4HI */ - mulsd %xmm3, %xmm4 /* j*PIO4LO */ - addsd %xmm4, %xmm0 /* t = |x| - j*PIO4HI - j*PIO4LO */ - jmp L(reconstruction) - - .p2align 4 -L(very_large_args): - /* Here if finite |x|>=2^23 */ - - /* bitpos = (ix>>23) - BIAS_32 + 59; */ - shrl $23, %eax /* eb = biased exponent of x */ - subl $68, %eax /* bitpos=eb-0x7f+59, where 0x7f */ - /*is exponent bias */ - movl $28, %ecx /* %cl=28 */ - movl %eax, %edx /* bitpos copy */ - - /* j = bitpos/28; */ - div %cl /* j in register %al=%ax/%cl */ - movapd %xmm0, %xmm3 /* |x| */ - andl $0xff, %eax /* clear unneeded remainder from %ah*/ - - imull $28, %eax, %ecx /* j*28 */ - movsd MO1(DP_HI_MASK), %xmm4 /* DP_HI_MASK */ - movapd %xmm0, %xmm5 /* |x| */ - mulsd -2*8+MO2(_FPI,%eax,8), %xmm3/* tmp3 = FPI[j-2]*|x| */ - movapd %xmm0, %xmm1 /* |x| */ - mulsd -1*8+MO2(_FPI,%eax,8), %xmm5/* tmp2 = FPI[j-1]*|x| */ - mulsd 0*8+MO2(_FPI,%eax,8), %xmm0/* tmp0 = FPI[j]*|x| */ - addl $19, %ecx /* j*28+19 */ - mulsd 1*8+MO2(_FPI,%eax,8), %xmm1/* tmp1 = FPI[j+1]*|x| */ - cmpl %ecx, %edx /* bitpos>=j*28+19 ? */ - jl L(very_large_skip1) - - /* Here if bitpos>=j*28+19 */ - andpd %xmm3, %xmm4 /* HI(tmp3) */ - subsd %xmm4, %xmm3 /* tmp3 = tmp3 - HI(tmp3) */ -L(very_large_skip1): - - movsd MO1(DP_2POW52), %xmm6 - movapd %xmm5, %xmm2 /* tmp2 copy */ - addsd %xmm3, %xmm5 /* tmp5 = tmp3 + tmp2 */ - movl $1, %edx - addsd %xmm5, %xmm6 /* tmp6 = tmp5 + 2^52 */ - movsd 8+MO1(DP_2POW52), %xmm4 - movd %xmm6, %eax /* k = I64_LO(tmp6); */ - addsd %xmm6, %xmm4 /* tmp4 = tmp6 - 2^52 */ - movl ARG_X, %ecx /* Load x */ - comisd %xmm5, %xmm4 /* tmp4 > tmp5 ? */ - jbe L(very_large_skip2) - - /* Here if tmp4 > tmp5 */ - subl $1, %eax /* k-- */ - addsd 8+MO1(DP_ONES), %xmm4 /* tmp4 -= 1.0 */ -L(very_large_skip2): - - andl %eax, %edx /* k&1 */ - subsd %xmm4, %xmm3 /* tmp3 -= tmp4 */ - addsd MO2(DP_ZERONE,%edx,8), %xmm3/* t = DP_ZERONE[k&1] + tmp3 */ - addsd %xmm2, %xmm3 /* t += tmp2 */ - shrl $29, %ecx /* (sign of x) << 2 */ - addsd %xmm3, %xmm0 /* t += tmp0 */ - addl $1, %eax /* n=k+1 */ - addsd %xmm1, %xmm0 /* t += tmp1 */ - mulsd MO1(DP_PIO4), %xmm0 /* t *= PI04 */ - - jmp L(reconstruction) /* end of very_large_args peth */ - - .p2align 4 -L(arg_less_pio4): - /* Here if |x|<Pi/4 */ - cmpl $0x3d000000, %eax /* |x|<2^-5 ? */ - jl L(arg_less_2pn5) - - /* Here if 2^-5<=|x|<Pi/4 */ - movaps %xmm0, %xmm3 /* DP x */ - movhpd MO1(DP_ONES), %xmm3 /* DP 1|x */ - mulsd %xmm0, %xmm0 /* DP y=x^2 */ - unpcklpd %xmm0, %xmm0 /* DP y|y */ - movaps %xmm0, %xmm1 /* y|y */ - mulpd %xmm0, %xmm0 /* z=x^4|z=x^4 */ - - movapd MO1(DP_SC4), %xmm4 /* S4 */ - mulpd %xmm0, %xmm4 /* z*S4 */ - movapd MO1(DP_SC3), %xmm5 /* S3 */ - mulpd %xmm0, %xmm5 /* z*S3 */ - addpd MO1(DP_SC2), %xmm4 /* S2+z*S4 */ - mulpd %xmm0, %xmm4 /* z*(S2+z*S4) */ - addpd MO1(DP_SC1), %xmm5 /* S1+z*S3 */ - mulpd %xmm0, %xmm5 /* z*(S1+z*S3) */ - addpd MO1(DP_SC0), %xmm4 /* S0+z*(S2+z*S4) */ - mulpd %xmm1, %xmm4 /* y*(S0+z*(S2+z*S4)) */ - mulpd %xmm3, %xmm5 /* x*z*(S1+z*S3) */ - mulpd %xmm3, %xmm4 /* x*y*(S0+z*(S2+z*S4)) */ - addpd %xmm5, %xmm4 /*x*y*(S0+y*(S1+y*(S2+y*(S3+y*S4))))*/ - movl ARG_SIN_PTR, %eax - addpd %xmm4, %xmm3 /*x+x*y*(S0+y*(S1+y*(S2+y*(S3+y*S4))*/ - movl ARG_COS_PTR, %ecx - cvtpd2ps %xmm3, %xmm0 /* SP results */ - movss %xmm0, (%eax) /* store sin(x) from xmm0[0] */ - shufps $1, %xmm0, %xmm0 /* move cos(x) to xmm0[0] */ - movss %xmm0, (%ecx) /* store cos(x) */ - RETURN - - .p2align 4 -L(arg_less_2pn5): - /* Here if |x|<2^-5 */ - cmpl $0x32000000, %eax /* |x|<2^-27 ? */ - jl L(arg_less_2pn27) - - /* Here if 2^-27<=|x|<2^-5 */ - movaps %xmm0, %xmm1 /* DP x */ - movhpd MO1(DP_ONES), %xmm1 /* DP 1|x */ - mulsd %xmm0, %xmm0 /* DP x^2 */ - unpcklpd %xmm0, %xmm0 /* DP x^2|x^2 */ - - movaps MO1(DP_SINCOS2_1), %xmm3/* DP DP_SIN2_1 */ - mulpd %xmm0, %xmm3 /* DP x^2*DP_SIN2_1 */ - addpd MO1(DP_SINCOS2_0), %xmm3/* DP DP_SIN2_0+x^2*DP_SIN2_1 */ - mulpd %xmm0, %xmm3 /* DP x^2*DP_SIN2_0+x^4*DP_SIN2_1 */ - mulpd %xmm1, %xmm3 /* DP x^3*DP_SIN2_0+x^5*DP_SIN2_1 */ - addpd %xmm1, %xmm3 /* DP x+x^3*DP_SIN2_0+x^5*DP_SIN2_1 */ - movl ARG_SIN_PTR, %eax - cvtpd2ps %xmm3, %xmm0 /* SP results */ - movl ARG_COS_PTR, %ecx - movss %xmm0, (%eax) /* store sin(x) from xmm0[0] */ - shufps $1, %xmm0, %xmm0 /* move cos(x) to xmm0[0] */ - movss %xmm0, (%ecx) /* store cos(x) */ - RETURN - - .p2align 4 -L(arg_less_2pn27): - movss ARG_X, %xmm7 /* SP x */ - cmpl $0, %eax /* x=0 ? */ - je L(arg_zero) /* in case x=0 return sin(+-0)==+-0 */ - /* Here if |x|<2^-27 */ - /* - * Special cases here: - * sin(subnormal) raises inexact/underflow - * sin(min_normalized) raises inexact/underflow - * sin(normalized) raises inexact - * cos(here)=1-|x| (raising inexact) - */ - movaps %xmm0, %xmm3 /* DP x */ - mulsd MO1(DP_SMALL), %xmm0 /* DP x*DP_SMALL */ - subsd %xmm0, %xmm3 /* DP sin result is x-x*DP_SMALL */ - andps MO1(SP_ABS_MASK), %xmm7 /* SP |x| */ - cvtsd2ss %xmm3, %xmm0 /* sin(x) */ - movl ARG_SIN_PTR, %eax - movss MO1(SP_ONE), %xmm1 /* SP 1.0 */ - movss %xmm0, (%eax) /* sin(x) store */ - movl ARG_COS_PTR, %ecx - subss %xmm7, %xmm1 /* cos(x) */ - movss %xmm1, (%ecx) /* cos(x) store */ - RETURN - - .p2align 4 -L(arg_zero): - movss MO1(SP_ONE), %xmm0 /* 1.0 */ - movl ARG_SIN_PTR, %eax - movl ARG_COS_PTR, %ecx - movss %xmm7, (%eax) /* sin(+-0)==x */ - movss %xmm0, (%ecx) /* cos(+-0)==1 */ - RETURN - - .p2align 4 -L(arg_inf_or_nan): - movss ARG_X, %xmm7 /* SP x */ - /* Here if |x| is Inf or NAN */ - jne L(skip_errno_setting) /* in case of x is NaN */ - - /* Here if x is Inf. Set errno to EDOM. */ - call JUMPTARGET(__errno_location) - movl $EDOM, (%eax) - - .p2align 4 -L(skip_errno_setting): - /* Here if |x| is Inf or NAN. Continued. */ - subss %xmm7, %xmm7 /* x-x, result is NaN */ - movl ARG_SIN_PTR, %eax - movl ARG_COS_PTR, %ecx - movss %xmm7, (%eax) - movss %xmm7, (%ecx) - RETURN -END(__sincosf_sse2) - - .section .rodata, "a" - .p2align 3 -L(PIO4J): /* Table of j*Pi/4, for j=0,1,..,10 */ - .long 0x00000000,0x00000000 - .long 0x54442d18,0x3fe921fb - .long 0x54442d18,0x3ff921fb - .long 0x7f3321d2,0x4002d97c - .long 0x54442d18,0x400921fb - .long 0x2955385e,0x400f6a7a - .long 0x7f3321d2,0x4012d97c - .long 0xe9bba775,0x4015fdbb - .long 0x54442d18,0x401921fb - .long 0xbeccb2bb,0x401c463a - .long 0x2955385e,0x401f6a7a - .type L(PIO4J), @object - ASM_SIZE_DIRECTIVE(L(PIO4J)) - - .p2align 3 -L(_FPI): /* 4/Pi broken into sum of positive DP values */ - .long 0x00000000,0x00000000 - .long 0x6c000000,0x3ff45f30 - .long 0x2a000000,0x3e3c9c88 - .long 0xa8000000,0x3c54fe13 - .long 0xd0000000,0x3aaf47d4 - .long 0x6c000000,0x38fbb81b - .long 0xe0000000,0x3714acc9 - .long 0x7c000000,0x3560e410 - .long 0x56000000,0x33bca2c7 - .long 0xac000000,0x31fbd778 - .long 0xe0000000,0x300b7246 - .long 0xe8000000,0x2e5d2126 - .long 0x48000000,0x2c970032 - .long 0xe8000000,0x2ad77504 - .long 0xe0000000,0x290921cf - .long 0xb0000000,0x274deb1c - .long 0xe0000000,0x25829a73 - .long 0xbe000000,0x23fd1046 - .long 0x10000000,0x2224baed - .long 0x8e000000,0x20709d33 - .long 0x80000000,0x1e535a2f - .long 0x64000000,0x1cef904e - .long 0x30000000,0x1b0d6398 - .long 0x24000000,0x1964ce7d - .long 0x16000000,0x17b908bf - .type L(_FPI), @object - ASM_SIZE_DIRECTIVE(L(_FPI)) - -/* Coefficients of polynomials for */ -/* sin(x)~=x+x*x^2*(DP_SIN2_0+x^2*DP_SIN2_1) in low DP part, */ -/* cos(x)~=1+1*x^2*(DP_COS2_0+x^2*DP_COS2_1) in high DP part, */ -/* for |x|<2^-5. */ - .p2align 4 -L(DP_SINCOS2_0): - .long 0x5543d49d,0xbfc55555 - .long 0xff5cc6fd,0xbfdfffff - .type L(DP_SINCOS2_0), @object - ASM_SIZE_DIRECTIVE(L(DP_SINCOS2_0)) - - .p2align 4 -L(DP_SINCOS2_1): - .long 0x75cec8c5,0x3f8110f4 - .long 0xb178dac5,0x3fa55514 - .type L(DP_SINCOS2_1), @object - ASM_SIZE_DIRECTIVE(L(DP_SINCOS2_1)) - - .p2align 3 -L(DP_ZERONE): - .long 0x00000000,0x00000000 /* 0.0 */ - .long 0x00000000,0xbff00000 /* 1.0 */ - .type L(DP_ZERONE), @object - ASM_SIZE_DIRECTIVE(L(DP_ZERONE)) - - .p2align 3 -L(DP_ONES): - .long 0x00000000,0x3ff00000 /* +1.0 */ - .long 0x00000000,0xbff00000 /* -1.0 */ - .type L(DP_ONES), @object - ASM_SIZE_DIRECTIVE(L(DP_ONES)) - -/* Coefficients of polynomials for */ -/* sin(t)~=t+t*t^2*(S0+t^2*(S1+t^2*(S2+t^2*(S3+t^2*S4)))) in low DP part, */ -/* cos(t)~=1+1*t^2*(C0+t^2*(C1+t^2*(C2+t^2*(C3+t^2*C4)))) in high DP part, */ -/* for |t|<Pi/4. */ - .p2align 4 -L(DP_SC4): - .long 0x1674b58a,0xbe5a947e - .long 0xdd8844d7,0xbe923c97 - .type L(DP_SC4), @object - ASM_SIZE_DIRECTIVE(L(DP_SC4)) - - .p2align 4 -L(DP_SC3): - .long 0x64e6b5b4,0x3ec71d72 - .long 0x9ac43cc0,0x3efa00eb - .type L(DP_SC3), @object - ASM_SIZE_DIRECTIVE(L(DP_SC3)) - - .p2align 4 -L(DP_SC2): - .long 0x8b4bd1f9,0xbf2a019f - .long 0x348b6874,0xbf56c16b - .type L(DP_SC2), @object - ASM_SIZE_DIRECTIVE(L(DP_SC2)) - - .p2align 4 -L(DP_SC1): - .long 0x10c2688b,0x3f811111 - .long 0x545c50c7,0x3fa55555 - .type L(DP_SC1), @object - ASM_SIZE_DIRECTIVE(L(DP_SC1)) - - .p2align 4 -L(DP_SC0): - .long 0x55551cd9,0xbfc55555 - .long 0xfffe98ae,0xbfdfffff - .type L(DP_SC0), @object - ASM_SIZE_DIRECTIVE(L(DP_SC0)) - - .p2align 3 -L(DP_SMALL): - .long 0x00000000,0x3cd00000 /* 2^(-50) */ - .type L(DP_SMALL), @object - ASM_SIZE_DIRECTIVE(L(DP_SMALL)) - - .p2align 3 -L(DP_PIO4): - .long 0x54442d18,0x3fe921fb /* Pi/4 */ - .type L(DP_PIO4), @object - ASM_SIZE_DIRECTIVE(L(DP_PIO4)) - - .p2align 3 -L(DP_2POW52): - .long 0x00000000,0x43300000 /* +2^52 */ - .long 0x00000000,0xc3300000 /* -2^52 */ - .type L(DP_2POW52), @object - ASM_SIZE_DIRECTIVE(L(DP_2POW52)) - - .p2align 3 -L(DP_INVPIO4): - .long 0x6dc9c883,0x3ff45f30 /* 4/Pi */ - .type L(DP_INVPIO4), @object - ASM_SIZE_DIRECTIVE(L(DP_INVPIO4)) - - .p2align 3 -L(DP_PIO4HI): - .long 0x54000000,0xbfe921fb /* High part of Pi/4 */ - .type L(DP_PIO4HI), @object - ASM_SIZE_DIRECTIVE(L(DP_PIO4HI)) - - .p2align 3 -L(DP_PIO4LO): - .long 0x11A62633,0xbe010b46 /* Low part of Pi/4 */ - .type L(DP_PIO4LO), @object - ASM_SIZE_DIRECTIVE(L(DP_PIO4LO)) - - .p2align 2 -L(SP_INVPIO4): - .long 0x3fa2f983 /* 4/Pi */ - .type L(SP_INVPIO4), @object - ASM_SIZE_DIRECTIVE(L(SP_INVPIO4)) - - .p2align 4 -L(DP_ABS_MASK): /* Mask for getting DP absolute value */ - .long 0xffffffff,0x7fffffff - .long 0xffffffff,0x7fffffff - .type L(DP_ABS_MASK), @object - ASM_SIZE_DIRECTIVE(L(DP_ABS_MASK)) - - .p2align 3 -L(DP_HI_MASK): /* Mask for getting high 21 bits of DP value */ - .long 0x00000000,0xffffffff - .type L(DP_HI_MASK), @object - ASM_SIZE_DIRECTIVE(L(DP_HI_MASK)) - - .p2align 4 -L(SP_ABS_MASK): /* Mask for getting SP absolute value */ - .long 0x7fffffff,0x7fffffff - .long 0x7fffffff,0x7fffffff - .type L(SP_ABS_MASK), @object - ASM_SIZE_DIRECTIVE(L(SP_ABS_MASK)) - - .p2align 2 -L(SP_ONE): - .long 0x3f800000 /* 1.0 */ - .type L(SP_ONE), @object - ASM_SIZE_DIRECTIVE(L(SP_ONE)) - -weak_alias(__sincosf, sincosf) diff --git a/sysdeps/i386/i686/fpu/multiarch/s_sincosf.c b/sysdeps/i386/i686/fpu/multiarch/s_sincosf.c deleted file mode 100644 index 9428f9b4ea..0000000000 --- a/sysdeps/i386/i686/fpu/multiarch/s_sincosf.c +++ /dev/null @@ -1,30 +0,0 @@ -/* Multiple versions of sincosf - Copyright (C) 2012-2017 Free Software Foundation, Inc. - This file is part of the GNU C Library. - - The GNU C Library is free software; you can redistribute it and/or - modify it under the terms of the GNU Lesser General Public - License as published by the Free Software Foundation; either - version 2.1 of the License, or (at your option) any later version. - - The GNU C Library is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - Lesser General Public License for more details. - - You should have received a copy of the GNU Lesser General Public - License along with the GNU C Library; if not, see - <http://www.gnu.org/licenses/>. */ - -#include <init-arch.h> - -extern void __sincosf_sse2 (float, float *, float *); -extern void __sincosf_ia32 (float, float *, float *); -void __sincosf (float, float *, float *); - -libm_ifunc (__sincosf, - HAS_CPU_FEATURE (SSE2) ? __sincosf_sse2 : __sincosf_ia32); -weak_alias (__sincosf, sincosf); - -#define SINCOSF __sincosf_ia32 -#include <sysdeps/ieee754/flt-32/s_sincosf.c> diff --git a/sysdeps/i386/i686/fpu/multiarch/s_sinf-sse2.S b/sysdeps/i386/i686/fpu/multiarch/s_sinf-sse2.S deleted file mode 100644 index ee96018061..0000000000 --- a/sysdeps/i386/i686/fpu/multiarch/s_sinf-sse2.S +++ /dev/null @@ -1,566 +0,0 @@ -/* Optimized with sse2 version of sinf - Copyright (C) 2012-2017 Free Software Foundation, Inc. - This file is part of the GNU C Library. - - The GNU C Library is free software; you can redistribute it and/or - modify it under the terms of the GNU Lesser General Public - License as published by the Free Software Foundation; either - version 2.1 of the License, or (at your option) any later version. - - The GNU C Library is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - Lesser General Public License for more details. - - You should have received a copy of the GNU Lesser General Public - License along with the GNU C Library; if not, see - <http://www.gnu.org/licenses/>. */ - -#include <sysdep.h> -#define __need_Emath -#include <bits/errno.h> - -/* Short algorithm description: - * - * 1) if |x| == 0: return x. - * 2) if |x| < 2^-27: return x-x*DP_SMALL, raise underflow only when needed. - * 3) if |x| < 2^-5 : return x+x^3*DP_SIN2_0+x^5*DP_SIN2_1. - * 4) if |x| < Pi/4: return x+x^3*(S0+x^2*(S1+x^2*(S2+x^2*(S3+x^2*S4)))). - * 5) if |x| < 9*Pi/4: - * 5.1) Range reduction: k=trunc(|x|/(Pi/4)), j=(k+1)&0x0e, n=k+1, - * t=|x|-j*Pi/4. - * 5.2) Reconstruction: - * s = sign(x) * (-1.0)^((n>>2)&1) - * if(n&2 != 0) { - * using cos(t) polynomial for |t|<Pi/4, result is - * s * (1.0+t^2*(C0+t^2*(C1+t^2*(C2+t^2*(C3+t^2*C4))))). - * } else { - * using sin(t) polynomial for |t|<Pi/4, result is - * s * t * (1.0+t^2*(S0+t^2*(S1+t^2*(S2+t^2*(S3+t^2*S4))))). - * } - * 6) if |x| < 2^23, large args: - * 6.1) Range reduction: k=trunc(|x|/(Pi/4)), j=(k+1)&0xfffffffe, n=k+1, - * t=|x|-j*Pi/4. - * 6.2) Reconstruction same as (5.2). - * 7) if |x| >= 2^23, very large args: - * 7.1) Range reduction: k=trunc(|x|/(Pi/4)), j=(k+1)&0xfffffffe, n=k+1, - * t=|x|-j*Pi/4. - * 7.2) Reconstruction same as (5.2). - * 8) if x is Inf, return x-x, and set errno=EDOM. - * 9) if x is NaN, return x-x. - * - * Special cases: - * sin(+-0) = +-0 not raising inexact/underflow, - * sin(subnormal) raises inexact/underflow, - * sin(min_normalized) raises inexact/underflow, - * sin(normalized) raises inexact, - * sin(Inf) = NaN, raises invalid, sets errno to EDOM, - * sin(NaN) = NaN. - */ - -#ifdef PIC -# define MO1(symbol) L(symbol)##@GOTOFF(%ebx) -# define MO2(symbol,reg2,_scale) L(symbol)##@GOTOFF(%ebx,reg2,_scale) -# define CFI_PUSH(REG) cfi_adjust_cfa_offset(4); cfi_rel_offset(REG,0) -# define CFI_POP(REG) cfi_adjust_cfa_offset(-4); cfi_restore(REG) -# define PUSH(REG) pushl REG; CFI_PUSH(REG) -# define POP(REG) popl REG; CFI_POP(REG) -# define ENTRANCE PUSH(%ebx); LOAD_PIC_REG(bx) -# define RETURN POP(%ebx); ret; CFI_PUSH(%ebx) -# define ARG_X 8(%esp) -#else -# define MO1(symbol) L(symbol) -# define MO2(symbol,reg2,_scale) L(symbol)(,reg2,_scale) -# define ENTRANCE -# define RETURN ret -# define ARG_X 4(%esp) -#endif - - .text -ENTRY(__sinf_sse2) - /* Input: single precision x on stack at address ARG_X */ - - ENTRANCE - movl ARG_X, %eax /* Bits of x */ - cvtss2sd ARG_X, %xmm0 /* DP x */ - andl $0x7fffffff, %eax /* |x| */ - - cmpl $0x3f490fdb, %eax /* |x|<Pi/4? */ - jb L(arg_less_pio4) - - /* Here if |x|>=Pi/4 */ - movd %eax, %xmm3 /* SP |x| */ - andpd MO1(DP_ABS_MASK),%xmm0 /* DP |x| */ - movss MO1(SP_INVPIO4), %xmm2 /* SP 1/(Pi/4) */ - - cmpl $0x40e231d6, %eax /* |x|<9*Pi/4? */ - jae L(large_args) - - /* Here if Pi/4<=|x|<9*Pi/4 */ - mulss %xmm3, %xmm2 /* SP |x|/(Pi/4) */ - movl ARG_X, %ecx /* Load x */ - cvttss2si %xmm2, %eax /* k, number of Pi/4 in x */ - shrl $31, %ecx /* sign of x */ - addl $1, %eax /* k+1 */ - movl $0x0e, %edx - andl %eax, %edx /* j = (k+1)&0x0e */ - subsd MO2(PIO4J,%edx,8), %xmm0 /* t = |x| - j * Pi/4 */ - -L(reconstruction): - /* Input: %eax=n, %xmm0=t, %ecx=sign(x) */ - testl $2, %eax /* n&2 != 0? */ - jz L(sin_poly) - -/*L(cos_poly):*/ - /* Here if sin(x) calculated using cos(t) polynomial for |t|<Pi/4: - * y = t*t; z = y*y; - * s = sign(x) * (-1.0)^((n>>2)&1) - * result = s * (1.0+t^2*(C0+t^2*(C1+t^2*(C2+t^2*(C3+t^2*C4))))) - */ - shrl $2, %eax /* n>>2 */ - mulsd %xmm0, %xmm0 /* y=t^2 */ - andl $1, %eax /* (n>>2)&1 */ - movaps %xmm0, %xmm1 /* y */ - mulsd %xmm0, %xmm0 /* z=t^4 */ - - movsd MO1(DP_C4), %xmm4 /* C4 */ - mulsd %xmm0, %xmm4 /* z*C4 */ - xorl %eax, %ecx /* (-1.0)^((n>>2)&1) XOR sign(x) */ - movsd MO1(DP_C3), %xmm3 /* C3 */ - mulsd %xmm0, %xmm3 /* z*C3 */ - addsd MO1(DP_C2), %xmm4 /* C2+z*C4 */ - mulsd %xmm0, %xmm4 /* z*(C2+z*C4) */ - lea -8(%esp), %esp /* Borrow 4 bytes of stack frame */ - addsd MO1(DP_C1), %xmm3 /* C1+z*C3 */ - mulsd %xmm0, %xmm3 /* z*(C1+z*C3) */ - addsd MO1(DP_C0), %xmm4 /* C0+z*(C2+z*C4) */ - mulsd %xmm1, %xmm4 /* y*(C0+z*(C2+z*C4)) */ - - addsd %xmm4, %xmm3 /* y*(C0+y*(C1+y*(C2+y*(C3+y*C4)))) */ - /* 1.0+y*(C0+y*(C1+y*(C2+y*(C3+y*C4)))) */ - addsd MO1(DP_ONES), %xmm3 - - mulsd MO2(DP_ONES,%ecx,8), %xmm3 /* DP result */ - movsd %xmm3, 0(%esp) /* Move result from sse... */ - fldl 0(%esp) /* ...to FPU. */ - /* Return back 4 bytes of stack frame */ - lea 8(%esp), %esp - RETURN - - .p2align 4 -L(sin_poly): - /* Here if sin(x) calculated using sin(t) polynomial for |t|<Pi/4: - * y = t*t; z = y*y; - * s = sign(x) * (-1.0)^((n>>2)&1) - * result = s * t * (1.0+t^2*(S0+t^2*(S1+t^2*(S2+t^2*(S3+t^2*S4))))) - */ - - movaps %xmm0, %xmm4 /* t */ - shrl $2, %eax /* n>>2 */ - mulsd %xmm0, %xmm0 /* y=t^2 */ - andl $1, %eax /* (n>>2)&1 */ - movaps %xmm0, %xmm1 /* y */ - xorl %eax, %ecx /* (-1.0)^((n>>2)&1) XOR sign(x) */ - mulsd %xmm0, %xmm0 /* z=t^4 */ - - movsd MO1(DP_S4), %xmm2 /* S4 */ - mulsd %xmm0, %xmm2 /* z*S4 */ - movsd MO1(DP_S3), %xmm3 /* S3 */ - mulsd %xmm0, %xmm3 /* z*S3 */ - lea -8(%esp), %esp /* Borrow 4 bytes of stack frame */ - addsd MO1(DP_S2), %xmm2 /* S2+z*S4 */ - mulsd %xmm0, %xmm2 /* z*(S2+z*S4) */ - addsd MO1(DP_S1), %xmm3 /* S1+z*S3 */ - mulsd %xmm0, %xmm3 /* z*(S1+z*S3) */ - addsd MO1(DP_S0), %xmm2 /* S0+z*(S2+z*S4) */ - mulsd %xmm1, %xmm2 /* y*(S0+z*(S2+z*S4)) */ - /* t*s, where s = sign(x) * (-1.0)^((n>>2)&1) */ - mulsd MO2(DP_ONES,%ecx,8), %xmm4 - addsd %xmm2, %xmm3 /* y*(S0+y*(S1+y*(S2+y*(S3+y*S4)))) */ - /* t*s*y*(S0+y*(S1+y*(S2+y*(S3+y*S4)))) */ - mulsd %xmm4, %xmm3 - /* t*s*(1.0+y*(S0+y*(S1+y*(S2+y*(S3+y*S4)))) */ - addsd %xmm4, %xmm3 - movsd %xmm3, 0(%esp) /* Move result from sse... */ - fldl 0(%esp) /* ...to FPU. */ - /* Return back 4 bytes of stack frame */ - lea 8(%esp), %esp - RETURN - - .p2align 4 -L(large_args): - /* Here if |x|>=9*Pi/4 */ - cmpl $0x7f800000, %eax /* x is Inf or NaN? */ - jae L(arg_inf_or_nan) - - /* Here if finite |x|>=9*Pi/4 */ - cmpl $0x4b000000, %eax /* |x|<2^23? */ - jae L(very_large_args) - - /* Here if 9*Pi/4<=|x|<2^23 */ - movsd MO1(DP_INVPIO4), %xmm1 /* 1/(Pi/4) */ - mulsd %xmm0, %xmm1 /* |x|/(Pi/4) */ - cvttsd2si %xmm1, %eax /* k=trunc(|x|/(Pi/4)) */ - addl $1, %eax /* k+1 */ - movl %eax, %edx - andl $0xfffffffe, %edx /* j=(k+1)&0xfffffffe */ - cvtsi2sdl %edx, %xmm4 /* DP j */ - movl ARG_X, %ecx /* Load x */ - movsd MO1(DP_PIO4HI), %xmm2 /* -PIO4HI = high part of -Pi/4 */ - shrl $31, %ecx /* sign bit of x */ - mulsd %xmm4, %xmm2 /* -j*PIO4HI */ - movsd MO1(DP_PIO4LO), %xmm3 /* -PIO4LO = low part of -Pi/4 */ - addsd %xmm2, %xmm0 /* |x| - j*PIO4HI */ - mulsd %xmm3, %xmm4 /* j*PIO4LO */ - addsd %xmm4, %xmm0 /* t = |x| - j*PIO4HI - j*PIO4LO */ - jmp L(reconstruction) - - .p2align 4 -L(very_large_args): - /* Here if finite |x|>=2^23 */ - - /* bitpos = (ix>>23) - BIAS_32 + 59; */ - shrl $23, %eax /* eb = biased exponent of x */ - /* bitpos = eb - 0x7f + 59, where 0x7f is exponent bias */ - subl $68, %eax - movl $28, %ecx /* %cl=28 */ - movl %eax, %edx /* bitpos copy */ - - /* j = bitpos/28; */ - div %cl /* j in register %al=%ax/%cl */ - movapd %xmm0, %xmm3 /* |x| */ - /* clear unneeded remainder from %ah */ - andl $0xff, %eax - - imull $28, %eax, %ecx /* j*28 */ - movsd MO1(DP_HI_MASK), %xmm4 /* DP_HI_MASK */ - movapd %xmm0, %xmm5 /* |x| */ - mulsd -2*8+MO2(_FPI,%eax,8), %xmm3 /* tmp3 = FPI[j-2]*|x| */ - movapd %xmm0, %xmm1 /* |x| */ - mulsd -1*8+MO2(_FPI,%eax,8), %xmm5 /* tmp2 = FPI[j-1]*|x| */ - mulsd 0*8+MO2(_FPI,%eax,8), %xmm0 /* tmp0 = FPI[j]*|x| */ - addl $19, %ecx /* j*28+19 */ - mulsd 1*8+MO2(_FPI,%eax,8), %xmm1 /* tmp1 = FPI[j+1]*|x| */ - cmpl %ecx, %edx /* bitpos>=j*28+19? */ - jl L(very_large_skip1) - - /* Here if bitpos>=j*28+19 */ - andpd %xmm3, %xmm4 /* HI(tmp3) */ - subsd %xmm4, %xmm3 /* tmp3 = tmp3 - HI(tmp3) */ -L(very_large_skip1): - - movsd MO1(DP_2POW52), %xmm6 - movapd %xmm5, %xmm2 /* tmp2 copy */ - addsd %xmm3, %xmm5 /* tmp5 = tmp3 + tmp2 */ - movl $1, %edx - addsd %xmm5, %xmm6 /* tmp6 = tmp5 + 2^52 */ - movsd 8+MO1(DP_2POW52), %xmm4 - movd %xmm6, %eax /* k = I64_LO(tmp6); */ - addsd %xmm6, %xmm4 /* tmp4 = tmp6 - 2^52 */ - movl ARG_X, %ecx /* Load x */ - comisd %xmm5, %xmm4 /* tmp4 > tmp5? */ - jbe L(very_large_skip2) - - /* Here if tmp4 > tmp5 */ - subl $1, %eax /* k-- */ - addsd 8+MO1(DP_ONES), %xmm4 /* tmp4 -= 1.0 */ -L(very_large_skip2): - - andl %eax, %edx /* k&1 */ - subsd %xmm4, %xmm3 /* tmp3 -= tmp4 */ - addsd MO2(DP_ZERONE,%edx,8), %xmm3 /* t = DP_ZERONE[k&1] + tmp3 */ - addsd %xmm2, %xmm3 /* t += tmp2 */ - shrl $31, %ecx /* sign of x */ - addsd %xmm3, %xmm0 /* t += tmp0 */ - addl $1, %eax /* n=k+1 */ - addsd %xmm1, %xmm0 /* t += tmp1 */ - mulsd MO1(DP_PIO4), %xmm0 /* t *= PI04 */ - - jmp L(reconstruction) /* end of very_large_args peth */ - - .p2align 4 -L(arg_less_pio4): - /* Here if |x|<Pi/4 */ - cmpl $0x3d000000, %eax /* |x|<2^-5? */ - jl L(arg_less_2pn5) - - /* Here if 2^-5<=|x|<Pi/4 */ - movaps %xmm0, %xmm3 /* x */ - mulsd %xmm0, %xmm0 /* y=x^2 */ - movaps %xmm0, %xmm1 /* y */ - mulsd %xmm0, %xmm0 /* z=x^4 */ - movsd MO1(DP_S4), %xmm4 /* S4 */ - mulsd %xmm0, %xmm4 /* z*S4 */ - movsd MO1(DP_S3), %xmm5 /* S3 */ - mulsd %xmm0, %xmm5 /* z*S3 */ - addsd MO1(DP_S2), %xmm4 /* S2+z*S4 */ - mulsd %xmm0, %xmm4 /* z*(S2+z*S4) */ - addsd MO1(DP_S1), %xmm5 /* S1+z*S3 */ - mulsd %xmm0, %xmm5 /* z*(S1+z*S3) */ - addsd MO1(DP_S0), %xmm4 /* S0+z*(S2+z*S4) */ - mulsd %xmm1, %xmm4 /* y*(S0+z*(S2+z*S4)) */ - mulsd %xmm3, %xmm5 /* x*z*(S1+z*S3) */ - mulsd %xmm3, %xmm4 /* x*y*(S0+z*(S2+z*S4)) */ - /* x*y*(S0+y*(S1+y*(S2+y*(S3+y*S4)))) */ - addsd %xmm5, %xmm4 - /* x + x*y*(S0+y*(S1+y*(S2+y*(S3+y*S4)))) */ - addsd %xmm4, %xmm3 - cvtsd2ss %xmm3, %xmm3 /* SP result */ - -L(epilogue): - lea -4(%esp), %esp /* Borrow 4 bytes of stack frame */ - movss %xmm3, 0(%esp) /* Move result from sse... */ - flds 0(%esp) /* ...to FPU. */ - /* Return back 4 bytes of stack frame */ - lea 4(%esp), %esp - RETURN - - .p2align 4 -L(arg_less_2pn5): - /* Here if |x|<2^-5 */ - cmpl $0x32000000, %eax /* |x|<2^-27? */ - jl L(arg_less_2pn27) - - /* Here if 2^-27<=|x|<2^-5 */ - movaps %xmm0, %xmm1 /* DP x */ - mulsd %xmm0, %xmm0 /* DP x^2 */ - movsd MO1(DP_SIN2_1), %xmm3 /* DP DP_SIN2_1 */ - mulsd %xmm0, %xmm3 /* DP x^2*DP_SIN2_1 */ - addsd MO1(DP_SIN2_0), %xmm3 /* DP DP_SIN2_0+x^2*DP_SIN2_1 */ - mulsd %xmm0, %xmm3 /* DP x^2*DP_SIN2_0+x^4*DP_SIN2_1 */ - mulsd %xmm1, %xmm3 /* DP x^3*DP_SIN2_0+x^5*DP_SIN2_1 */ - addsd %xmm1, %xmm3 /* DP x+x^3*DP_SIN2_0+x^5*DP_SIN2_1 */ - cvtsd2ss %xmm3, %xmm3 /* SP result */ - jmp L(epilogue) - - .p2align 4 -L(arg_less_2pn27): - movss ARG_X, %xmm3 /* SP x */ - cmpl $0, %eax /* x=0? */ - je L(epilogue) /* in case x=0 return sin(+-0)==+-0 */ - /* Here if |x|<2^-27 */ - /* - * Special cases here: - * sin(subnormal) raises inexact/underflow - * sin(min_normalized) raises inexact/underflow - * sin(normalized) raises inexact - */ - movaps %xmm0, %xmm3 /* Copy of DP x */ - mulsd MO1(DP_SMALL), %xmm0 /* x*DP_SMALL */ - subsd %xmm0, %xmm3 /* Result is x-x*DP_SMALL */ - cvtsd2ss %xmm3, %xmm3 /* Result converted to SP */ - jmp L(epilogue) - - .p2align 4 -L(arg_inf_or_nan): - /* Here if |x| is Inf or NAN */ - jne L(skip_errno_setting) /* in case of x is NaN */ - - /* Here if x is Inf. Set errno to EDOM. */ - call JUMPTARGET(__errno_location) - movl $EDOM, (%eax) - - .p2align 4 -L(skip_errno_setting): - /* Here if |x| is Inf or NAN. Continued. */ - movss ARG_X, %xmm3 /* load x */ - subss %xmm3, %xmm3 /* Result is NaN */ - jmp L(epilogue) -END(__sinf_sse2) - - .section .rodata, "a" - .p2align 3 -L(PIO4J): /* Table of j*Pi/4, for j=0,1,..,10 */ - .long 0x00000000,0x00000000 - .long 0x54442d18,0x3fe921fb - .long 0x54442d18,0x3ff921fb - .long 0x7f3321d2,0x4002d97c - .long 0x54442d18,0x400921fb - .long 0x2955385e,0x400f6a7a - .long 0x7f3321d2,0x4012d97c - .long 0xe9bba775,0x4015fdbb - .long 0x54442d18,0x401921fb - .long 0xbeccb2bb,0x401c463a - .long 0x2955385e,0x401f6a7a - .type L(PIO4J), @object - ASM_SIZE_DIRECTIVE(L(PIO4J)) - - .p2align 3 -L(_FPI): /* 4/Pi broken into sum of positive DP values */ - .long 0x00000000,0x00000000 - .long 0x6c000000,0x3ff45f30 - .long 0x2a000000,0x3e3c9c88 - .long 0xa8000000,0x3c54fe13 - .long 0xd0000000,0x3aaf47d4 - .long 0x6c000000,0x38fbb81b - .long 0xe0000000,0x3714acc9 - .long 0x7c000000,0x3560e410 - .long 0x56000000,0x33bca2c7 - .long 0xac000000,0x31fbd778 - .long 0xe0000000,0x300b7246 - .long 0xe8000000,0x2e5d2126 - .long 0x48000000,0x2c970032 - .long 0xe8000000,0x2ad77504 - .long 0xe0000000,0x290921cf - .long 0xb0000000,0x274deb1c - .long 0xe0000000,0x25829a73 - .long 0xbe000000,0x23fd1046 - .long 0x10000000,0x2224baed - .long 0x8e000000,0x20709d33 - .long 0x80000000,0x1e535a2f - .long 0x64000000,0x1cef904e - .long 0x30000000,0x1b0d6398 - .long 0x24000000,0x1964ce7d - .long 0x16000000,0x17b908bf - .type L(_FPI), @object - ASM_SIZE_DIRECTIVE(L(_FPI)) - -/* Coefficients of polynomial - for sin(x)~=x+x^3*DP_SIN2_0+x^5*DP_SIN2_1, |x|<2^-5. */ - .p2align 3 -L(DP_SIN2_0): - .long 0x5543d49d,0xbfc55555 - .type L(DP_SIN2_0), @object - ASM_SIZE_DIRECTIVE(L(DP_SIN2_0)) - - .p2align 3 -L(DP_SIN2_1): - .long 0x75cec8c5,0x3f8110f4 - .type L(DP_SIN2_1), @object - ASM_SIZE_DIRECTIVE(L(DP_SIN2_1)) - - .p2align 3 -L(DP_ZERONE): - .long 0x00000000,0x00000000 /* 0.0 */ - .long 0x00000000,0xbff00000 /* 1.0 */ - .type L(DP_ZERONE), @object - ASM_SIZE_DIRECTIVE(L(DP_ZERONE)) - - .p2align 3 -L(DP_ONES): - .long 0x00000000,0x3ff00000 /* +1.0 */ - .long 0x00000000,0xbff00000 /* -1.0 */ - .type L(DP_ONES), @object - ASM_SIZE_DIRECTIVE(L(DP_ONES)) - -/* Coefficients of polynomial - for sin(t)~=t+t^3*(S0+t^2*(S1+t^2*(S2+t^2*(S3+t^2*S4)))), |t|<Pi/4. */ - .p2align 3 -L(DP_S3): - .long 0x64e6b5b4,0x3ec71d72 - .type L(DP_S3), @object - ASM_SIZE_DIRECTIVE(L(DP_S3)) - - .p2align 3 -L(DP_S1): - .long 0x10c2688b,0x3f811111 - .type L(DP_S1), @object - ASM_SIZE_DIRECTIVE(L(DP_S1)) - - .p2align 3 -L(DP_S4): - .long 0x1674b58a,0xbe5a947e - .type L(DP_S4), @object - ASM_SIZE_DIRECTIVE(L(DP_S4)) - - .p2align 3 -L(DP_S2): - .long 0x8b4bd1f9,0xbf2a019f - .type L(DP_S2), @object - ASM_SIZE_DIRECTIVE(L(DP_S2)) - - .p2align 3 -L(DP_S0): - .long 0x55551cd9,0xbfc55555 - .type L(DP_S0), @object - ASM_SIZE_DIRECTIVE(L(DP_S0)) - - .p2align 3 -L(DP_SMALL): - .long 0x00000000,0x3cd00000 /* 2^(-50) */ - .type L(DP_SMALL), @object - ASM_SIZE_DIRECTIVE(L(DP_SMALL)) - -/* Coefficients of polynomial - for cos(t)~=1.0+t^2*(C0+t^2*(C1+t^2*(C2+t^2*(C3+t^2*C4)))), |t|<Pi/4. */ - .p2align 3 -L(DP_C3): - .long 0x9ac43cc0,0x3efa00eb - .type L(DP_C3), @object - ASM_SIZE_DIRECTIVE(L(DP_C3)) - - .p2align 3 -L(DP_C1): - .long 0x545c50c7,0x3fa55555 - .type L(DP_C1), @object - ASM_SIZE_DIRECTIVE(L(DP_C1)) - - .p2align 3 -L(DP_C4): - .long 0xdd8844d7,0xbe923c97 - .type L(DP_C4), @object - ASM_SIZE_DIRECTIVE(L(DP_C4)) - - .p2align 3 -L(DP_C2): - .long 0x348b6874,0xbf56c16b - .type L(DP_C2), @object - ASM_SIZE_DIRECTIVE(L(DP_C2)) - - .p2align 3 -L(DP_C0): - .long 0xfffe98ae,0xbfdfffff - .type L(DP_C0), @object - ASM_SIZE_DIRECTIVE(L(DP_C0)) - - .p2align 3 -L(DP_PIO4): - .long 0x54442d18,0x3fe921fb /* Pi/4 */ - .type L(DP_PIO4), @object - ASM_SIZE_DIRECTIVE(L(DP_PIO4)) - - .p2align 3 -L(DP_2POW52): - .long 0x00000000,0x43300000 /* +2^52 */ - .long 0x00000000,0xc3300000 /* -2^52 */ - .type L(DP_2POW52), @object - ASM_SIZE_DIRECTIVE(L(DP_2POW52)) - - .p2align 3 -L(DP_INVPIO4): - .long 0x6dc9c883,0x3ff45f30 /* 4/Pi */ - .type L(DP_INVPIO4), @object - ASM_SIZE_DIRECTIVE(L(DP_INVPIO4)) - - .p2align 3 -L(DP_PIO4HI): - .long 0x54000000,0xbfe921fb /* High part of Pi/4 */ - .type L(DP_PIO4HI), @object - ASM_SIZE_DIRECTIVE(L(DP_PIO4HI)) - - .p2align 3 -L(DP_PIO4LO): - .long 0x11A62633,0xbe010b46 /* Low part of Pi/4 */ - .type L(DP_PIO4LO), @object - ASM_SIZE_DIRECTIVE(L(DP_PIO4LO)) - - .p2align 2 -L(SP_INVPIO4): - .long 0x3fa2f983 /* 4/Pi */ - .type L(SP_INVPIO4), @object - ASM_SIZE_DIRECTIVE(L(SP_INVPIO4)) - - .p2align 4 -L(DP_ABS_MASK): /* Mask for getting DP absolute value */ - .long 0xffffffff,0x7fffffff - .long 0xffffffff,0x7fffffff - .type L(DP_ABS_MASK), @object - ASM_SIZE_DIRECTIVE(L(DP_ABS_MASK)) - - .p2align 3 -L(DP_HI_MASK): /* Mask for getting high 21 bits of DP value */ - .long 0x00000000,0xffffffff - .type L(DP_HI_MASK), @object - ASM_SIZE_DIRECTIVE(L(DP_HI_MASK)) - -weak_alias (__sinf, sinf) diff --git a/sysdeps/i386/i686/fpu/multiarch/s_sinf.c b/sysdeps/i386/i686/fpu/multiarch/s_sinf.c deleted file mode 100644 index 8ccdd2f34d..0000000000 --- a/sysdeps/i386/i686/fpu/multiarch/s_sinf.c +++ /dev/null @@ -1,28 +0,0 @@ -/* Multiple versions of sinf - Copyright (C) 2012-2017 Free Software Foundation, Inc. - This file is part of the GNU C Library. - - The GNU C Library is free software; you can redistribute it and/or - modify it under the terms of the GNU Lesser General Public - License as published by the Free Software Foundation; either - version 2.1 of the License, or (at your option) any later version. - - The GNU C Library is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - Lesser General Public License for more details. - - You should have received a copy of the GNU Lesser General Public - License along with the GNU C Library; if not, see - <http://www.gnu.org/licenses/>. */ - -#include <init-arch.h> - -extern float __sinf_sse2 (float); -extern float __sinf_ia32 (float); -float __sinf (float); - -libm_ifunc (__sinf, HAS_CPU_FEATURE (SSE2) ? __sinf_sse2 : __sinf_ia32); -weak_alias (__sinf, sinf); -#define SINF __sinf_ia32 -#include <sysdeps/ieee754/flt-32/s_sinf.c> diff --git a/sysdeps/i386/i686/fpu/s_fmax.S b/sysdeps/i386/i686/fpu/s_fmax.S deleted file mode 100644 index ace8db9410..0000000000 --- a/sysdeps/i386/i686/fpu/s_fmax.S +++ /dev/null @@ -1,39 +0,0 @@ -/* Compute maximum of two numbers, regarding NaN as missing argument. - Copyright (C) 1997-2017 Free Software Foundation, Inc. - This file is part of the GNU C Library. - Contributed by Ulrich Drepper <drepper@cygnus.com>, 1997. - - The GNU C Library is free software; you can redistribute it and/or - modify it under the terms of the GNU Lesser General Public - License as published by the Free Software Foundation; either - version 2.1 of the License, or (at your option) any later version. - - The GNU C Library is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - Lesser General Public License for more details. - - You should have received a copy of the GNU Lesser General Public - License along with the GNU C Library; if not, see - <http://www.gnu.org/licenses/>. */ - -#include <sysdep.h> - - .text -ENTRY(__fmax) - fldl 4(%esp) // x - fldl 12(%esp) // x : y - - fucomi %st(0), %st - fcmovu %st(1), %st // now %st contains y if not NaN, x otherwise - - fxch - - fucomi %st(1), %st - fcmovb %st(1), %st - - fstp %st(1) - - ret -END(__fmax) -weak_alias (__fmax, fmax) diff --git a/sysdeps/i386/i686/fpu/s_fmaxf.S b/sysdeps/i386/i686/fpu/s_fmaxf.S deleted file mode 100644 index 3a25951a09..0000000000 --- a/sysdeps/i386/i686/fpu/s_fmaxf.S +++ /dev/null @@ -1,39 +0,0 @@ -/* Compute maximum of two numbers, regarding NaN as missing argument. - Copyright (C) 1997-2017 Free Software Foundation, Inc. - This file is part of the GNU C Library. - Contributed by Ulrich Drepper <drepper@cygnus.com>, 1997. - - The GNU C Library is free software; you can redistribute it and/or - modify it under the terms of the GNU Lesser General Public - License as published by the Free Software Foundation; either - version 2.1 of the License, or (at your option) any later version. - - The GNU C Library is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - Lesser General Public License for more details. - - You should have received a copy of the GNU Lesser General Public - License along with the GNU C Library; if not, see - <http://www.gnu.org/licenses/>. */ - -#include <sysdep.h> - - .text -ENTRY(__fmaxf) - flds 4(%esp) // x - flds 8(%esp) // x : y - - fucomi %st(0), %st - fcmovu %st(1), %st // now %st contains y if not NaN, x otherwise - - fxch - - fucomi %st(1), %st - fcmovb %st(1), %st - - fstp %st(1) - - ret -END(__fmaxf) -weak_alias (__fmaxf, fmaxf) diff --git a/sysdeps/i386/i686/fpu/s_fmaxl.S b/sysdeps/i386/i686/fpu/s_fmaxl.S deleted file mode 100644 index 3f6c21c63d..0000000000 --- a/sysdeps/i386/i686/fpu/s_fmaxl.S +++ /dev/null @@ -1,58 +0,0 @@ -/* Compute maximum of two numbers, regarding NaN as missing argument. - Copyright (C) 1997-2017 Free Software Foundation, Inc. - This file is part of the GNU C Library. - Contributed by Ulrich Drepper <drepper@cygnus.com>, 1997. - - The GNU C Library is free software; you can redistribute it and/or - modify it under the terms of the GNU Lesser General Public - License as published by the Free Software Foundation; either - version 2.1 of the License, or (at your option) any later version. - - The GNU C Library is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - Lesser General Public License for more details. - - You should have received a copy of the GNU Lesser General Public - License along with the GNU C Library; if not, see - <http://www.gnu.org/licenses/>. */ - -#include <sysdep.h> - - .text -ENTRY(__fmaxl) - fldt 4(%esp) // x - fldt 16(%esp) // x : y - - fucomi %st(1), %st - jp 2f - fcmovb %st(1), %st - - fstp %st(1) - - ret - -2: // Unordered. - fucomi %st(0), %st - jp 3f - // st(1) is a NaN; st(0) is not. Test if st(1) is signaling. - testb $0x40, 11(%esp) - jz 4f - fstp %st(1) - ret - -3: // st(0) is a NaN; st(1) may or may not be. - fxch - fucomi %st(0), %st - jp 4f - // st(1) is a NaN; st(0) is not. Test if st(1) is signaling. - testb $0x40, 23(%esp) - jz 4f - fstp %st(1) - ret - -4: // Both arguments are NaNs, or one is a signaling NaN. - faddp - ret -END(__fmaxl) -weak_alias (__fmaxl, fmaxl) diff --git a/sysdeps/i386/i686/fpu/s_fmin.S b/sysdeps/i386/i686/fpu/s_fmin.S deleted file mode 100644 index 72d306fd79..0000000000 --- a/sysdeps/i386/i686/fpu/s_fmin.S +++ /dev/null @@ -1,37 +0,0 @@ -/* Compute minimum of two numbers, regarding NaN as missing argument. - Copyright (C) 1997-2017 Free Software Foundation, Inc. - This file is part of the GNU C Library. - Contributed by Ulrich Drepper <drepper@cygnus.com>, 1997. - - The GNU C Library is free software; you can redistribute it and/or - modify it under the terms of the GNU Lesser General Public - License as published by the Free Software Foundation; either - version 2.1 of the License, or (at your option) any later version. - - The GNU C Library is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - Lesser General Public License for more details. - - You should have received a copy of the GNU Lesser General Public - License along with the GNU C Library; if not, see - <http://www.gnu.org/licenses/>. */ - -#include <sysdep.h> - - .text -ENTRY(__fmin) - fldl 4(%esp) // x - fldl 12(%esp) // x : y - - fucomi %st(0), %st - fcmovu %st(1), %st // now %st contains y if not NaN, x otherwise - - fucomi %st(1), %st - fcmovnb %st(1), %st - - fstp %st(1) - - ret -END(__fmin) -weak_alias (__fmin, fmin) diff --git a/sysdeps/i386/i686/fpu/s_fminf.S b/sysdeps/i386/i686/fpu/s_fminf.S deleted file mode 100644 index 52ea892bad..0000000000 --- a/sysdeps/i386/i686/fpu/s_fminf.S +++ /dev/null @@ -1,37 +0,0 @@ -/* Compute minimum of two numbers, regarding NaN as missing argument. - Copyright (C) 1997-2017 Free Software Foundation, Inc. - This file is part of the GNU C Library. - Contributed by Ulrich Drepper <drepper@cygnus.com>, 1997. - - The GNU C Library is free software; you can redistribute it and/or - modify it under the terms of the GNU Lesser General Public - License as published by the Free Software Foundation; either - version 2.1 of the License, or (at your option) any later version. - - The GNU C Library is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - Lesser General Public License for more details. - - You should have received a copy of the GNU Lesser General Public - License along with the GNU C Library; if not, see - <http://www.gnu.org/licenses/>. */ - -#include <sysdep.h> - - .text -ENTRY(__fminf) - flds 4(%esp) // x - flds 8(%esp) // x : y - - fucomi %st(0), %st - fcmovu %st(1), %st // now %st contains y if not NaN, x otherwise - - fucomi %st(1), %st - fcmovnb %st(1), %st - - fstp %st(1) - - ret -END(__fminf) -weak_alias (__fminf, fminf) diff --git a/sysdeps/i386/i686/fpu/s_fminl.S b/sysdeps/i386/i686/fpu/s_fminl.S deleted file mode 100644 index e1cb83fed7..0000000000 --- a/sysdeps/i386/i686/fpu/s_fminl.S +++ /dev/null @@ -1,58 +0,0 @@ -/* Compute minimum of two numbers, regarding NaN as missing argument. - Copyright (C) 1997-2017 Free Software Foundation, Inc. - This file is part of the GNU C Library. - Contributed by Ulrich Drepper <drepper@cygnus.com>, 1997. - - The GNU C Library is free software; you can redistribute it and/or - modify it under the terms of the GNU Lesser General Public - License as published by the Free Software Foundation; either - version 2.1 of the License, or (at your option) any later version. - - The GNU C Library is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - Lesser General Public License for more details. - - You should have received a copy of the GNU Lesser General Public - License along with the GNU C Library; if not, see - <http://www.gnu.org/licenses/>. */ - -#include <sysdep.h> - - .text -ENTRY(__fminl) - fldt 4(%esp) // x - fldt 16(%esp) // x : y - - fucomi %st(1), %st - jp 2f - fcmovnb %st(1), %st - - fstp %st(1) - - ret - -2: // Unordered. - fucomi %st(0), %st - jp 3f - // st(1) is a NaN; st(0) is not. Test if st(1) is signaling. - testb $0x40, 11(%esp) - jz 4f - fstp %st(1) - ret - -3: // st(0) is a NaN; st(1) may or may not be. - fxch - fucomi %st(0), %st - jp 4f - // st(1) is a NaN; st(0) is not. Test if st(1) is signaling. - testb $0x40, 23(%esp) - jz 4f - fstp %st(1) - ret - -4: // Both arguments are NaNs, or one is a signaling NaN. - faddp - ret -END(__fminl) -weak_alias (__fminl, fminl) diff --git a/sysdeps/i386/i686/hp-timing.h b/sysdeps/i386/i686/hp-timing.h deleted file mode 100644 index 1b11410feb..0000000000 --- a/sysdeps/i386/i686/hp-timing.h +++ /dev/null @@ -1,42 +0,0 @@ -/* High precision, low overhead timing functions. i686 version. - Copyright (C) 1998-2017 Free Software Foundation, Inc. - This file is part of the GNU C Library. - Contributed by Ulrich Drepper <drepper@cygnus.com>, 1998. - - The GNU C Library is free software; you can redistribute it and/or - modify it under the terms of the GNU Lesser General Public - License as published by the Free Software Foundation; either - version 2.1 of the License, or (at your option) any later version. - - The GNU C Library is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - Lesser General Public License for more details. - - You should have received a copy of the GNU Lesser General Public - License along with the GNU C Library; if not, see - <http://www.gnu.org/licenses/>. */ - -#ifndef _HP_TIMING_H -#define _HP_TIMING_H 1 - -/* We always assume having the timestamp register. */ -#define HP_TIMING_AVAIL (1) -#define HP_SMALL_TIMING_AVAIL (1) - -/* We indeed have inlined functions. */ -#define HP_TIMING_INLINE (1) - -/* We use 64bit values for the times. */ -typedef unsigned long long int hp_timing_t; - -/* That's quite simple. Use the `rdtsc' instruction. Note that the value - might not be 100% accurate since there might be some more instructions - running in this moment. This could be changed by using a barrier like - 'cpuid' right before the `rdtsc' instruciton. But we are not interested - in accurate clock cycles here so we don't do this. */ -#define HP_TIMING_NOW(Var) __asm__ __volatile__ ("rdtsc" : "=A" (Var)) - -#include <hp-timing-common.h> - -#endif /* hp-timing.h */ diff --git a/sysdeps/i386/i686/init-arch.h b/sysdeps/i386/i686/init-arch.h deleted file mode 100644 index f55f80efa0..0000000000 --- a/sysdeps/i386/i686/init-arch.h +++ /dev/null @@ -1,19 +0,0 @@ -/* Copyright (C) 2015-2017 Free Software Foundation, Inc. - This file is part of the GNU C Library. - - The GNU C Library is free software; you can redistribute it and/or - modify it under the terms of the GNU Lesser General Public - License as published by the Free Software Foundation; either - version 2.1 of the License, or (at your option) any later version. - - The GNU C Library is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - Lesser General Public License for more details. - - You should have received a copy of the GNU Lesser General Public - License along with the GNU C Library; if not, see - <http://www.gnu.org/licenses/>. */ - -#define MINIMUM_ISA 686 -#include <sysdeps/x86/init-arch.h> diff --git a/sysdeps/i386/i686/memcmp.S b/sysdeps/i386/i686/memcmp.S deleted file mode 100644 index 5140ee2145..0000000000 --- a/sysdeps/i386/i686/memcmp.S +++ /dev/null @@ -1,408 +0,0 @@ -/* Compare two memory blocks for differences in the first COUNT bytes. - Copyright (C) 2004-2017 Free Software Foundation, Inc. - This file is part of the GNU C Library. - - The GNU C Library is free software; you can redistribute it and/or - modify it under the terms of the GNU Lesser General Public - License as published by the Free Software Foundation; either - version 2.1 of the License, or (at your option) any later version. - - The GNU C Library is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - Lesser General Public License for more details. - - You should have received a copy of the GNU Lesser General Public - License along with the GNU C Library; if not, see - <http://www.gnu.org/licenses/>. */ - -#include <sysdep.h> -#include "asm-syntax.h" - -#define PARMS 4+4 /* Preserve EBX. */ -#define BLK1 PARMS -#define BLK2 BLK1+4 -#define LEN BLK2+4 -#define ENTRANCE pushl %ebx; cfi_adjust_cfa_offset (4); \ - cfi_rel_offset (ebx, 0) -#define RETURN popl %ebx; cfi_adjust_cfa_offset (-4); \ - cfi_restore (ebx); ret - -/* Load an entry in a jump table into EBX. TABLE is a jump table - with relative offsets. INDEX is a register contains the index - into the jump table. */ -#define LOAD_JUMP_TABLE_ENTRY(TABLE, INDEX) \ - /* We first load PC into EBX. */ \ - SETUP_PIC_REG(bx); \ - /* Get the address of the jump table. */ \ - addl $(TABLE - .), %ebx; \ - /* Get the entry and convert the relative offset to the \ - absolute address. */ \ - addl (%ebx,INDEX,4), %ebx - - .text - ALIGN (4) -ENTRY (memcmp) - ENTRANCE - - movl BLK1(%esp), %eax - movl BLK2(%esp), %edx - movl LEN(%esp), %ecx - - cmpl $1, %ecx - jne L(not_1) - movzbl (%eax), %ecx /* LEN == 1 */ - cmpb (%edx), %cl - jne L(neq) -L(bye): - xorl %eax, %eax - RETURN - - cfi_adjust_cfa_offset (4) - cfi_rel_offset (ebx, 0) -L(neq): - sbbl %eax, %eax - sbbl $-1, %eax - RETURN - - cfi_adjust_cfa_offset (4) - cfi_rel_offset (ebx, 0) -L(not_1): - jl L(bye) /* LEN == 0 */ - - pushl %esi - cfi_adjust_cfa_offset (4) - movl %eax, %esi - cfi_rel_offset (esi, 0) - cmpl $32, %ecx; - jge L(32bytesormore) /* LEN => 32 */ - - LOAD_JUMP_TABLE_ENTRY (L(table_32bytes), %ecx) - addl %ecx, %edx - addl %ecx, %esi - jmp *%ebx - - ALIGN (4) -L(28bytes): - movl -28(%esi), %eax - movl -28(%edx), %ecx - cmpl %ecx, %eax - jne L(find_diff) -L(24bytes): - movl -24(%esi), %eax - movl -24(%edx), %ecx - cmpl %ecx, %eax - jne L(find_diff) -L(20bytes): - movl -20(%esi), %eax - movl -20(%edx), %ecx - cmpl %ecx, %eax - jne L(find_diff) -L(16bytes): - movl -16(%esi), %eax - movl -16(%edx), %ecx - cmpl %ecx, %eax - jne L(find_diff) -L(12bytes): - movl -12(%esi), %eax - movl -12(%edx), %ecx - cmpl %ecx, %eax - jne L(find_diff) -L(8bytes): - movl -8(%esi), %eax - movl -8(%edx), %ecx - cmpl %ecx, %eax - jne L(find_diff) -L(4bytes): - movl -4(%esi), %eax - movl -4(%edx), %ecx - cmpl %ecx, %eax - jne L(find_diff) -L(0bytes): - popl %esi - cfi_adjust_cfa_offset (-4) - cfi_restore (esi) - xorl %eax, %eax - RETURN - - cfi_adjust_cfa_offset (8) - cfi_rel_offset (esi, 0) - cfi_rel_offset (ebx, 4) -L(29bytes): - movl -29(%esi), %eax - movl -29(%edx), %ecx - cmpl %ecx, %eax - jne L(find_diff) -L(25bytes): - movl -25(%esi), %eax - movl -25(%edx), %ecx - cmpl %ecx, %eax - jne L(find_diff) -L(21bytes): - movl -21(%esi), %eax - movl -21(%edx), %ecx - cmpl %ecx, %eax - jne L(find_diff) -L(17bytes): - movl -17(%esi), %eax - movl -17(%edx), %ecx - cmpl %ecx, %eax - jne L(find_diff) -L(13bytes): - movl -13(%esi), %eax - movl -13(%edx), %ecx - cmpl %ecx, %eax - jne L(find_diff) -L(9bytes): - movl -9(%esi), %eax - movl -9(%edx), %ecx - cmpl %ecx, %eax - jne L(find_diff) -L(5bytes): - movl -5(%esi), %eax - movl -5(%edx), %ecx - cmpl %ecx, %eax - jne L(find_diff) -L(1bytes): - movzbl -1(%esi), %eax - cmpb -1(%edx), %al - jne L(set) - popl %esi - cfi_adjust_cfa_offset (-4) - cfi_restore (esi) - xorl %eax, %eax - RETURN - - cfi_adjust_cfa_offset (8) - cfi_rel_offset (esi, 0) - cfi_rel_offset (ebx, 4) -L(30bytes): - movl -30(%esi), %eax - movl -30(%edx), %ecx - cmpl %ecx, %eax - jne L(find_diff) -L(26bytes): - movl -26(%esi), %eax - movl -26(%edx), %ecx - cmpl %ecx, %eax - jne L(find_diff) -L(22bytes): - movl -22(%esi), %eax - movl -22(%edx), %ecx - cmpl %ecx, %eax - jne L(find_diff) -L(18bytes): - movl -18(%esi), %eax - movl -18(%edx), %ecx - cmpl %ecx, %eax - jne L(find_diff) -L(14bytes): - movl -14(%esi), %eax - movl -14(%edx), %ecx - cmpl %ecx, %eax - jne L(find_diff) -L(10bytes): - movl -10(%esi), %eax - movl -10(%edx), %ecx - cmpl %ecx, %eax - jne L(find_diff) -L(6bytes): - movl -6(%esi), %eax - movl -6(%edx), %ecx - cmpl %ecx, %eax - jne L(find_diff) -L(2bytes): - movzwl -2(%esi), %eax - movzwl -2(%edx), %ecx - cmpb %cl, %al - jne L(set) - cmpl %ecx, %eax - jne L(set) - popl %esi - cfi_adjust_cfa_offset (-4) - cfi_restore (esi) - xorl %eax, %eax - RETURN - - cfi_adjust_cfa_offset (8) - cfi_rel_offset (esi, 0) - cfi_rel_offset (ebx, 4) -L(31bytes): - movl -31(%esi), %eax - movl -31(%edx), %ecx - cmpl %ecx, %eax - jne L(find_diff) -L(27bytes): - movl -27(%esi), %eax - movl -27(%edx), %ecx - cmpl %ecx, %eax - jne L(find_diff) -L(23bytes): - movl -23(%esi), %eax - movl -23(%edx), %ecx - cmpl %ecx, %eax - jne L(find_diff) -L(19bytes): - movl -19(%esi), %eax - movl -19(%edx), %ecx - cmpl %ecx, %eax - jne L(find_diff) -L(15bytes): - movl -15(%esi), %eax - movl -15(%edx), %ecx - cmpl %ecx, %eax - jne L(find_diff) -L(11bytes): - movl -11(%esi), %eax - movl -11(%edx), %ecx - cmpl %ecx, %eax - jne L(find_diff) -L(7bytes): - movl -7(%esi), %eax - movl -7(%edx), %ecx - cmpl %ecx, %eax - jne L(find_diff) -L(3bytes): - movzwl -3(%esi), %eax - movzwl -3(%edx), %ecx - cmpb %cl, %al - jne L(set) - cmpl %ecx, %eax - jne L(set) - movzbl -1(%esi), %eax - cmpb -1(%edx), %al - jne L(set) - popl %esi - cfi_adjust_cfa_offset (-4) - cfi_restore (esi) - xorl %eax, %eax - RETURN - - cfi_adjust_cfa_offset (8) - cfi_rel_offset (esi, 0) - cfi_rel_offset (ebx, 4) - ALIGN (4) -/* ECX >= 32. */ -L(32bytesormore): - subl $32, %ecx - - movl (%esi), %eax - cmpl (%edx), %eax - jne L(load_ecx) - - movl 4(%esi), %eax - cmpl 4(%edx), %eax - jne L(load_ecx_4) - - movl 8(%esi), %eax - cmpl 8(%edx), %eax - jne L(load_ecx_8) - - movl 12(%esi), %eax - cmpl 12(%edx), %eax - jne L(load_ecx_12) - - movl 16(%esi), %eax - cmpl 16(%edx), %eax - jne L(load_ecx_16) - - movl 20(%esi), %eax - cmpl 20(%edx), %eax - jne L(load_ecx_20) - - movl 24(%esi), %eax - cmpl 24(%edx), %eax - jne L(load_ecx_24) - - movl 28(%esi), %eax - cmpl 28(%edx), %eax - jne L(load_ecx_28) - - addl $32, %esi - addl $32, %edx - cmpl $32, %ecx - jge L(32bytesormore) - - LOAD_JUMP_TABLE_ENTRY (L(table_32bytes), %ecx) - addl %ecx, %edx - addl %ecx, %esi - jmp *%ebx - -L(load_ecx_28): - addl $0x4, %edx -L(load_ecx_24): - addl $0x4, %edx -L(load_ecx_20): - addl $0x4, %edx -L(load_ecx_16): - addl $0x4, %edx -L(load_ecx_12): - addl $0x4, %edx -L(load_ecx_8): - addl $0x4, %edx -L(load_ecx_4): - addl $0x4, %edx -L(load_ecx): - movl (%edx), %ecx - -L(find_diff): - cmpb %cl, %al - jne L(set) - cmpb %ch, %ah - jne L(set) - shrl $16,%eax - shrl $16,%ecx - cmpb %cl, %al - jne L(set) - /* We get there only if we already know there is a - difference. */ - cmpl %ecx, %eax -L(set): - sbbl %eax, %eax - sbbl $-1, %eax - popl %esi - cfi_adjust_cfa_offset (-4) - cfi_restore (esi) - RETURN -END (memcmp) - - .section .rodata - ALIGN (2) -L(table_32bytes) : - .long L(0bytes) - L(table_32bytes) - .long L(1bytes) - L(table_32bytes) - .long L(2bytes) - L(table_32bytes) - .long L(3bytes) - L(table_32bytes) - .long L(4bytes) - L(table_32bytes) - .long L(5bytes) - L(table_32bytes) - .long L(6bytes) - L(table_32bytes) - .long L(7bytes) - L(table_32bytes) - .long L(8bytes) - L(table_32bytes) - .long L(9bytes) - L(table_32bytes) - .long L(10bytes) - L(table_32bytes) - .long L(11bytes) - L(table_32bytes) - .long L(12bytes) - L(table_32bytes) - .long L(13bytes) - L(table_32bytes) - .long L(14bytes) - L(table_32bytes) - .long L(15bytes) - L(table_32bytes) - .long L(16bytes) - L(table_32bytes) - .long L(17bytes) - L(table_32bytes) - .long L(18bytes) - L(table_32bytes) - .long L(19bytes) - L(table_32bytes) - .long L(20bytes) - L(table_32bytes) - .long L(21bytes) - L(table_32bytes) - .long L(22bytes) - L(table_32bytes) - .long L(23bytes) - L(table_32bytes) - .long L(24bytes) - L(table_32bytes) - .long L(25bytes) - L(table_32bytes) - .long L(26bytes) - L(table_32bytes) - .long L(27bytes) - L(table_32bytes) - .long L(28bytes) - L(table_32bytes) - .long L(29bytes) - L(table_32bytes) - .long L(30bytes) - L(table_32bytes) - .long L(31bytes) - L(table_32bytes) - - -#undef bcmp -weak_alias (memcmp, bcmp) -libc_hidden_builtin_def (memcmp) diff --git a/sysdeps/i386/i686/memcpy.S b/sysdeps/i386/i686/memcpy.S deleted file mode 100644 index 1d61447430..0000000000 --- a/sysdeps/i386/i686/memcpy.S +++ /dev/null @@ -1,98 +0,0 @@ -/* Copy memory block and return pointer to beginning of destination block - For Intel 80x86, x>=6. - This file is part of the GNU C Library. - Copyright (C) 1999-2017 Free Software Foundation, Inc. - Contributed by Ulrich Drepper <drepper@cygnus.com>, 1999. - - The GNU C Library is free software; you can redistribute it and/or - modify it under the terms of the GNU Lesser General Public - License as published by the Free Software Foundation; either - version 2.1 of the License, or (at your option) any later version. - - The GNU C Library is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - Lesser General Public License for more details. - - You should have received a copy of the GNU Lesser General Public - License along with the GNU C Library; if not, see - <http://www.gnu.org/licenses/>. */ - -#include <sysdep.h> -#include "asm-syntax.h" - -#define PARMS 4 /* no space for saved regs */ -#define RTN PARMS -#define DEST RTN -#define SRC DEST+4 -#define LEN SRC+4 - - .text -#if defined PIC && IS_IN (libc) -ENTRY_CHK (__memcpy_chk) - movl 12(%esp), %eax - cmpl %eax, 16(%esp) - jb HIDDEN_JUMPTARGET (__chk_fail) -END_CHK (__memcpy_chk) -#endif -ENTRY (memcpy) - - movl %edi, %eax - movl DEST(%esp), %edi - movl %esi, %edx - movl SRC(%esp), %esi - - movl %edi, %ecx - xorl %esi, %ecx - andl $3, %ecx - movl LEN(%esp), %ecx - cld - jne .Lunaligned - - cmpl $3, %ecx - jbe .Lunaligned - - testl $3, %esi - je 1f - movsb - decl %ecx - testl $3, %esi - je 1f - movsb - decl %ecx - testl $3, %esi - je 1f - movsb - decl %ecx -1: pushl %eax - movl %ecx, %eax - shrl $2, %ecx - andl $3, %eax - rep - movsl - movl %eax, %ecx - rep - movsb - popl %eax - -.Lend: movl %eax, %edi - movl %edx, %esi - movl DEST(%esp), %eax - - ret - - /* When we come here the pointers do not have the same - alignment or the length is too short. No need to optimize for - aligned memory accesses. */ -.Lunaligned: - shrl $1, %ecx - jnc 1f - movsb -1: shrl $1, %ecx - jnc 2f - movsw -2: rep - movsl - jmp .Lend -END (memcpy) -libc_hidden_builtin_def (memcpy) diff --git a/sysdeps/i386/i686/memmove.S b/sysdeps/i386/i686/memmove.S deleted file mode 100644 index f60c3d501b..0000000000 --- a/sysdeps/i386/i686/memmove.S +++ /dev/null @@ -1,120 +0,0 @@ -/* Copy memory block and return pointer to beginning of destination block - For Intel 80x86, x>=6. - This file is part of the GNU C Library. - Copyright (C) 2003-2017 Free Software Foundation, Inc. - Contributed by Ulrich Drepper <drepper@cygnus.com>, 2003. - - The GNU C Library is free software; you can redistribute it and/or - modify it under the terms of the GNU Lesser General Public - License as published by the Free Software Foundation; either - version 2.1 of the License, or (at your option) any later version. - - The GNU C Library is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - Lesser General Public License for more details. - - You should have received a copy of the GNU Lesser General Public - License along with the GNU C Library; if not, see - <http://www.gnu.org/licenses/>. */ - -#include <sysdep.h> -#include "asm-syntax.h" - -#define PARMS 4+4 /* one spilled register */ -#define RTN PARMS - - .text - -#ifdef USE_AS_BCOPY -# define SRC RTN -# define DEST SRC+4 -# define LEN DEST+4 -#else -# define DEST RTN -# define SRC DEST+4 -# define LEN SRC+4 - -# if defined PIC && IS_IN (libc) -ENTRY_CHK (__memmove_chk) - movl 12(%esp), %eax - cmpl %eax, 16(%esp) - jb HIDDEN_JUMPTARGET (__chk_fail) -END_CHK (__memmove_chk) -# endif -#endif - -ENTRY (memmove) - - pushl %edi - cfi_adjust_cfa_offset (4) - - movl LEN(%esp), %ecx - movl DEST(%esp), %edi - cfi_rel_offset (edi, 0) - movl %esi, %edx - movl SRC(%esp), %esi - cfi_register (esi, edx) - - movl %edi, %eax - subl %esi, %eax - cmpl %eax, %ecx - ja 3f - - cld - shrl $1, %ecx - jnc 1f - movsb -1: shrl $1, %ecx - jnc 2f - movsw -2: rep - movsl - movl %edx, %esi - cfi_restore (esi) -#ifndef USE_AS_BCOPY - movl DEST(%esp), %eax -#endif - - popl %edi - cfi_adjust_cfa_offset (-4) - cfi_restore (edi) - - ret - - cfi_adjust_cfa_offset (4) - cfi_rel_offset (edi, 0) - cfi_register (esi, edx) - - /* Backward copying. */ -3: std - leal -1(%edi, %ecx), %edi - leal -1(%esi, %ecx), %esi - shrl $1, %ecx - jnc 1f - movsb -1: subl $1, %edi - subl $1, %esi - shrl $1, %ecx - jnc 2f - movsw -2: subl $2, %edi - subl $2, %esi - rep - movsl - movl %edx, %esi - cfi_restore (esi) -#ifndef USE_AS_BCOPY - movl DEST(%esp), %eax -#endif - - cld - popl %edi - cfi_adjust_cfa_offset (-4) - cfi_restore (edi) - - ret -END (memmove) -#ifndef USE_AS_BCOPY -libc_hidden_builtin_def (memmove) -#endif diff --git a/sysdeps/i386/i686/mempcpy.S b/sysdeps/i386/i686/mempcpy.S deleted file mode 100644 index 31cb4efdb2..0000000000 --- a/sysdeps/i386/i686/mempcpy.S +++ /dev/null @@ -1,65 +0,0 @@ -/* Copy memory block and return pointer to following byte. - For Intel 80x86, x>=6. - This file is part of the GNU C Library. - Copyright (C) 1998-2017 Free Software Foundation, Inc. - Contributed by Ulrich Drepper <drepper@cygnus.com>, 1998. - - The GNU C Library is free software; you can redistribute it and/or - modify it under the terms of the GNU Lesser General Public - License as published by the Free Software Foundation; either - version 2.1 of the License, or (at your option) any later version. - - The GNU C Library is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - Lesser General Public License for more details. - - You should have received a copy of the GNU Lesser General Public - License along with the GNU C Library; if not, see - <http://www.gnu.org/licenses/>. */ - -#include <sysdep.h> -#include "asm-syntax.h" - -#define PARMS 4 /* no space for saved regs */ -#define RTN PARMS -#define DEST RTN -#define SRC DEST+4 -#define LEN SRC+4 - - .text -#if defined PIC && IS_IN (libc) -ENTRY_CHK (__mempcpy_chk) - movl 12(%esp), %eax - cmpl %eax, 16(%esp) - jb HIDDEN_JUMPTARGET (__chk_fail) -END_CHK (__mempcpy_chk) -#endif -ENTRY (__mempcpy) - - movl LEN(%esp), %ecx - movl %edi, %eax - cfi_register (edi, eax) - movl DEST(%esp), %edi - movl %esi, %edx - cfi_register (esi, edx) - movl SRC(%esp), %esi - cld - shrl $1, %ecx - jnc 1f - movsb -1: shrl $1, %ecx - jnc 2f - movsw -2: rep - movsl - xchgl %edi, %eax - cfi_restore (edi) - movl %edx, %esi - cfi_restore (esi) - - ret -END (__mempcpy) -libc_hidden_def (__mempcpy) -weak_alias (__mempcpy, mempcpy) -libc_hidden_builtin_def (mempcpy) diff --git a/sysdeps/i386/i686/memset.S b/sysdeps/i386/i686/memset.S deleted file mode 100644 index 24d06178d2..0000000000 --- a/sysdeps/i386/i686/memset.S +++ /dev/null @@ -1,100 +0,0 @@ -/* memset/bzero -- set memory area to CH/0 - Highly optimized version for ix86, x>=6. - Copyright (C) 1999-2017 Free Software Foundation, Inc. - This file is part of the GNU C Library. - Contributed by Ulrich Drepper <drepper@cygnus.com>, 1999. - - The GNU C Library is free software; you can redistribute it and/or - modify it under the terms of the GNU Lesser General Public - License as published by the Free Software Foundation; either - version 2.1 of the License, or (at your option) any later version. - - The GNU C Library is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - Lesser General Public License for more details. - - You should have received a copy of the GNU Lesser General Public - License along with the GNU C Library; if not, see - <http://www.gnu.org/licenses/>. */ - -#include <sysdep.h> -#include "asm-syntax.h" - -#define PARMS 4+4 /* space for 1 saved reg */ -#ifdef USE_AS_BZERO -# define DEST PARMS -# define LEN DEST+4 -#else -# define RTN PARMS -# define DEST RTN -# define CHR DEST+4 -# define LEN CHR+4 -#endif - - .text -#if defined SHARED && IS_IN (libc) && !defined USE_AS_BZERO -ENTRY_CHK (__memset_chk) - movl 12(%esp), %eax - cmpl %eax, 16(%esp) - jb HIDDEN_JUMPTARGET (__chk_fail) -END_CHK (__memset_chk) -#endif -ENTRY (memset) - - cld - pushl %edi - cfi_adjust_cfa_offset (4) - movl DEST(%esp), %edx - movl LEN(%esp), %ecx -#ifdef USE_AS_BZERO - xorl %eax, %eax /* fill with 0 */ -#else - movzbl CHR(%esp), %eax -#endif - jecxz 1f - movl %edx, %edi - cfi_rel_offset (edi, 0) - andl $3, %edx - jz 2f /* aligned */ - jp 3f /* misaligned at 3, store just one byte below */ - stosb /* misaligned at 1 or 2, store two bytes */ - decl %ecx - jz 1f -3: stosb - decl %ecx - jz 1f - xorl $1, %edx - jnz 2f /* was misaligned at 2 or 3, now aligned */ - stosb /* was misaligned at 1, store third byte */ - decl %ecx -2: movl %ecx, %edx - shrl $2, %ecx - andl $3, %edx -#ifndef USE_AS_BZERO - imul $0x01010101, %eax -#endif - rep - stosl - movl %edx, %ecx - rep - stosb - -1: -#ifndef USE_AS_BZERO - movl DEST(%esp), %eax /* start address of destination is result */ -#endif - popl %edi - cfi_adjust_cfa_offset (-4) - cfi_restore (edi) - - ret -END (memset) -libc_hidden_builtin_def (memset) - -#if defined SHARED && IS_IN (libc) && !defined __memset_chk \ - && !defined USE_AS_BZERO -strong_alias (__memset_chk, __memset_zero_constant_len_parameter) - .section .gnu.warning.__memset_zero_constant_len_parameter - .string "memset used with constant zero length parameter; this could be due to transposed parameters" -#endif diff --git a/sysdeps/i386/i686/memusage.h b/sysdeps/i386/i686/memusage.h deleted file mode 100644 index 77a020d7c0..0000000000 --- a/sysdeps/i386/i686/memusage.h +++ /dev/null @@ -1,21 +0,0 @@ -/* Copyright (C) 2000-2017 Free Software Foundation, Inc. - This file is part of the GNU C Library. - - The GNU C Library is free software; you can redistribute it and/or - modify it under the terms of the GNU Lesser General Public - License as published by the Free Software Foundation; either - version 2.1 of the License, or (at your option) any later version. - - The GNU C Library is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - Lesser General Public License for more details. - - You should have received a copy of the GNU Lesser General Public - License along with the GNU C Library; if not, see - <http://www.gnu.org/licenses/>. */ - -#define GETSP() ({ register uintptr_t stack_ptr asm ("esp"); stack_ptr; }) -#define GETTIME(low,high) asm ("rdtsc" : "=a" (low), "=d" (high)) - -#include <sysdeps/generic/memusage.h> diff --git a/sysdeps/i386/i686/multiarch/Makefile b/sysdeps/i386/i686/multiarch/Makefile deleted file mode 100644 index 4a0c20c051..0000000000 --- a/sysdeps/i386/i686/multiarch/Makefile +++ /dev/null @@ -1,44 +0,0 @@ -ifeq ($(subdir),csu) -tests += test-multiarch -endif - -ifeq ($(subdir),string) -gen-as-const-headers += locale-defines.sym -sysdep_routines += bzero-sse2 memset-sse2 memcpy-ssse3 mempcpy-ssse3 \ - memmove-ssse3 memcpy-ssse3-rep mempcpy-ssse3-rep \ - memmove-ssse3-rep bcopy-ssse3 bcopy-ssse3-rep \ - memset-sse2-rep bzero-sse2-rep strcmp-ssse3 \ - strcmp-sse4 strncmp-c strncmp-ssse3 strncmp-sse4 \ - memcmp-ssse3 memcmp-sse4 varshift \ - strlen-sse2 strlen-sse2-bsf strncpy-c strcpy-ssse3 \ - strncpy-ssse3 stpcpy-ssse3 stpncpy-ssse3 strcpy-sse2 \ - strncpy-sse2 stpcpy-sse2 stpncpy-sse2 strcat-ssse3 \ - strcat-sse2 strncat-ssse3 strncat-sse2 strncat-c \ - strchr-sse2 strrchr-sse2 strchr-sse2-bsf strrchr-sse2-bsf \ - memchr-sse2 memchr-sse2-bsf \ - memrchr-sse2 memrchr-sse2-bsf memrchr-c \ - rawmemchr-sse2 rawmemchr-sse2-bsf \ - strnlen-sse2 strnlen-c \ - strcasecmp_l-c strcasecmp-c strcasecmp_l-ssse3 \ - strncase_l-c strncase-c strncase_l-ssse3 \ - strcasecmp_l-sse4 strncase_l-sse4 \ - bcopy-sse2-unaligned memcpy-sse2-unaligned \ - mempcpy-sse2-unaligned memmove-sse2-unaligned \ - strcspn-c strpbrk-c strspn-c -CFLAGS-varshift.c += -msse4 -CFLAGS-strcspn-c.c += -msse4 -CFLAGS-strpbrk-c.c += -msse4 -CFLAGS-strspn-c.c += -msse4 -endif - -ifeq ($(subdir),wcsmbs) -sysdep_routines += wcscmp-sse2 wcscmp-c wcslen-sse2 wcslen-c \ - wmemcmp-sse4 wmemcmp-ssse3 wmemcmp-c wcschr-sse2 \ - wcschr-c wcsrchr-sse2 wcsrchr-c wcscpy-ssse3 wcscpy-c -endif - -ifeq ($(subdir),math) -libm-sysdep_routines += s_fma-fma s_fmaf-fma -CFLAGS-s_fma-fma.c += -mavx -mfpmath=sse -CFLAGS-s_fmaf-fma.c += -mavx -mfpmath=sse -endif diff --git a/sysdeps/i386/i686/multiarch/bcopy-sse2-unaligned.S b/sysdeps/i386/i686/multiarch/bcopy-sse2-unaligned.S deleted file mode 100644 index efef2a10dd..0000000000 --- a/sysdeps/i386/i686/multiarch/bcopy-sse2-unaligned.S +++ /dev/null @@ -1,4 +0,0 @@ -#define USE_AS_MEMMOVE -#define USE_AS_BCOPY -#define MEMCPY __bcopy_sse2_unaligned -#include "memcpy-sse2-unaligned.S" diff --git a/sysdeps/i386/i686/multiarch/bcopy-ssse3-rep.S b/sysdeps/i386/i686/multiarch/bcopy-ssse3-rep.S deleted file mode 100644 index cbc8b420e8..0000000000 --- a/sysdeps/i386/i686/multiarch/bcopy-ssse3-rep.S +++ /dev/null @@ -1,4 +0,0 @@ -#define USE_AS_MEMMOVE -#define USE_AS_BCOPY -#define MEMCPY __bcopy_ssse3_rep -#include "memcpy-ssse3-rep.S" diff --git a/sysdeps/i386/i686/multiarch/bcopy-ssse3.S b/sysdeps/i386/i686/multiarch/bcopy-ssse3.S deleted file mode 100644 index 36aac44b9c..0000000000 --- a/sysdeps/i386/i686/multiarch/bcopy-ssse3.S +++ /dev/null @@ -1,4 +0,0 @@ -#define USE_AS_MEMMOVE -#define USE_AS_BCOPY -#define MEMCPY __bcopy_ssse3 -#include "memcpy-ssse3.S" diff --git a/sysdeps/i386/i686/multiarch/bcopy.S b/sysdeps/i386/i686/multiarch/bcopy.S deleted file mode 100644 index 877f82c28f..0000000000 --- a/sysdeps/i386/i686/multiarch/bcopy.S +++ /dev/null @@ -1,59 +0,0 @@ -/* Multiple versions of bcopy - All versions must be listed in ifunc-impl-list.c. - Copyright (C) 2010-2017 Free Software Foundation, Inc. - Contributed by Intel Corporation. - This file is part of the GNU C Library. - - The GNU C Library is free software; you can redistribute it and/or - modify it under the terms of the GNU Lesser General Public - License as published by the Free Software Foundation; either - version 2.1 of the License, or (at your option) any later version. - - The GNU C Library is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - Lesser General Public License for more details. - - You should have received a copy of the GNU Lesser General Public - License along with the GNU C Library; if not, see - <http://www.gnu.org/licenses/>. */ - -#include <sysdep.h> -#include <init-arch.h> - -/* Define multiple versions only for the definition in lib. */ -#if IS_IN (libc) - .text -ENTRY(bcopy) - .type bcopy, @gnu_indirect_function - LOAD_GOT_AND_RTLD_GLOBAL_RO - LOAD_FUNC_GOT_EAX (__bcopy_ia32) - HAS_CPU_FEATURE (SSE2) - jz 2f - LOAD_FUNC_GOT_EAX (__bcopy_sse2_unaligned) - HAS_ARCH_FEATURE (Fast_Unaligned_Load) - jnz 2f - HAS_CPU_FEATURE (SSSE3) - jz 2f - LOAD_FUNC_GOT_EAX (__bcopy_ssse3) - HAS_ARCH_FEATURE (Fast_Rep_String) - jz 2f - LOAD_FUNC_GOT_EAX (__bcopy_ssse3_rep) -2: ret -END(bcopy) - -# undef ENTRY -# define ENTRY(name) \ - .type __bcopy_ia32, @function; \ - .p2align 4; \ - .globl __bcopy_ia32; \ - .hidden __bcopy_ia32; \ - __bcopy_ia32: cfi_startproc; \ - CALL_MCOUNT -# undef END -# define END(name) \ - cfi_endproc; .size __bcopy_ia32, .-__bcopy_ia32 - -#endif - -#include "../bcopy.S" diff --git a/sysdeps/i386/i686/multiarch/bzero-sse2-rep.S b/sysdeps/i386/i686/multiarch/bzero-sse2-rep.S deleted file mode 100644 index 507b288bb3..0000000000 --- a/sysdeps/i386/i686/multiarch/bzero-sse2-rep.S +++ /dev/null @@ -1,3 +0,0 @@ -#define USE_AS_BZERO -#define __memset_sse2_rep __bzero_sse2_rep -#include "memset-sse2-rep.S" diff --git a/sysdeps/i386/i686/multiarch/bzero-sse2.S b/sysdeps/i386/i686/multiarch/bzero-sse2.S deleted file mode 100644 index 8d04512e4e..0000000000 --- a/sysdeps/i386/i686/multiarch/bzero-sse2.S +++ /dev/null @@ -1,3 +0,0 @@ -#define USE_AS_BZERO -#define __memset_sse2 __bzero_sse2 -#include "memset-sse2.S" diff --git a/sysdeps/i386/i686/multiarch/bzero.S b/sysdeps/i386/i686/multiarch/bzero.S deleted file mode 100644 index 9dac490aa2..0000000000 --- a/sysdeps/i386/i686/multiarch/bzero.S +++ /dev/null @@ -1,62 +0,0 @@ -/* Multiple versions of bzero - All versions must be listed in ifunc-impl-list.c. - Copyright (C) 2010-2017 Free Software Foundation, Inc. - Contributed by Intel Corporation. - This file is part of the GNU C Library. - - The GNU C Library is free software; you can redistribute it and/or - modify it under the terms of the GNU Lesser General Public - License as published by the Free Software Foundation; either - version 2.1 of the License, or (at your option) any later version. - - The GNU C Library is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - Lesser General Public License for more details. - - You should have received a copy of the GNU Lesser General Public - License along with the GNU C Library; if not, see - <http://www.gnu.org/licenses/>. */ - -#include <sysdep.h> -#include <init-arch.h> - -/* Define multiple versions only for the definition in lib. */ -#if IS_IN (libc) - .text -ENTRY(__bzero) - .type __bzero, @gnu_indirect_function - LOAD_GOT_AND_RTLD_GLOBAL_RO - LOAD_FUNC_GOT_EAX (__bzero_ia32) - HAS_CPU_FEATURE (SSE2) - jz 2f - LOAD_FUNC_GOT_EAX ( __bzero_sse2) - HAS_ARCH_FEATURE (Fast_Rep_String) - jz 2f - LOAD_FUNC_GOT_EAX (__bzero_sse2_rep) -2: ret -END(__bzero) - -# undef ENTRY -# define ENTRY(name) \ - .type __bzero_ia32, @function; \ - .p2align 4; \ - .globl __bzero_ia32; \ - .hidden __bzero_ia32; \ - __bzero_ia32: cfi_startproc; \ - CALL_MCOUNT -# undef END -# define END(name) \ - cfi_endproc; .size __bzero_ia32, .-__bzero_ia32 - -# ifdef SHARED -# undef libc_hidden_builtin_def -/* IFUNC doesn't work with the hidden functions in shared library since - they will be called without setting up EBX needed for PLT which is - used by IFUNC. */ -# define libc_hidden_builtin_def(name) \ - .globl __GI___bzero; __GI___bzero = __bzero_ia32 -# endif -#endif - -#include "../bzero.S" diff --git a/sysdeps/i386/i686/multiarch/ifunc-impl-list.c b/sysdeps/i386/i686/multiarch/ifunc-impl-list.c deleted file mode 100644 index e8026a2a78..0000000000 --- a/sysdeps/i386/i686/multiarch/ifunc-impl-list.c +++ /dev/null @@ -1,376 +0,0 @@ -/* Enumerate available IFUNC implementations of a function. i686 version. - Copyright (C) 2012-2017 Free Software Foundation, Inc. - This file is part of the GNU C Library. - - The GNU C Library is free software; you can redistribute it and/or - modify it under the terms of the GNU Lesser General Public - License as published by the Free Software Foundation; either - version 2.1 of the License, or (at your option) any later version. - - The GNU C Library is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - Lesser General Public License for more details. - - You should have received a copy of the GNU Lesser General Public - License along with the GNU C Library; if not, see - <http://www.gnu.org/licenses/>. */ - -#include <assert.h> -#include <string.h> -#include <wchar.h> -#include <ifunc-impl-list.h> -#include "init-arch.h" - -/* Maximum number of IFUNC implementations. */ -#define MAX_IFUNC 4 - -/* Fill ARRAY of MAX elements with IFUNC implementations for function - NAME and return the number of valid entries. */ - -size_t -__libc_ifunc_impl_list (const char *name, struct libc_ifunc_impl *array, - size_t max) -{ - assert (max >= MAX_IFUNC); - - size_t i = 0; - - /* Support sysdeps/i386/i686/multiarch/bcopy.S. */ - IFUNC_IMPL (i, name, bcopy, - IFUNC_IMPL_ADD (array, i, bcopy, HAS_CPU_FEATURE (SSSE3), - __bcopy_ssse3_rep) - IFUNC_IMPL_ADD (array, i, bcopy, HAS_CPU_FEATURE (SSSE3), - __bcopy_ssse3) - IFUNC_IMPL_ADD (array, i, bcopy, HAS_CPU_FEATURE (SSE2), - __bcopy_sse2_unaligned) - IFUNC_IMPL_ADD (array, i, bcopy, 1, __bcopy_ia32)) - - /* Support sysdeps/i386/i686/multiarch/bzero.S. */ - IFUNC_IMPL (i, name, bzero, - IFUNC_IMPL_ADD (array, i, bzero, HAS_CPU_FEATURE (SSE2), - __bzero_sse2_rep) - IFUNC_IMPL_ADD (array, i, bzero, HAS_CPU_FEATURE (SSE2), - __bzero_sse2) - IFUNC_IMPL_ADD (array, i, bzero, 1, __bzero_ia32)) - - /* Support sysdeps/i386/i686/multiarch/memchr.S. */ - IFUNC_IMPL (i, name, memchr, - IFUNC_IMPL_ADD (array, i, memchr, HAS_CPU_FEATURE (SSE2), - __memchr_sse2_bsf) - IFUNC_IMPL_ADD (array, i, memchr, HAS_CPU_FEATURE (SSE2), - __memchr_sse2) - IFUNC_IMPL_ADD (array, i, memchr, 1, __memchr_ia32)) - - /* Support sysdeps/i386/i686/multiarch/memcmp.S. */ - IFUNC_IMPL (i, name, memcmp, - IFUNC_IMPL_ADD (array, i, memcmp, HAS_CPU_FEATURE (SSE4_2), - __memcmp_sse4_2) - IFUNC_IMPL_ADD (array, i, memcmp, HAS_CPU_FEATURE (SSSE3), - __memcmp_ssse3) - IFUNC_IMPL_ADD (array, i, memcmp, 1, __memcmp_ia32)) - - /* Support sysdeps/i386/i686/multiarch/memmove_chk.S. */ - IFUNC_IMPL (i, name, __memmove_chk, - IFUNC_IMPL_ADD (array, i, __memmove_chk, - HAS_CPU_FEATURE (SSSE3), - __memmove_chk_ssse3_rep) - IFUNC_IMPL_ADD (array, i, __memmove_chk, - HAS_CPU_FEATURE (SSSE3), - __memmove_chk_ssse3) - IFUNC_IMPL_ADD (array, i, __memmove_chk, - HAS_CPU_FEATURE (SSE2), - __memmove_chk_sse2_unaligned) - IFUNC_IMPL_ADD (array, i, __memmove_chk, 1, - __memmove_chk_ia32)) - - /* Support sysdeps/i386/i686/multiarch/memmove.S. */ - IFUNC_IMPL (i, name, memmove, - IFUNC_IMPL_ADD (array, i, memmove, HAS_CPU_FEATURE (SSSE3), - __memmove_ssse3_rep) - IFUNC_IMPL_ADD (array, i, memmove, HAS_CPU_FEATURE (SSSE3), - __memmove_ssse3) - IFUNC_IMPL_ADD (array, i, memmove, HAS_CPU_FEATURE (SSE2), - __memmove_sse2_unaligned) - IFUNC_IMPL_ADD (array, i, memmove, 1, __memmove_ia32)) - - /* Support sysdeps/i386/i686/multiarch/memrchr.S. */ - IFUNC_IMPL (i, name, memrchr, - IFUNC_IMPL_ADD (array, i, memrchr, HAS_CPU_FEATURE (SSE2), - __memrchr_sse2_bsf) - IFUNC_IMPL_ADD (array, i, memrchr, HAS_CPU_FEATURE (SSE2), - __memrchr_sse2) - IFUNC_IMPL_ADD (array, i, memrchr, 1, __memrchr_ia32)) - - /* Support sysdeps/i386/i686/multiarch/memset_chk.S. */ - IFUNC_IMPL (i, name, __memset_chk, - IFUNC_IMPL_ADD (array, i, __memset_chk, - HAS_CPU_FEATURE (SSE2), - __memset_chk_sse2_rep) - IFUNC_IMPL_ADD (array, i, __memset_chk, - HAS_CPU_FEATURE (SSE2), - __memset_chk_sse2) - IFUNC_IMPL_ADD (array, i, __memset_chk, 1, - __memset_chk_ia32)) - - /* Support sysdeps/i386/i686/multiarch/memset.S. */ - IFUNC_IMPL (i, name, memset, - IFUNC_IMPL_ADD (array, i, memset, HAS_CPU_FEATURE (SSE2), - __memset_sse2_rep) - IFUNC_IMPL_ADD (array, i, memset, HAS_CPU_FEATURE (SSE2), - __memset_sse2) - IFUNC_IMPL_ADD (array, i, memset, 1, __memset_ia32)) - - /* Support sysdeps/i386/i686/multiarch/rawmemchr.S. */ - IFUNC_IMPL (i, name, rawmemchr, - IFUNC_IMPL_ADD (array, i, rawmemchr, HAS_CPU_FEATURE (SSE2), - __rawmemchr_sse2_bsf) - IFUNC_IMPL_ADD (array, i, rawmemchr, HAS_CPU_FEATURE (SSE2), - __rawmemchr_sse2) - IFUNC_IMPL_ADD (array, i, rawmemchr, 1, __rawmemchr_ia32)) - - /* Support sysdeps/i386/i686/multiarch/stpncpy.S. */ - IFUNC_IMPL (i, name, stpncpy, - IFUNC_IMPL_ADD (array, i, stpncpy, HAS_CPU_FEATURE (SSSE3), - __stpncpy_ssse3) - IFUNC_IMPL_ADD (array, i, stpncpy, HAS_CPU_FEATURE (SSE2), - __stpncpy_sse2) - IFUNC_IMPL_ADD (array, i, stpncpy, 1, __stpncpy_ia32)) - - /* Support sysdeps/i386/i686/multiarch/stpcpy.S. */ - IFUNC_IMPL (i, name, stpcpy, - IFUNC_IMPL_ADD (array, i, stpcpy, HAS_CPU_FEATURE (SSSE3), - __stpcpy_ssse3) - IFUNC_IMPL_ADD (array, i, stpcpy, HAS_CPU_FEATURE (SSE2), - __stpcpy_sse2) - IFUNC_IMPL_ADD (array, i, stpcpy, 1, __stpcpy_ia32)) - - /* Support sysdeps/i386/i686/multiarch/strcasecmp.S. */ - IFUNC_IMPL (i, name, strcasecmp, - IFUNC_IMPL_ADD (array, i, strcasecmp, - HAS_CPU_FEATURE (SSE4_2), - __strcasecmp_sse4_2) - IFUNC_IMPL_ADD (array, i, strcasecmp, - HAS_CPU_FEATURE (SSSE3), - __strcasecmp_ssse3) - IFUNC_IMPL_ADD (array, i, strcasecmp, 1, __strcasecmp_ia32)) - - /* Support sysdeps/i386/i686/multiarch/strcasecmp_l.S. */ - IFUNC_IMPL (i, name, strcasecmp_l, - IFUNC_IMPL_ADD (array, i, strcasecmp_l, - HAS_CPU_FEATURE (SSE4_2), - __strcasecmp_l_sse4_2) - IFUNC_IMPL_ADD (array, i, strcasecmp_l, - HAS_CPU_FEATURE (SSSE3), - __strcasecmp_l_ssse3) - IFUNC_IMPL_ADD (array, i, strcasecmp_l, 1, - __strcasecmp_l_ia32)) - - /* Support sysdeps/i386/i686/multiarch/strcat.S. */ - IFUNC_IMPL (i, name, strcat, - IFUNC_IMPL_ADD (array, i, strcat, HAS_CPU_FEATURE (SSSE3), - __strcat_ssse3) - IFUNC_IMPL_ADD (array, i, strcat, HAS_CPU_FEATURE (SSE2), - __strcat_sse2) - IFUNC_IMPL_ADD (array, i, strcat, 1, __strcat_ia32)) - - /* Support sysdeps/i386/i686/multiarch/strchr.S. */ - IFUNC_IMPL (i, name, strchr, - IFUNC_IMPL_ADD (array, i, strchr, HAS_CPU_FEATURE (SSE2), - __strchr_sse2_bsf) - IFUNC_IMPL_ADD (array, i, strchr, HAS_CPU_FEATURE (SSE2), - __strchr_sse2) - IFUNC_IMPL_ADD (array, i, strchr, 1, __strchr_ia32)) - - /* Support sysdeps/i386/i686/multiarch/strcmp.S. */ - IFUNC_IMPL (i, name, strcmp, - IFUNC_IMPL_ADD (array, i, strcmp, HAS_CPU_FEATURE (SSE4_2), - __strcmp_sse4_2) - IFUNC_IMPL_ADD (array, i, strcmp, HAS_CPU_FEATURE (SSSE3), - __strcmp_ssse3) - IFUNC_IMPL_ADD (array, i, strcmp, 1, __strcmp_ia32)) - - /* Support sysdeps/i386/i686/multiarch/strcpy.S. */ - IFUNC_IMPL (i, name, strcpy, - IFUNC_IMPL_ADD (array, i, strcpy, HAS_CPU_FEATURE (SSSE3), - __strcpy_ssse3) - IFUNC_IMPL_ADD (array, i, strcpy, HAS_CPU_FEATURE (SSE2), - __strcpy_sse2) - IFUNC_IMPL_ADD (array, i, strcpy, 1, __strcpy_ia32)) - - /* Support sysdeps/i386/i686/multiarch/strcspn.S. */ - IFUNC_IMPL (i, name, strcspn, - IFUNC_IMPL_ADD (array, i, strcspn, HAS_CPU_FEATURE (SSE4_2), - __strcspn_sse42) - IFUNC_IMPL_ADD (array, i, strcspn, 1, __strcspn_ia32)) - - /* Support sysdeps/i386/i686/multiarch/strncase.S. */ - IFUNC_IMPL (i, name, strncasecmp, - IFUNC_IMPL_ADD (array, i, strncasecmp, - HAS_CPU_FEATURE (SSE4_2), - __strncasecmp_sse4_2) - IFUNC_IMPL_ADD (array, i, strncasecmp, - HAS_CPU_FEATURE (SSSE3), - __strncasecmp_ssse3) - IFUNC_IMPL_ADD (array, i, strncasecmp, 1, - __strncasecmp_ia32)) - - /* Support sysdeps/i386/i686/multiarch/strncase_l.S. */ - IFUNC_IMPL (i, name, strncasecmp_l, - IFUNC_IMPL_ADD (array, i, strncasecmp_l, - HAS_CPU_FEATURE (SSE4_2), - __strncasecmp_l_sse4_2) - IFUNC_IMPL_ADD (array, i, strncasecmp_l, - HAS_CPU_FEATURE (SSSE3), - __strncasecmp_l_ssse3) - IFUNC_IMPL_ADD (array, i, strncasecmp_l, 1, - __strncasecmp_l_ia32)) - - /* Support sysdeps/i386/i686/multiarch/strncat.S. */ - IFUNC_IMPL (i, name, strncat, - IFUNC_IMPL_ADD (array, i, strncat, HAS_CPU_FEATURE (SSSE3), - __strncat_ssse3) - IFUNC_IMPL_ADD (array, i, strncat, HAS_CPU_FEATURE (SSE2), - __strncat_sse2) - IFUNC_IMPL_ADD (array, i, strncat, 1, __strncat_ia32)) - - /* Support sysdeps/i386/i686/multiarch/strncpy.S. */ - IFUNC_IMPL (i, name, strncpy, - IFUNC_IMPL_ADD (array, i, strncpy, HAS_CPU_FEATURE (SSSE3), - __strncpy_ssse3) - IFUNC_IMPL_ADD (array, i, strncpy, HAS_CPU_FEATURE (SSE2), - __strncpy_sse2) - IFUNC_IMPL_ADD (array, i, strncpy, 1, __strncpy_ia32)) - - /* Support sysdeps/i386/i686/multiarch/strnlen.S. */ - IFUNC_IMPL (i, name, strnlen, - IFUNC_IMPL_ADD (array, i, strnlen, HAS_CPU_FEATURE (SSE2), - __strnlen_sse2) - IFUNC_IMPL_ADD (array, i, strnlen, 1, __strnlen_ia32)) - - /* Support sysdeps/i386/i686/multiarch/strpbrk.S. */ - IFUNC_IMPL (i, name, strpbrk, - IFUNC_IMPL_ADD (array, i, strpbrk, HAS_CPU_FEATURE (SSE4_2), - __strpbrk_sse42) - IFUNC_IMPL_ADD (array, i, strpbrk, 1, __strpbrk_ia32)) - - /* Support sysdeps/i386/i686/multiarch/strrchr.S. */ - IFUNC_IMPL (i, name, strrchr, - IFUNC_IMPL_ADD (array, i, strrchr, HAS_CPU_FEATURE (SSE2), - __strrchr_sse2_bsf) - IFUNC_IMPL_ADD (array, i, strrchr, HAS_CPU_FEATURE (SSE2), - __strrchr_sse2) - IFUNC_IMPL_ADD (array, i, strrchr, 1, __strrchr_ia32)) - - /* Support sysdeps/i386/i686/multiarch/strspn.S. */ - IFUNC_IMPL (i, name, strspn, - IFUNC_IMPL_ADD (array, i, strspn, HAS_CPU_FEATURE (SSE4_2), - __strspn_sse42) - IFUNC_IMPL_ADD (array, i, strspn, 1, __strspn_ia32)) - - /* Support sysdeps/i386/i686/multiarch/wcschr.S. */ - IFUNC_IMPL (i, name, wcschr, - IFUNC_IMPL_ADD (array, i, wcschr, HAS_CPU_FEATURE (SSE2), - __wcschr_sse2) - IFUNC_IMPL_ADD (array, i, wcschr, 1, __wcschr_ia32)) - - /* Support sysdeps/i386/i686/multiarch/wcscmp.S. */ - IFUNC_IMPL (i, name, wcscmp, - IFUNC_IMPL_ADD (array, i, wcscmp, HAS_CPU_FEATURE (SSE2), - __wcscmp_sse2) - IFUNC_IMPL_ADD (array, i, wcscmp, 1, __wcscmp_ia32)) - - /* Support sysdeps/i386/i686/multiarch/wcscpy.S. */ - IFUNC_IMPL (i, name, wcscpy, - IFUNC_IMPL_ADD (array, i, wcscpy, HAS_CPU_FEATURE (SSSE3), - __wcscpy_ssse3) - IFUNC_IMPL_ADD (array, i, wcscpy, 1, __wcscpy_ia32)) - - /* Support sysdeps/i386/i686/multiarch/wcslen.S. */ - IFUNC_IMPL (i, name, wcslen, - IFUNC_IMPL_ADD (array, i, wcslen, HAS_CPU_FEATURE (SSE2), - __wcslen_sse2) - IFUNC_IMPL_ADD (array, i, wcslen, 1, __wcslen_ia32)) - - /* Support sysdeps/i386/i686/multiarch/wcsrchr.S. */ - IFUNC_IMPL (i, name, wcsrchr, - IFUNC_IMPL_ADD (array, i, wcsrchr, HAS_CPU_FEATURE (SSE2), - __wcsrchr_sse2) - IFUNC_IMPL_ADD (array, i, wcsrchr, 1, __wcsrchr_ia32)) - - /* Support sysdeps/i386/i686/multiarch/wmemcmp.S. */ - IFUNC_IMPL (i, name, wmemcmp, - IFUNC_IMPL_ADD (array, i, wmemcmp, HAS_CPU_FEATURE (SSE4_2), - __wmemcmp_sse4_2) - IFUNC_IMPL_ADD (array, i, wmemcmp, HAS_CPU_FEATURE (SSSE3), - __wmemcmp_ssse3) - IFUNC_IMPL_ADD (array, i, wmemcmp, 1, __wmemcmp_ia32)) - -#ifdef SHARED - /* Support sysdeps/i386/i686/multiarch/memcpy_chk.S. */ - IFUNC_IMPL (i, name, __memcpy_chk, - IFUNC_IMPL_ADD (array, i, __memcpy_chk, - HAS_CPU_FEATURE (SSSE3), - __memcpy_chk_ssse3_rep) - IFUNC_IMPL_ADD (array, i, __memcpy_chk, - HAS_CPU_FEATURE (SSSE3), - __memcpy_chk_ssse3) - IFUNC_IMPL_ADD (array, i, __memcpy_chk, - HAS_CPU_FEATURE (SSE2), - __memcpy_chk_sse2_unaligned) - IFUNC_IMPL_ADD (array, i, __memcpy_chk, 1, - __memcpy_chk_ia32)) - - /* Support sysdeps/i386/i686/multiarch/memcpy.S. */ - IFUNC_IMPL (i, name, memcpy, - IFUNC_IMPL_ADD (array, i, memcpy, HAS_CPU_FEATURE (SSSE3), - __memcpy_ssse3_rep) - IFUNC_IMPL_ADD (array, i, memcpy, HAS_CPU_FEATURE (SSSE3), - __memcpy_ssse3) - IFUNC_IMPL_ADD (array, i, memcpy, HAS_CPU_FEATURE (SSE2), - __memcpy_sse2_unaligned) - IFUNC_IMPL_ADD (array, i, memcpy, 1, __memcpy_ia32)) - - /* Support sysdeps/i386/i686/multiarch/mempcpy_chk.S. */ - IFUNC_IMPL (i, name, __mempcpy_chk, - IFUNC_IMPL_ADD (array, i, __mempcpy_chk, - HAS_CPU_FEATURE (SSSE3), - __mempcpy_chk_ssse3_rep) - IFUNC_IMPL_ADD (array, i, __mempcpy_chk, - HAS_CPU_FEATURE (SSSE3), - __mempcpy_chk_ssse3) - IFUNC_IMPL_ADD (array, i, __mempcpy_chk, - HAS_CPU_FEATURE (SSE2), - __mempcpy_chk_sse2_unaligned) - IFUNC_IMPL_ADD (array, i, __mempcpy_chk, 1, - __mempcpy_chk_ia32)) - - /* Support sysdeps/i386/i686/multiarch/mempcpy.S. */ - IFUNC_IMPL (i, name, mempcpy, - IFUNC_IMPL_ADD (array, i, mempcpy, HAS_CPU_FEATURE (SSSE3), - __mempcpy_ssse3_rep) - IFUNC_IMPL_ADD (array, i, mempcpy, HAS_CPU_FEATURE (SSSE3), - __mempcpy_ssse3) - IFUNC_IMPL_ADD (array, i, mempcpy, HAS_CPU_FEATURE (SSE2), - __mempcpy_sse2_unaligned) - IFUNC_IMPL_ADD (array, i, mempcpy, 1, __mempcpy_ia32)) - - /* Support sysdeps/i386/i686/multiarch/strlen.S. */ - IFUNC_IMPL (i, name, strlen, - IFUNC_IMPL_ADD (array, i, strlen, HAS_CPU_FEATURE (SSE2), - __strlen_sse2_bsf) - IFUNC_IMPL_ADD (array, i, strlen, HAS_CPU_FEATURE (SSE2), - __strlen_sse2) - IFUNC_IMPL_ADD (array, i, strlen, 1, __strlen_ia32)) - - /* Support sysdeps/i386/i686/multiarch/strncmp.S. */ - IFUNC_IMPL (i, name, strncmp, - IFUNC_IMPL_ADD (array, i, strncmp, HAS_CPU_FEATURE (SSE4_2), - __strncmp_sse4_2) - IFUNC_IMPL_ADD (array, i, strncmp, HAS_CPU_FEATURE (SSSE3), - __strncmp_ssse3) - IFUNC_IMPL_ADD (array, i, strncmp, 1, __strncmp_ia32)) -#endif - - return i; -} diff --git a/sysdeps/i386/i686/multiarch/locale-defines.sym b/sysdeps/i386/i686/multiarch/locale-defines.sym deleted file mode 100644 index aebff9a4f9..0000000000 --- a/sysdeps/i386/i686/multiarch/locale-defines.sym +++ /dev/null @@ -1,11 +0,0 @@ -#include <locale/localeinfo.h> -#include <langinfo.h> -#include <stddef.h> - --- - -LOCALE_T___LOCALES offsetof (struct __locale_struct, __locales) -LC_CTYPE -_NL_CTYPE_NONASCII_CASE -LOCALE_DATA_VALUES offsetof (struct __locale_data, values) -SIZEOF_VALUES sizeof (((struct __locale_data *) 0)->values[0]) diff --git a/sysdeps/i386/i686/multiarch/memchr-sse2-bsf.S b/sysdeps/i386/i686/multiarch/memchr-sse2-bsf.S deleted file mode 100644 index dd316486e6..0000000000 --- a/sysdeps/i386/i686/multiarch/memchr-sse2-bsf.S +++ /dev/null @@ -1,502 +0,0 @@ -/* Optimized memchr with sse2 - Copyright (C) 2011-2017 Free Software Foundation, Inc. - Contributed by Intel Corporation. - This file is part of the GNU C Library. - - The GNU C Library is free software; you can redistribute it and/or - modify it under the terms of the GNU Lesser General Public - License as published by the Free Software Foundation; either - version 2.1 of the License, or (at your option) any later version. - - The GNU C Library is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - Lesser General Public License for more details. - - You should have received a copy of the GNU Lesser General Public - License along with the GNU C Library; if not, see - <http://www.gnu.org/licenses/>. */ - -#if IS_IN (libc) - -# include <sysdep.h> - -# define CFI_PUSH(REG) \ - cfi_adjust_cfa_offset (4); \ - cfi_rel_offset (REG, 0) - -# define CFI_POP(REG) \ - cfi_adjust_cfa_offset (-4); \ - cfi_restore (REG) - -# define PUSH(REG) pushl REG; CFI_PUSH (REG) -# define POP(REG) popl REG; CFI_POP (REG) - -# define PARMS 4 -# define STR1 PARMS -# define STR2 STR1+4 - -# ifndef USE_AS_RAWMEMCHR -# define LEN STR2+4 -# define RETURN POP(%edi); ret; CFI_PUSH(%edi); -# endif - -# ifndef MEMCHR -# define MEMCHR __memchr_sse2_bsf -# endif - - .text -ENTRY (MEMCHR) - - mov STR1(%esp), %ecx - movd STR2(%esp), %xmm1 - -# ifndef USE_AS_RAWMEMCHR - mov LEN(%esp), %edx - test %edx, %edx - jz L(return_null_1) -# endif - mov %ecx, %eax - - punpcklbw %xmm1, %xmm1 - punpcklbw %xmm1, %xmm1 - - and $63, %ecx - pshufd $0, %xmm1, %xmm1 - - cmp $48, %ecx - ja L(crosscache) - - movdqu (%eax), %xmm0 - pcmpeqb %xmm1, %xmm0 -/* Check if there is a match. */ - pmovmskb %xmm0, %ecx - test %ecx, %ecx - je L(unaligned_no_match_1) -/* Check which byte is a match. */ - bsf %ecx, %ecx - -# ifndef USE_AS_RAWMEMCHR - sub %ecx, %edx - jbe L(return_null_1) -# endif - add %ecx, %eax - ret - - .p2align 4 -L(unaligned_no_match_1): -# ifndef USE_AS_RAWMEMCHR - sub $16, %edx - jbe L(return_null_1) - PUSH (%edi) - lea 16(%eax), %edi - and $15, %eax - and $-16, %edi - add %eax, %edx -# else - lea 16(%eax), %edx - and $-16, %edx -# endif - jmp L(loop_prolog) - - .p2align 4 -L(return_null_1): - xor %eax, %eax - ret - -# ifndef USE_AS_RAWMEMCHR - CFI_POP (%edi) -# endif - - .p2align 4 -L(crosscache): -/* Handle unaligned string. */ - -# ifndef USE_AS_RAWMEMCHR - PUSH (%edi) - mov %eax, %edi - and $15, %ecx - and $-16, %edi - movdqa (%edi), %xmm0 -# else - mov %eax, %edx - and $15, %ecx - and $-16, %edx - movdqa (%edx), %xmm0 -# endif - pcmpeqb %xmm1, %xmm0 -/* Check if there is a match. */ - pmovmskb %xmm0, %eax -/* Remove the leading bytes. */ - sar %cl, %eax - test %eax, %eax - je L(unaligned_no_match) -/* Check which byte is a match. */ - bsf %eax, %eax - -# ifndef USE_AS_RAWMEMCHR - sub %eax, %edx - jbe L(return_null) - add %edi, %eax - add %ecx, %eax - RETURN -# else - add %edx, %eax - add %ecx, %eax - ret -# endif - - .p2align 4 -L(unaligned_no_match): -# ifndef USE_AS_RAWMEMCHR - /* Calculate the last acceptable address and check for possible - addition overflow by using satured math: - edx = ecx + edx - edx |= -(edx < ecx) */ - add %ecx, %edx - sbb %eax, %eax - or %eax, %edx - sub $16, %edx - jbe L(return_null) - add $16, %edi -# else - add $16, %edx -# endif - - .p2align 4 -/* Loop start on aligned string. */ -L(loop_prolog): -# ifndef USE_AS_RAWMEMCHR - sub $64, %edx - jbe L(exit_loop) - movdqa (%edi), %xmm0 -# else - movdqa (%edx), %xmm0 -# endif - pcmpeqb %xmm1, %xmm0 - pmovmskb %xmm0, %eax - test %eax, %eax - jnz L(matches) - -# ifndef USE_AS_RAWMEMCHR - movdqa 16(%edi), %xmm2 -# else - movdqa 16(%edx), %xmm2 -# endif - pcmpeqb %xmm1, %xmm2 - pmovmskb %xmm2, %eax - test %eax, %eax - jnz L(matches16) - -# ifndef USE_AS_RAWMEMCHR - movdqa 32(%edi), %xmm3 -# else - movdqa 32(%edx), %xmm3 -# endif - pcmpeqb %xmm1, %xmm3 - pmovmskb %xmm3, %eax - test %eax, %eax - jnz L(matches32) - -# ifndef USE_AS_RAWMEMCHR - movdqa 48(%edi), %xmm4 -# else - movdqa 48(%edx), %xmm4 -# endif - pcmpeqb %xmm1, %xmm4 - -# ifndef USE_AS_RAWMEMCHR - add $64, %edi -# else - add $64, %edx -# endif - pmovmskb %xmm4, %eax - test %eax, %eax - jnz L(matches0) - -# ifndef USE_AS_RAWMEMCHR - test $0x3f, %edi -# else - test $0x3f, %edx -# endif - jz L(align64_loop) - -# ifndef USE_AS_RAWMEMCHR - sub $64, %edx - jbe L(exit_loop) - movdqa (%edi), %xmm0 -# else - movdqa (%edx), %xmm0 -# endif - pcmpeqb %xmm1, %xmm0 - pmovmskb %xmm0, %eax - test %eax, %eax - jnz L(matches) - -# ifndef USE_AS_RAWMEMCHR - movdqa 16(%edi), %xmm2 -# else - movdqa 16(%edx), %xmm2 -# endif - pcmpeqb %xmm1, %xmm2 - pmovmskb %xmm2, %eax - test %eax, %eax - jnz L(matches16) - -# ifndef USE_AS_RAWMEMCHR - movdqa 32(%edi), %xmm3 -# else - movdqa 32(%edx), %xmm3 -# endif - pcmpeqb %xmm1, %xmm3 - pmovmskb %xmm3, %eax - test %eax, %eax - jnz L(matches32) - -# ifndef USE_AS_RAWMEMCHR - movdqa 48(%edi), %xmm3 -# else - movdqa 48(%edx), %xmm3 -# endif - pcmpeqb %xmm1, %xmm3 - pmovmskb %xmm3, %eax - -# ifndef USE_AS_RAWMEMCHR - add $64, %edi -# else - add $64, %edx -# endif - test %eax, %eax - jnz L(matches0) - -# ifndef USE_AS_RAWMEMCHR - mov %edi, %ecx - and $-64, %edi - and $63, %ecx - add %ecx, %edx -# else - and $-64, %edx -# endif - - .p2align 4 -L(align64_loop): -# ifndef USE_AS_RAWMEMCHR - sub $64, %edx - jbe L(exit_loop) - movdqa (%edi), %xmm0 - movdqa 16(%edi), %xmm2 - movdqa 32(%edi), %xmm3 - movdqa 48(%edi), %xmm4 -# else - movdqa (%edx), %xmm0 - movdqa 16(%edx), %xmm2 - movdqa 32(%edx), %xmm3 - movdqa 48(%edx), %xmm4 -# endif - pcmpeqb %xmm1, %xmm0 - pcmpeqb %xmm1, %xmm2 - pcmpeqb %xmm1, %xmm3 - pcmpeqb %xmm1, %xmm4 - - pmaxub %xmm0, %xmm3 - pmaxub %xmm2, %xmm4 - pmaxub %xmm3, %xmm4 - pmovmskb %xmm4, %eax - -# ifndef USE_AS_RAWMEMCHR - add $64, %edi -# else - add $64, %edx -# endif - - test %eax, %eax - jz L(align64_loop) - -# ifndef USE_AS_RAWMEMCHR - sub $64, %edi -# else - sub $64, %edx -# endif - - pmovmskb %xmm0, %eax - test %eax, %eax - jnz L(matches) - - pmovmskb %xmm2, %eax - test %eax, %eax - jnz L(matches16) - -# ifndef USE_AS_RAWMEMCHR - movdqa 32(%edi), %xmm3 -# else - movdqa 32(%edx), %xmm3 -# endif - - pcmpeqb %xmm1, %xmm3 - -# ifndef USE_AS_RAWMEMCHR - pcmpeqb 48(%edi), %xmm1 -# else - pcmpeqb 48(%edx), %xmm1 -# endif - pmovmskb %xmm3, %eax - test %eax, %eax - jnz L(matches32) - - pmovmskb %xmm1, %eax - bsf %eax, %eax - -# ifndef USE_AS_RAWMEMCHR - lea 48(%edi, %eax), %eax - RETURN -# else - lea 48(%edx, %eax), %eax - ret -# endif - -# ifndef USE_AS_RAWMEMCHR - .p2align 4 -L(exit_loop): - add $64, %edx - cmp $32, %edx - jbe L(exit_loop_32) - - movdqa (%edi), %xmm0 - pcmpeqb %xmm1, %xmm0 - pmovmskb %xmm0, %eax - test %eax, %eax - jnz L(matches) - - movdqa 16(%edi), %xmm2 - pcmpeqb %xmm1, %xmm2 - pmovmskb %xmm2, %eax - test %eax, %eax - jnz L(matches16) - - movdqa 32(%edi), %xmm3 - pcmpeqb %xmm1, %xmm3 - pmovmskb %xmm3, %eax - test %eax, %eax - jnz L(matches32_1) - cmp $48, %edx - jbe L(return_null) - - pcmpeqb 48(%edi), %xmm1 - pmovmskb %xmm1, %eax - test %eax, %eax - jnz L(matches48_1) - xor %eax, %eax - RETURN - - .p2align 4 -L(exit_loop_32): - movdqa (%edi), %xmm0 - pcmpeqb %xmm1, %xmm0 - pmovmskb %xmm0, %eax - test %eax, %eax - jnz L(matches_1) - cmp $16, %edx - jbe L(return_null) - - pcmpeqb 16(%edi), %xmm1 - pmovmskb %xmm1, %eax - test %eax, %eax - jnz L(matches16_1) - xor %eax, %eax - RETURN -# endif - .p2align 4 -L(matches0): - bsf %eax, %eax -# ifndef USE_AS_RAWMEMCHR - lea -16(%eax, %edi), %eax - RETURN -# else - lea -16(%eax, %edx), %eax - ret -# endif - - .p2align 4 -L(matches): - bsf %eax, %eax -# ifndef USE_AS_RAWMEMCHR - add %edi, %eax - RETURN -# else - add %edx, %eax - ret -# endif - - .p2align 4 -L(matches16): - bsf %eax, %eax -# ifndef USE_AS_RAWMEMCHR - lea 16(%eax, %edi), %eax - RETURN -# else - lea 16(%eax, %edx), %eax - ret -# endif - - .p2align 4 -L(matches32): - bsf %eax, %eax -# ifndef USE_AS_RAWMEMCHR - lea 32(%eax, %edi), %eax - RETURN -# else - lea 32(%eax, %edx), %eax - ret -# endif - -# ifndef USE_AS_RAWMEMCHR - .p2align 4 -L(matches_1): - bsf %eax, %eax - sub %eax, %edx - jbe L(return_null) - - add %edi, %eax - RETURN - - .p2align 4 -L(matches16_1): - sub $16, %edx - bsf %eax, %eax - sub %eax, %edx - jbe L(return_null) - - lea 16(%edi, %eax), %eax - RETURN - - .p2align 4 -L(matches32_1): - sub $32, %edx - bsf %eax, %eax - sub %eax, %edx - jbe L(return_null) - - lea 32(%edi, %eax), %eax - RETURN - - .p2align 4 -L(matches48_1): - sub $48, %edx - bsf %eax, %eax - sub %eax, %edx - jbe L(return_null) - - lea 48(%edi, %eax), %eax - RETURN -# endif - .p2align 4 -L(return_null): - xor %eax, %eax -# ifndef USE_AS_RAWMEMCHR - RETURN -# else - ret -# endif - -END (MEMCHR) -#endif diff --git a/sysdeps/i386/i686/multiarch/memchr-sse2.S b/sysdeps/i386/i686/multiarch/memchr-sse2.S deleted file mode 100644 index 172d70de13..0000000000 --- a/sysdeps/i386/i686/multiarch/memchr-sse2.S +++ /dev/null @@ -1,709 +0,0 @@ -/* Optimized memchr with sse2 without bsf - Copyright (C) 2011-2017 Free Software Foundation, Inc. - Contributed by Intel Corporation. - This file is part of the GNU C Library. - - The GNU C Library is free software; you can redistribute it and/or - modify it under the terms of the GNU Lesser General Public - License as published by the Free Software Foundation; either - version 2.1 of the License, or (at your option) any later version. - - The GNU C Library is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - Lesser General Public License for more details. - - You should have received a copy of the GNU Lesser General Public - License along with the GNU C Library; if not, see - <http://www.gnu.org/licenses/>. */ - -#if IS_IN (libc) - -# include <sysdep.h> - -# define CFI_PUSH(REG) \ - cfi_adjust_cfa_offset (4); \ - cfi_rel_offset (REG, 0) - -# define CFI_POP(REG) \ - cfi_adjust_cfa_offset (-4); \ - cfi_restore (REG) - -# define PUSH(REG) pushl REG; CFI_PUSH (REG) -# define POP(REG) popl REG; CFI_POP (REG) - -# ifndef USE_AS_RAWMEMCHR -# define ENTRANCE PUSH(%edi); -# define PARMS 8 -# define RETURN POP(%edi); ret; CFI_PUSH(%edi); -# else -# define ENTRANCE -# define PARMS 4 -# endif - -# define STR1 PARMS -# define STR2 STR1+4 - -# ifndef USE_AS_RAWMEMCHR -# define LEN STR2+4 -# endif - -# ifndef MEMCHR -# define MEMCHR __memchr_sse2 -# endif - - atom_text_section -ENTRY (MEMCHR) - ENTRANCE - mov STR1(%esp), %ecx - movd STR2(%esp), %xmm1 -# ifndef USE_AS_RAWMEMCHR - mov LEN(%esp), %edx - test %edx, %edx - jz L(return_null) -# endif - - punpcklbw %xmm1, %xmm1 -# ifndef USE_AS_RAWMEMCHR - mov %ecx, %edi -# else - mov %ecx, %edx -# endif - punpcklbw %xmm1, %xmm1 - - and $63, %ecx - pshufd $0, %xmm1, %xmm1 - cmp $48, %ecx - ja L(crosscache) - -# ifndef USE_AS_RAWMEMCHR - movdqu (%edi), %xmm0 -# else - movdqu (%edx), %xmm0 -# endif - pcmpeqb %xmm1, %xmm0 - pmovmskb %xmm0, %eax - test %eax, %eax -# ifndef USE_AS_RAWMEMCHR - jnz L(match_case2_prolog) - - sub $16, %edx - jbe L(return_null) - lea 16(%edi), %edi - and $15, %ecx - and $-16, %edi - add %ecx, %edx -# else - jnz L(match_case1_prolog) - lea 16(%edx), %edx - and $-16, %edx -# endif - jmp L(loop_prolog) - - .p2align 4 -L(crosscache): - and $15, %ecx -# ifndef USE_AS_RAWMEMCHR - and $-16, %edi - movdqa (%edi), %xmm0 -# else - and $-16, %edx - movdqa (%edx), %xmm0 -# endif - pcmpeqb %xmm1, %xmm0 - pmovmskb %xmm0, %eax - sar %cl, %eax - test %eax, %eax - -# ifndef USE_AS_RAWMEMCHR - jnz L(match_case2_prolog1) - /* "ecx" is less than 16. Calculate "edx + ecx - 16" by using - "edx - (16 - ecx)" instead of "(edx + ecx) - 16" to void - possible addition overflow. */ - neg %ecx - add $16, %ecx - sub %ecx, %edx - jbe L(return_null) - lea 16(%edi), %edi -# else - jnz L(match_case1_prolog1) - lea 16(%edx), %edx -# endif - - .p2align 4 -L(loop_prolog): -# ifndef USE_AS_RAWMEMCHR - sub $64, %edx - jbe L(exit_loop) - movdqa (%edi), %xmm0 -# else - movdqa (%edx), %xmm0 -# endif - pcmpeqb %xmm1, %xmm0 - xor %ecx, %ecx - pmovmskb %xmm0, %eax - test %eax, %eax - jnz L(match_case1) - -# ifndef USE_AS_RAWMEMCHR - movdqa 16(%edi), %xmm2 -# else - movdqa 16(%edx), %xmm2 -# endif - pcmpeqb %xmm1, %xmm2 - lea 16(%ecx), %ecx - pmovmskb %xmm2, %eax - test %eax, %eax - jnz L(match_case1) - -# ifndef USE_AS_RAWMEMCHR - movdqa 32(%edi), %xmm3 -# else - movdqa 32(%edx), %xmm3 -# endif - pcmpeqb %xmm1, %xmm3 - lea 16(%ecx), %ecx - pmovmskb %xmm3, %eax - test %eax, %eax - jnz L(match_case1) - -# ifndef USE_AS_RAWMEMCHR - movdqa 48(%edi), %xmm4 -# else - movdqa 48(%edx), %xmm4 -# endif - pcmpeqb %xmm1, %xmm4 - lea 16(%ecx), %ecx - pmovmskb %xmm4, %eax - test %eax, %eax - jnz L(match_case1) - -# ifndef USE_AS_RAWMEMCHR - lea 64(%edi), %edi - sub $64, %edx - jbe L(exit_loop) - - movdqa (%edi), %xmm0 -# else - lea 64(%edx), %edx - movdqa (%edx), %xmm0 -# endif - pcmpeqb %xmm1, %xmm0 - xor %ecx, %ecx - pmovmskb %xmm0, %eax - test %eax, %eax - jnz L(match_case1) - -# ifndef USE_AS_RAWMEMCHR - movdqa 16(%edi), %xmm2 -# else - movdqa 16(%edx), %xmm2 -# endif - pcmpeqb %xmm1, %xmm2 - lea 16(%ecx), %ecx - pmovmskb %xmm2, %eax - test %eax, %eax - jnz L(match_case1) - -# ifndef USE_AS_RAWMEMCHR - movdqa 32(%edi), %xmm3 -# else - movdqa 32(%edx), %xmm3 -# endif - pcmpeqb %xmm1, %xmm3 - lea 16(%ecx), %ecx - pmovmskb %xmm3, %eax - test %eax, %eax - jnz L(match_case1) - -# ifndef USE_AS_RAWMEMCHR - movdqa 48(%edi), %xmm4 -# else - movdqa 48(%edx), %xmm4 -# endif - pcmpeqb %xmm1, %xmm4 - lea 16(%ecx), %ecx - pmovmskb %xmm4, %eax - test %eax, %eax - jnz L(match_case1) - -# ifndef USE_AS_RAWMEMCHR - lea 64(%edi), %edi - mov %edi, %ecx - and $-64, %edi - and $63, %ecx - add %ecx, %edx -# else - lea 64(%edx), %edx - and $-64, %edx -# endif - - .p2align 4 -L(align64_loop): - -# ifndef USE_AS_RAWMEMCHR - sub $64, %edx - jbe L(exit_loop) - movdqa (%edi), %xmm0 - movdqa 16(%edi), %xmm2 - movdqa 32(%edi), %xmm3 - movdqa 48(%edi), %xmm4 -# else - movdqa (%edx), %xmm0 - movdqa 16(%edx), %xmm2 - movdqa 32(%edx), %xmm3 - movdqa 48(%edx), %xmm4 -# endif - pcmpeqb %xmm1, %xmm0 - pcmpeqb %xmm1, %xmm2 - pcmpeqb %xmm1, %xmm3 - pcmpeqb %xmm1, %xmm4 - - pmaxub %xmm0, %xmm3 - pmaxub %xmm2, %xmm4 - pmaxub %xmm3, %xmm4 -# ifndef USE_AS_RAWMEMCHR - add $64, %edi -# else - add $64, %edx -# endif - pmovmskb %xmm4, %eax - - test %eax, %eax - jz L(align64_loop) - -# ifndef USE_AS_RAWMEMCHR - sub $64, %edi -# else - sub $64, %edx -# endif - - pmovmskb %xmm0, %eax - xor %ecx, %ecx - test %eax, %eax - jnz L(match_case1) - - pmovmskb %xmm2, %eax - lea 16(%ecx), %ecx - test %eax, %eax - jnz L(match_case1) - -# ifndef USE_AS_RAWMEMCHR - movdqa 32(%edi), %xmm3 -# else - movdqa 32(%edx), %xmm3 -# endif - pcmpeqb %xmm1, %xmm3 - pmovmskb %xmm3, %eax - lea 16(%ecx), %ecx - test %eax, %eax - jnz L(match_case1) - -# ifndef USE_AS_RAWMEMCHR - pcmpeqb 48(%edi), %xmm1 -# else - pcmpeqb 48(%edx), %xmm1 -# endif - pmovmskb %xmm1, %eax - lea 16(%ecx), %ecx - - .p2align 4 -L(match_case1): -# ifndef USE_AS_RAWMEMCHR - add %ecx, %edi -# else -L(match_case1_prolog1): - add %ecx, %edx -L(match_case1_prolog): -# endif - test %al, %al - jz L(match_case1_high) - mov %al, %cl - and $15, %cl - jz L(match_case1_8) - test $0x01, %al - jnz L(ExitCase1_1) - test $0x02, %al - jnz L(ExitCase1_2) - test $0x04, %al - jnz L(ExitCase1_3) -# ifndef USE_AS_RAWMEMCHR - lea 3(%edi), %eax - RETURN -# else - lea 3(%edx), %eax - ret -# endif - - .p2align 4 -L(match_case1_8): - test $0x10, %al - jnz L(ExitCase1_5) - test $0x20, %al - jnz L(ExitCase1_6) - test $0x40, %al - jnz L(ExitCase1_7) -# ifndef USE_AS_RAWMEMCHR - lea 7(%edi), %eax - RETURN -# else - lea 7(%edx), %eax - ret -# endif - - .p2align 4 -L(match_case1_high): - mov %ah, %ch - and $15, %ch - jz L(match_case1_high_8) - test $0x01, %ah - jnz L(ExitCase1_9) - test $0x02, %ah - jnz L(ExitCase1_10) - test $0x04, %ah - jnz L(ExitCase1_11) -# ifndef USE_AS_RAWMEMCHR - lea 11(%edi), %eax - RETURN -# else - lea 11(%edx), %eax - ret -# endif - - .p2align 4 -L(match_case1_high_8): - test $0x10, %ah - jnz L(ExitCase1_13) - test $0x20, %ah - jnz L(ExitCase1_14) - test $0x40, %ah - jnz L(ExitCase1_15) -# ifndef USE_AS_RAWMEMCHR - lea 15(%edi), %eax - RETURN -# else - lea 15(%edx), %eax - ret -# endif - -# ifndef USE_AS_RAWMEMCHR - .p2align 4 -L(exit_loop): - add $64, %edx - - movdqa (%edi), %xmm0 - pcmpeqb %xmm1, %xmm0 - xor %ecx, %ecx - pmovmskb %xmm0, %eax - test %eax, %eax - jnz L(match_case2) - cmp $16, %edx - jbe L(return_null) - - movdqa 16(%edi), %xmm2 - pcmpeqb %xmm1, %xmm2 - lea 16(%ecx), %ecx - pmovmskb %xmm2, %eax - test %eax, %eax - jnz L(match_case2) - cmp $32, %edx - jbe L(return_null) - - movdqa 32(%edi), %xmm3 - pcmpeqb %xmm1, %xmm3 - lea 16(%ecx), %ecx - pmovmskb %xmm3, %eax - test %eax, %eax - jnz L(match_case2) - cmp $48, %edx - jbe L(return_null) - - pcmpeqb 48(%edi), %xmm1 - lea 16(%ecx), %ecx - pmovmskb %xmm1, %eax - test %eax, %eax - jnz L(match_case2) - - xor %eax, %eax - RETURN -# endif - - .p2align 4 -L(ExitCase1_1): -# ifndef USE_AS_RAWMEMCHR - mov %edi, %eax - RETURN -# else - mov %edx, %eax - ret -# endif - - .p2align 4 -L(ExitCase1_2): -# ifndef USE_AS_RAWMEMCHR - lea 1(%edi), %eax - RETURN -# else - lea 1(%edx), %eax - ret -# endif - - .p2align 4 -L(ExitCase1_3): -# ifndef USE_AS_RAWMEMCHR - lea 2(%edi), %eax - RETURN -# else - lea 2(%edx), %eax - ret -# endif - - .p2align 4 -L(ExitCase1_5): -# ifndef USE_AS_RAWMEMCHR - lea 4(%edi), %eax - RETURN -# else - lea 4(%edx), %eax - ret -# endif - - .p2align 4 -L(ExitCase1_6): -# ifndef USE_AS_RAWMEMCHR - lea 5(%edi), %eax - RETURN -# else - lea 5(%edx), %eax - ret -# endif - - .p2align 4 -L(ExitCase1_7): -# ifndef USE_AS_RAWMEMCHR - lea 6(%edi), %eax - RETURN -# else - lea 6(%edx), %eax - ret -# endif - - .p2align 4 -L(ExitCase1_9): -# ifndef USE_AS_RAWMEMCHR - lea 8(%edi), %eax - RETURN -# else - lea 8(%edx), %eax - ret -# endif - - .p2align 4 -L(ExitCase1_10): -# ifndef USE_AS_RAWMEMCHR - lea 9(%edi), %eax - RETURN -# else - lea 9(%edx), %eax - ret -# endif - - .p2align 4 -L(ExitCase1_11): -# ifndef USE_AS_RAWMEMCHR - lea 10(%edi), %eax - RETURN -# else - lea 10(%edx), %eax - ret -# endif - - .p2align 4 -L(ExitCase1_13): -# ifndef USE_AS_RAWMEMCHR - lea 12(%edi), %eax - RETURN -# else - lea 12(%edx), %eax - ret -# endif - - .p2align 4 -L(ExitCase1_14): -# ifndef USE_AS_RAWMEMCHR - lea 13(%edi), %eax - RETURN -# else - lea 13(%edx), %eax - ret -# endif - - .p2align 4 -L(ExitCase1_15): -# ifndef USE_AS_RAWMEMCHR - lea 14(%edi), %eax - RETURN -# else - lea 14(%edx), %eax - ret -# endif - -# ifndef USE_AS_RAWMEMCHR - .p2align 4 -L(match_case2): - sub %ecx, %edx -L(match_case2_prolog1): - add %ecx, %edi -L(match_case2_prolog): - test %al, %al - jz L(match_case2_high) - mov %al, %cl - and $15, %cl - jz L(match_case2_8) - test $0x01, %al - jnz L(ExitCase2_1) - test $0x02, %al - jnz L(ExitCase2_2) - test $0x04, %al - jnz L(ExitCase2_3) - sub $4, %edx - jb L(return_null) - lea 3(%edi), %eax - RETURN - - .p2align 4 -L(match_case2_8): - test $0x10, %al - jnz L(ExitCase2_5) - test $0x20, %al - jnz L(ExitCase2_6) - test $0x40, %al - jnz L(ExitCase2_7) - sub $8, %edx - jb L(return_null) - lea 7(%edi), %eax - RETURN - - .p2align 4 -L(match_case2_high): - mov %ah, %ch - and $15, %ch - jz L(match_case2_high_8) - test $0x01, %ah - jnz L(ExitCase2_9) - test $0x02, %ah - jnz L(ExitCase2_10) - test $0x04, %ah - jnz L(ExitCase2_11) - sub $12, %edx - jb L(return_null) - lea 11(%edi), %eax - RETURN - - .p2align 4 -L(match_case2_high_8): - test $0x10, %ah - jnz L(ExitCase2_13) - test $0x20, %ah - jnz L(ExitCase2_14) - test $0x40, %ah - jnz L(ExitCase2_15) - sub $16, %edx - jb L(return_null) - lea 15(%edi), %eax - RETURN - - .p2align 4 -L(ExitCase2_1): - mov %edi, %eax - RETURN - - .p2align 4 -L(ExitCase2_2): - sub $2, %edx - jb L(return_null) - lea 1(%edi), %eax - RETURN - - .p2align 4 -L(ExitCase2_3): - sub $3, %edx - jb L(return_null) - lea 2(%edi), %eax - RETURN - - .p2align 4 -L(ExitCase2_5): - sub $5, %edx - jb L(return_null) - lea 4(%edi), %eax - RETURN - - .p2align 4 -L(ExitCase2_6): - sub $6, %edx - jb L(return_null) - lea 5(%edi), %eax - RETURN - - .p2align 4 -L(ExitCase2_7): - sub $7, %edx - jb L(return_null) - lea 6(%edi), %eax - RETURN - - .p2align 4 -L(ExitCase2_9): - sub $9, %edx - jb L(return_null) - lea 8(%edi), %eax - RETURN - - .p2align 4 -L(ExitCase2_10): - sub $10, %edx - jb L(return_null) - lea 9(%edi), %eax - RETURN - - .p2align 4 -L(ExitCase2_11): - sub $11, %edx - jb L(return_null) - lea 10(%edi), %eax - RETURN - - .p2align 4 -L(ExitCase2_13): - sub $13, %edx - jb L(return_null) - lea 12(%edi), %eax - RETURN - - .p2align 4 -L(ExitCase2_14): - sub $14, %edx - jb L(return_null) - lea 13(%edi), %eax - RETURN - - .p2align 4 -L(ExitCase2_15): - sub $15, %edx - jb L(return_null) - lea 14(%edi), %eax - RETURN -# endif - - .p2align 4 -L(return_null): - xor %eax, %eax -# ifndef USE_AS_RAWMEMCHR - RETURN -# else - ret -# endif - -END (MEMCHR) -#endif diff --git a/sysdeps/i386/i686/multiarch/memchr.S b/sysdeps/i386/i686/multiarch/memchr.S deleted file mode 100644 index bd0dace290..0000000000 --- a/sysdeps/i386/i686/multiarch/memchr.S +++ /dev/null @@ -1,65 +0,0 @@ -/* Multiple versions of memchr - All versions must be listed in ifunc-impl-list.c. - Copyright (C) 2011-2017 Free Software Foundation, Inc. - Contributed by Intel Corporation. - This file is part of the GNU C Library. - - The GNU C Library is free software; you can redistribute it and/or - modify it under the terms of the GNU Lesser General Public - License as published by the Free Software Foundation; either - version 2.1 of the License, or (at your option) any later version. - - The GNU C Library is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - Lesser General Public License for more details. - - You should have received a copy of the GNU Lesser General Public - License along with the GNU C Library; if not, see - <http://www.gnu.org/licenses/>. */ - -#include <sysdep.h> -#include <init-arch.h> - -#if IS_IN (libc) - .text -ENTRY(__memchr) - .type __memchr, @gnu_indirect_function - LOAD_GOT_AND_RTLD_GLOBAL_RO - HAS_CPU_FEATURE (SSE2) - jz 2f - HAS_ARCH_FEATURE (Slow_BSF) - jz 3f - - LOAD_FUNC_GOT_EAX ( __memchr_sse2) - ret - -2: LOAD_FUNC_GOT_EAX (__memchr_ia32) - ret - -3: LOAD_FUNC_GOT_EAX (__memchr_sse2_bsf) - ret -END(__memchr) - -weak_alias(__memchr, memchr) - -# undef ENTRY -# define ENTRY(name) \ - .type __memchr_ia32, @function; \ - .globl __memchr_ia32; \ - .p2align 4; \ - __memchr_ia32: cfi_startproc; \ - CALL_MCOUNT -# undef END -# define END(name) \ - cfi_endproc; .size __memchr_ia32, .-__memchr_ia32 - -# undef libc_hidden_builtin_def -/* IFUNC doesn't work with the hidden functions in shared library since - they will be called without setting up EBX needed for PLT which is - used by IFUNC. */ -# define libc_hidden_builtin_def(name) \ - .globl __GI_memchr; __GI_memchr = __memchr_ia32 - -#endif -#include "../../memchr.S" diff --git a/sysdeps/i386/i686/multiarch/memcmp-sse4.S b/sysdeps/i386/i686/multiarch/memcmp-sse4.S deleted file mode 100644 index 2aa13048b2..0000000000 --- a/sysdeps/i386/i686/multiarch/memcmp-sse4.S +++ /dev/null @@ -1,1225 +0,0 @@ -/* memcmp with SSE4.2, wmemcmp with SSE4.2 - Copyright (C) 2010-2017 Free Software Foundation, Inc. - Contributed by Intel Corporation. - This file is part of the GNU C Library. - - The GNU C Library is free software; you can redistribute it and/or - modify it under the terms of the GNU Lesser General Public - License as published by the Free Software Foundation; either - version 2.1 of the License, or (at your option) any later version. - - The GNU C Library is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - Lesser General Public License for more details. - - You should have received a copy of the GNU Lesser General Public - License along with the GNU C Library; if not, see - <http://www.gnu.org/licenses/>. */ - -#if IS_IN (libc) - -# include <sysdep.h> - -# ifndef MEMCMP -# define MEMCMP __memcmp_sse4_2 -# endif - -# define CFI_PUSH(REG) \ - cfi_adjust_cfa_offset (4); \ - cfi_rel_offset (REG, 0) - -# define CFI_POP(REG) \ - cfi_adjust_cfa_offset (-4); \ - cfi_restore (REG) - -# define PUSH(REG) pushl REG; CFI_PUSH (REG) -# define POP(REG) popl REG; CFI_POP (REG) - -# define PARMS 4 -# define BLK1 PARMS -# define BLK2 BLK1 + 4 -# define LEN BLK2 + 4 -# define RETURN POP (%ebx); ret; CFI_PUSH (%ebx) - - -# ifdef SHARED -# define JMPTBL(I, B) I - B - -/* Load an entry in a jump table into EBX and branch to it. TABLE is a - jump table with relative offsets. INDEX is a register contains the - index into the jump table. SCALE is the scale of INDEX. */ - -# define BRANCH_TO_JMPTBL_ENTRY(TABLE, INDEX, SCALE) \ -/* We first load PC into EBX. */ \ - SETUP_PIC_REG(bx); \ -/* Get the address of the jump table. */ \ - addl $(TABLE - .), %ebx; \ -/* Get the entry and convert the relative offset to the \ - absolute address. */ \ - addl (%ebx,INDEX,SCALE), %ebx; \ -/* We loaded the jump table and adjusted EDX/ESI. Go. */ \ - jmp *%ebx -# else -# define JMPTBL(I, B) I - -/* Load an entry in a jump table into EBX and branch to it. TABLE is a - jump table with relative offsets. INDEX is a register contains the - index into the jump table. SCALE is the scale of INDEX. */ -# define BRANCH_TO_JMPTBL_ENTRY(TABLE, INDEX, SCALE) \ - jmp *TABLE(,INDEX,SCALE) -# endif - - -/* Warning! - wmemcmp has to use SIGNED comparison for elements. - memcmp has to use UNSIGNED comparison for elemnts. -*/ - - .section .text.sse4.2,"ax",@progbits -ENTRY (MEMCMP) - movl BLK1(%esp), %eax - movl BLK2(%esp), %edx - movl LEN(%esp), %ecx - -# ifdef USE_AS_WMEMCMP - shl $2, %ecx - test %ecx, %ecx - jz L(return0) -# else - cmp $1, %ecx - jbe L(less1bytes) -# endif - - pxor %xmm0, %xmm0 - cmp $64, %ecx - ja L(64bytesormore) - cmp $8, %ecx - -# ifndef USE_AS_WMEMCMP - PUSH (%ebx) - jb L(less8bytes) -# else - jb L(less8bytes) - PUSH (%ebx) -# endif - - add %ecx, %edx - add %ecx, %eax - BRANCH_TO_JMPTBL_ENTRY(L(table_64bytes), %ecx, 4) - -# ifndef USE_AS_WMEMCMP - .p2align 4 -L(less8bytes): - mov (%eax), %bl - cmpb (%edx), %bl - jne L(nonzero) - - mov 1(%eax), %bl - cmpb 1(%edx), %bl - jne L(nonzero) - - cmp $2, %ecx - jz L(0bytes) - - mov 2(%eax), %bl - cmpb 2(%edx), %bl - jne L(nonzero) - - cmp $3, %ecx - jz L(0bytes) - - mov 3(%eax), %bl - cmpb 3(%edx), %bl - jne L(nonzero) - - cmp $4, %ecx - jz L(0bytes) - - mov 4(%eax), %bl - cmpb 4(%edx), %bl - jne L(nonzero) - - cmp $5, %ecx - jz L(0bytes) - - mov 5(%eax), %bl - cmpb 5(%edx), %bl - jne L(nonzero) - - cmp $6, %ecx - jz L(0bytes) - - mov 6(%eax), %bl - cmpb 6(%edx), %bl - je L(0bytes) - -L(nonzero): - POP (%ebx) - mov $1, %eax - ja L(above) - neg %eax -L(above): - ret - CFI_PUSH (%ebx) -# endif - - .p2align 4 -L(0bytes): - POP (%ebx) - xor %eax, %eax - ret - -# ifdef USE_AS_WMEMCMP - -/* for wmemcmp, case N == 1 */ - - .p2align 4 -L(less8bytes): - mov (%eax), %ecx - cmp (%edx), %ecx - je L(return0) - mov $1, %eax - jg L(find_diff_bigger) - neg %eax - ret - - .p2align 4 -L(find_diff_bigger): - ret - - .p2align 4 -L(return0): - xor %eax, %eax - ret -# endif - -# ifndef USE_AS_WMEMCMP - .p2align 4 -L(less1bytes): - jb L(0bytesend) - movzbl (%eax), %eax - movzbl (%edx), %edx - sub %edx, %eax - ret - - .p2align 4 -L(0bytesend): - xor %eax, %eax - ret -# endif - .p2align 4 -L(64bytesormore): - PUSH (%ebx) - mov %ecx, %ebx - mov $64, %ecx - sub $64, %ebx -L(64bytesormore_loop): - movdqu (%eax), %xmm1 - movdqu (%edx), %xmm2 - pxor %xmm1, %xmm2 - ptest %xmm2, %xmm0 - jnc L(find_16diff) - - movdqu 16(%eax), %xmm1 - movdqu 16(%edx), %xmm2 - pxor %xmm1, %xmm2 - ptest %xmm2, %xmm0 - jnc L(find_32diff) - - movdqu 32(%eax), %xmm1 - movdqu 32(%edx), %xmm2 - pxor %xmm1, %xmm2 - ptest %xmm2, %xmm0 - jnc L(find_48diff) - - movdqu 48(%eax), %xmm1 - movdqu 48(%edx), %xmm2 - pxor %xmm1, %xmm2 - ptest %xmm2, %xmm0 - jnc L(find_64diff) - add %ecx, %eax - add %ecx, %edx - sub %ecx, %ebx - jae L(64bytesormore_loop) - add %ebx, %ecx - add %ecx, %edx - add %ecx, %eax - BRANCH_TO_JMPTBL_ENTRY(L(table_64bytes), %ecx, 4) - -# ifdef USE_AS_WMEMCMP - -/* Label needs only for table_64bytes filling */ -L(unreal_case): -/* no code here */ - -# endif - .p2align 4 -L(find_16diff): - sub $16, %ecx -L(find_32diff): - sub $16, %ecx -L(find_48diff): - sub $16, %ecx -L(find_64diff): - add %ecx, %edx - add %ecx, %eax - -# ifndef USE_AS_WMEMCMP - .p2align 4 -L(16bytes): - mov -16(%eax), %ecx - mov -16(%edx), %ebx - cmp %ebx, %ecx - jne L(find_diff) -L(12bytes): - mov -12(%eax), %ecx - mov -12(%edx), %ebx - cmp %ebx, %ecx - jne L(find_diff) -L(8bytes): - mov -8(%eax), %ecx - mov -8(%edx), %ebx - cmp %ebx, %ecx - jne L(find_diff) -L(4bytes): - mov -4(%eax), %ecx - mov -4(%edx), %ebx - cmp %ebx, %ecx - mov $0, %eax - jne L(find_diff) - RETURN -# else - .p2align 4 -L(16bytes): - mov -16(%eax), %ecx - cmp -16(%edx), %ecx - jne L(find_diff) -L(12bytes): - mov -12(%eax), %ecx - cmp -12(%edx), %ecx - jne L(find_diff) -L(8bytes): - mov -8(%eax), %ecx - cmp -8(%edx), %ecx - jne L(find_diff) -L(4bytes): - mov -4(%eax), %ecx - cmp -4(%edx), %ecx - mov $0, %eax - jne L(find_diff) - RETURN -# endif - -# ifndef USE_AS_WMEMCMP - .p2align 4 -L(49bytes): - movdqu -49(%eax), %xmm1 - movdqu -49(%edx), %xmm2 - mov $-49, %ebx - pxor %xmm1, %xmm2 - ptest %xmm2, %xmm0 - jnc L(less16bytes) -L(33bytes): - movdqu -33(%eax), %xmm1 - movdqu -33(%edx), %xmm2 - mov $-33, %ebx - pxor %xmm1, %xmm2 - ptest %xmm2, %xmm0 - jnc L(less16bytes) -L(17bytes): - mov -17(%eax), %ecx - mov -17(%edx), %ebx - cmp %ebx, %ecx - jne L(find_diff) -L(13bytes): - mov -13(%eax), %ecx - mov -13(%edx), %ebx - cmp %ebx, %ecx - jne L(find_diff) -L(9bytes): - mov -9(%eax), %ecx - mov -9(%edx), %ebx - cmp %ebx, %ecx - jne L(find_diff) -L(5bytes): - mov -5(%eax), %ecx - mov -5(%edx), %ebx - cmp %ebx, %ecx - jne L(find_diff) - movzbl -1(%eax), %ecx - cmp -1(%edx), %cl - mov $0, %eax - jne L(end) - RETURN - - .p2align 4 -L(50bytes): - mov $-50, %ebx - movdqu -50(%eax), %xmm1 - movdqu -50(%edx), %xmm2 - pxor %xmm1, %xmm2 - ptest %xmm2, %xmm0 - jnc L(less16bytes) -L(34bytes): - mov $-34, %ebx - movdqu -34(%eax), %xmm1 - movdqu -34(%edx), %xmm2 - pxor %xmm1, %xmm2 - ptest %xmm2, %xmm0 - jnc L(less16bytes) -L(18bytes): - mov -18(%eax), %ecx - mov -18(%edx), %ebx - cmp %ebx, %ecx - jne L(find_diff) -L(14bytes): - mov -14(%eax), %ecx - mov -14(%edx), %ebx - cmp %ebx, %ecx - jne L(find_diff) -L(10bytes): - mov -10(%eax), %ecx - mov -10(%edx), %ebx - cmp %ebx, %ecx - jne L(find_diff) -L(6bytes): - mov -6(%eax), %ecx - mov -6(%edx), %ebx - cmp %ebx, %ecx - jne L(find_diff) -L(2bytes): - movzwl -2(%eax), %ecx - movzwl -2(%edx), %ebx - cmp %bl, %cl - jne L(end) - cmp %bh, %ch - mov $0, %eax - jne L(end) - RETURN - - .p2align 4 -L(51bytes): - mov $-51, %ebx - movdqu -51(%eax), %xmm1 - movdqu -51(%edx), %xmm2 - pxor %xmm1, %xmm2 - ptest %xmm2, %xmm0 - jnc L(less16bytes) -L(35bytes): - mov $-35, %ebx - movdqu -35(%eax), %xmm1 - movdqu -35(%edx), %xmm2 - pxor %xmm1, %xmm2 - ptest %xmm2, %xmm0 - jnc L(less16bytes) -L(19bytes): - movl -19(%eax), %ecx - movl -19(%edx), %ebx - cmp %ebx, %ecx - jne L(find_diff) -L(15bytes): - movl -15(%eax), %ecx - movl -15(%edx), %ebx - cmp %ebx, %ecx - jne L(find_diff) -L(11bytes): - movl -11(%eax), %ecx - movl -11(%edx), %ebx - cmp %ebx, %ecx - jne L(find_diff) -L(7bytes): - movl -7(%eax), %ecx - movl -7(%edx), %ebx - cmp %ebx, %ecx - jne L(find_diff) -L(3bytes): - movzwl -3(%eax), %ecx - movzwl -3(%edx), %ebx - cmpb %bl, %cl - jne L(end) - cmp %bx, %cx - jne L(end) -L(1bytes): - movzbl -1(%eax), %eax - cmpb -1(%edx), %al - mov $0, %eax - jne L(end) - RETURN -# endif - .p2align 4 -L(52bytes): - movdqu -52(%eax), %xmm1 - movdqu -52(%edx), %xmm2 - mov $-52, %ebx - pxor %xmm1, %xmm2 - ptest %xmm2, %xmm0 - jnc L(less16bytes) -L(36bytes): - movdqu -36(%eax), %xmm1 - movdqu -36(%edx), %xmm2 - mov $-36, %ebx - pxor %xmm1, %xmm2 - ptest %xmm2, %xmm0 - jnc L(less16bytes) -L(20bytes): - movdqu -20(%eax), %xmm1 - movdqu -20(%edx), %xmm2 - mov $-20, %ebx - pxor %xmm1, %xmm2 - ptest %xmm2, %xmm0 - jnc L(less16bytes) - mov -4(%eax), %ecx -# ifndef USE_AS_WMEMCMP - mov -4(%edx), %ebx - cmp %ebx, %ecx -# else - cmp -4(%edx), %ecx -# endif - mov $0, %eax - jne L(find_diff) - RETURN - -# ifndef USE_AS_WMEMCMP - .p2align 4 -L(53bytes): - movdqu -53(%eax), %xmm1 - movdqu -53(%edx), %xmm2 - mov $-53, %ebx - pxor %xmm1, %xmm2 - ptest %xmm2, %xmm0 - jnc L(less16bytes) -L(37bytes): - mov $-37, %ebx - movdqu -37(%eax), %xmm1 - movdqu -37(%edx), %xmm2 - pxor %xmm1, %xmm2 - ptest %xmm2, %xmm0 - jnc L(less16bytes) -L(21bytes): - mov $-21, %ebx - movdqu -21(%eax), %xmm1 - movdqu -21(%edx), %xmm2 - pxor %xmm1, %xmm2 - ptest %xmm2, %xmm0 - jnc L(less16bytes) - mov -5(%eax), %ecx - mov -5(%edx), %ebx - cmp %ebx, %ecx - jne L(find_diff) - movzbl -1(%eax), %ecx - cmp -1(%edx), %cl - mov $0, %eax - jne L(end) - RETURN - - .p2align 4 -L(54bytes): - movdqu -54(%eax), %xmm1 - movdqu -54(%edx), %xmm2 - mov $-54, %ebx - pxor %xmm1, %xmm2 - ptest %xmm2, %xmm0 - jnc L(less16bytes) -L(38bytes): - mov $-38, %ebx - movdqu -38(%eax), %xmm1 - movdqu -38(%edx), %xmm2 - pxor %xmm1, %xmm2 - ptest %xmm2, %xmm0 - jnc L(less16bytes) -L(22bytes): - mov $-22, %ebx - movdqu -22(%eax), %xmm1 - movdqu -22(%edx), %xmm2 - pxor %xmm1, %xmm2 - ptest %xmm2, %xmm0 - jnc L(less16bytes) - - mov -6(%eax), %ecx - mov -6(%edx), %ebx - cmp %ebx, %ecx - jne L(find_diff) - movzwl -2(%eax), %ecx - movzwl -2(%edx), %ebx - cmp %bl, %cl - jne L(end) - cmp %bh, %ch - mov $0, %eax - jne L(end) - RETURN - - .p2align 4 -L(55bytes): - movdqu -55(%eax), %xmm1 - movdqu -55(%edx), %xmm2 - mov $-55, %ebx - pxor %xmm1, %xmm2 - ptest %xmm2, %xmm0 - jnc L(less16bytes) -L(39bytes): - mov $-39, %ebx - movdqu -39(%eax), %xmm1 - movdqu -39(%edx), %xmm2 - pxor %xmm1, %xmm2 - ptest %xmm2, %xmm0 - jnc L(less16bytes) -L(23bytes): - mov $-23, %ebx - movdqu -23(%eax), %xmm1 - movdqu -23(%edx), %xmm2 - pxor %xmm1, %xmm2 - ptest %xmm2, %xmm0 - jnc L(less16bytes) - movl -7(%eax), %ecx - movl -7(%edx), %ebx - cmp %ebx, %ecx - jne L(find_diff) - movzwl -3(%eax), %ecx - movzwl -3(%edx), %ebx - cmpb %bl, %cl - jne L(end) - cmp %bx, %cx - jne L(end) - movzbl -1(%eax), %eax - cmpb -1(%edx), %al - mov $0, %eax - jne L(end) - RETURN -# endif - .p2align 4 -L(56bytes): - movdqu -56(%eax), %xmm1 - movdqu -56(%edx), %xmm2 - mov $-56, %ebx - pxor %xmm1, %xmm2 - ptest %xmm2, %xmm0 - jnc L(less16bytes) -L(40bytes): - mov $-40, %ebx - movdqu -40(%eax), %xmm1 - movdqu -40(%edx), %xmm2 - pxor %xmm1, %xmm2 - ptest %xmm2, %xmm0 - jnc L(less16bytes) -L(24bytes): - mov $-24, %ebx - movdqu -24(%eax), %xmm1 - movdqu -24(%edx), %xmm2 - pxor %xmm1, %xmm2 - ptest %xmm2, %xmm0 - jnc L(less16bytes) - - mov -8(%eax), %ecx -# ifndef USE_AS_WMEMCMP - mov -8(%edx), %ebx - cmp %ebx, %ecx -# else - cmp -8(%edx), %ecx -# endif - jne L(find_diff) - - mov -4(%eax), %ecx -# ifndef USE_AS_WMEMCMP - mov -4(%edx), %ebx - cmp %ebx, %ecx -# else - cmp -4(%edx), %ecx -# endif - mov $0, %eax - jne L(find_diff) - RETURN - -# ifndef USE_AS_WMEMCMP - .p2align 4 -L(57bytes): - movdqu -57(%eax), %xmm1 - movdqu -57(%edx), %xmm2 - mov $-57, %ebx - pxor %xmm1, %xmm2 - ptest %xmm2, %xmm0 - jnc L(less16bytes) -L(41bytes): - mov $-41, %ebx - movdqu -41(%eax), %xmm1 - movdqu -41(%edx), %xmm2 - pxor %xmm1, %xmm2 - ptest %xmm2, %xmm0 - jnc L(less16bytes) -L(25bytes): - mov $-25, %ebx - movdqu -25(%eax), %xmm1 - movdqu -25(%edx), %xmm2 - pxor %xmm1, %xmm2 - ptest %xmm2, %xmm0 - jnc L(less16bytes) - mov -9(%eax), %ecx - mov -9(%edx), %ebx - cmp %ebx, %ecx - jne L(find_diff) - mov -5(%eax), %ecx - mov -5(%edx), %ebx - cmp %ebx, %ecx - jne L(find_diff) - movzbl -1(%eax), %ecx - cmp -1(%edx), %cl - mov $0, %eax - jne L(end) - RETURN - - .p2align 4 -L(58bytes): - movdqu -58(%eax), %xmm1 - movdqu -58(%edx), %xmm2 - mov $-58, %ebx - pxor %xmm1, %xmm2 - ptest %xmm2, %xmm0 - jnc L(less16bytes) -L(42bytes): - mov $-42, %ebx - movdqu -42(%eax), %xmm1 - movdqu -42(%edx), %xmm2 - pxor %xmm1, %xmm2 - ptest %xmm2, %xmm0 - jnc L(less16bytes) -L(26bytes): - mov $-26, %ebx - movdqu -26(%eax), %xmm1 - movdqu -26(%edx), %xmm2 - pxor %xmm1, %xmm2 - ptest %xmm2, %xmm0 - jnc L(less16bytes) - - mov -10(%eax), %ecx - mov -10(%edx), %ebx - cmp %ebx, %ecx - jne L(find_diff) - - mov -6(%eax), %ecx - mov -6(%edx), %ebx - cmp %ebx, %ecx - jne L(find_diff) - - movzwl -2(%eax), %ecx - movzwl -2(%edx), %ebx - cmp %bl, %cl - jne L(end) - cmp %bh, %ch - mov $0, %eax - jne L(end) - RETURN - - .p2align 4 -L(59bytes): - movdqu -59(%eax), %xmm1 - movdqu -59(%edx), %xmm2 - mov $-59, %ebx - pxor %xmm1, %xmm2 - ptest %xmm2, %xmm0 - jnc L(less16bytes) -L(43bytes): - mov $-43, %ebx - movdqu -43(%eax), %xmm1 - movdqu -43(%edx), %xmm2 - pxor %xmm1, %xmm2 - ptest %xmm2, %xmm0 - jnc L(less16bytes) -L(27bytes): - mov $-27, %ebx - movdqu -27(%eax), %xmm1 - movdqu -27(%edx), %xmm2 - pxor %xmm1, %xmm2 - ptest %xmm2, %xmm0 - jnc L(less16bytes) - movl -11(%eax), %ecx - movl -11(%edx), %ebx - cmp %ebx, %ecx - jne L(find_diff) - movl -7(%eax), %ecx - movl -7(%edx), %ebx - cmp %ebx, %ecx - jne L(find_diff) - movzwl -3(%eax), %ecx - movzwl -3(%edx), %ebx - cmpb %bl, %cl - jne L(end) - cmp %bx, %cx - jne L(end) - movzbl -1(%eax), %eax - cmpb -1(%edx), %al - mov $0, %eax - jne L(end) - RETURN -# endif - .p2align 4 -L(60bytes): - movdqu -60(%eax), %xmm1 - movdqu -60(%edx), %xmm2 - mov $-60, %ebx - pxor %xmm1, %xmm2 - ptest %xmm2, %xmm0 - jnc L(less16bytes) -L(44bytes): - mov $-44, %ebx - movdqu -44(%eax), %xmm1 - movdqu -44(%edx), %xmm2 - pxor %xmm1, %xmm2 - ptest %xmm2, %xmm0 - jnc L(less16bytes) -L(28bytes): - mov $-28, %ebx - movdqu -28(%eax), %xmm1 - movdqu -28(%edx), %xmm2 - pxor %xmm1, %xmm2 - ptest %xmm2, %xmm0 - jnc L(less16bytes) - - mov -12(%eax), %ecx -# ifndef USE_AS_WMEMCMP - mov -12(%edx), %ebx - cmp %ebx, %ecx -# else - cmp -12(%edx), %ecx -# endif - jne L(find_diff) - - mov -8(%eax), %ecx -# ifndef USE_AS_WMEMCMP - mov -8(%edx), %ebx - cmp %ebx, %ecx -# else - cmp -8(%edx), %ecx -# endif - jne L(find_diff) - - mov -4(%eax), %ecx -# ifndef USE_AS_WMEMCMP - mov -4(%edx), %ebx - cmp %ebx, %ecx -# else - cmp -4(%edx), %ecx -# endif - mov $0, %eax - jne L(find_diff) - RETURN - -# ifndef USE_AS_WMEMCMP - .p2align 4 -L(61bytes): - movdqu -61(%eax), %xmm1 - movdqu -61(%edx), %xmm2 - mov $-61, %ebx - pxor %xmm1, %xmm2 - ptest %xmm2, %xmm0 - jnc L(less16bytes) -L(45bytes): - mov $-45, %ebx - movdqu -45(%eax), %xmm1 - movdqu -45(%edx), %xmm2 - pxor %xmm1, %xmm2 - ptest %xmm2, %xmm0 - jnc L(less16bytes) -L(29bytes): - mov $-29, %ebx - movdqu -29(%eax), %xmm1 - movdqu -29(%edx), %xmm2 - pxor %xmm1, %xmm2 - ptest %xmm2, %xmm0 - jnc L(less16bytes) - - mov -13(%eax), %ecx - mov -13(%edx), %ebx - cmp %ebx, %ecx - jne L(find_diff) - - mov -9(%eax), %ecx - mov -9(%edx), %ebx - cmp %ebx, %ecx - jne L(find_diff) - - mov -5(%eax), %ecx - mov -5(%edx), %ebx - cmp %ebx, %ecx - jne L(find_diff) - movzbl -1(%eax), %ecx - cmp -1(%edx), %cl - mov $0, %eax - jne L(end) - RETURN - - .p2align 4 -L(62bytes): - movdqu -62(%eax), %xmm1 - movdqu -62(%edx), %xmm2 - mov $-62, %ebx - pxor %xmm1, %xmm2 - ptest %xmm2, %xmm0 - jnc L(less16bytes) -L(46bytes): - mov $-46, %ebx - movdqu -46(%eax), %xmm1 - movdqu -46(%edx), %xmm2 - pxor %xmm1, %xmm2 - ptest %xmm2, %xmm0 - jnc L(less16bytes) -L(30bytes): - mov $-30, %ebx - movdqu -30(%eax), %xmm1 - movdqu -30(%edx), %xmm2 - pxor %xmm1, %xmm2 - ptest %xmm2, %xmm0 - jnc L(less16bytes) - mov -14(%eax), %ecx - mov -14(%edx), %ebx - cmp %ebx, %ecx - jne L(find_diff) - mov -10(%eax), %ecx - mov -10(%edx), %ebx - cmp %ebx, %ecx - jne L(find_diff) - mov -6(%eax), %ecx - mov -6(%edx), %ebx - cmp %ebx, %ecx - jne L(find_diff) - movzwl -2(%eax), %ecx - movzwl -2(%edx), %ebx - cmp %bl, %cl - jne L(end) - cmp %bh, %ch - mov $0, %eax - jne L(end) - RETURN - - .p2align 4 -L(63bytes): - movdqu -63(%eax), %xmm1 - movdqu -63(%edx), %xmm2 - mov $-63, %ebx - pxor %xmm1, %xmm2 - ptest %xmm2, %xmm0 - jnc L(less16bytes) -L(47bytes): - mov $-47, %ebx - movdqu -47(%eax), %xmm1 - movdqu -47(%edx), %xmm2 - pxor %xmm1, %xmm2 - ptest %xmm2, %xmm0 - jnc L(less16bytes) -L(31bytes): - mov $-31, %ebx - movdqu -31(%eax), %xmm1 - movdqu -31(%edx), %xmm2 - pxor %xmm1, %xmm2 - ptest %xmm2, %xmm0 - jnc L(less16bytes) - - movl -15(%eax), %ecx - movl -15(%edx), %ebx - cmp %ebx, %ecx - jne L(find_diff) - movl -11(%eax), %ecx - movl -11(%edx), %ebx - cmp %ebx, %ecx - jne L(find_diff) - movl -7(%eax), %ecx - movl -7(%edx), %ebx - cmp %ebx, %ecx - jne L(find_diff) - movzwl -3(%eax), %ecx - movzwl -3(%edx), %ebx - cmpb %bl, %cl - jne L(end) - cmp %bx, %cx - jne L(end) - movzbl -1(%eax), %eax - cmpb -1(%edx), %al - mov $0, %eax - jne L(end) - RETURN -# endif - - .p2align 4 -L(64bytes): - movdqu -64(%eax), %xmm1 - movdqu -64(%edx), %xmm2 - mov $-64, %ebx - pxor %xmm1, %xmm2 - ptest %xmm2, %xmm0 - jnc L(less16bytes) -L(48bytes): - movdqu -48(%eax), %xmm1 - movdqu -48(%edx), %xmm2 - mov $-48, %ebx - pxor %xmm1, %xmm2 - ptest %xmm2, %xmm0 - jnc L(less16bytes) -L(32bytes): - movdqu -32(%eax), %xmm1 - movdqu -32(%edx), %xmm2 - mov $-32, %ebx - pxor %xmm1, %xmm2 - ptest %xmm2, %xmm0 - jnc L(less16bytes) - - mov -16(%eax), %ecx -# ifndef USE_AS_WMEMCMP - mov -16(%edx), %ebx - cmp %ebx, %ecx -# else - cmp -16(%edx), %ecx -# endif - jne L(find_diff) - - mov -12(%eax), %ecx -# ifndef USE_AS_WMEMCMP - mov -12(%edx), %ebx - cmp %ebx, %ecx -# else - cmp -12(%edx), %ecx -# endif - jne L(find_diff) - - mov -8(%eax), %ecx -# ifndef USE_AS_WMEMCMP - mov -8(%edx), %ebx - cmp %ebx, %ecx -# else - cmp -8(%edx), %ecx -# endif - jne L(find_diff) - - mov -4(%eax), %ecx -# ifndef USE_AS_WMEMCMP - mov -4(%edx), %ebx - cmp %ebx, %ecx -# else - cmp -4(%edx), %ecx -# endif - mov $0, %eax - jne L(find_diff) - RETURN - -# ifndef USE_AS_WMEMCMP - .p2align 4 -L(less16bytes): - add %ebx, %eax - add %ebx, %edx - - mov (%eax), %ecx - mov (%edx), %ebx - cmp %ebx, %ecx - jne L(find_diff) - - mov 4(%eax), %ecx - mov 4(%edx), %ebx - cmp %ebx, %ecx - jne L(find_diff) - - mov 8(%eax), %ecx - mov 8(%edx), %ebx - cmp %ebx, %ecx - jne L(find_diff) - - mov 12(%eax), %ecx - mov 12(%edx), %ebx - cmp %ebx, %ecx - mov $0, %eax - jne L(find_diff) - RETURN -# else - .p2align 4 -L(less16bytes): - add %ebx, %eax - add %ebx, %edx - - mov (%eax), %ecx - cmp (%edx), %ecx - jne L(find_diff) - - mov 4(%eax), %ecx - cmp 4(%edx), %ecx - jne L(find_diff) - - mov 8(%eax), %ecx - cmp 8(%edx), %ecx - jne L(find_diff) - - mov 12(%eax), %ecx - cmp 12(%edx), %ecx - - mov $0, %eax - jne L(find_diff) - RETURN -# endif - - .p2align 4 -L(find_diff): -# ifndef USE_AS_WMEMCMP - cmpb %bl, %cl - jne L(end) - cmp %bx, %cx - jne L(end) - shr $16,%ecx - shr $16,%ebx - cmp %bl, %cl - jne L(end) - cmp %bx, %cx -L(end): - POP (%ebx) - mov $1, %eax - ja L(bigger) - neg %eax -L(bigger): - ret -# else - POP (%ebx) - mov $1, %eax - jg L(bigger) - neg %eax - ret - - .p2align 4 -L(bigger): - ret -# endif -END (MEMCMP) - - .section .rodata.sse4.2,"a",@progbits - .p2align 2 - .type L(table_64bytes), @object -# ifndef USE_AS_WMEMCMP -L(table_64bytes): - .int JMPTBL (L(0bytes), L(table_64bytes)) - .int JMPTBL (L(1bytes), L(table_64bytes)) - .int JMPTBL (L(2bytes), L(table_64bytes)) - .int JMPTBL (L(3bytes), L(table_64bytes)) - .int JMPTBL (L(4bytes), L(table_64bytes)) - .int JMPTBL (L(5bytes), L(table_64bytes)) - .int JMPTBL (L(6bytes), L(table_64bytes)) - .int JMPTBL (L(7bytes), L(table_64bytes)) - .int JMPTBL (L(8bytes), L(table_64bytes)) - .int JMPTBL (L(9bytes), L(table_64bytes)) - .int JMPTBL (L(10bytes), L(table_64bytes)) - .int JMPTBL (L(11bytes), L(table_64bytes)) - .int JMPTBL (L(12bytes), L(table_64bytes)) - .int JMPTBL (L(13bytes), L(table_64bytes)) - .int JMPTBL (L(14bytes), L(table_64bytes)) - .int JMPTBL (L(15bytes), L(table_64bytes)) - .int JMPTBL (L(16bytes), L(table_64bytes)) - .int JMPTBL (L(17bytes), L(table_64bytes)) - .int JMPTBL (L(18bytes), L(table_64bytes)) - .int JMPTBL (L(19bytes), L(table_64bytes)) - .int JMPTBL (L(20bytes), L(table_64bytes)) - .int JMPTBL (L(21bytes), L(table_64bytes)) - .int JMPTBL (L(22bytes), L(table_64bytes)) - .int JMPTBL (L(23bytes), L(table_64bytes)) - .int JMPTBL (L(24bytes), L(table_64bytes)) - .int JMPTBL (L(25bytes), L(table_64bytes)) - .int JMPTBL (L(26bytes), L(table_64bytes)) - .int JMPTBL (L(27bytes), L(table_64bytes)) - .int JMPTBL (L(28bytes), L(table_64bytes)) - .int JMPTBL (L(29bytes), L(table_64bytes)) - .int JMPTBL (L(30bytes), L(table_64bytes)) - .int JMPTBL (L(31bytes), L(table_64bytes)) - .int JMPTBL (L(32bytes), L(table_64bytes)) - .int JMPTBL (L(33bytes), L(table_64bytes)) - .int JMPTBL (L(34bytes), L(table_64bytes)) - .int JMPTBL (L(35bytes), L(table_64bytes)) - .int JMPTBL (L(36bytes), L(table_64bytes)) - .int JMPTBL (L(37bytes), L(table_64bytes)) - .int JMPTBL (L(38bytes), L(table_64bytes)) - .int JMPTBL (L(39bytes), L(table_64bytes)) - .int JMPTBL (L(40bytes), L(table_64bytes)) - .int JMPTBL (L(41bytes), L(table_64bytes)) - .int JMPTBL (L(42bytes), L(table_64bytes)) - .int JMPTBL (L(43bytes), L(table_64bytes)) - .int JMPTBL (L(44bytes), L(table_64bytes)) - .int JMPTBL (L(45bytes), L(table_64bytes)) - .int JMPTBL (L(46bytes), L(table_64bytes)) - .int JMPTBL (L(47bytes), L(table_64bytes)) - .int JMPTBL (L(48bytes), L(table_64bytes)) - .int JMPTBL (L(49bytes), L(table_64bytes)) - .int JMPTBL (L(50bytes), L(table_64bytes)) - .int JMPTBL (L(51bytes), L(table_64bytes)) - .int JMPTBL (L(52bytes), L(table_64bytes)) - .int JMPTBL (L(53bytes), L(table_64bytes)) - .int JMPTBL (L(54bytes), L(table_64bytes)) - .int JMPTBL (L(55bytes), L(table_64bytes)) - .int JMPTBL (L(56bytes), L(table_64bytes)) - .int JMPTBL (L(57bytes), L(table_64bytes)) - .int JMPTBL (L(58bytes), L(table_64bytes)) - .int JMPTBL (L(59bytes), L(table_64bytes)) - .int JMPTBL (L(60bytes), L(table_64bytes)) - .int JMPTBL (L(61bytes), L(table_64bytes)) - .int JMPTBL (L(62bytes), L(table_64bytes)) - .int JMPTBL (L(63bytes), L(table_64bytes)) - .int JMPTBL (L(64bytes), L(table_64bytes)) -# else -L(table_64bytes): - .int JMPTBL (L(0bytes), L(table_64bytes)) - .int JMPTBL (L(unreal_case), L(table_64bytes)) - .int JMPTBL (L(unreal_case), L(table_64bytes)) - .int JMPTBL (L(unreal_case), L(table_64bytes)) - .int JMPTBL (L(4bytes), L(table_64bytes)) - .int JMPTBL (L(unreal_case), L(table_64bytes)) - .int JMPTBL (L(unreal_case), L(table_64bytes)) - .int JMPTBL (L(unreal_case), L(table_64bytes)) - .int JMPTBL (L(8bytes), L(table_64bytes)) - .int JMPTBL (L(unreal_case), L(table_64bytes)) - .int JMPTBL (L(unreal_case), L(table_64bytes)) - .int JMPTBL (L(unreal_case), L(table_64bytes)) - .int JMPTBL (L(12bytes), L(table_64bytes)) - .int JMPTBL (L(unreal_case), L(table_64bytes)) - .int JMPTBL (L(unreal_case), L(table_64bytes)) - .int JMPTBL (L(unreal_case), L(table_64bytes)) - .int JMPTBL (L(16bytes), L(table_64bytes)) - .int JMPTBL (L(unreal_case), L(table_64bytes)) - .int JMPTBL (L(unreal_case), L(table_64bytes)) - .int JMPTBL (L(unreal_case), L(table_64bytes)) - .int JMPTBL (L(20bytes), L(table_64bytes)) - .int JMPTBL (L(unreal_case), L(table_64bytes)) - .int JMPTBL (L(unreal_case), L(table_64bytes)) - .int JMPTBL (L(unreal_case), L(table_64bytes)) - .int JMPTBL (L(24bytes), L(table_64bytes)) - .int JMPTBL (L(unreal_case), L(table_64bytes)) - .int JMPTBL (L(unreal_case), L(table_64bytes)) - .int JMPTBL (L(unreal_case), L(table_64bytes)) - .int JMPTBL (L(28bytes), L(table_64bytes)) - .int JMPTBL (L(unreal_case), L(table_64bytes)) - .int JMPTBL (L(unreal_case), L(table_64bytes)) - .int JMPTBL (L(unreal_case), L(table_64bytes)) - .int JMPTBL (L(32bytes), L(table_64bytes)) - .int JMPTBL (L(unreal_case), L(table_64bytes)) - .int JMPTBL (L(unreal_case), L(table_64bytes)) - .int JMPTBL (L(unreal_case), L(table_64bytes)) - .int JMPTBL (L(36bytes), L(table_64bytes)) - .int JMPTBL (L(unreal_case), L(table_64bytes)) - .int JMPTBL (L(unreal_case), L(table_64bytes)) - .int JMPTBL (L(unreal_case), L(table_64bytes)) - .int JMPTBL (L(40bytes), L(table_64bytes)) - .int JMPTBL (L(unreal_case), L(table_64bytes)) - .int JMPTBL (L(unreal_case), L(table_64bytes)) - .int JMPTBL (L(unreal_case), L(table_64bytes)) - .int JMPTBL (L(44bytes), L(table_64bytes)) - .int JMPTBL (L(unreal_case), L(table_64bytes)) - .int JMPTBL (L(unreal_case), L(table_64bytes)) - .int JMPTBL (L(unreal_case), L(table_64bytes)) - .int JMPTBL (L(48bytes), L(table_64bytes)) - .int JMPTBL (L(unreal_case), L(table_64bytes)) - .int JMPTBL (L(unreal_case), L(table_64bytes)) - .int JMPTBL (L(unreal_case), L(table_64bytes)) - .int JMPTBL (L(52bytes), L(table_64bytes)) - .int JMPTBL (L(unreal_case), L(table_64bytes)) - .int JMPTBL (L(unreal_case), L(table_64bytes)) - .int JMPTBL (L(unreal_case), L(table_64bytes)) - .int JMPTBL (L(56bytes), L(table_64bytes)) - .int JMPTBL (L(unreal_case), L(table_64bytes)) - .int JMPTBL (L(unreal_case), L(table_64bytes)) - .int JMPTBL (L(unreal_case), L(table_64bytes)) - .int JMPTBL (L(60bytes), L(table_64bytes)) - .int JMPTBL (L(unreal_case), L(table_64bytes)) - .int JMPTBL (L(unreal_case), L(table_64bytes)) - .int JMPTBL (L(unreal_case), L(table_64bytes)) - .int JMPTBL (L(64bytes), L(table_64bytes)) -# endif -#endif diff --git a/sysdeps/i386/i686/multiarch/memcmp-ssse3.S b/sysdeps/i386/i686/multiarch/memcmp-ssse3.S deleted file mode 100644 index 5ebf5a4d73..0000000000 --- a/sysdeps/i386/i686/multiarch/memcmp-ssse3.S +++ /dev/null @@ -1,2157 +0,0 @@ -/* memcmp with SSSE3, wmemcmp with SSSE3 - Copyright (C) 2010-2017 Free Software Foundation, Inc. - Contributed by Intel Corporation. - This file is part of the GNU C Library. - - The GNU C Library is free software; you can redistribute it and/or - modify it under the terms of the GNU Lesser General Public - License as published by the Free Software Foundation; either - version 2.1 of the License, or (at your option) any later version. - - The GNU C Library is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - Lesser General Public License for more details. - - You should have received a copy of the GNU Lesser General Public - License along with the GNU C Library; if not, see - <http://www.gnu.org/licenses/>. */ - -#if IS_IN (libc) - -# include <sysdep.h> - -# ifndef MEMCMP -# define MEMCMP __memcmp_ssse3 -# endif - -# define CFI_PUSH(REG) \ - cfi_adjust_cfa_offset (4); \ - cfi_rel_offset (REG, 0) - -# define CFI_POP(REG) \ - cfi_adjust_cfa_offset (-4); \ - cfi_restore (REG) - -# define PUSH(REG) pushl REG; CFI_PUSH (REG) -# define POP(REG) popl REG; CFI_POP (REG) - -# define PARMS 4 -# define BLK1 PARMS -# define BLK2 BLK1+4 -# define LEN BLK2+4 -# define RETURN_END POP (%edi); POP (%esi); POP (%ebx); ret -# define RETURN RETURN_END; cfi_restore_state; cfi_remember_state - -/* Warning! - wmemcmp has to use SIGNED comparison for elements. - memcmp has to use UNSIGNED comparison for elemnts. -*/ - - atom_text_section -ENTRY (MEMCMP) - movl LEN(%esp), %ecx - -# ifdef USE_AS_WMEMCMP - shl $2, %ecx - test %ecx, %ecx - jz L(zero) -# endif - - movl BLK1(%esp), %eax - cmp $48, %ecx - movl BLK2(%esp), %edx - jae L(48bytesormore) - -# ifndef USE_AS_WMEMCMP - cmp $1, %ecx - jbe L(less1bytes) -# endif - - PUSH (%ebx) - add %ecx, %edx - add %ecx, %eax - jmp L(less48bytes) - - CFI_POP (%ebx) - -# ifndef USE_AS_WMEMCMP - .p2align 4 -L(less1bytes): - jb L(zero) - movb (%eax), %cl - cmp (%edx), %cl - je L(zero) - mov $1, %eax - ja L(1bytesend) - neg %eax -L(1bytesend): - ret -# endif - - .p2align 4 -L(zero): - xor %eax, %eax - ret - - .p2align 4 -L(48bytesormore): - PUSH (%ebx) - PUSH (%esi) - PUSH (%edi) - cfi_remember_state - movdqu (%eax), %xmm3 - movdqu (%edx), %xmm0 - movl %eax, %edi - movl %edx, %esi - pcmpeqb %xmm0, %xmm3 - pmovmskb %xmm3, %edx - lea 16(%edi), %edi - - sub $0xffff, %edx - lea 16(%esi), %esi - jnz L(less16bytes) - mov %edi, %edx - and $0xf, %edx - xor %edx, %edi - sub %edx, %esi - add %edx, %ecx - mov %esi, %edx - and $0xf, %edx - jz L(shr_0) - xor %edx, %esi - -# ifndef USE_AS_WMEMCMP - cmp $8, %edx - jae L(next_unaligned_table) - cmp $0, %edx - je L(shr_0) - cmp $1, %edx - je L(shr_1) - cmp $2, %edx - je L(shr_2) - cmp $3, %edx - je L(shr_3) - cmp $4, %edx - je L(shr_4) - cmp $5, %edx - je L(shr_5) - cmp $6, %edx - je L(shr_6) - jmp L(shr_7) - - .p2align 2 -L(next_unaligned_table): - cmp $8, %edx - je L(shr_8) - cmp $9, %edx - je L(shr_9) - cmp $10, %edx - je L(shr_10) - cmp $11, %edx - je L(shr_11) - cmp $12, %edx - je L(shr_12) - cmp $13, %edx - je L(shr_13) - cmp $14, %edx - je L(shr_14) - jmp L(shr_15) -# else - cmp $0, %edx - je L(shr_0) - cmp $4, %edx - je L(shr_4) - cmp $8, %edx - je L(shr_8) - jmp L(shr_12) -# endif - - .p2align 4 -L(shr_0): - cmp $80, %ecx - jae L(shr_0_gobble) - lea -48(%ecx), %ecx - xor %eax, %eax - movaps (%esi), %xmm1 - pcmpeqb (%edi), %xmm1 - movaps 16(%esi), %xmm2 - pcmpeqb 16(%edi), %xmm2 - pand %xmm1, %xmm2 - pmovmskb %xmm2, %edx - add $32, %edi - add $32, %esi - sub $0xffff, %edx - jnz L(exit) - - lea (%ecx, %edi,1), %eax - lea (%ecx, %esi,1), %edx - POP (%edi) - POP (%esi) - jmp L(less48bytes) - - cfi_restore_state - cfi_remember_state - .p2align 4 -L(shr_0_gobble): - lea -48(%ecx), %ecx - movdqa (%esi), %xmm0 - xor %eax, %eax - pcmpeqb (%edi), %xmm0 - sub $32, %ecx - movdqa 16(%esi), %xmm2 - pcmpeqb 16(%edi), %xmm2 -L(shr_0_gobble_loop): - pand %xmm0, %xmm2 - sub $32, %ecx - pmovmskb %xmm2, %edx - movdqa %xmm0, %xmm1 - movdqa 32(%esi), %xmm0 - movdqa 48(%esi), %xmm2 - sbb $0xffff, %edx - pcmpeqb 32(%edi), %xmm0 - pcmpeqb 48(%edi), %xmm2 - lea 32(%edi), %edi - lea 32(%esi), %esi - jz L(shr_0_gobble_loop) - - pand %xmm0, %xmm2 - cmp $0, %ecx - jge L(shr_0_gobble_loop_next) - inc %edx - add $32, %ecx -L(shr_0_gobble_loop_next): - test %edx, %edx - jnz L(exit) - - pmovmskb %xmm2, %edx - movdqa %xmm0, %xmm1 - lea 32(%edi), %edi - lea 32(%esi), %esi - sub $0xffff, %edx - jnz L(exit) - lea (%ecx, %edi,1), %eax - lea (%ecx, %esi,1), %edx - POP (%edi) - POP (%esi) - jmp L(less48bytes) - -# ifndef USE_AS_WMEMCMP - cfi_restore_state - cfi_remember_state - .p2align 4 -L(shr_1): - cmp $80, %ecx - lea -48(%ecx), %ecx - mov %edx, %eax - jae L(shr_1_gobble) - - movdqa 16(%esi), %xmm1 - movdqa %xmm1, %xmm2 - palignr $1,(%esi), %xmm1 - pcmpeqb (%edi), %xmm1 - - movdqa 32(%esi), %xmm3 - palignr $1,%xmm2, %xmm3 - pcmpeqb 16(%edi), %xmm3 - - pand %xmm1, %xmm3 - pmovmskb %xmm3, %edx - lea 32(%edi), %edi - lea 32(%esi), %esi - sub $0xffff, %edx - jnz L(exit) - lea (%ecx, %edi,1), %eax - lea 1(%ecx, %esi,1), %edx - POP (%edi) - POP (%esi) - jmp L(less48bytes) - - cfi_restore_state - cfi_remember_state - .p2align 4 -L(shr_1_gobble): - sub $32, %ecx - movdqa 16(%esi), %xmm0 - palignr $1,(%esi), %xmm0 - pcmpeqb (%edi), %xmm0 - - movdqa 32(%esi), %xmm3 - palignr $1,16(%esi), %xmm3 - pcmpeqb 16(%edi), %xmm3 - -L(shr_1_gobble_loop): - pand %xmm0, %xmm3 - sub $32, %ecx - pmovmskb %xmm3, %edx - movdqa %xmm0, %xmm1 - - movdqa 64(%esi), %xmm3 - palignr $1,48(%esi), %xmm3 - sbb $0xffff, %edx - movdqa 48(%esi), %xmm0 - palignr $1,32(%esi), %xmm0 - pcmpeqb 32(%edi), %xmm0 - lea 32(%esi), %esi - pcmpeqb 48(%edi), %xmm3 - - lea 32(%edi), %edi - jz L(shr_1_gobble_loop) - pand %xmm0, %xmm3 - - cmp $0, %ecx - jge L(shr_1_gobble_next) - inc %edx - add $32, %ecx -L(shr_1_gobble_next): - test %edx, %edx - jnz L(exit) - - pmovmskb %xmm3, %edx - movdqa %xmm0, %xmm1 - lea 32(%edi), %edi - lea 32(%esi), %esi - sub $0xffff, %edx - jnz L(exit) - - lea (%ecx, %edi,1), %eax - lea 1(%ecx, %esi,1), %edx - POP (%edi) - POP (%esi) - jmp L(less48bytes) - - - cfi_restore_state - cfi_remember_state - .p2align 4 -L(shr_2): - cmp $80, %ecx - lea -48(%ecx), %ecx - mov %edx, %eax - jae L(shr_2_gobble) - - movdqa 16(%esi), %xmm1 - movdqa %xmm1, %xmm2 - palignr $2,(%esi), %xmm1 - pcmpeqb (%edi), %xmm1 - - movdqa 32(%esi), %xmm3 - palignr $2,%xmm2, %xmm3 - pcmpeqb 16(%edi), %xmm3 - - pand %xmm1, %xmm3 - pmovmskb %xmm3, %edx - lea 32(%edi), %edi - lea 32(%esi), %esi - sub $0xffff, %edx - jnz L(exit) - lea (%ecx, %edi,1), %eax - lea 2(%ecx, %esi,1), %edx - POP (%edi) - POP (%esi) - jmp L(less48bytes) - - cfi_restore_state - cfi_remember_state - .p2align 4 -L(shr_2_gobble): - sub $32, %ecx - movdqa 16(%esi), %xmm0 - palignr $2,(%esi), %xmm0 - pcmpeqb (%edi), %xmm0 - - movdqa 32(%esi), %xmm3 - palignr $2,16(%esi), %xmm3 - pcmpeqb 16(%edi), %xmm3 - -L(shr_2_gobble_loop): - pand %xmm0, %xmm3 - sub $32, %ecx - pmovmskb %xmm3, %edx - movdqa %xmm0, %xmm1 - - movdqa 64(%esi), %xmm3 - palignr $2,48(%esi), %xmm3 - sbb $0xffff, %edx - movdqa 48(%esi), %xmm0 - palignr $2,32(%esi), %xmm0 - pcmpeqb 32(%edi), %xmm0 - lea 32(%esi), %esi - pcmpeqb 48(%edi), %xmm3 - - lea 32(%edi), %edi - jz L(shr_2_gobble_loop) - pand %xmm0, %xmm3 - - cmp $0, %ecx - jge L(shr_2_gobble_next) - inc %edx - add $32, %ecx -L(shr_2_gobble_next): - test %edx, %edx - jnz L(exit) - - pmovmskb %xmm3, %edx - movdqa %xmm0, %xmm1 - lea 32(%edi), %edi - lea 32(%esi), %esi - sub $0xffff, %edx - jnz L(exit) - - lea (%ecx, %edi,1), %eax - lea 2(%ecx, %esi,1), %edx - POP (%edi) - POP (%esi) - jmp L(less48bytes) - - cfi_restore_state - cfi_remember_state - .p2align 4 -L(shr_3): - cmp $80, %ecx - lea -48(%ecx), %ecx - mov %edx, %eax - jae L(shr_3_gobble) - - movdqa 16(%esi), %xmm1 - movdqa %xmm1, %xmm2 - palignr $3,(%esi), %xmm1 - pcmpeqb (%edi), %xmm1 - - movdqa 32(%esi), %xmm3 - palignr $3,%xmm2, %xmm3 - pcmpeqb 16(%edi), %xmm3 - - pand %xmm1, %xmm3 - pmovmskb %xmm3, %edx - lea 32(%edi), %edi - lea 32(%esi), %esi - sub $0xffff, %edx - jnz L(exit) - lea (%ecx, %edi,1), %eax - lea 3(%ecx, %esi,1), %edx - POP (%edi) - POP (%esi) - jmp L(less48bytes) - - cfi_restore_state - cfi_remember_state - .p2align 4 -L(shr_3_gobble): - sub $32, %ecx - movdqa 16(%esi), %xmm0 - palignr $3,(%esi), %xmm0 - pcmpeqb (%edi), %xmm0 - - movdqa 32(%esi), %xmm3 - palignr $3,16(%esi), %xmm3 - pcmpeqb 16(%edi), %xmm3 - -L(shr_3_gobble_loop): - pand %xmm0, %xmm3 - sub $32, %ecx - pmovmskb %xmm3, %edx - movdqa %xmm0, %xmm1 - - movdqa 64(%esi), %xmm3 - palignr $3,48(%esi), %xmm3 - sbb $0xffff, %edx - movdqa 48(%esi), %xmm0 - palignr $3,32(%esi), %xmm0 - pcmpeqb 32(%edi), %xmm0 - lea 32(%esi), %esi - pcmpeqb 48(%edi), %xmm3 - - lea 32(%edi), %edi - jz L(shr_3_gobble_loop) - pand %xmm0, %xmm3 - - cmp $0, %ecx - jge L(shr_3_gobble_next) - inc %edx - add $32, %ecx -L(shr_3_gobble_next): - test %edx, %edx - jnz L(exit) - - pmovmskb %xmm3, %edx - movdqa %xmm0, %xmm1 - lea 32(%edi), %edi - lea 32(%esi), %esi - sub $0xffff, %edx - jnz L(exit) - - lea (%ecx, %edi,1), %eax - lea 3(%ecx, %esi,1), %edx - POP (%edi) - POP (%esi) - jmp L(less48bytes) -# endif - - cfi_restore_state - cfi_remember_state - .p2align 4 -L(shr_4): - cmp $80, %ecx - lea -48(%ecx), %ecx - mov %edx, %eax - jae L(shr_4_gobble) - - movdqa 16(%esi), %xmm1 - movdqa %xmm1, %xmm2 - palignr $4,(%esi), %xmm1 - pcmpeqb (%edi), %xmm1 - - movdqa 32(%esi), %xmm3 - palignr $4,%xmm2, %xmm3 - pcmpeqb 16(%edi), %xmm3 - - pand %xmm1, %xmm3 - pmovmskb %xmm3, %edx - lea 32(%edi), %edi - lea 32(%esi), %esi - sub $0xffff, %edx - jnz L(exit) - lea (%ecx, %edi,1), %eax - lea 4(%ecx, %esi,1), %edx - POP (%edi) - POP (%esi) - jmp L(less48bytes) - - cfi_restore_state - cfi_remember_state - .p2align 4 -L(shr_4_gobble): - sub $32, %ecx - movdqa 16(%esi), %xmm0 - palignr $4,(%esi), %xmm0 - pcmpeqb (%edi), %xmm0 - - movdqa 32(%esi), %xmm3 - palignr $4,16(%esi), %xmm3 - pcmpeqb 16(%edi), %xmm3 - -L(shr_4_gobble_loop): - pand %xmm0, %xmm3 - sub $32, %ecx - pmovmskb %xmm3, %edx - movdqa %xmm0, %xmm1 - - movdqa 64(%esi), %xmm3 - palignr $4,48(%esi), %xmm3 - sbb $0xffff, %edx - movdqa 48(%esi), %xmm0 - palignr $4,32(%esi), %xmm0 - pcmpeqb 32(%edi), %xmm0 - lea 32(%esi), %esi - pcmpeqb 48(%edi), %xmm3 - - lea 32(%edi), %edi - jz L(shr_4_gobble_loop) - pand %xmm0, %xmm3 - - cmp $0, %ecx - jge L(shr_4_gobble_next) - inc %edx - add $32, %ecx -L(shr_4_gobble_next): - test %edx, %edx - jnz L(exit) - - pmovmskb %xmm3, %edx - movdqa %xmm0, %xmm1 - lea 32(%edi), %edi - lea 32(%esi), %esi - sub $0xffff, %edx - jnz L(exit) - - lea (%ecx, %edi,1), %eax - lea 4(%ecx, %esi,1), %edx - POP (%edi) - POP (%esi) - jmp L(less48bytes) - -# ifndef USE_AS_WMEMCMP - cfi_restore_state - cfi_remember_state - .p2align 4 -L(shr_5): - cmp $80, %ecx - lea -48(%ecx), %ecx - mov %edx, %eax - jae L(shr_5_gobble) - - movdqa 16(%esi), %xmm1 - movdqa %xmm1, %xmm2 - palignr $5,(%esi), %xmm1 - pcmpeqb (%edi), %xmm1 - - movdqa 32(%esi), %xmm3 - palignr $5,%xmm2, %xmm3 - pcmpeqb 16(%edi), %xmm3 - - pand %xmm1, %xmm3 - pmovmskb %xmm3, %edx - lea 32(%edi), %edi - lea 32(%esi), %esi - sub $0xffff, %edx - jnz L(exit) - lea (%ecx, %edi,1), %eax - lea 5(%ecx, %esi,1), %edx - POP (%edi) - POP (%esi) - jmp L(less48bytes) - - cfi_restore_state - cfi_remember_state - .p2align 4 -L(shr_5_gobble): - sub $32, %ecx - movdqa 16(%esi), %xmm0 - palignr $5,(%esi), %xmm0 - pcmpeqb (%edi), %xmm0 - - movdqa 32(%esi), %xmm3 - palignr $5,16(%esi), %xmm3 - pcmpeqb 16(%edi), %xmm3 - -L(shr_5_gobble_loop): - pand %xmm0, %xmm3 - sub $32, %ecx - pmovmskb %xmm3, %edx - movdqa %xmm0, %xmm1 - - movdqa 64(%esi), %xmm3 - palignr $5,48(%esi), %xmm3 - sbb $0xffff, %edx - movdqa 48(%esi), %xmm0 - palignr $5,32(%esi), %xmm0 - pcmpeqb 32(%edi), %xmm0 - lea 32(%esi), %esi - pcmpeqb 48(%edi), %xmm3 - - lea 32(%edi), %edi - jz L(shr_5_gobble_loop) - pand %xmm0, %xmm3 - - cmp $0, %ecx - jge L(shr_5_gobble_next) - inc %edx - add $32, %ecx -L(shr_5_gobble_next): - test %edx, %edx - jnz L(exit) - - pmovmskb %xmm3, %edx - movdqa %xmm0, %xmm1 - lea 32(%edi), %edi - lea 32(%esi), %esi - sub $0xffff, %edx - jnz L(exit) - - lea (%ecx, %edi,1), %eax - lea 5(%ecx, %esi,1), %edx - POP (%edi) - POP (%esi) - jmp L(less48bytes) - - cfi_restore_state - cfi_remember_state - .p2align 4 -L(shr_6): - cmp $80, %ecx - lea -48(%ecx), %ecx - mov %edx, %eax - jae L(shr_6_gobble) - - movdqa 16(%esi), %xmm1 - movdqa %xmm1, %xmm2 - palignr $6,(%esi), %xmm1 - pcmpeqb (%edi), %xmm1 - - movdqa 32(%esi), %xmm3 - palignr $6,%xmm2, %xmm3 - pcmpeqb 16(%edi), %xmm3 - - pand %xmm1, %xmm3 - pmovmskb %xmm3, %edx - lea 32(%edi), %edi - lea 32(%esi), %esi - sub $0xffff, %edx - jnz L(exit) - lea (%ecx, %edi,1), %eax - lea 6(%ecx, %esi,1), %edx - POP (%edi) - POP (%esi) - jmp L(less48bytes) - - cfi_restore_state - cfi_remember_state - .p2align 4 -L(shr_6_gobble): - sub $32, %ecx - movdqa 16(%esi), %xmm0 - palignr $6,(%esi), %xmm0 - pcmpeqb (%edi), %xmm0 - - movdqa 32(%esi), %xmm3 - palignr $6,16(%esi), %xmm3 - pcmpeqb 16(%edi), %xmm3 - -L(shr_6_gobble_loop): - pand %xmm0, %xmm3 - sub $32, %ecx - pmovmskb %xmm3, %edx - movdqa %xmm0, %xmm1 - - movdqa 64(%esi), %xmm3 - palignr $6,48(%esi), %xmm3 - sbb $0xffff, %edx - movdqa 48(%esi), %xmm0 - palignr $6,32(%esi), %xmm0 - pcmpeqb 32(%edi), %xmm0 - lea 32(%esi), %esi - pcmpeqb 48(%edi), %xmm3 - - lea 32(%edi), %edi - jz L(shr_6_gobble_loop) - pand %xmm0, %xmm3 - - cmp $0, %ecx - jge L(shr_6_gobble_next) - inc %edx - add $32, %ecx -L(shr_6_gobble_next): - test %edx, %edx - jnz L(exit) - - pmovmskb %xmm3, %edx - movdqa %xmm0, %xmm1 - lea 32(%edi), %edi - lea 32(%esi), %esi - sub $0xffff, %edx - jnz L(exit) - - lea (%ecx, %edi,1), %eax - lea 6(%ecx, %esi,1), %edx - POP (%edi) - POP (%esi) - jmp L(less48bytes) - - cfi_restore_state - cfi_remember_state - .p2align 4 -L(shr_7): - cmp $80, %ecx - lea -48(%ecx), %ecx - mov %edx, %eax - jae L(shr_7_gobble) - - movdqa 16(%esi), %xmm1 - movdqa %xmm1, %xmm2 - palignr $7,(%esi), %xmm1 - pcmpeqb (%edi), %xmm1 - - movdqa 32(%esi), %xmm3 - palignr $7,%xmm2, %xmm3 - pcmpeqb 16(%edi), %xmm3 - - pand %xmm1, %xmm3 - pmovmskb %xmm3, %edx - lea 32(%edi), %edi - lea 32(%esi), %esi - sub $0xffff, %edx - jnz L(exit) - lea (%ecx, %edi,1), %eax - lea 7(%ecx, %esi,1), %edx - POP (%edi) - POP (%esi) - jmp L(less48bytes) - - cfi_restore_state - cfi_remember_state - .p2align 4 -L(shr_7_gobble): - sub $32, %ecx - movdqa 16(%esi), %xmm0 - palignr $7,(%esi), %xmm0 - pcmpeqb (%edi), %xmm0 - - movdqa 32(%esi), %xmm3 - palignr $7,16(%esi), %xmm3 - pcmpeqb 16(%edi), %xmm3 - -L(shr_7_gobble_loop): - pand %xmm0, %xmm3 - sub $32, %ecx - pmovmskb %xmm3, %edx - movdqa %xmm0, %xmm1 - - movdqa 64(%esi), %xmm3 - palignr $7,48(%esi), %xmm3 - sbb $0xffff, %edx - movdqa 48(%esi), %xmm0 - palignr $7,32(%esi), %xmm0 - pcmpeqb 32(%edi), %xmm0 - lea 32(%esi), %esi - pcmpeqb 48(%edi), %xmm3 - - lea 32(%edi), %edi - jz L(shr_7_gobble_loop) - pand %xmm0, %xmm3 - - cmp $0, %ecx - jge L(shr_7_gobble_next) - inc %edx - add $32, %ecx -L(shr_7_gobble_next): - test %edx, %edx - jnz L(exit) - - pmovmskb %xmm3, %edx - movdqa %xmm0, %xmm1 - lea 32(%edi), %edi - lea 32(%esi), %esi - sub $0xffff, %edx - jnz L(exit) - - lea (%ecx, %edi,1), %eax - lea 7(%ecx, %esi,1), %edx - POP (%edi) - POP (%esi) - jmp L(less48bytes) -# endif - - cfi_restore_state - cfi_remember_state - .p2align 4 -L(shr_8): - cmp $80, %ecx - lea -48(%ecx), %ecx - mov %edx, %eax - jae L(shr_8_gobble) - - movdqa 16(%esi), %xmm1 - movdqa %xmm1, %xmm2 - palignr $8,(%esi), %xmm1 - pcmpeqb (%edi), %xmm1 - - movdqa 32(%esi), %xmm3 - palignr $8,%xmm2, %xmm3 - pcmpeqb 16(%edi), %xmm3 - - pand %xmm1, %xmm3 - pmovmskb %xmm3, %edx - lea 32(%edi), %edi - lea 32(%esi), %esi - sub $0xffff, %edx - jnz L(exit) - lea (%ecx, %edi,1), %eax - lea 8(%ecx, %esi,1), %edx - POP (%edi) - POP (%esi) - jmp L(less48bytes) - - cfi_restore_state - cfi_remember_state - .p2align 4 -L(shr_8_gobble): - sub $32, %ecx - movdqa 16(%esi), %xmm0 - palignr $8,(%esi), %xmm0 - pcmpeqb (%edi), %xmm0 - - movdqa 32(%esi), %xmm3 - palignr $8,16(%esi), %xmm3 - pcmpeqb 16(%edi), %xmm3 - -L(shr_8_gobble_loop): - pand %xmm0, %xmm3 - sub $32, %ecx - pmovmskb %xmm3, %edx - movdqa %xmm0, %xmm1 - - movdqa 64(%esi), %xmm3 - palignr $8,48(%esi), %xmm3 - sbb $0xffff, %edx - movdqa 48(%esi), %xmm0 - palignr $8,32(%esi), %xmm0 - pcmpeqb 32(%edi), %xmm0 - lea 32(%esi), %esi - pcmpeqb 48(%edi), %xmm3 - - lea 32(%edi), %edi - jz L(shr_8_gobble_loop) - pand %xmm0, %xmm3 - - cmp $0, %ecx - jge L(shr_8_gobble_next) - inc %edx - add $32, %ecx -L(shr_8_gobble_next): - test %edx, %edx - jnz L(exit) - - pmovmskb %xmm3, %edx - movdqa %xmm0, %xmm1 - lea 32(%edi), %edi - lea 32(%esi), %esi - sub $0xffff, %edx - jnz L(exit) - - lea (%ecx, %edi,1), %eax - lea 8(%ecx, %esi,1), %edx - POP (%edi) - POP (%esi) - jmp L(less48bytes) - -# ifndef USE_AS_WMEMCMP - cfi_restore_state - cfi_remember_state - .p2align 4 -L(shr_9): - cmp $80, %ecx - lea -48(%ecx), %ecx - mov %edx, %eax - jae L(shr_9_gobble) - - movdqa 16(%esi), %xmm1 - movdqa %xmm1, %xmm2 - palignr $9,(%esi), %xmm1 - pcmpeqb (%edi), %xmm1 - - movdqa 32(%esi), %xmm3 - palignr $9,%xmm2, %xmm3 - pcmpeqb 16(%edi), %xmm3 - - pand %xmm1, %xmm3 - pmovmskb %xmm3, %edx - lea 32(%edi), %edi - lea 32(%esi), %esi - sub $0xffff, %edx - jnz L(exit) - lea (%ecx, %edi,1), %eax - lea 9(%ecx, %esi,1), %edx - POP (%edi) - POP (%esi) - jmp L(less48bytes) - - cfi_restore_state - cfi_remember_state - .p2align 4 -L(shr_9_gobble): - sub $32, %ecx - movdqa 16(%esi), %xmm0 - palignr $9,(%esi), %xmm0 - pcmpeqb (%edi), %xmm0 - - movdqa 32(%esi), %xmm3 - palignr $9,16(%esi), %xmm3 - pcmpeqb 16(%edi), %xmm3 - -L(shr_9_gobble_loop): - pand %xmm0, %xmm3 - sub $32, %ecx - pmovmskb %xmm3, %edx - movdqa %xmm0, %xmm1 - - movdqa 64(%esi), %xmm3 - palignr $9,48(%esi), %xmm3 - sbb $0xffff, %edx - movdqa 48(%esi), %xmm0 - palignr $9,32(%esi), %xmm0 - pcmpeqb 32(%edi), %xmm0 - lea 32(%esi), %esi - pcmpeqb 48(%edi), %xmm3 - - lea 32(%edi), %edi - jz L(shr_9_gobble_loop) - pand %xmm0, %xmm3 - - cmp $0, %ecx - jge L(shr_9_gobble_next) - inc %edx - add $32, %ecx -L(shr_9_gobble_next): - test %edx, %edx - jnz L(exit) - - pmovmskb %xmm3, %edx - movdqa %xmm0, %xmm1 - lea 32(%edi), %edi - lea 32(%esi), %esi - sub $0xffff, %edx - jnz L(exit) - - lea (%ecx, %edi,1), %eax - lea 9(%ecx, %esi,1), %edx - POP (%edi) - POP (%esi) - jmp L(less48bytes) - - cfi_restore_state - cfi_remember_state - .p2align 4 -L(shr_10): - cmp $80, %ecx - lea -48(%ecx), %ecx - mov %edx, %eax - jae L(shr_10_gobble) - - movdqa 16(%esi), %xmm1 - movdqa %xmm1, %xmm2 - palignr $10, (%esi), %xmm1 - pcmpeqb (%edi), %xmm1 - - movdqa 32(%esi), %xmm3 - palignr $10,%xmm2, %xmm3 - pcmpeqb 16(%edi), %xmm3 - - pand %xmm1, %xmm3 - pmovmskb %xmm3, %edx - lea 32(%edi), %edi - lea 32(%esi), %esi - sub $0xffff, %edx - jnz L(exit) - lea (%ecx, %edi,1), %eax - lea 10(%ecx, %esi,1), %edx - POP (%edi) - POP (%esi) - jmp L(less48bytes) - - cfi_restore_state - cfi_remember_state - .p2align 4 -L(shr_10_gobble): - sub $32, %ecx - movdqa 16(%esi), %xmm0 - palignr $10, (%esi), %xmm0 - pcmpeqb (%edi), %xmm0 - - movdqa 32(%esi), %xmm3 - palignr $10, 16(%esi), %xmm3 - pcmpeqb 16(%edi), %xmm3 - -L(shr_10_gobble_loop): - pand %xmm0, %xmm3 - sub $32, %ecx - pmovmskb %xmm3, %edx - movdqa %xmm0, %xmm1 - - movdqa 64(%esi), %xmm3 - palignr $10,48(%esi), %xmm3 - sbb $0xffff, %edx - movdqa 48(%esi), %xmm0 - palignr $10,32(%esi), %xmm0 - pcmpeqb 32(%edi), %xmm0 - lea 32(%esi), %esi - pcmpeqb 48(%edi), %xmm3 - - lea 32(%edi), %edi - jz L(shr_10_gobble_loop) - pand %xmm0, %xmm3 - - cmp $0, %ecx - jge L(shr_10_gobble_next) - inc %edx - add $32, %ecx -L(shr_10_gobble_next): - test %edx, %edx - jnz L(exit) - - pmovmskb %xmm3, %edx - movdqa %xmm0, %xmm1 - lea 32(%edi), %edi - lea 32(%esi), %esi - sub $0xffff, %edx - jnz L(exit) - - lea (%ecx, %edi,1), %eax - lea 10(%ecx, %esi,1), %edx - POP (%edi) - POP (%esi) - jmp L(less48bytes) - - cfi_restore_state - cfi_remember_state - .p2align 4 -L(shr_11): - cmp $80, %ecx - lea -48(%ecx), %ecx - mov %edx, %eax - jae L(shr_11_gobble) - - movdqa 16(%esi), %xmm1 - movdqa %xmm1, %xmm2 - palignr $11, (%esi), %xmm1 - pcmpeqb (%edi), %xmm1 - - movdqa 32(%esi), %xmm3 - palignr $11, %xmm2, %xmm3 - pcmpeqb 16(%edi), %xmm3 - - pand %xmm1, %xmm3 - pmovmskb %xmm3, %edx - lea 32(%edi), %edi - lea 32(%esi), %esi - sub $0xffff, %edx - jnz L(exit) - lea (%ecx, %edi,1), %eax - lea 11(%ecx, %esi,1), %edx - POP (%edi) - POP (%esi) - jmp L(less48bytes) - - cfi_restore_state - cfi_remember_state - .p2align 4 -L(shr_11_gobble): - sub $32, %ecx - movdqa 16(%esi), %xmm0 - palignr $11, (%esi), %xmm0 - pcmpeqb (%edi), %xmm0 - - movdqa 32(%esi), %xmm3 - palignr $11, 16(%esi), %xmm3 - pcmpeqb 16(%edi), %xmm3 - -L(shr_11_gobble_loop): - pand %xmm0, %xmm3 - sub $32, %ecx - pmovmskb %xmm3, %edx - movdqa %xmm0, %xmm1 - - movdqa 64(%esi), %xmm3 - palignr $11,48(%esi), %xmm3 - sbb $0xffff, %edx - movdqa 48(%esi), %xmm0 - palignr $11,32(%esi), %xmm0 - pcmpeqb 32(%edi), %xmm0 - lea 32(%esi), %esi - pcmpeqb 48(%edi), %xmm3 - - lea 32(%edi), %edi - jz L(shr_11_gobble_loop) - pand %xmm0, %xmm3 - - cmp $0, %ecx - jge L(shr_11_gobble_next) - inc %edx - add $32, %ecx -L(shr_11_gobble_next): - test %edx, %edx - jnz L(exit) - - pmovmskb %xmm3, %edx - movdqa %xmm0, %xmm1 - lea 32(%edi), %edi - lea 32(%esi), %esi - sub $0xffff, %edx - jnz L(exit) - - lea (%ecx, %edi,1), %eax - lea 11(%ecx, %esi,1), %edx - POP (%edi) - POP (%esi) - jmp L(less48bytes) -# endif - - cfi_restore_state - cfi_remember_state - .p2align 4 -L(shr_12): - cmp $80, %ecx - lea -48(%ecx), %ecx - mov %edx, %eax - jae L(shr_12_gobble) - - movdqa 16(%esi), %xmm1 - movdqa %xmm1, %xmm2 - palignr $12, (%esi), %xmm1 - pcmpeqb (%edi), %xmm1 - - movdqa 32(%esi), %xmm3 - palignr $12, %xmm2, %xmm3 - pcmpeqb 16(%edi), %xmm3 - - pand %xmm1, %xmm3 - pmovmskb %xmm3, %edx - lea 32(%edi), %edi - lea 32(%esi), %esi - sub $0xffff, %edx - jnz L(exit) - lea (%ecx, %edi,1), %eax - lea 12(%ecx, %esi,1), %edx - POP (%edi) - POP (%esi) - jmp L(less48bytes) - - cfi_restore_state - cfi_remember_state - .p2align 4 -L(shr_12_gobble): - sub $32, %ecx - movdqa 16(%esi), %xmm0 - palignr $12, (%esi), %xmm0 - pcmpeqb (%edi), %xmm0 - - movdqa 32(%esi), %xmm3 - palignr $12, 16(%esi), %xmm3 - pcmpeqb 16(%edi), %xmm3 - -L(shr_12_gobble_loop): - pand %xmm0, %xmm3 - sub $32, %ecx - pmovmskb %xmm3, %edx - movdqa %xmm0, %xmm1 - - movdqa 64(%esi), %xmm3 - palignr $12,48(%esi), %xmm3 - sbb $0xffff, %edx - movdqa 48(%esi), %xmm0 - palignr $12,32(%esi), %xmm0 - pcmpeqb 32(%edi), %xmm0 - lea 32(%esi), %esi - pcmpeqb 48(%edi), %xmm3 - - lea 32(%edi), %edi - jz L(shr_12_gobble_loop) - pand %xmm0, %xmm3 - - cmp $0, %ecx - jge L(shr_12_gobble_next) - inc %edx - add $32, %ecx -L(shr_12_gobble_next): - test %edx, %edx - jnz L(exit) - - pmovmskb %xmm3, %edx - movdqa %xmm0, %xmm1 - lea 32(%edi), %edi - lea 32(%esi), %esi - sub $0xffff, %edx - jnz L(exit) - - lea (%ecx, %edi,1), %eax - lea 12(%ecx, %esi,1), %edx - POP (%edi) - POP (%esi) - jmp L(less48bytes) - -# ifndef USE_AS_WMEMCMP - cfi_restore_state - cfi_remember_state - .p2align 4 -L(shr_13): - cmp $80, %ecx - lea -48(%ecx), %ecx - mov %edx, %eax - jae L(shr_13_gobble) - - movdqa 16(%esi), %xmm1 - movdqa %xmm1, %xmm2 - palignr $13, (%esi), %xmm1 - pcmpeqb (%edi), %xmm1 - - movdqa 32(%esi), %xmm3 - palignr $13, %xmm2, %xmm3 - pcmpeqb 16(%edi), %xmm3 - - pand %xmm1, %xmm3 - pmovmskb %xmm3, %edx - lea 32(%edi), %edi - lea 32(%esi), %esi - sub $0xffff, %edx - jnz L(exit) - lea (%ecx, %edi,1), %eax - lea 13(%ecx, %esi,1), %edx - POP (%edi) - POP (%esi) - jmp L(less48bytes) - - cfi_restore_state - cfi_remember_state - .p2align 4 -L(shr_13_gobble): - sub $32, %ecx - movdqa 16(%esi), %xmm0 - palignr $13, (%esi), %xmm0 - pcmpeqb (%edi), %xmm0 - - movdqa 32(%esi), %xmm3 - palignr $13, 16(%esi), %xmm3 - pcmpeqb 16(%edi), %xmm3 - -L(shr_13_gobble_loop): - pand %xmm0, %xmm3 - sub $32, %ecx - pmovmskb %xmm3, %edx - movdqa %xmm0, %xmm1 - - movdqa 64(%esi), %xmm3 - palignr $13,48(%esi), %xmm3 - sbb $0xffff, %edx - movdqa 48(%esi), %xmm0 - palignr $13,32(%esi), %xmm0 - pcmpeqb 32(%edi), %xmm0 - lea 32(%esi), %esi - pcmpeqb 48(%edi), %xmm3 - - lea 32(%edi), %edi - jz L(shr_13_gobble_loop) - pand %xmm0, %xmm3 - - cmp $0, %ecx - jge L(shr_13_gobble_next) - inc %edx - add $32, %ecx -L(shr_13_gobble_next): - test %edx, %edx - jnz L(exit) - - pmovmskb %xmm3, %edx - movdqa %xmm0, %xmm1 - lea 32(%edi), %edi - lea 32(%esi), %esi - sub $0xffff, %edx - jnz L(exit) - - lea (%ecx, %edi,1), %eax - lea 13(%ecx, %esi,1), %edx - POP (%edi) - POP (%esi) - jmp L(less48bytes) - - cfi_restore_state - cfi_remember_state - .p2align 4 -L(shr_14): - cmp $80, %ecx - lea -48(%ecx), %ecx - mov %edx, %eax - jae L(shr_14_gobble) - - movdqa 16(%esi), %xmm1 - movdqa %xmm1, %xmm2 - palignr $14, (%esi), %xmm1 - pcmpeqb (%edi), %xmm1 - - movdqa 32(%esi), %xmm3 - palignr $14, %xmm2, %xmm3 - pcmpeqb 16(%edi), %xmm3 - - pand %xmm1, %xmm3 - pmovmskb %xmm3, %edx - lea 32(%edi), %edi - lea 32(%esi), %esi - sub $0xffff, %edx - jnz L(exit) - lea (%ecx, %edi,1), %eax - lea 14(%ecx, %esi,1), %edx - POP (%edi) - POP (%esi) - jmp L(less48bytes) - - cfi_restore_state - cfi_remember_state - .p2align 4 -L(shr_14_gobble): - sub $32, %ecx - movdqa 16(%esi), %xmm0 - palignr $14, (%esi), %xmm0 - pcmpeqb (%edi), %xmm0 - - movdqa 32(%esi), %xmm3 - palignr $14, 16(%esi), %xmm3 - pcmpeqb 16(%edi), %xmm3 - -L(shr_14_gobble_loop): - pand %xmm0, %xmm3 - sub $32, %ecx - pmovmskb %xmm3, %edx - movdqa %xmm0, %xmm1 - - movdqa 64(%esi), %xmm3 - palignr $14,48(%esi), %xmm3 - sbb $0xffff, %edx - movdqa 48(%esi), %xmm0 - palignr $14,32(%esi), %xmm0 - pcmpeqb 32(%edi), %xmm0 - lea 32(%esi), %esi - pcmpeqb 48(%edi), %xmm3 - - lea 32(%edi), %edi - jz L(shr_14_gobble_loop) - pand %xmm0, %xmm3 - - cmp $0, %ecx - jge L(shr_14_gobble_next) - inc %edx - add $32, %ecx -L(shr_14_gobble_next): - test %edx, %edx - jnz L(exit) - - pmovmskb %xmm3, %edx - movdqa %xmm0, %xmm1 - lea 32(%edi), %edi - lea 32(%esi), %esi - sub $0xffff, %edx - jnz L(exit) - - lea (%ecx, %edi,1), %eax - lea 14(%ecx, %esi,1), %edx - POP (%edi) - POP (%esi) - jmp L(less48bytes) - - cfi_restore_state - cfi_remember_state - .p2align 4 -L(shr_15): - cmp $80, %ecx - lea -48(%ecx), %ecx - mov %edx, %eax - jae L(shr_15_gobble) - - movdqa 16(%esi), %xmm1 - movdqa %xmm1, %xmm2 - palignr $15, (%esi), %xmm1 - pcmpeqb (%edi), %xmm1 - - movdqa 32(%esi), %xmm3 - palignr $15, %xmm2, %xmm3 - pcmpeqb 16(%edi), %xmm3 - - pand %xmm1, %xmm3 - pmovmskb %xmm3, %edx - lea 32(%edi), %edi - lea 32(%esi), %esi - sub $0xffff, %edx - jnz L(exit) - lea (%ecx, %edi,1), %eax - lea 15(%ecx, %esi,1), %edx - POP (%edi) - POP (%esi) - jmp L(less48bytes) - - cfi_restore_state - cfi_remember_state - .p2align 4 -L(shr_15_gobble): - sub $32, %ecx - movdqa 16(%esi), %xmm0 - palignr $15, (%esi), %xmm0 - pcmpeqb (%edi), %xmm0 - - movdqa 32(%esi), %xmm3 - palignr $15, 16(%esi), %xmm3 - pcmpeqb 16(%edi), %xmm3 - -L(shr_15_gobble_loop): - pand %xmm0, %xmm3 - sub $32, %ecx - pmovmskb %xmm3, %edx - movdqa %xmm0, %xmm1 - - movdqa 64(%esi), %xmm3 - palignr $15,48(%esi), %xmm3 - sbb $0xffff, %edx - movdqa 48(%esi), %xmm0 - palignr $15,32(%esi), %xmm0 - pcmpeqb 32(%edi), %xmm0 - lea 32(%esi), %esi - pcmpeqb 48(%edi), %xmm3 - - lea 32(%edi), %edi - jz L(shr_15_gobble_loop) - pand %xmm0, %xmm3 - - cmp $0, %ecx - jge L(shr_15_gobble_next) - inc %edx - add $32, %ecx -L(shr_15_gobble_next): - test %edx, %edx - jnz L(exit) - - pmovmskb %xmm3, %edx - movdqa %xmm0, %xmm1 - lea 32(%edi), %edi - lea 32(%esi), %esi - sub $0xffff, %edx - jnz L(exit) - - lea (%ecx, %edi,1), %eax - lea 15(%ecx, %esi,1), %edx - POP (%edi) - POP (%esi) - jmp L(less48bytes) -# endif - - cfi_restore_state - cfi_remember_state - .p2align 4 -L(exit): - pmovmskb %xmm1, %ebx - sub $0xffff, %ebx - jz L(first16bytes) - lea -16(%esi), %esi - lea -16(%edi), %edi - mov %ebx, %edx - -L(first16bytes): - add %eax, %esi -L(less16bytes): - -# ifndef USE_AS_WMEMCMP - test %dl, %dl - jz L(next_24_bytes) - - test $0x01, %dl - jnz L(Byte16) - - test $0x02, %dl - jnz L(Byte17) - - test $0x04, %dl - jnz L(Byte18) - - test $0x08, %dl - jnz L(Byte19) - - test $0x10, %dl - jnz L(Byte20) - - test $0x20, %dl - jnz L(Byte21) - - test $0x40, %dl - jnz L(Byte22) -L(Byte23): - movzbl -9(%edi), %eax - movzbl -9(%esi), %edx - sub %edx, %eax - RETURN - - .p2align 4 -L(Byte16): - movzbl -16(%edi), %eax - movzbl -16(%esi), %edx - sub %edx, %eax - RETURN - - .p2align 4 -L(Byte17): - movzbl -15(%edi), %eax - movzbl -15(%esi), %edx - sub %edx, %eax - RETURN - - .p2align 4 -L(Byte18): - movzbl -14(%edi), %eax - movzbl -14(%esi), %edx - sub %edx, %eax - RETURN - - .p2align 4 -L(Byte19): - movzbl -13(%edi), %eax - movzbl -13(%esi), %edx - sub %edx, %eax - RETURN - - .p2align 4 -L(Byte20): - movzbl -12(%edi), %eax - movzbl -12(%esi), %edx - sub %edx, %eax - RETURN - - .p2align 4 -L(Byte21): - movzbl -11(%edi), %eax - movzbl -11(%esi), %edx - sub %edx, %eax - RETURN - - .p2align 4 -L(Byte22): - movzbl -10(%edi), %eax - movzbl -10(%esi), %edx - sub %edx, %eax - RETURN - - .p2align 4 -L(next_24_bytes): - lea 8(%edi), %edi - lea 8(%esi), %esi - test $0x01, %dh - jnz L(Byte16) - - test $0x02, %dh - jnz L(Byte17) - - test $0x04, %dh - jnz L(Byte18) - - test $0x08, %dh - jnz L(Byte19) - - test $0x10, %dh - jnz L(Byte20) - - test $0x20, %dh - jnz L(Byte21) - - test $0x40, %dh - jnz L(Byte22) - - .p2align 4 -L(Byte31): - movzbl -9(%edi), %eax - movzbl -9(%esi), %edx - sub %edx, %eax - RETURN_END -# else - -/* special for wmemcmp */ - xor %eax, %eax - test %dl, %dl - jz L(next_two_double_words) - and $15, %dl - jz L(second_double_word) - mov -16(%edi), %eax - cmp -16(%esi), %eax - jne L(nequal) - RETURN - - .p2align 4 -L(second_double_word): - mov -12(%edi), %eax - cmp -12(%esi), %eax - jne L(nequal) - RETURN - - .p2align 4 -L(next_two_double_words): - and $15, %dh - jz L(fourth_double_word) - mov -8(%edi), %eax - cmp -8(%esi), %eax - jne L(nequal) - RETURN - - .p2align 4 -L(fourth_double_word): - mov -4(%edi), %eax - cmp -4(%esi), %eax - jne L(nequal) - RETURN - - .p2align 4 -L(nequal): - mov $1, %eax - jg L(nequal_bigger) - neg %eax - RETURN - - .p2align 4 -L(nequal_bigger): - RETURN_END -# endif - - CFI_PUSH (%ebx) - - .p2align 4 -L(more8bytes): - cmp $16, %ecx - jae L(more16bytes) - cmp $8, %ecx - je L(8bytes) -# ifndef USE_AS_WMEMCMP - cmp $9, %ecx - je L(9bytes) - cmp $10, %ecx - je L(10bytes) - cmp $11, %ecx - je L(11bytes) - cmp $12, %ecx - je L(12bytes) - cmp $13, %ecx - je L(13bytes) - cmp $14, %ecx - je L(14bytes) - jmp L(15bytes) -# else - jmp L(12bytes) -# endif - - .p2align 4 -L(more16bytes): - cmp $24, %ecx - jae L(more24bytes) - cmp $16, %ecx - je L(16bytes) -# ifndef USE_AS_WMEMCMP - cmp $17, %ecx - je L(17bytes) - cmp $18, %ecx - je L(18bytes) - cmp $19, %ecx - je L(19bytes) - cmp $20, %ecx - je L(20bytes) - cmp $21, %ecx - je L(21bytes) - cmp $22, %ecx - je L(22bytes) - jmp L(23bytes) -# else - jmp L(20bytes) -# endif - - .p2align 4 -L(more24bytes): - cmp $32, %ecx - jae L(more32bytes) - cmp $24, %ecx - je L(24bytes) -# ifndef USE_AS_WMEMCMP - cmp $25, %ecx - je L(25bytes) - cmp $26, %ecx - je L(26bytes) - cmp $27, %ecx - je L(27bytes) - cmp $28, %ecx - je L(28bytes) - cmp $29, %ecx - je L(29bytes) - cmp $30, %ecx - je L(30bytes) - jmp L(31bytes) -# else - jmp L(28bytes) -# endif - - .p2align 4 -L(more32bytes): - cmp $40, %ecx - jae L(more40bytes) - cmp $32, %ecx - je L(32bytes) -# ifndef USE_AS_WMEMCMP - cmp $33, %ecx - je L(33bytes) - cmp $34, %ecx - je L(34bytes) - cmp $35, %ecx - je L(35bytes) - cmp $36, %ecx - je L(36bytes) - cmp $37, %ecx - je L(37bytes) - cmp $38, %ecx - je L(38bytes) - jmp L(39bytes) -# else - jmp L(36bytes) -# endif - - .p2align 4 -L(less48bytes): - cmp $8, %ecx - jae L(more8bytes) -# ifndef USE_AS_WMEMCMP - cmp $2, %ecx - je L(2bytes) - cmp $3, %ecx - je L(3bytes) - cmp $4, %ecx - je L(4bytes) - cmp $5, %ecx - je L(5bytes) - cmp $6, %ecx - je L(6bytes) - jmp L(7bytes) -# else - jmp L(4bytes) -# endif - - .p2align 4 -L(more40bytes): - cmp $40, %ecx - je L(40bytes) -# ifndef USE_AS_WMEMCMP - cmp $41, %ecx - je L(41bytes) - cmp $42, %ecx - je L(42bytes) - cmp $43, %ecx - je L(43bytes) - cmp $44, %ecx - je L(44bytes) - cmp $45, %ecx - je L(45bytes) - cmp $46, %ecx - je L(46bytes) - jmp L(47bytes) - - .p2align 4 -L(44bytes): - mov -44(%eax), %ecx - mov -44(%edx), %ebx - cmp %ebx, %ecx - jne L(find_diff) -L(40bytes): - mov -40(%eax), %ecx - mov -40(%edx), %ebx - cmp %ebx, %ecx - jne L(find_diff) -L(36bytes): - mov -36(%eax), %ecx - mov -36(%edx), %ebx - cmp %ebx, %ecx - jne L(find_diff) -L(32bytes): - mov -32(%eax), %ecx - mov -32(%edx), %ebx - cmp %ebx, %ecx - jne L(find_diff) -L(28bytes): - mov -28(%eax), %ecx - mov -28(%edx), %ebx - cmp %ebx, %ecx - jne L(find_diff) -L(24bytes): - mov -24(%eax), %ecx - mov -24(%edx), %ebx - cmp %ebx, %ecx - jne L(find_diff) -L(20bytes): - mov -20(%eax), %ecx - mov -20(%edx), %ebx - cmp %ebx, %ecx - jne L(find_diff) -L(16bytes): - mov -16(%eax), %ecx - mov -16(%edx), %ebx - cmp %ebx, %ecx - jne L(find_diff) -L(12bytes): - mov -12(%eax), %ecx - mov -12(%edx), %ebx - cmp %ebx, %ecx - jne L(find_diff) -L(8bytes): - mov -8(%eax), %ecx - mov -8(%edx), %ebx - cmp %ebx, %ecx - jne L(find_diff) -L(4bytes): - mov -4(%eax), %ecx - mov -4(%edx), %ebx - cmp %ebx, %ecx - mov $0, %eax - jne L(find_diff) - POP (%ebx) - ret - CFI_PUSH (%ebx) -# else - .p2align 4 -L(44bytes): - mov -44(%eax), %ecx - cmp -44(%edx), %ecx - jne L(find_diff) -L(40bytes): - mov -40(%eax), %ecx - cmp -40(%edx), %ecx - jne L(find_diff) -L(36bytes): - mov -36(%eax), %ecx - cmp -36(%edx), %ecx - jne L(find_diff) -L(32bytes): - mov -32(%eax), %ecx - cmp -32(%edx), %ecx - jne L(find_diff) -L(28bytes): - mov -28(%eax), %ecx - cmp -28(%edx), %ecx - jne L(find_diff) -L(24bytes): - mov -24(%eax), %ecx - cmp -24(%edx), %ecx - jne L(find_diff) -L(20bytes): - mov -20(%eax), %ecx - cmp -20(%edx), %ecx - jne L(find_diff) -L(16bytes): - mov -16(%eax), %ecx - cmp -16(%edx), %ecx - jne L(find_diff) -L(12bytes): - mov -12(%eax), %ecx - cmp -12(%edx), %ecx - jne L(find_diff) -L(8bytes): - mov -8(%eax), %ecx - cmp -8(%edx), %ecx - jne L(find_diff) -L(4bytes): - mov -4(%eax), %ecx - xor %eax, %eax - cmp -4(%edx), %ecx - jne L(find_diff) - POP (%ebx) - ret - CFI_PUSH (%ebx) -# endif - -# ifndef USE_AS_WMEMCMP - - .p2align 4 -L(45bytes): - mov -45(%eax), %ecx - mov -45(%edx), %ebx - cmp %ebx, %ecx - jne L(find_diff) -L(41bytes): - mov -41(%eax), %ecx - mov -41(%edx), %ebx - cmp %ebx, %ecx - jne L(find_diff) -L(37bytes): - mov -37(%eax), %ecx - mov -37(%edx), %ebx - cmp %ebx, %ecx - jne L(find_diff) -L(33bytes): - mov -33(%eax), %ecx - mov -33(%edx), %ebx - cmp %ebx, %ecx - jne L(find_diff) -L(29bytes): - mov -29(%eax), %ecx - mov -29(%edx), %ebx - cmp %ebx, %ecx - jne L(find_diff) -L(25bytes): - mov -25(%eax), %ecx - mov -25(%edx), %ebx - cmp %ebx, %ecx - jne L(find_diff) -L(21bytes): - mov -21(%eax), %ecx - mov -21(%edx), %ebx - cmp %ebx, %ecx - jne L(find_diff) -L(17bytes): - mov -17(%eax), %ecx - mov -17(%edx), %ebx - cmp %ebx, %ecx - jne L(find_diff) -L(13bytes): - mov -13(%eax), %ecx - mov -13(%edx), %ebx - cmp %ebx, %ecx - jne L(find_diff) -L(9bytes): - mov -9(%eax), %ecx - mov -9(%edx), %ebx - cmp %ebx, %ecx - jne L(find_diff) -L(5bytes): - mov -5(%eax), %ecx - mov -5(%edx), %ebx - cmp %ebx, %ecx - jne L(find_diff) - movzbl -1(%eax), %ecx - cmp -1(%edx), %cl - mov $0, %eax - jne L(end) - POP (%ebx) - ret - CFI_PUSH (%ebx) - - .p2align 4 -L(46bytes): - mov -46(%eax), %ecx - mov -46(%edx), %ebx - cmp %ebx, %ecx - jne L(find_diff) -L(42bytes): - mov -42(%eax), %ecx - mov -42(%edx), %ebx - cmp %ebx, %ecx - jne L(find_diff) -L(38bytes): - mov -38(%eax), %ecx - mov -38(%edx), %ebx - cmp %ebx, %ecx - jne L(find_diff) -L(34bytes): - mov -34(%eax), %ecx - mov -34(%edx), %ebx - cmp %ebx, %ecx - jne L(find_diff) -L(30bytes): - mov -30(%eax), %ecx - mov -30(%edx), %ebx - cmp %ebx, %ecx - jne L(find_diff) -L(26bytes): - mov -26(%eax), %ecx - mov -26(%edx), %ebx - cmp %ebx, %ecx - jne L(find_diff) -L(22bytes): - mov -22(%eax), %ecx - mov -22(%edx), %ebx - cmp %ebx, %ecx - jne L(find_diff) -L(18bytes): - mov -18(%eax), %ecx - mov -18(%edx), %ebx - cmp %ebx, %ecx - jne L(find_diff) -L(14bytes): - mov -14(%eax), %ecx - mov -14(%edx), %ebx - cmp %ebx, %ecx - jne L(find_diff) -L(10bytes): - mov -10(%eax), %ecx - mov -10(%edx), %ebx - cmp %ebx, %ecx - jne L(find_diff) -L(6bytes): - mov -6(%eax), %ecx - mov -6(%edx), %ebx - cmp %ebx, %ecx - jne L(find_diff) -L(2bytes): - movzwl -2(%eax), %ecx - movzwl -2(%edx), %ebx - cmp %bl, %cl - jne L(end) - cmp %bh, %ch - mov $0, %eax - jne L(end) - POP (%ebx) - ret - CFI_PUSH (%ebx) - - .p2align 4 -L(47bytes): - movl -47(%eax), %ecx - movl -47(%edx), %ebx - cmp %ebx, %ecx - jne L(find_diff) -L(43bytes): - movl -43(%eax), %ecx - movl -43(%edx), %ebx - cmp %ebx, %ecx - jne L(find_diff) -L(39bytes): - movl -39(%eax), %ecx - movl -39(%edx), %ebx - cmp %ebx, %ecx - jne L(find_diff) -L(35bytes): - movl -35(%eax), %ecx - movl -35(%edx), %ebx - cmp %ebx, %ecx - jne L(find_diff) -L(31bytes): - movl -31(%eax), %ecx - movl -31(%edx), %ebx - cmp %ebx, %ecx - jne L(find_diff) -L(27bytes): - movl -27(%eax), %ecx - movl -27(%edx), %ebx - cmp %ebx, %ecx - jne L(find_diff) -L(23bytes): - movl -23(%eax), %ecx - movl -23(%edx), %ebx - cmp %ebx, %ecx - jne L(find_diff) -L(19bytes): - movl -19(%eax), %ecx - movl -19(%edx), %ebx - cmp %ebx, %ecx - jne L(find_diff) -L(15bytes): - movl -15(%eax), %ecx - movl -15(%edx), %ebx - cmp %ebx, %ecx - jne L(find_diff) -L(11bytes): - movl -11(%eax), %ecx - movl -11(%edx), %ebx - cmp %ebx, %ecx - jne L(find_diff) -L(7bytes): - movl -7(%eax), %ecx - movl -7(%edx), %ebx - cmp %ebx, %ecx - jne L(find_diff) -L(3bytes): - movzwl -3(%eax), %ecx - movzwl -3(%edx), %ebx - cmpb %bl, %cl - jne L(end) - cmp %bx, %cx - jne L(end) - movzbl -1(%eax), %eax - cmpb -1(%edx), %al - mov $0, %eax - jne L(end) - POP (%ebx) - ret - CFI_PUSH (%ebx) - - .p2align 4 -L(find_diff): - cmpb %bl, %cl - jne L(end) - cmp %bx, %cx - jne L(end) - shr $16,%ecx - shr $16,%ebx - cmp %bl, %cl - jne L(end) - cmp %bx, %cx - - .p2align 4 -L(end): - POP (%ebx) - mov $1, %eax - ja L(bigger) - neg %eax -L(bigger): - ret -# else - -/* for wmemcmp */ - .p2align 4 -L(find_diff): - POP (%ebx) - mov $1, %eax - jg L(find_diff_bigger) - neg %eax - ret - - .p2align 4 -L(find_diff_bigger): - ret - -# endif -END (MEMCMP) -#endif diff --git a/sysdeps/i386/i686/multiarch/memcmp.S b/sysdeps/i386/i686/multiarch/memcmp.S deleted file mode 100644 index 1fc5994a17..0000000000 --- a/sysdeps/i386/i686/multiarch/memcmp.S +++ /dev/null @@ -1,62 +0,0 @@ -/* Multiple versions of memcmp - All versions must be listed in ifunc-impl-list.c. - Copyright (C) 2010-2017 Free Software Foundation, Inc. - Contributed by Intel Corporation. - This file is part of the GNU C Library. - - The GNU C Library is free software; you can redistribute it and/or - modify it under the terms of the GNU Lesser General Public - License as published by the Free Software Foundation; either - version 2.1 of the License, or (at your option) any later version. - - The GNU C Library is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - Lesser General Public License for more details. - - You should have received a copy of the GNU Lesser General Public - License along with the GNU C Library; if not, see - <http://www.gnu.org/licenses/>. */ - -#include <sysdep.h> -#include <init-arch.h> - -/* Define multiple versions only for the definition in libc. */ -#if IS_IN (libc) - .text -ENTRY(memcmp) - .type memcmp, @gnu_indirect_function - LOAD_GOT_AND_RTLD_GLOBAL_RO - LOAD_FUNC_GOT_EAX (__memcmp_ia32) - HAS_CPU_FEATURE (SSSE3) - jz 2f - LOAD_FUNC_GOT_EAX (__memcmp_ssse3) - HAS_CPU_FEATURE (SSE4_2) - jz 2f - LOAD_FUNC_GOT_EAX (__memcmp_sse4_2) -2: ret -END(memcmp) - -# undef ENTRY -# define ENTRY(name) \ - .type __memcmp_ia32, @function; \ - .p2align 4; \ - .globl __memcmp_ia32; \ - .hidden __memcmp_ia32; \ - __memcmp_ia32: cfi_startproc; \ - CALL_MCOUNT -# undef END -# define END(name) \ - cfi_endproc; .size __memcmp_ia32, .-__memcmp_ia32 - -# ifdef SHARED -# undef libc_hidden_builtin_def -/* IFUNC doesn't work with the hidden functions in shared library since - they will be called without setting up EBX needed for PLT which is - used by IFUNC. */ -# define libc_hidden_builtin_def(name) \ - .globl __GI_memcmp; __GI_memcmp = __memcmp_ia32 -# endif -#endif - -#include "../memcmp.S" diff --git a/sysdeps/i386/i686/multiarch/memcpy-sse2-unaligned.S b/sysdeps/i386/i686/multiarch/memcpy-sse2-unaligned.S deleted file mode 100644 index 2fe2072cb1..0000000000 --- a/sysdeps/i386/i686/multiarch/memcpy-sse2-unaligned.S +++ /dev/null @@ -1,681 +0,0 @@ -/* memcpy optimized with SSE2 unaligned memory access instructions. - Copyright (C) 2014-2017 Free Software Foundation, Inc. - This file is part of the GNU C Library. - - The GNU C Library is free software; you can redistribute it and/or - modify it under the terms of the GNU Lesser General Public - License as published by the Free Software Foundation; either - version 2.1 of the License, or (at your option) any later version. - - The GNU C Library is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - Lesser General Public License for more details. - - You should have received a copy of the GNU Lesser General Public - License along with the GNU C Library; if not, see - <http://www.gnu.org/licenses/>. */ - -#if IS_IN (libc) \ - && (defined SHARED \ - || defined USE_AS_MEMMOVE \ - || !defined USE_MULTIARCH) - -# include <sysdep.h> -# include "asm-syntax.h" - -# ifndef MEMCPY -# define MEMCPY __memcpy_sse2_unaligned -# define MEMCPY_CHK __memcpy_chk_sse2_unaligned -# endif - -# ifdef USE_AS_BCOPY -# define SRC PARMS -# define DEST SRC+4 -# define LEN DEST+4 -# else -# define DEST PARMS -# define SRC DEST+4 -# define LEN SRC+4 -# endif - -# define CFI_PUSH(REG) \ - cfi_adjust_cfa_offset (4); \ - cfi_rel_offset (REG, 0) - -# define CFI_POP(REG) \ - cfi_adjust_cfa_offset (-4); \ - cfi_restore (REG) - -# define PUSH(REG) pushl REG; CFI_PUSH (REG) -# define POP(REG) popl REG; CFI_POP (REG) - -# define PARMS 8 /* Preserve EBX. */ -# define ENTRANCE PUSH (%ebx); -# define RETURN_END POP (%ebx); ret -# define RETURN RETURN_END; CFI_PUSH (%ebx) - - .section .text.sse2,"ax",@progbits -# if !defined USE_AS_BCOPY -ENTRY (MEMCPY_CHK) - movl 12(%esp), %eax - cmpl %eax, 16(%esp) - jb HIDDEN_JUMPTARGET (__chk_fail) -END (MEMCPY_CHK) -# endif - -ENTRY (MEMCPY) - ENTRANCE - movl LEN(%esp), %ecx - movl SRC(%esp), %eax - movl DEST(%esp), %edx - cmp %edx, %eax - -# ifdef USE_AS_MEMMOVE - jg L(check_forward) - -L(mm_len_0_or_more_backward): -/* Now do checks for lengths. We do [0..16], [16..32], [32..64], [64..128] - separately. */ - cmp $16, %ecx - jbe L(mm_len_0_16_bytes_backward) - - cmpl $32, %ecx - jg L(mm_len_32_or_more_backward) - -/* Copy [0..32] and return. */ - movdqu (%eax), %xmm0 - movdqu -16(%eax, %ecx), %xmm1 - movdqu %xmm0, (%edx) - movdqu %xmm1, -16(%edx, %ecx) - jmp L(return) - -L(mm_len_32_or_more_backward): - cmpl $64, %ecx - jg L(mm_len_64_or_more_backward) - -/* Copy [0..64] and return. */ - movdqu (%eax), %xmm0 - movdqu 16(%eax), %xmm1 - movdqu -16(%eax, %ecx), %xmm2 - movdqu -32(%eax, %ecx), %xmm3 - movdqu %xmm0, (%edx) - movdqu %xmm1, 16(%edx) - movdqu %xmm2, -16(%edx, %ecx) - movdqu %xmm3, -32(%edx, %ecx) - jmp L(return) - -L(mm_len_64_or_more_backward): - cmpl $128, %ecx - jg L(mm_len_128_or_more_backward) - -/* Copy [0..128] and return. */ - movdqu (%eax), %xmm0 - movdqu 16(%eax), %xmm1 - movdqu 32(%eax), %xmm2 - movdqu 48(%eax), %xmm3 - movdqu -64(%eax, %ecx), %xmm4 - movdqu -48(%eax, %ecx), %xmm5 - movdqu -32(%eax, %ecx), %xmm6 - movdqu -16(%eax, %ecx), %xmm7 - movdqu %xmm0, (%edx) - movdqu %xmm1, 16(%edx) - movdqu %xmm2, 32(%edx) - movdqu %xmm3, 48(%edx) - movdqu %xmm4, -64(%edx, %ecx) - movdqu %xmm5, -48(%edx, %ecx) - movdqu %xmm6, -32(%edx, %ecx) - movdqu %xmm7, -16(%edx, %ecx) - jmp L(return) - -L(mm_len_128_or_more_backward): - add %ecx, %eax - cmp %edx, %eax - movl SRC(%esp), %eax - jle L(forward) - PUSH (%esi) - PUSH (%edi) - PUSH (%ebx) - -/* Aligning the address of destination. */ - movdqu (%eax), %xmm4 - movdqu 16(%eax), %xmm5 - movdqu 32(%eax), %xmm6 - movdqu 48(%eax), %xmm7 - leal (%edx, %ecx), %esi - movdqu -16(%eax, %ecx), %xmm0 - subl $16, %esp - movdqu %xmm0, (%esp) - mov %ecx, %edi - movl %esi, %ecx - andl $-16, %ecx - leal (%ecx), %ebx - subl %edx, %ebx - leal (%eax, %ebx), %eax - shrl $6, %ebx - -# ifdef SHARED_CACHE_SIZE_HALF - cmp $SHARED_CACHE_SIZE_HALF, %edi -# else -# ifdef SHARED - PUSH (%ebx) - SETUP_PIC_REG (bx) - add $_GLOBAL_OFFSET_TABLE_, %ebx - cmp __x86_shared_cache_size_half@GOTOFF(%ebx), %edi - POP (%ebx) -# else - cmp __x86_shared_cache_size_half, %edi -# endif -# endif - jae L(mm_large_page_loop_backward) - - .p2align 4 -L(mm_main_loop_backward): - - prefetcht0 -128(%eax) - - movdqu -64(%eax), %xmm0 - movdqu -48(%eax), %xmm1 - movdqu -32(%eax), %xmm2 - movdqu -16(%eax), %xmm3 - movaps %xmm0, -64(%ecx) - subl $64, %eax - movaps %xmm1, -48(%ecx) - movaps %xmm2, -32(%ecx) - movaps %xmm3, -16(%ecx) - subl $64, %ecx - sub $1, %ebx - jnz L(mm_main_loop_backward) - movdqu (%esp), %xmm0 - addl $16, %esp - movdqu %xmm0, -16(%esi) - movdqu %xmm4, (%edx) - movdqu %xmm5, 16(%edx) - movdqu %xmm6, 32(%edx) - movdqu %xmm7, 48(%edx) - POP (%ebx) - jmp L(mm_return_pop_all) - -/* Copy [0..16] and return. */ -L(mm_len_0_16_bytes_backward): - testb $24, %cl - jnz L(mm_len_9_16_bytes_backward) - testb $4, %cl - .p2align 4,,5 - jnz L(mm_len_5_8_bytes_backward) - testl %ecx, %ecx - .p2align 4,,2 - je L(return) - testb $2, %cl - .p2align 4,,1 - jne L(mm_len_3_4_bytes_backward) - movzbl -1(%eax,%ecx), %ebx - movzbl (%eax), %eax - movb %bl, -1(%edx,%ecx) - movb %al, (%edx) - jmp L(return) - -L(mm_len_3_4_bytes_backward): - movzwl -2(%eax,%ecx), %ebx - movzwl (%eax), %eax - movw %bx, -2(%edx,%ecx) - movw %ax, (%edx) - jmp L(return) - -L(mm_len_9_16_bytes_backward): - PUSH (%esi) - movl -4(%eax,%ecx), %ebx - movl -8(%eax,%ecx), %esi - movl %ebx, -4(%edx,%ecx) - movl %esi, -8(%edx,%ecx) - subl $8, %ecx - POP (%esi) - jmp L(mm_len_0_16_bytes_backward) - -L(mm_len_5_8_bytes_backward): - movl (%eax), %ebx - movl -4(%eax,%ecx), %eax - movl %ebx, (%edx) - movl %eax, -4(%edx,%ecx) - jmp L(return) - -/* Big length copy backward part. */ - .p2align 4 -L(mm_large_page_loop_backward): - movdqu -64(%eax), %xmm0 - movdqu -48(%eax), %xmm1 - movdqu -32(%eax), %xmm2 - movdqu -16(%eax), %xmm3 - movntdq %xmm0, -64(%ecx) - subl $64, %eax - movntdq %xmm1, -48(%ecx) - movntdq %xmm2, -32(%ecx) - movntdq %xmm3, -16(%ecx) - subl $64, %ecx - sub $1, %ebx - jnz L(mm_large_page_loop_backward) - sfence - movdqu (%esp), %xmm0 - addl $16, %esp - movdqu %xmm0, -16(%esi) - movdqu %xmm4, (%edx) - movdqu %xmm5, 16(%edx) - movdqu %xmm6, 32(%edx) - movdqu %xmm7, 48(%edx) - POP (%ebx) - jmp L(mm_return_pop_all) - -L(check_forward): - add %edx, %ecx - cmp %eax, %ecx - movl LEN(%esp), %ecx - jle L(forward) - -/* Now do checks for lengths. We do [0..16], [0..32], [0..64], [0..128] - separately. */ - cmp $16, %ecx - jbe L(mm_len_0_16_bytes_forward) - - cmpl $32, %ecx - ja L(mm_len_32_or_more_forward) - -/* Copy [0..32] and return. */ - movdqu (%eax), %xmm0 - movdqu -16(%eax, %ecx), %xmm1 - movdqu %xmm0, (%edx) - movdqu %xmm1, -16(%edx, %ecx) - jmp L(return) - -L(mm_len_32_or_more_forward): - cmpl $64, %ecx - ja L(mm_len_64_or_more_forward) - -/* Copy [0..64] and return. */ - movdqu (%eax), %xmm0 - movdqu 16(%eax), %xmm1 - movdqu -16(%eax, %ecx), %xmm2 - movdqu -32(%eax, %ecx), %xmm3 - movdqu %xmm0, (%edx) - movdqu %xmm1, 16(%edx) - movdqu %xmm2, -16(%edx, %ecx) - movdqu %xmm3, -32(%edx, %ecx) - jmp L(return) - -L(mm_len_64_or_more_forward): - cmpl $128, %ecx - ja L(mm_len_128_or_more_forward) - -/* Copy [0..128] and return. */ - movdqu (%eax), %xmm0 - movdqu 16(%eax), %xmm1 - movdqu 32(%eax), %xmm2 - movdqu 48(%eax), %xmm3 - movdqu -64(%eax, %ecx), %xmm4 - movdqu -48(%eax, %ecx), %xmm5 - movdqu -32(%eax, %ecx), %xmm6 - movdqu -16(%eax, %ecx), %xmm7 - movdqu %xmm0, (%edx) - movdqu %xmm1, 16(%edx) - movdqu %xmm2, 32(%edx) - movdqu %xmm3, 48(%edx) - movdqu %xmm4, -64(%edx, %ecx) - movdqu %xmm5, -48(%edx, %ecx) - movdqu %xmm6, -32(%edx, %ecx) - movdqu %xmm7, -16(%edx, %ecx) - jmp L(return) - -L(mm_len_128_or_more_forward): - PUSH (%esi) - PUSH (%edi) - PUSH (%ebx) - -/* Aligning the address of destination. */ - movdqu -16(%eax, %ecx), %xmm4 - movdqu -32(%eax, %ecx), %xmm5 - movdqu -48(%eax, %ecx), %xmm6 - movdqu -64(%eax, %ecx), %xmm7 - leal (%edx, %ecx), %esi - movdqu (%eax), %xmm0 - subl $16, %esp - movdqu %xmm0, (%esp) - mov %ecx, %edi - leal 16(%edx), %ecx - andl $-16, %ecx - movl %ecx, %ebx - subl %edx, %ebx - addl %ebx, %eax - movl %esi, %ebx - subl %ecx, %ebx - shrl $6, %ebx - -# ifdef SHARED_CACHE_SIZE_HALF - cmp $SHARED_CACHE_SIZE_HALF, %edi -# else -# ifdef SHARED - PUSH (%ebx) - SETUP_PIC_REG(bx) - add $_GLOBAL_OFFSET_TABLE_, %ebx - cmp __x86_shared_cache_size_half@GOTOFF(%ebx), %edi - POP (%ebx) -# else - cmp __x86_shared_cache_size_half, %edi -# endif -# endif - jae L(mm_large_page_loop_forward) - - .p2align 4 -L(mm_main_loop_forward): - - prefetcht0 128(%eax) - - movdqu (%eax), %xmm0 - movdqu 16(%eax), %xmm1 - movdqu 32(%eax), %xmm2 - movdqu 48(%eax), %xmm3 - movdqa %xmm0, (%ecx) - addl $64, %eax - movaps %xmm1, 16(%ecx) - movaps %xmm2, 32(%ecx) - movaps %xmm3, 48(%ecx) - addl $64, %ecx - sub $1, %ebx - jnz L(mm_main_loop_forward) - movdqu (%esp), %xmm0 - addl $16, %esp - movdqu %xmm0, (%edx) - movdqu %xmm4, -16(%esi) - movdqu %xmm5, -32(%esi) - movdqu %xmm6, -48(%esi) - movdqu %xmm7, -64(%esi) - POP (%ebx) - jmp L(mm_return_pop_all) - -L(mm_len_0_16_bytes_forward): - testb $24, %cl - jne L(mm_len_9_16_bytes_forward) - testb $4, %cl - .p2align 4,,5 - jne L(mm_len_5_8_bytes_forward) - testl %ecx, %ecx - .p2align 4,,2 - je L(return) - testb $2, %cl - .p2align 4,,1 - jne L(mm_len_2_4_bytes_forward) - movzbl -1(%eax,%ecx), %ebx - movzbl (%eax), %eax - movb %bl, -1(%edx,%ecx) - movb %al, (%edx) - jmp L(return) - -L(mm_len_2_4_bytes_forward): - movzwl -2(%eax,%ecx), %ebx - movzwl (%eax), %eax - movw %bx, -2(%edx,%ecx) - movw %ax, (%edx) - jmp L(return) - -L(mm_len_5_8_bytes_forward): - movl (%eax), %ebx - movl -4(%eax,%ecx), %eax - movl %ebx, (%edx) - movl %eax, -4(%edx,%ecx) - jmp L(return) - -L(mm_len_9_16_bytes_forward): - movq (%eax), %xmm0 - movq -8(%eax, %ecx), %xmm1 - movq %xmm0, (%edx) - movq %xmm1, -8(%edx, %ecx) - jmp L(return) - -L(mm_return_pop_all): - movl %edx, %eax - POP (%edi) - POP (%esi) - RETURN - -/* Big length copy forward part. */ - .p2align 4 -L(mm_large_page_loop_forward): - movdqu (%eax), %xmm0 - movdqu 16(%eax), %xmm1 - movdqu 32(%eax), %xmm2 - movdqu 48(%eax), %xmm3 - movntdq %xmm0, (%ecx) - addl $64, %eax - movntdq %xmm1, 16(%ecx) - movntdq %xmm2, 32(%ecx) - movntdq %xmm3, 48(%ecx) - addl $64, %ecx - sub $1, %ebx - jnz L(mm_large_page_loop_forward) - sfence - movdqu (%esp), %xmm0 - addl $16, %esp - movdqu %xmm0, (%edx) - movdqu %xmm4, -16(%esi) - movdqu %xmm5, -32(%esi) - movdqu %xmm6, -48(%esi) - movdqu %xmm7, -64(%esi) - POP (%ebx) - jmp L(mm_return_pop_all) -# endif - -L(forward): - cmp $16, %ecx - jbe L(len_0_16_bytes) - -# ifdef SHARED_CACHE_SIZE_HALF - cmp $SHARED_CACHE_SIZE_HALF, %ecx -# else -# ifdef SHARED - SETUP_PIC_REG(bx) - add $_GLOBAL_OFFSET_TABLE_, %ebx - cmp __x86_shared_cache_size_half@GOTOFF(%ebx), %ecx -# else - cmp __x86_shared_cache_size_half, %ecx -# endif -# endif - jae L(large_page) - - movdqu (%eax), %xmm0 - movdqu -16(%eax, %ecx), %xmm1 - cmpl $32, %ecx - movdqu %xmm0, (%edx) - movdqu %xmm1, -16(%edx, %ecx) - jbe L(return) - - movdqu 16(%eax), %xmm0 - movdqu -32(%eax, %ecx), %xmm1 - cmpl $64, %ecx - movdqu %xmm0, 16(%edx) - movdqu %xmm1, -32(%edx, %ecx) - jbe L(return) - - movdqu 32(%eax), %xmm0 - movdqu 48(%eax), %xmm1 - movdqu -48(%eax, %ecx), %xmm2 - movdqu -64(%eax, %ecx), %xmm3 - cmpl $128, %ecx - movdqu %xmm0, 32(%edx) - movdqu %xmm1, 48(%edx) - movdqu %xmm2, -48(%edx, %ecx) - movdqu %xmm3, -64(%edx, %ecx) - jbe L(return) - -/* Now the main loop: we align the address of the destination. */ - leal 64(%edx), %ebx - andl $-64, %ebx - - addl %edx, %ecx - andl $-64, %ecx - - subl %edx, %eax - -/* We should stop two iterations before the termination - (in order not to misprefetch). */ - subl $64, %ecx - cmpl %ebx, %ecx - je L(main_loop_just_one_iteration) - - subl $64, %ecx - cmpl %ebx, %ecx - je L(main_loop_last_two_iterations) - - .p2align 4 -L(main_loop_cache): - - prefetcht0 128(%ebx, %eax) - - movdqu (%ebx, %eax), %xmm0 - movdqu 16(%ebx, %eax), %xmm1 - movdqu 32(%ebx, %eax), %xmm2 - movdqu 48(%ebx, %eax), %xmm3 - movdqa %xmm0, (%ebx) - movaps %xmm1, 16(%ebx) - movaps %xmm2, 32(%ebx) - movaps %xmm3, 48(%ebx) - lea 64(%ebx), %ebx - cmpl %ebx, %ecx - jne L(main_loop_cache) - -L(main_loop_last_two_iterations): - movdqu (%ebx, %eax), %xmm0 - movdqu 16(%ebx, %eax), %xmm1 - movdqu 32(%ebx, %eax), %xmm2 - movdqu 48(%ebx, %eax), %xmm3 - movdqu 64(%ebx, %eax), %xmm4 - movdqu 80(%ebx, %eax), %xmm5 - movdqu 96(%ebx, %eax), %xmm6 - movdqu 112(%ebx, %eax), %xmm7 - movdqa %xmm0, (%ebx) - movaps %xmm1, 16(%ebx) - movaps %xmm2, 32(%ebx) - movaps %xmm3, 48(%ebx) - movaps %xmm4, 64(%ebx) - movaps %xmm5, 80(%ebx) - movaps %xmm6, 96(%ebx) - movaps %xmm7, 112(%ebx) - jmp L(return) - -L(main_loop_just_one_iteration): - movdqu (%ebx, %eax), %xmm0 - movdqu 16(%ebx, %eax), %xmm1 - movdqu 32(%ebx, %eax), %xmm2 - movdqu 48(%ebx, %eax), %xmm3 - movdqa %xmm0, (%ebx) - movaps %xmm1, 16(%ebx) - movaps %xmm2, 32(%ebx) - movaps %xmm3, 48(%ebx) - jmp L(return) - -L(large_page): - movdqu (%eax), %xmm0 - movdqu 16(%eax), %xmm1 - movdqu 32(%eax), %xmm2 - movdqu 48(%eax), %xmm3 - movdqu -64(%eax, %ecx), %xmm4 - movdqu -48(%eax, %ecx), %xmm5 - movdqu -32(%eax, %ecx), %xmm6 - movdqu -16(%eax, %ecx), %xmm7 - movdqu %xmm0, (%edx) - movdqu %xmm1, 16(%edx) - movdqu %xmm2, 32(%edx) - movdqu %xmm3, 48(%edx) - movdqu %xmm4, -64(%edx, %ecx) - movdqu %xmm5, -48(%edx, %ecx) - movdqu %xmm6, -32(%edx, %ecx) - movdqu %xmm7, -16(%edx, %ecx) - - movdqu 64(%eax), %xmm0 - movdqu 80(%eax), %xmm1 - movdqu 96(%eax), %xmm2 - movdqu 112(%eax), %xmm3 - movdqu -128(%eax, %ecx), %xmm4 - movdqu -112(%eax, %ecx), %xmm5 - movdqu -96(%eax, %ecx), %xmm6 - movdqu -80(%eax, %ecx), %xmm7 - movdqu %xmm0, 64(%edx) - movdqu %xmm1, 80(%edx) - movdqu %xmm2, 96(%edx) - movdqu %xmm3, 112(%edx) - movdqu %xmm4, -128(%edx, %ecx) - movdqu %xmm5, -112(%edx, %ecx) - movdqu %xmm6, -96(%edx, %ecx) - movdqu %xmm7, -80(%edx, %ecx) - -/* Now the main loop with non temporal stores. We align - the address of the destination. */ - leal 128(%edx), %ebx - andl $-128, %ebx - - addl %edx, %ecx - andl $-128, %ecx - - subl %edx, %eax - - .p2align 4 -L(main_loop_large_page): - movdqu (%ebx, %eax), %xmm0 - movdqu 16(%ebx, %eax), %xmm1 - movdqu 32(%ebx, %eax), %xmm2 - movdqu 48(%ebx, %eax), %xmm3 - movdqu 64(%ebx, %eax), %xmm4 - movdqu 80(%ebx, %eax), %xmm5 - movdqu 96(%ebx, %eax), %xmm6 - movdqu 112(%ebx, %eax), %xmm7 - movntdq %xmm0, (%ebx) - movntdq %xmm1, 16(%ebx) - movntdq %xmm2, 32(%ebx) - movntdq %xmm3, 48(%ebx) - movntdq %xmm4, 64(%ebx) - movntdq %xmm5, 80(%ebx) - movntdq %xmm6, 96(%ebx) - movntdq %xmm7, 112(%ebx) - lea 128(%ebx), %ebx - cmpl %ebx, %ecx - jne L(main_loop_large_page) - sfence - jmp L(return) - -L(len_0_16_bytes): - testb $24, %cl - jne L(len_9_16_bytes) - testb $4, %cl - .p2align 4,,5 - jne L(len_5_8_bytes) - testl %ecx, %ecx - .p2align 4,,2 - je L(return) - movzbl (%eax), %ebx - testb $2, %cl - movb %bl, (%edx) - je L(return) - movzwl -2(%eax,%ecx), %ebx - movw %bx, -2(%edx,%ecx) - jmp L(return) - -L(len_9_16_bytes): - movq (%eax), %xmm0 - movq -8(%eax, %ecx), %xmm1 - movq %xmm0, (%edx) - movq %xmm1, -8(%edx, %ecx) - jmp L(return) - -L(len_5_8_bytes): - movl (%eax), %ebx - movl %ebx, (%edx) - movl -4(%eax,%ecx), %ebx - movl %ebx, -4(%edx,%ecx) - -L(return): - movl %edx, %eax -# if !defined USE_AS_BCOPY && defined USE_AS_MEMPCPY - movl LEN(%esp), %ecx - add %ecx, %eax -# endif - RETURN - -END (MEMCPY) -#endif diff --git a/sysdeps/i386/i686/multiarch/memcpy-ssse3-rep.S b/sysdeps/i386/i686/multiarch/memcpy-ssse3-rep.S deleted file mode 100644 index 687e083147..0000000000 --- a/sysdeps/i386/i686/multiarch/memcpy-ssse3-rep.S +++ /dev/null @@ -1,1809 +0,0 @@ -/* memcpy with SSSE3 and REP string. - Copyright (C) 2010-2017 Free Software Foundation, Inc. - Contributed by Intel Corporation. - This file is part of the GNU C Library. - - The GNU C Library is free software; you can redistribute it and/or - modify it under the terms of the GNU Lesser General Public - License as published by the Free Software Foundation; either - version 2.1 of the License, or (at your option) any later version. - - The GNU C Library is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - Lesser General Public License for more details. - - You should have received a copy of the GNU Lesser General Public - License along with the GNU C Library; if not, see - <http://www.gnu.org/licenses/>. */ - -#include <sysdep.h> - -#if IS_IN (libc) \ - && (defined SHARED \ - || defined USE_AS_MEMMOVE \ - || !defined USE_MULTIARCH) - -#include "asm-syntax.h" - -#ifndef MEMCPY -# define MEMCPY __memcpy_ssse3_rep -# define MEMCPY_CHK __memcpy_chk_ssse3_rep -#endif - -#ifdef USE_AS_BCOPY -# define SRC PARMS -# define DEST SRC+4 -# define LEN DEST+4 -#else -# define DEST PARMS -# define SRC DEST+4 -# define LEN SRC+4 -#endif - -#define CFI_PUSH(REG) \ - cfi_adjust_cfa_offset (4); \ - cfi_rel_offset (REG, 0) - -#define CFI_POP(REG) \ - cfi_adjust_cfa_offset (-4); \ - cfi_restore (REG) - -#define PUSH(REG) pushl REG; CFI_PUSH (REG) -#define POP(REG) popl REG; CFI_POP (REG) - -#ifdef SHARED -# define PARMS 8 /* Preserve EBX. */ -# define ENTRANCE PUSH (%ebx); -# define RETURN_END POP (%ebx); ret -# define RETURN RETURN_END; CFI_PUSH (%ebx) -# define JMPTBL(I, B) I - B - -/* Load an entry in a jump table into EBX and branch to it. TABLE is a - jump table with relative offsets. INDEX is a register contains the - index into the jump table. SCALE is the scale of INDEX. */ -# define BRANCH_TO_JMPTBL_ENTRY(TABLE, INDEX, SCALE) \ - /* We first load PC into EBX. */ \ - SETUP_PIC_REG(bx); \ - /* Get the address of the jump table. */ \ - addl $(TABLE - .), %ebx; \ - /* Get the entry and convert the relative offset to the \ - absolute address. */ \ - addl (%ebx,INDEX,SCALE), %ebx; \ - /* We loaded the jump table. Go. */ \ - jmp *%ebx - -# define BRANCH_TO_JMPTBL_ENTRY_VALUE(TABLE) \ - addl $(TABLE - .), %ebx - -# define BRANCH_TO_JMPTBL_ENTRY_TAIL(TABLE, INDEX, SCALE) \ - addl (%ebx,INDEX,SCALE), %ebx; \ - /* We loaded the jump table. Go. */ \ - jmp *%ebx -#else -# define PARMS 4 -# define ENTRANCE -# define RETURN_END ret -# define RETURN RETURN_END -# define JMPTBL(I, B) I - -/* Branch to an entry in a jump table. TABLE is a jump table with - absolute offsets. INDEX is a register contains the index into the - jump table. SCALE is the scale of INDEX. */ -# define BRANCH_TO_JMPTBL_ENTRY(TABLE, INDEX, SCALE) \ - jmp *TABLE(,INDEX,SCALE) - -# define BRANCH_TO_JMPTBL_ENTRY_VALUE(TABLE) - -# define BRANCH_TO_JMPTBL_ENTRY_TAIL(TABLE, INDEX, SCALE) \ - jmp *TABLE(,INDEX,SCALE) -#endif - - .section .text.ssse3,"ax",@progbits -#if !defined USE_AS_BCOPY -ENTRY (MEMCPY_CHK) - movl 12(%esp), %eax - cmpl %eax, 16(%esp) - jb HIDDEN_JUMPTARGET (__chk_fail) -END (MEMCPY_CHK) -#endif -ENTRY (MEMCPY) - ENTRANCE - movl LEN(%esp), %ecx - movl SRC(%esp), %eax - movl DEST(%esp), %edx - -#ifdef USE_AS_MEMMOVE - cmp %eax, %edx - jb L(copy_forward) - je L(fwd_write_0bytes) - cmp $48, %ecx - jb L(bk_write_less48bytes) - add %ecx, %eax - cmp %eax, %edx - movl SRC(%esp), %eax - jb L(copy_backward) - -L(copy_forward): -#endif - cmp $48, %ecx - jae L(48bytesormore) - -L(fwd_write_less32bytes): -#ifndef USE_AS_MEMMOVE - cmp %dl, %al - jb L(bk_write) -#endif - add %ecx, %edx - add %ecx, %eax - BRANCH_TO_JMPTBL_ENTRY (L(table_48bytes_fwd), %ecx, 4) -#ifndef USE_AS_MEMMOVE -L(bk_write): - BRANCH_TO_JMPTBL_ENTRY (L(table_48_bytes_bwd), %ecx, 4) -#endif - - ALIGN (4) -/* ECX > 32 and EDX is 4 byte aligned. */ -L(48bytesormore): - movdqu (%eax), %xmm0 - PUSH (%edi) - movl %edx, %edi - and $-16, %edx - PUSH (%esi) - cfi_remember_state - add $16, %edx - movl %edi, %esi - sub %edx, %edi - add %edi, %ecx - sub %edi, %eax - -#ifdef SHARED_CACHE_SIZE_HALF - cmp $SHARED_CACHE_SIZE_HALF, %ecx -#else -# ifdef SHARED - SETUP_PIC_REG(bx) - add $_GLOBAL_OFFSET_TABLE_, %ebx - cmp __x86_shared_cache_size_half@GOTOFF(%ebx), %ecx -# else - cmp __x86_shared_cache_size_half, %ecx -# endif -#endif - - mov %eax, %edi - jae L(large_page) - and $0xf, %edi - jz L(shl_0) - - BRANCH_TO_JMPTBL_ENTRY (L(shl_table), %edi, 4) - - ALIGN (4) -L(shl_0): - movdqu %xmm0, (%esi) - xor %edi, %edi - cmp $127, %ecx - ja L(shl_0_gobble) - lea -32(%ecx), %ecx -L(shl_0_loop): - movdqa (%eax, %edi), %xmm0 - movdqa 16(%eax, %edi), %xmm1 - sub $32, %ecx - movdqa %xmm0, (%edx, %edi) - movdqa %xmm1, 16(%edx, %edi) - lea 32(%edi), %edi - jb L(shl_0_end) - - movdqa (%eax, %edi), %xmm0 - movdqa 16(%eax, %edi), %xmm1 - sub $32, %ecx - movdqa %xmm0, (%edx, %edi) - movdqa %xmm1, 16(%edx, %edi) - lea 32(%edi), %edi - jb L(shl_0_end) - - movdqa (%eax, %edi), %xmm0 - movdqa 16(%eax, %edi), %xmm1 - sub $32, %ecx - movdqa %xmm0, (%edx, %edi) - movdqa %xmm1, 16(%edx, %edi) - lea 32(%edi), %edi - jb L(shl_0_end) - - movdqa (%eax, %edi), %xmm0 - movdqa 16(%eax, %edi), %xmm1 - sub $32, %ecx - movdqa %xmm0, (%edx, %edi) - movdqa %xmm1, 16(%edx, %edi) - lea 32(%edi), %edi -L(shl_0_end): - lea 32(%ecx), %ecx - add %ecx, %edi - add %edi, %edx - add %edi, %eax - POP (%esi) - POP (%edi) - BRANCH_TO_JMPTBL_ENTRY (L(table_48bytes_fwd), %ecx, 4) - - cfi_restore_state - cfi_remember_state -L(shl_0_gobble): - -#ifdef DATA_CACHE_SIZE_HALF - cmp $DATA_CACHE_SIZE_HALF, %ecx -#else -# ifdef SHARED - SETUP_PIC_REG(bx) - add $_GLOBAL_OFFSET_TABLE_, %ebx - mov __x86_data_cache_size_half@GOTOFF(%ebx), %edi -# else - mov __x86_data_cache_size_half, %edi -# endif -#endif - mov %edi, %esi - shr $3, %esi - sub %esi, %edi - cmp %edi, %ecx - jae L(shl_0_gobble_mem_start) - sub $128, %ecx - ALIGN (4) -L(shl_0_gobble_cache_loop): - movdqa (%eax), %xmm0 - movaps 0x10(%eax), %xmm1 - movaps 0x20(%eax), %xmm2 - movaps 0x30(%eax), %xmm3 - movaps 0x40(%eax), %xmm4 - movaps 0x50(%eax), %xmm5 - movaps 0x60(%eax), %xmm6 - movaps 0x70(%eax), %xmm7 - lea 0x80(%eax), %eax - sub $128, %ecx - movdqa %xmm0, (%edx) - movaps %xmm1, 0x10(%edx) - movaps %xmm2, 0x20(%edx) - movaps %xmm3, 0x30(%edx) - movaps %xmm4, 0x40(%edx) - movaps %xmm5, 0x50(%edx) - movaps %xmm6, 0x60(%edx) - movaps %xmm7, 0x70(%edx) - lea 0x80(%edx), %edx - - jae L(shl_0_gobble_cache_loop) - add $0x80, %ecx - cmp $0x40, %ecx - jb L(shl_0_cache_less_64bytes) - - movdqa (%eax), %xmm0 - sub $0x40, %ecx - movdqa 0x10(%eax), %xmm1 - - movdqa %xmm0, (%edx) - movdqa %xmm1, 0x10(%edx) - - movdqa 0x20(%eax), %xmm0 - movdqa 0x30(%eax), %xmm1 - add $0x40, %eax - - movdqa %xmm0, 0x20(%edx) - movdqa %xmm1, 0x30(%edx) - add $0x40, %edx -L(shl_0_cache_less_64bytes): - cmp $0x20, %ecx - jb L(shl_0_cache_less_32bytes) - movdqa (%eax), %xmm0 - sub $0x20, %ecx - movdqa 0x10(%eax), %xmm1 - add $0x20, %eax - movdqa %xmm0, (%edx) - movdqa %xmm1, 0x10(%edx) - add $0x20, %edx -L(shl_0_cache_less_32bytes): - cmp $0x10, %ecx - jb L(shl_0_cache_less_16bytes) - sub $0x10, %ecx - movdqa (%eax), %xmm0 - add $0x10, %eax - movdqa %xmm0, (%edx) - add $0x10, %edx -L(shl_0_cache_less_16bytes): - add %ecx, %edx - add %ecx, %eax - POP (%esi) - POP (%edi) - BRANCH_TO_JMPTBL_ENTRY (L(table_48bytes_fwd), %ecx, 4) - - cfi_restore_state - cfi_remember_state - ALIGN (4) -L(shl_0_gobble_mem_start): - cmp %al, %dl - je L(copy_page_by_rep) - sub $128, %ecx -L(shl_0_gobble_mem_loop): - prefetchnta 0x1c0(%eax) - prefetchnta 0x280(%eax) - prefetchnta 0x1c0(%edx) - prefetchnta 0x280(%edx) - - movdqa (%eax), %xmm0 - movaps 0x10(%eax), %xmm1 - movaps 0x20(%eax), %xmm2 - movaps 0x30(%eax), %xmm3 - movaps 0x40(%eax), %xmm4 - movaps 0x50(%eax), %xmm5 - movaps 0x60(%eax), %xmm6 - movaps 0x70(%eax), %xmm7 - lea 0x80(%eax), %eax - sub $0x80, %ecx - movdqa %xmm0, (%edx) - movaps %xmm1, 0x10(%edx) - movaps %xmm2, 0x20(%edx) - movaps %xmm3, 0x30(%edx) - movaps %xmm4, 0x40(%edx) - movaps %xmm5, 0x50(%edx) - movaps %xmm6, 0x60(%edx) - movaps %xmm7, 0x70(%edx) - lea 0x80(%edx), %edx - - jae L(shl_0_gobble_mem_loop) - add $0x80, %ecx - cmp $0x40, %ecx - jb L(shl_0_mem_less_64bytes) - - movdqa (%eax), %xmm0 - sub $0x40, %ecx - movdqa 0x10(%eax), %xmm1 - - movdqa %xmm0, (%edx) - movdqa %xmm1, 0x10(%edx) - - movdqa 0x20(%eax), %xmm0 - movdqa 0x30(%eax), %xmm1 - add $0x40, %eax - - movdqa %xmm0, 0x20(%edx) - movdqa %xmm1, 0x30(%edx) - add $0x40, %edx -L(shl_0_mem_less_64bytes): - cmp $0x20, %ecx - jb L(shl_0_mem_less_32bytes) - movdqa (%eax), %xmm0 - sub $0x20, %ecx - movdqa 0x10(%eax), %xmm1 - add $0x20, %eax - movdqa %xmm0, (%edx) - movdqa %xmm1, 0x10(%edx) - add $0x20, %edx -L(shl_0_mem_less_32bytes): - cmp $0x10, %ecx - jb L(shl_0_mem_less_16bytes) - sub $0x10, %ecx - movdqa (%eax), %xmm0 - add $0x10, %eax - movdqa %xmm0, (%edx) - add $0x10, %edx -L(shl_0_mem_less_16bytes): - add %ecx, %edx - add %ecx, %eax - POP (%esi) - POP (%edi) - BRANCH_TO_JMPTBL_ENTRY (L(table_48bytes_fwd), %ecx, 4) - - cfi_restore_state - cfi_remember_state - ALIGN (4) -L(shl_1): - BRANCH_TO_JMPTBL_ENTRY_VALUE(L(table_48bytes_fwd)) - sub $1, %eax - movaps (%eax), %xmm1 - xor %edi, %edi - sub $32, %ecx - movdqu %xmm0, (%esi) - POP (%esi) -L(shl_1_loop): - - movdqa 16(%eax, %edi), %xmm2 - sub $32, %ecx - movdqa 32(%eax, %edi), %xmm3 - movdqa %xmm3, %xmm4 - palignr $1, %xmm2, %xmm3 - palignr $1, %xmm1, %xmm2 - lea 32(%edi), %edi - movdqa %xmm2, -32(%edx, %edi) - movdqa %xmm3, -16(%edx, %edi) - - jb L(shl_1_end) - - movdqa 16(%eax, %edi), %xmm2 - sub $32, %ecx - movdqa 32(%eax, %edi), %xmm3 - movdqa %xmm3, %xmm1 - palignr $1, %xmm2, %xmm3 - palignr $1, %xmm4, %xmm2 - lea 32(%edi), %edi - movdqa %xmm2, -32(%edx, %edi) - movdqa %xmm3, -16(%edx, %edi) - - jae L(shl_1_loop) - -L(shl_1_end): - add $32, %ecx - add %ecx, %edi - add %edi, %edx - lea 1(%edi, %eax), %eax - POP (%edi) - BRANCH_TO_JMPTBL_ENTRY_TAIL(L(table_48bytes_fwd), %ecx, 4) - - cfi_restore_state - cfi_remember_state - ALIGN (4) -L(shl_2): - BRANCH_TO_JMPTBL_ENTRY_VALUE(L(table_48bytes_fwd)) - sub $2, %eax - movaps (%eax), %xmm1 - xor %edi, %edi - sub $32, %ecx - movdqu %xmm0, (%esi) - POP (%esi) -L(shl_2_loop): - - movdqa 16(%eax, %edi), %xmm2 - sub $32, %ecx - movdqa 32(%eax, %edi), %xmm3 - movdqa %xmm3, %xmm4 - palignr $2, %xmm2, %xmm3 - palignr $2, %xmm1, %xmm2 - lea 32(%edi), %edi - movdqa %xmm2, -32(%edx, %edi) - movdqa %xmm3, -16(%edx, %edi) - - jb L(shl_2_end) - - movdqa 16(%eax, %edi), %xmm2 - sub $32, %ecx - movdqa 32(%eax, %edi), %xmm3 - movdqa %xmm3, %xmm1 - palignr $2, %xmm2, %xmm3 - palignr $2, %xmm4, %xmm2 - lea 32(%edi), %edi - movdqa %xmm2, -32(%edx, %edi) - movdqa %xmm3, -16(%edx, %edi) - - jae L(shl_2_loop) - -L(shl_2_end): - add $32, %ecx - add %ecx, %edi - add %edi, %edx - lea 2(%edi, %eax), %eax - POP (%edi) - BRANCH_TO_JMPTBL_ENTRY_TAIL(L(table_48bytes_fwd), %ecx, 4) - - cfi_restore_state - cfi_remember_state - ALIGN (4) -L(shl_3): - BRANCH_TO_JMPTBL_ENTRY_VALUE(L(table_48bytes_fwd)) - sub $3, %eax - movaps (%eax), %xmm1 - xor %edi, %edi - sub $32, %ecx - movdqu %xmm0, (%esi) - POP (%esi) -L(shl_3_loop): - - movdqa 16(%eax, %edi), %xmm2 - sub $32, %ecx - movdqa 32(%eax, %edi), %xmm3 - movdqa %xmm3, %xmm4 - palignr $3, %xmm2, %xmm3 - palignr $3, %xmm1, %xmm2 - lea 32(%edi), %edi - movdqa %xmm2, -32(%edx, %edi) - movdqa %xmm3, -16(%edx, %edi) - - jb L(shl_3_end) - - movdqa 16(%eax, %edi), %xmm2 - sub $32, %ecx - movdqa 32(%eax, %edi), %xmm3 - movdqa %xmm3, %xmm1 - palignr $3, %xmm2, %xmm3 - palignr $3, %xmm4, %xmm2 - lea 32(%edi), %edi - movdqa %xmm2, -32(%edx, %edi) - movdqa %xmm3, -16(%edx, %edi) - - jae L(shl_3_loop) - -L(shl_3_end): - add $32, %ecx - add %ecx, %edi - add %edi, %edx - lea 3(%edi, %eax), %eax - POP (%edi) - BRANCH_TO_JMPTBL_ENTRY_TAIL(L(table_48bytes_fwd), %ecx, 4) - - cfi_restore_state - cfi_remember_state - ALIGN (4) -L(shl_4): - BRANCH_TO_JMPTBL_ENTRY_VALUE(L(table_48bytes_fwd)) - sub $4, %eax - movaps (%eax), %xmm1 - xor %edi, %edi - sub $32, %ecx - movdqu %xmm0, (%esi) - POP (%esi) -L(shl_4_loop): - - movdqa 16(%eax, %edi), %xmm2 - sub $32, %ecx - movdqa 32(%eax, %edi), %xmm3 - movdqa %xmm3, %xmm4 - palignr $4, %xmm2, %xmm3 - palignr $4, %xmm1, %xmm2 - lea 32(%edi), %edi - movdqa %xmm2, -32(%edx, %edi) - movdqa %xmm3, -16(%edx, %edi) - - jb L(shl_4_end) - - movdqa 16(%eax, %edi), %xmm2 - sub $32, %ecx - movdqa 32(%eax, %edi), %xmm3 - movdqa %xmm3, %xmm1 - palignr $4, %xmm2, %xmm3 - palignr $4, %xmm4, %xmm2 - lea 32(%edi), %edi - movdqa %xmm2, -32(%edx, %edi) - movdqa %xmm3, -16(%edx, %edi) - - jae L(shl_4_loop) - -L(shl_4_end): - add $32, %ecx - add %ecx, %edi - add %edi, %edx - lea 4(%edi, %eax), %eax - POP (%edi) - BRANCH_TO_JMPTBL_ENTRY_TAIL(L(table_48bytes_fwd), %ecx, 4) - - cfi_restore_state - cfi_remember_state - ALIGN (4) -L(shl_5): - BRANCH_TO_JMPTBL_ENTRY_VALUE(L(table_48bytes_fwd)) - sub $5, %eax - movaps (%eax), %xmm1 - xor %edi, %edi - sub $32, %ecx - movdqu %xmm0, (%esi) - POP (%esi) -L(shl_5_loop): - - movdqa 16(%eax, %edi), %xmm2 - sub $32, %ecx - movdqa 32(%eax, %edi), %xmm3 - movdqa %xmm3, %xmm4 - palignr $5, %xmm2, %xmm3 - palignr $5, %xmm1, %xmm2 - lea 32(%edi), %edi - movdqa %xmm2, -32(%edx, %edi) - movdqa %xmm3, -16(%edx, %edi) - - jb L(shl_5_end) - - movdqa 16(%eax, %edi), %xmm2 - sub $32, %ecx - movdqa 32(%eax, %edi), %xmm3 - movdqa %xmm3, %xmm1 - palignr $5, %xmm2, %xmm3 - palignr $5, %xmm4, %xmm2 - lea 32(%edi), %edi - movdqa %xmm2, -32(%edx, %edi) - movdqa %xmm3, -16(%edx, %edi) - - jae L(shl_5_loop) - -L(shl_5_end): - add $32, %ecx - add %ecx, %edi - add %edi, %edx - lea 5(%edi, %eax), %eax - POP (%edi) - BRANCH_TO_JMPTBL_ENTRY_TAIL(L(table_48bytes_fwd), %ecx, 4) - - cfi_restore_state - cfi_remember_state - ALIGN (4) -L(shl_6): - BRANCH_TO_JMPTBL_ENTRY_VALUE(L(table_48bytes_fwd)) - sub $6, %eax - movaps (%eax), %xmm1 - xor %edi, %edi - sub $32, %ecx - movdqu %xmm0, (%esi) - POP (%esi) -L(shl_6_loop): - - movdqa 16(%eax, %edi), %xmm2 - sub $32, %ecx - movdqa 32(%eax, %edi), %xmm3 - movdqa %xmm3, %xmm4 - palignr $6, %xmm2, %xmm3 - palignr $6, %xmm1, %xmm2 - lea 32(%edi), %edi - movdqa %xmm2, -32(%edx, %edi) - movdqa %xmm3, -16(%edx, %edi) - - jb L(shl_6_end) - - movdqa 16(%eax, %edi), %xmm2 - sub $32, %ecx - movdqa 32(%eax, %edi), %xmm3 - movdqa %xmm3, %xmm1 - palignr $6, %xmm2, %xmm3 - palignr $6, %xmm4, %xmm2 - lea 32(%edi), %edi - movdqa %xmm2, -32(%edx, %edi) - movdqa %xmm3, -16(%edx, %edi) - - jae L(shl_6_loop) - -L(shl_6_end): - add $32, %ecx - add %ecx, %edi - add %edi, %edx - lea 6(%edi, %eax), %eax - POP (%edi) - BRANCH_TO_JMPTBL_ENTRY_TAIL(L(table_48bytes_fwd), %ecx, 4) - - cfi_restore_state - cfi_remember_state - ALIGN (4) -L(shl_7): - BRANCH_TO_JMPTBL_ENTRY_VALUE(L(table_48bytes_fwd)) - sub $7, %eax - movaps (%eax), %xmm1 - xor %edi, %edi - sub $32, %ecx - movdqu %xmm0, (%esi) - POP (%esi) -L(shl_7_loop): - - movdqa 16(%eax, %edi), %xmm2 - sub $32, %ecx - movdqa 32(%eax, %edi), %xmm3 - movdqa %xmm3, %xmm4 - palignr $7, %xmm2, %xmm3 - palignr $7, %xmm1, %xmm2 - lea 32(%edi), %edi - movdqa %xmm2, -32(%edx, %edi) - movdqa %xmm3, -16(%edx, %edi) - - jb L(shl_7_end) - - movdqa 16(%eax, %edi), %xmm2 - sub $32, %ecx - movdqa 32(%eax, %edi), %xmm3 - movdqa %xmm3, %xmm1 - palignr $7, %xmm2, %xmm3 - palignr $7, %xmm4, %xmm2 - lea 32(%edi), %edi - movdqa %xmm2, -32(%edx, %edi) - movdqa %xmm3, -16(%edx, %edi) - - jae L(shl_7_loop) - -L(shl_7_end): - add $32, %ecx - add %ecx, %edi - add %edi, %edx - lea 7(%edi, %eax), %eax - POP (%edi) - BRANCH_TO_JMPTBL_ENTRY_TAIL(L(table_48bytes_fwd), %ecx, 4) - - cfi_restore_state - cfi_remember_state - ALIGN (4) -L(shl_8): - BRANCH_TO_JMPTBL_ENTRY_VALUE(L(table_48bytes_fwd)) - sub $8, %eax - movaps (%eax), %xmm1 - xor %edi, %edi - sub $32, %ecx - movdqu %xmm0, (%esi) - POP (%esi) -L(shl_8_loop): - - movdqa 16(%eax, %edi), %xmm2 - sub $32, %ecx - movdqa 32(%eax, %edi), %xmm3 - movdqa %xmm3, %xmm4 - palignr $8, %xmm2, %xmm3 - palignr $8, %xmm1, %xmm2 - lea 32(%edi), %edi - movdqa %xmm2, -32(%edx, %edi) - movdqa %xmm3, -16(%edx, %edi) - - jb L(shl_8_end) - - movdqa 16(%eax, %edi), %xmm2 - sub $32, %ecx - movdqa 32(%eax, %edi), %xmm3 - movdqa %xmm3, %xmm1 - palignr $8, %xmm2, %xmm3 - palignr $8, %xmm4, %xmm2 - lea 32(%edi), %edi - movdqa %xmm2, -32(%edx, %edi) - movdqa %xmm3, -16(%edx, %edi) - - jae L(shl_8_loop) - -L(shl_8_end): - add $32, %ecx - add %ecx, %edi - add %edi, %edx - lea 8(%edi, %eax), %eax - POP (%edi) - BRANCH_TO_JMPTBL_ENTRY_TAIL(L(table_48bytes_fwd), %ecx, 4) - - cfi_restore_state - cfi_remember_state - ALIGN (4) -L(shl_9): - BRANCH_TO_JMPTBL_ENTRY_VALUE(L(table_48bytes_fwd)) - sub $9, %eax - movaps (%eax), %xmm1 - xor %edi, %edi - sub $32, %ecx - movdqu %xmm0, (%esi) - POP (%esi) -L(shl_9_loop): - - movdqa 16(%eax, %edi), %xmm2 - sub $32, %ecx - movdqa 32(%eax, %edi), %xmm3 - movdqa %xmm3, %xmm4 - palignr $9, %xmm2, %xmm3 - palignr $9, %xmm1, %xmm2 - lea 32(%edi), %edi - movdqa %xmm2, -32(%edx, %edi) - movdqa %xmm3, -16(%edx, %edi) - - jb L(shl_9_end) - - movdqa 16(%eax, %edi), %xmm2 - sub $32, %ecx - movdqa 32(%eax, %edi), %xmm3 - movdqa %xmm3, %xmm1 - palignr $9, %xmm2, %xmm3 - palignr $9, %xmm4, %xmm2 - lea 32(%edi), %edi - movdqa %xmm2, -32(%edx, %edi) - movdqa %xmm3, -16(%edx, %edi) - - jae L(shl_9_loop) - -L(shl_9_end): - add $32, %ecx - add %ecx, %edi - add %edi, %edx - lea 9(%edi, %eax), %eax - POP (%edi) - BRANCH_TO_JMPTBL_ENTRY_TAIL(L(table_48bytes_fwd), %ecx, 4) - - cfi_restore_state - cfi_remember_state - ALIGN (4) -L(shl_10): - BRANCH_TO_JMPTBL_ENTRY_VALUE(L(table_48bytes_fwd)) - sub $10, %eax - movaps (%eax), %xmm1 - xor %edi, %edi - sub $32, %ecx - movdqu %xmm0, (%esi) - POP (%esi) -L(shl_10_loop): - - movdqa 16(%eax, %edi), %xmm2 - sub $32, %ecx - movdqa 32(%eax, %edi), %xmm3 - movdqa %xmm3, %xmm4 - palignr $10, %xmm2, %xmm3 - palignr $10, %xmm1, %xmm2 - lea 32(%edi), %edi - movdqa %xmm2, -32(%edx, %edi) - movdqa %xmm3, -16(%edx, %edi) - - jb L(shl_10_end) - - movdqa 16(%eax, %edi), %xmm2 - sub $32, %ecx - movdqa 32(%eax, %edi), %xmm3 - movdqa %xmm3, %xmm1 - palignr $10, %xmm2, %xmm3 - palignr $10, %xmm4, %xmm2 - lea 32(%edi), %edi - movdqa %xmm2, -32(%edx, %edi) - movdqa %xmm3, -16(%edx, %edi) - - jae L(shl_10_loop) - -L(shl_10_end): - add $32, %ecx - add %ecx, %edi - add %edi, %edx - lea 10(%edi, %eax), %eax - POP (%edi) - BRANCH_TO_JMPTBL_ENTRY_TAIL(L(table_48bytes_fwd), %ecx, 4) - - cfi_restore_state - cfi_remember_state - ALIGN (4) -L(shl_11): - BRANCH_TO_JMPTBL_ENTRY_VALUE(L(table_48bytes_fwd)) - sub $11, %eax - movaps (%eax), %xmm1 - xor %edi, %edi - sub $32, %ecx - movdqu %xmm0, (%esi) - POP (%esi) -L(shl_11_loop): - - movdqa 16(%eax, %edi), %xmm2 - sub $32, %ecx - movdqa 32(%eax, %edi), %xmm3 - movdqa %xmm3, %xmm4 - palignr $11, %xmm2, %xmm3 - palignr $11, %xmm1, %xmm2 - lea 32(%edi), %edi - movdqa %xmm2, -32(%edx, %edi) - movdqa %xmm3, -16(%edx, %edi) - - jb L(shl_11_end) - - movdqa 16(%eax, %edi), %xmm2 - sub $32, %ecx - movdqa 32(%eax, %edi), %xmm3 - movdqa %xmm3, %xmm1 - palignr $11, %xmm2, %xmm3 - palignr $11, %xmm4, %xmm2 - lea 32(%edi), %edi - movdqa %xmm2, -32(%edx, %edi) - movdqa %xmm3, -16(%edx, %edi) - - jae L(shl_11_loop) - -L(shl_11_end): - add $32, %ecx - add %ecx, %edi - add %edi, %edx - lea 11(%edi, %eax), %eax - POP (%edi) - BRANCH_TO_JMPTBL_ENTRY_TAIL(L(table_48bytes_fwd), %ecx, 4) - - cfi_restore_state - cfi_remember_state - ALIGN (4) -L(shl_12): - BRANCH_TO_JMPTBL_ENTRY_VALUE(L(table_48bytes_fwd)) - sub $12, %eax - movaps (%eax), %xmm1 - xor %edi, %edi - sub $32, %ecx - movdqu %xmm0, (%esi) - POP (%esi) -L(shl_12_loop): - - movdqa 16(%eax, %edi), %xmm2 - sub $32, %ecx - movdqa 32(%eax, %edi), %xmm3 - movdqa %xmm3, %xmm4 - palignr $12, %xmm2, %xmm3 - palignr $12, %xmm1, %xmm2 - lea 32(%edi), %edi - movdqa %xmm2, -32(%edx, %edi) - movdqa %xmm3, -16(%edx, %edi) - - jb L(shl_12_end) - - movdqa 16(%eax, %edi), %xmm2 - sub $32, %ecx - movdqa 32(%eax, %edi), %xmm3 - movdqa %xmm3, %xmm1 - palignr $12, %xmm2, %xmm3 - palignr $12, %xmm4, %xmm2 - lea 32(%edi), %edi - movdqa %xmm2, -32(%edx, %edi) - movdqa %xmm3, -16(%edx, %edi) - - jae L(shl_12_loop) - -L(shl_12_end): - add $32, %ecx - add %ecx, %edi - add %edi, %edx - lea 12(%edi, %eax), %eax - POP (%edi) - BRANCH_TO_JMPTBL_ENTRY_TAIL(L(table_48bytes_fwd), %ecx, 4) - - cfi_restore_state - cfi_remember_state - ALIGN (4) -L(shl_13): - BRANCH_TO_JMPTBL_ENTRY_VALUE(L(table_48bytes_fwd)) - sub $13, %eax - movaps (%eax), %xmm1 - xor %edi, %edi - sub $32, %ecx - movdqu %xmm0, (%esi) - POP (%esi) -L(shl_13_loop): - - movdqa 16(%eax, %edi), %xmm2 - sub $32, %ecx - movdqa 32(%eax, %edi), %xmm3 - movdqa %xmm3, %xmm4 - palignr $13, %xmm2, %xmm3 - palignr $13, %xmm1, %xmm2 - lea 32(%edi), %edi - movdqa %xmm2, -32(%edx, %edi) - movdqa %xmm3, -16(%edx, %edi) - - jb L(shl_13_end) - - movdqa 16(%eax, %edi), %xmm2 - sub $32, %ecx - movdqa 32(%eax, %edi), %xmm3 - movdqa %xmm3, %xmm1 - palignr $13, %xmm2, %xmm3 - palignr $13, %xmm4, %xmm2 - lea 32(%edi), %edi - movdqa %xmm2, -32(%edx, %edi) - movdqa %xmm3, -16(%edx, %edi) - - jae L(shl_13_loop) - -L(shl_13_end): - add $32, %ecx - add %ecx, %edi - add %edi, %edx - lea 13(%edi, %eax), %eax - POP (%edi) - BRANCH_TO_JMPTBL_ENTRY_TAIL(L(table_48bytes_fwd), %ecx, 4) - - cfi_restore_state - cfi_remember_state - ALIGN (4) -L(shl_14): - BRANCH_TO_JMPTBL_ENTRY_VALUE(L(table_48bytes_fwd)) - sub $14, %eax - movaps (%eax), %xmm1 - xor %edi, %edi - sub $32, %ecx - movdqu %xmm0, (%esi) - POP (%esi) -L(shl_14_loop): - - movdqa 16(%eax, %edi), %xmm2 - sub $32, %ecx - movdqa 32(%eax, %edi), %xmm3 - movdqa %xmm3, %xmm4 - palignr $14, %xmm2, %xmm3 - palignr $14, %xmm1, %xmm2 - lea 32(%edi), %edi - movdqa %xmm2, -32(%edx, %edi) - movdqa %xmm3, -16(%edx, %edi) - - jb L(shl_14_end) - - movdqa 16(%eax, %edi), %xmm2 - sub $32, %ecx - movdqa 32(%eax, %edi), %xmm3 - movdqa %xmm3, %xmm1 - palignr $14, %xmm2, %xmm3 - palignr $14, %xmm4, %xmm2 - lea 32(%edi), %edi - movdqa %xmm2, -32(%edx, %edi) - movdqa %xmm3, -16(%edx, %edi) - - jae L(shl_14_loop) - -L(shl_14_end): - add $32, %ecx - add %ecx, %edi - add %edi, %edx - lea 14(%edi, %eax), %eax - POP (%edi) - BRANCH_TO_JMPTBL_ENTRY_TAIL(L(table_48bytes_fwd), %ecx, 4) - - cfi_restore_state - cfi_remember_state - ALIGN (4) -L(shl_15): - BRANCH_TO_JMPTBL_ENTRY_VALUE(L(table_48bytes_fwd)) - sub $15, %eax - movaps (%eax), %xmm1 - xor %edi, %edi - sub $32, %ecx - movdqu %xmm0, (%esi) - POP (%esi) -L(shl_15_loop): - - movdqa 16(%eax, %edi), %xmm2 - sub $32, %ecx - movdqa 32(%eax, %edi), %xmm3 - movdqa %xmm3, %xmm4 - palignr $15, %xmm2, %xmm3 - palignr $15, %xmm1, %xmm2 - lea 32(%edi), %edi - movdqa %xmm2, -32(%edx, %edi) - movdqa %xmm3, -16(%edx, %edi) - - jb L(shl_15_end) - - movdqa 16(%eax, %edi), %xmm2 - sub $32, %ecx - movdqa 32(%eax, %edi), %xmm3 - movdqa %xmm3, %xmm1 - palignr $15, %xmm2, %xmm3 - palignr $15, %xmm4, %xmm2 - lea 32(%edi), %edi - movdqa %xmm2, -32(%edx, %edi) - movdqa %xmm3, -16(%edx, %edi) - - jae L(shl_15_loop) - -L(shl_15_end): - add $32, %ecx - add %ecx, %edi - add %edi, %edx - lea 15(%edi, %eax), %eax - POP (%edi) - BRANCH_TO_JMPTBL_ENTRY_TAIL(L(table_48bytes_fwd), %ecx, 4) - - - ALIGN (4) -L(fwd_write_44bytes): - movl -44(%eax), %ecx - movl %ecx, -44(%edx) -L(fwd_write_40bytes): - movl -40(%eax), %ecx - movl %ecx, -40(%edx) -L(fwd_write_36bytes): - movl -36(%eax), %ecx - movl %ecx, -36(%edx) -L(fwd_write_32bytes): - movl -32(%eax), %ecx - movl %ecx, -32(%edx) -L(fwd_write_28bytes): - movl -28(%eax), %ecx - movl %ecx, -28(%edx) -L(fwd_write_24bytes): - movl -24(%eax), %ecx - movl %ecx, -24(%edx) -L(fwd_write_20bytes): - movl -20(%eax), %ecx - movl %ecx, -20(%edx) -L(fwd_write_16bytes): - movl -16(%eax), %ecx - movl %ecx, -16(%edx) -L(fwd_write_12bytes): - movl -12(%eax), %ecx - movl %ecx, -12(%edx) -L(fwd_write_8bytes): - movl -8(%eax), %ecx - movl %ecx, -8(%edx) -L(fwd_write_4bytes): - movl -4(%eax), %ecx - movl %ecx, -4(%edx) -L(fwd_write_0bytes): -#ifndef USE_AS_BCOPY -# ifdef USE_AS_MEMPCPY - movl %edx, %eax -# else - movl DEST(%esp), %eax -# endif -#endif - RETURN - - ALIGN (4) -L(fwd_write_5bytes): - movl -5(%eax), %ecx - movl -4(%eax), %eax - movl %ecx, -5(%edx) - movl %eax, -4(%edx) -#ifndef USE_AS_BCOPY -# ifdef USE_AS_MEMPCPY - movl %edx, %eax -# else - movl DEST(%esp), %eax -# endif -#endif - RETURN - - ALIGN (4) -L(fwd_write_45bytes): - movl -45(%eax), %ecx - movl %ecx, -45(%edx) -L(fwd_write_41bytes): - movl -41(%eax), %ecx - movl %ecx, -41(%edx) -L(fwd_write_37bytes): - movl -37(%eax), %ecx - movl %ecx, -37(%edx) -L(fwd_write_33bytes): - movl -33(%eax), %ecx - movl %ecx, -33(%edx) -L(fwd_write_29bytes): - movl -29(%eax), %ecx - movl %ecx, -29(%edx) -L(fwd_write_25bytes): - movl -25(%eax), %ecx - movl %ecx, -25(%edx) -L(fwd_write_21bytes): - movl -21(%eax), %ecx - movl %ecx, -21(%edx) -L(fwd_write_17bytes): - movl -17(%eax), %ecx - movl %ecx, -17(%edx) -L(fwd_write_13bytes): - movl -13(%eax), %ecx - movl %ecx, -13(%edx) -L(fwd_write_9bytes): - movl -9(%eax), %ecx - movl %ecx, -9(%edx) - movl -5(%eax), %ecx - movl %ecx, -5(%edx) -L(fwd_write_1bytes): - movzbl -1(%eax), %ecx - movb %cl, -1(%edx) -#ifndef USE_AS_BCOPY -# ifdef USE_AS_MEMPCPY - movl %edx, %eax -# else - movl DEST(%esp), %eax -# endif -#endif - RETURN - - ALIGN (4) -L(fwd_write_46bytes): - movl -46(%eax), %ecx - movl %ecx, -46(%edx) -L(fwd_write_42bytes): - movl -42(%eax), %ecx - movl %ecx, -42(%edx) -L(fwd_write_38bytes): - movl -38(%eax), %ecx - movl %ecx, -38(%edx) -L(fwd_write_34bytes): - movl -34(%eax), %ecx - movl %ecx, -34(%edx) -L(fwd_write_30bytes): - movl -30(%eax), %ecx - movl %ecx, -30(%edx) -L(fwd_write_26bytes): - movl -26(%eax), %ecx - movl %ecx, -26(%edx) -L(fwd_write_22bytes): - movl -22(%eax), %ecx - movl %ecx, -22(%edx) -L(fwd_write_18bytes): - movl -18(%eax), %ecx - movl %ecx, -18(%edx) -L(fwd_write_14bytes): - movl -14(%eax), %ecx - movl %ecx, -14(%edx) -L(fwd_write_10bytes): - movl -10(%eax), %ecx - movl %ecx, -10(%edx) -L(fwd_write_6bytes): - movl -6(%eax), %ecx - movl %ecx, -6(%edx) -L(fwd_write_2bytes): - movzwl -2(%eax), %ecx - movw %cx, -2(%edx) -#ifndef USE_AS_BCOPY -# ifdef USE_AS_MEMPCPY - movl %edx, %eax -# else - movl DEST(%esp), %eax -# endif -#endif - RETURN - - ALIGN (4) -L(fwd_write_47bytes): - movl -47(%eax), %ecx - movl %ecx, -47(%edx) -L(fwd_write_43bytes): - movl -43(%eax), %ecx - movl %ecx, -43(%edx) -L(fwd_write_39bytes): - movl -39(%eax), %ecx - movl %ecx, -39(%edx) -L(fwd_write_35bytes): - movl -35(%eax), %ecx - movl %ecx, -35(%edx) -L(fwd_write_31bytes): - movl -31(%eax), %ecx - movl %ecx, -31(%edx) -L(fwd_write_27bytes): - movl -27(%eax), %ecx - movl %ecx, -27(%edx) -L(fwd_write_23bytes): - movl -23(%eax), %ecx - movl %ecx, -23(%edx) -L(fwd_write_19bytes): - movl -19(%eax), %ecx - movl %ecx, -19(%edx) -L(fwd_write_15bytes): - movl -15(%eax), %ecx - movl %ecx, -15(%edx) -L(fwd_write_11bytes): - movl -11(%eax), %ecx - movl %ecx, -11(%edx) -L(fwd_write_7bytes): - movl -7(%eax), %ecx - movl %ecx, -7(%edx) -L(fwd_write_3bytes): - movzwl -3(%eax), %ecx - movzbl -1(%eax), %eax - movw %cx, -3(%edx) - movb %al, -1(%edx) -#ifndef USE_AS_BCOPY -# ifdef USE_AS_MEMPCPY - movl %edx, %eax -# else - movl DEST(%esp), %eax -# endif -#endif - RETURN_END - - cfi_restore_state - cfi_remember_state - ALIGN (4) -L(large_page): - movdqu (%eax), %xmm1 - movdqu %xmm0, (%esi) - movntdq %xmm1, (%edx) - add $0x10, %eax - add $0x10, %edx - sub $0x10, %ecx - cmp %al, %dl - je L(copy_page_by_rep) -L(large_page_loop_init): - POP (%esi) - sub $0x80, %ecx - POP (%edi) -L(large_page_loop): - prefetchnta 0x1c0(%eax) - prefetchnta 0x280(%eax) - movdqu (%eax), %xmm0 - movdqu 0x10(%eax), %xmm1 - movdqu 0x20(%eax), %xmm2 - movdqu 0x30(%eax), %xmm3 - movdqu 0x40(%eax), %xmm4 - movdqu 0x50(%eax), %xmm5 - movdqu 0x60(%eax), %xmm6 - movdqu 0x70(%eax), %xmm7 - lea 0x80(%eax), %eax - lfence - sub $0x80, %ecx - movntdq %xmm0, (%edx) - movntdq %xmm1, 0x10(%edx) - movntdq %xmm2, 0x20(%edx) - movntdq %xmm3, 0x30(%edx) - movntdq %xmm4, 0x40(%edx) - movntdq %xmm5, 0x50(%edx) - movntdq %xmm6, 0x60(%edx) - movntdq %xmm7, 0x70(%edx) - lea 0x80(%edx), %edx - jae L(large_page_loop) - add $0x80, %ecx - cmp $0x40, %ecx - jb L(large_page_less_64bytes) - - movdqu (%eax), %xmm0 - movdqu 0x10(%eax), %xmm1 - movdqu 0x20(%eax), %xmm2 - movdqu 0x30(%eax), %xmm3 - lea 0x40(%eax), %eax - - movntdq %xmm0, (%edx) - movntdq %xmm1, 0x10(%edx) - movntdq %xmm2, 0x20(%edx) - movntdq %xmm3, 0x30(%edx) - lea 0x40(%edx), %edx - sub $0x40, %ecx -L(large_page_less_64bytes): - cmp $32, %ecx - jb L(large_page_less_32bytes) - movdqu (%eax), %xmm0 - movdqu 0x10(%eax), %xmm1 - lea 0x20(%eax), %eax - movntdq %xmm0, (%edx) - movntdq %xmm1, 0x10(%edx) - lea 0x20(%edx), %edx - sub $0x20, %ecx -L(large_page_less_32bytes): - add %ecx, %edx - add %ecx, %eax - sfence - BRANCH_TO_JMPTBL_ENTRY (L(table_48bytes_fwd), %ecx, 4) - - cfi_restore_state - cfi_remember_state - ALIGN (4) -L(copy_page_by_rep): - mov %eax, %esi - mov %edx, %edi - mov %ecx, %edx - shr $2, %ecx - and $3, %edx - rep movsl - jz L(copy_page_by_rep_exit) - cmp $2, %edx - jb L(copy_page_by_rep_left_1) - movzwl (%esi), %eax - movw %ax, (%edi) - add $2, %esi - add $2, %edi - sub $2, %edx - jz L(copy_page_by_rep_exit) -L(copy_page_by_rep_left_1): - movzbl (%esi), %eax - movb %al, (%edi) -L(copy_page_by_rep_exit): - POP (%esi) - POP (%edi) -#ifndef USE_AS_BCOPY - movl DEST(%esp), %eax -# ifdef USE_AS_MEMPCPY - movl LEN(%esp), %ecx - add %ecx, %eax -# endif -#endif - RETURN - - ALIGN (4) -L(bk_write_44bytes): - movl 40(%eax), %ecx - movl %ecx, 40(%edx) -L(bk_write_40bytes): - movl 36(%eax), %ecx - movl %ecx, 36(%edx) -L(bk_write_36bytes): - movl 32(%eax), %ecx - movl %ecx, 32(%edx) -L(bk_write_32bytes): - movl 28(%eax), %ecx - movl %ecx, 28(%edx) -L(bk_write_28bytes): - movl 24(%eax), %ecx - movl %ecx, 24(%edx) -L(bk_write_24bytes): - movl 20(%eax), %ecx - movl %ecx, 20(%edx) -L(bk_write_20bytes): - movl 16(%eax), %ecx - movl %ecx, 16(%edx) -L(bk_write_16bytes): - movl 12(%eax), %ecx - movl %ecx, 12(%edx) -L(bk_write_12bytes): - movl 8(%eax), %ecx - movl %ecx, 8(%edx) -L(bk_write_8bytes): - movl 4(%eax), %ecx - movl %ecx, 4(%edx) -L(bk_write_4bytes): - movl (%eax), %ecx - movl %ecx, (%edx) -L(bk_write_0bytes): -#ifndef USE_AS_BCOPY - movl DEST(%esp), %eax -# ifdef USE_AS_MEMPCPY - movl LEN(%esp), %ecx - add %ecx, %eax -# endif -#endif - RETURN - - ALIGN (4) -L(bk_write_45bytes): - movl 41(%eax), %ecx - movl %ecx, 41(%edx) -L(bk_write_41bytes): - movl 37(%eax), %ecx - movl %ecx, 37(%edx) -L(bk_write_37bytes): - movl 33(%eax), %ecx - movl %ecx, 33(%edx) -L(bk_write_33bytes): - movl 29(%eax), %ecx - movl %ecx, 29(%edx) -L(bk_write_29bytes): - movl 25(%eax), %ecx - movl %ecx, 25(%edx) -L(bk_write_25bytes): - movl 21(%eax), %ecx - movl %ecx, 21(%edx) -L(bk_write_21bytes): - movl 17(%eax), %ecx - movl %ecx, 17(%edx) -L(bk_write_17bytes): - movl 13(%eax), %ecx - movl %ecx, 13(%edx) -L(bk_write_13bytes): - movl 9(%eax), %ecx - movl %ecx, 9(%edx) -L(bk_write_9bytes): - movl 5(%eax), %ecx - movl %ecx, 5(%edx) -L(bk_write_5bytes): - movl 1(%eax), %ecx - movl %ecx, 1(%edx) -L(bk_write_1bytes): - movzbl (%eax), %ecx - movb %cl, (%edx) -#ifndef USE_AS_BCOPY - movl DEST(%esp), %eax -# ifdef USE_AS_MEMPCPY - movl LEN(%esp), %ecx - add %ecx, %eax -# endif -#endif - RETURN - - ALIGN (4) -L(bk_write_46bytes): - movl 42(%eax), %ecx - movl %ecx, 42(%edx) -L(bk_write_42bytes): - movl 38(%eax), %ecx - movl %ecx, 38(%edx) -L(bk_write_38bytes): - movl 34(%eax), %ecx - movl %ecx, 34(%edx) -L(bk_write_34bytes): - movl 30(%eax), %ecx - movl %ecx, 30(%edx) -L(bk_write_30bytes): - movl 26(%eax), %ecx - movl %ecx, 26(%edx) -L(bk_write_26bytes): - movl 22(%eax), %ecx - movl %ecx, 22(%edx) -L(bk_write_22bytes): - movl 18(%eax), %ecx - movl %ecx, 18(%edx) -L(bk_write_18bytes): - movl 14(%eax), %ecx - movl %ecx, 14(%edx) -L(bk_write_14bytes): - movl 10(%eax), %ecx - movl %ecx, 10(%edx) -L(bk_write_10bytes): - movl 6(%eax), %ecx - movl %ecx, 6(%edx) -L(bk_write_6bytes): - movl 2(%eax), %ecx - movl %ecx, 2(%edx) -L(bk_write_2bytes): - movzwl (%eax), %ecx - movw %cx, (%edx) -#ifndef USE_AS_BCOPY - movl DEST(%esp), %eax -# ifdef USE_AS_MEMPCPY - movl LEN(%esp), %ecx - add %ecx, %eax -# endif -#endif - RETURN - - ALIGN (4) -L(bk_write_47bytes): - movl 43(%eax), %ecx - movl %ecx, 43(%edx) -L(bk_write_43bytes): - movl 39(%eax), %ecx - movl %ecx, 39(%edx) -L(bk_write_39bytes): - movl 35(%eax), %ecx - movl %ecx, 35(%edx) -L(bk_write_35bytes): - movl 31(%eax), %ecx - movl %ecx, 31(%edx) -L(bk_write_31bytes): - movl 27(%eax), %ecx - movl %ecx, 27(%edx) -L(bk_write_27bytes): - movl 23(%eax), %ecx - movl %ecx, 23(%edx) -L(bk_write_23bytes): - movl 19(%eax), %ecx - movl %ecx, 19(%edx) -L(bk_write_19bytes): - movl 15(%eax), %ecx - movl %ecx, 15(%edx) -L(bk_write_15bytes): - movl 11(%eax), %ecx - movl %ecx, 11(%edx) -L(bk_write_11bytes): - movl 7(%eax), %ecx - movl %ecx, 7(%edx) -L(bk_write_7bytes): - movl 3(%eax), %ecx - movl %ecx, 3(%edx) -L(bk_write_3bytes): - movzwl 1(%eax), %ecx - movw %cx, 1(%edx) - movzbl (%eax), %eax - movb %al, (%edx) -#ifndef USE_AS_BCOPY - movl DEST(%esp), %eax -# ifdef USE_AS_MEMPCPY - movl LEN(%esp), %ecx - add %ecx, %eax -# endif -#endif - RETURN_END - - - .pushsection .rodata.ssse3,"a",@progbits - ALIGN (2) -L(table_48bytes_fwd): - .int JMPTBL (L(fwd_write_0bytes), L(table_48bytes_fwd)) - .int JMPTBL (L(fwd_write_1bytes), L(table_48bytes_fwd)) - .int JMPTBL (L(fwd_write_2bytes), L(table_48bytes_fwd)) - .int JMPTBL (L(fwd_write_3bytes), L(table_48bytes_fwd)) - .int JMPTBL (L(fwd_write_4bytes), L(table_48bytes_fwd)) - .int JMPTBL (L(fwd_write_5bytes), L(table_48bytes_fwd)) - .int JMPTBL (L(fwd_write_6bytes), L(table_48bytes_fwd)) - .int JMPTBL (L(fwd_write_7bytes), L(table_48bytes_fwd)) - .int JMPTBL (L(fwd_write_8bytes), L(table_48bytes_fwd)) - .int JMPTBL (L(fwd_write_9bytes), L(table_48bytes_fwd)) - .int JMPTBL (L(fwd_write_10bytes), L(table_48bytes_fwd)) - .int JMPTBL (L(fwd_write_11bytes), L(table_48bytes_fwd)) - .int JMPTBL (L(fwd_write_12bytes), L(table_48bytes_fwd)) - .int JMPTBL (L(fwd_write_13bytes), L(table_48bytes_fwd)) - .int JMPTBL (L(fwd_write_14bytes), L(table_48bytes_fwd)) - .int JMPTBL (L(fwd_write_15bytes), L(table_48bytes_fwd)) - .int JMPTBL (L(fwd_write_16bytes), L(table_48bytes_fwd)) - .int JMPTBL (L(fwd_write_17bytes), L(table_48bytes_fwd)) - .int JMPTBL (L(fwd_write_18bytes), L(table_48bytes_fwd)) - .int JMPTBL (L(fwd_write_19bytes), L(table_48bytes_fwd)) - .int JMPTBL (L(fwd_write_20bytes), L(table_48bytes_fwd)) - .int JMPTBL (L(fwd_write_21bytes), L(table_48bytes_fwd)) - .int JMPTBL (L(fwd_write_22bytes), L(table_48bytes_fwd)) - .int JMPTBL (L(fwd_write_23bytes), L(table_48bytes_fwd)) - .int JMPTBL (L(fwd_write_24bytes), L(table_48bytes_fwd)) - .int JMPTBL (L(fwd_write_25bytes), L(table_48bytes_fwd)) - .int JMPTBL (L(fwd_write_26bytes), L(table_48bytes_fwd)) - .int JMPTBL (L(fwd_write_27bytes), L(table_48bytes_fwd)) - .int JMPTBL (L(fwd_write_28bytes), L(table_48bytes_fwd)) - .int JMPTBL (L(fwd_write_29bytes), L(table_48bytes_fwd)) - .int JMPTBL (L(fwd_write_30bytes), L(table_48bytes_fwd)) - .int JMPTBL (L(fwd_write_31bytes), L(table_48bytes_fwd)) - .int JMPTBL (L(fwd_write_32bytes), L(table_48bytes_fwd)) - .int JMPTBL (L(fwd_write_33bytes), L(table_48bytes_fwd)) - .int JMPTBL (L(fwd_write_34bytes), L(table_48bytes_fwd)) - .int JMPTBL (L(fwd_write_35bytes), L(table_48bytes_fwd)) - .int JMPTBL (L(fwd_write_36bytes), L(table_48bytes_fwd)) - .int JMPTBL (L(fwd_write_37bytes), L(table_48bytes_fwd)) - .int JMPTBL (L(fwd_write_38bytes), L(table_48bytes_fwd)) - .int JMPTBL (L(fwd_write_39bytes), L(table_48bytes_fwd)) - .int JMPTBL (L(fwd_write_40bytes), L(table_48bytes_fwd)) - .int JMPTBL (L(fwd_write_41bytes), L(table_48bytes_fwd)) - .int JMPTBL (L(fwd_write_42bytes), L(table_48bytes_fwd)) - .int JMPTBL (L(fwd_write_43bytes), L(table_48bytes_fwd)) - .int JMPTBL (L(fwd_write_44bytes), L(table_48bytes_fwd)) - .int JMPTBL (L(fwd_write_45bytes), L(table_48bytes_fwd)) - .int JMPTBL (L(fwd_write_46bytes), L(table_48bytes_fwd)) - .int JMPTBL (L(fwd_write_47bytes), L(table_48bytes_fwd)) - - ALIGN (2) -L(shl_table): - .int JMPTBL (L(shl_0), L(shl_table)) - .int JMPTBL (L(shl_1), L(shl_table)) - .int JMPTBL (L(shl_2), L(shl_table)) - .int JMPTBL (L(shl_3), L(shl_table)) - .int JMPTBL (L(shl_4), L(shl_table)) - .int JMPTBL (L(shl_5), L(shl_table)) - .int JMPTBL (L(shl_6), L(shl_table)) - .int JMPTBL (L(shl_7), L(shl_table)) - .int JMPTBL (L(shl_8), L(shl_table)) - .int JMPTBL (L(shl_9), L(shl_table)) - .int JMPTBL (L(shl_10), L(shl_table)) - .int JMPTBL (L(shl_11), L(shl_table)) - .int JMPTBL (L(shl_12), L(shl_table)) - .int JMPTBL (L(shl_13), L(shl_table)) - .int JMPTBL (L(shl_14), L(shl_table)) - .int JMPTBL (L(shl_15), L(shl_table)) - - ALIGN (2) -L(table_48_bytes_bwd): - .int JMPTBL (L(bk_write_0bytes), L(table_48_bytes_bwd)) - .int JMPTBL (L(bk_write_1bytes), L(table_48_bytes_bwd)) - .int JMPTBL (L(bk_write_2bytes), L(table_48_bytes_bwd)) - .int JMPTBL (L(bk_write_3bytes), L(table_48_bytes_bwd)) - .int JMPTBL (L(bk_write_4bytes), L(table_48_bytes_bwd)) - .int JMPTBL (L(bk_write_5bytes), L(table_48_bytes_bwd)) - .int JMPTBL (L(bk_write_6bytes), L(table_48_bytes_bwd)) - .int JMPTBL (L(bk_write_7bytes), L(table_48_bytes_bwd)) - .int JMPTBL (L(bk_write_8bytes), L(table_48_bytes_bwd)) - .int JMPTBL (L(bk_write_9bytes), L(table_48_bytes_bwd)) - .int JMPTBL (L(bk_write_10bytes), L(table_48_bytes_bwd)) - .int JMPTBL (L(bk_write_11bytes), L(table_48_bytes_bwd)) - .int JMPTBL (L(bk_write_12bytes), L(table_48_bytes_bwd)) - .int JMPTBL (L(bk_write_13bytes), L(table_48_bytes_bwd)) - .int JMPTBL (L(bk_write_14bytes), L(table_48_bytes_bwd)) - .int JMPTBL (L(bk_write_15bytes), L(table_48_bytes_bwd)) - .int JMPTBL (L(bk_write_16bytes), L(table_48_bytes_bwd)) - .int JMPTBL (L(bk_write_17bytes), L(table_48_bytes_bwd)) - .int JMPTBL (L(bk_write_18bytes), L(table_48_bytes_bwd)) - .int JMPTBL (L(bk_write_19bytes), L(table_48_bytes_bwd)) - .int JMPTBL (L(bk_write_20bytes), L(table_48_bytes_bwd)) - .int JMPTBL (L(bk_write_21bytes), L(table_48_bytes_bwd)) - .int JMPTBL (L(bk_write_22bytes), L(table_48_bytes_bwd)) - .int JMPTBL (L(bk_write_23bytes), L(table_48_bytes_bwd)) - .int JMPTBL (L(bk_write_24bytes), L(table_48_bytes_bwd)) - .int JMPTBL (L(bk_write_25bytes), L(table_48_bytes_bwd)) - .int JMPTBL (L(bk_write_26bytes), L(table_48_bytes_bwd)) - .int JMPTBL (L(bk_write_27bytes), L(table_48_bytes_bwd)) - .int JMPTBL (L(bk_write_28bytes), L(table_48_bytes_bwd)) - .int JMPTBL (L(bk_write_29bytes), L(table_48_bytes_bwd)) - .int JMPTBL (L(bk_write_30bytes), L(table_48_bytes_bwd)) - .int JMPTBL (L(bk_write_31bytes), L(table_48_bytes_bwd)) - .int JMPTBL (L(bk_write_32bytes), L(table_48_bytes_bwd)) - .int JMPTBL (L(bk_write_33bytes), L(table_48_bytes_bwd)) - .int JMPTBL (L(bk_write_34bytes), L(table_48_bytes_bwd)) - .int JMPTBL (L(bk_write_35bytes), L(table_48_bytes_bwd)) - .int JMPTBL (L(bk_write_36bytes), L(table_48_bytes_bwd)) - .int JMPTBL (L(bk_write_37bytes), L(table_48_bytes_bwd)) - .int JMPTBL (L(bk_write_38bytes), L(table_48_bytes_bwd)) - .int JMPTBL (L(bk_write_39bytes), L(table_48_bytes_bwd)) - .int JMPTBL (L(bk_write_40bytes), L(table_48_bytes_bwd)) - .int JMPTBL (L(bk_write_41bytes), L(table_48_bytes_bwd)) - .int JMPTBL (L(bk_write_42bytes), L(table_48_bytes_bwd)) - .int JMPTBL (L(bk_write_43bytes), L(table_48_bytes_bwd)) - .int JMPTBL (L(bk_write_44bytes), L(table_48_bytes_bwd)) - .int JMPTBL (L(bk_write_45bytes), L(table_48_bytes_bwd)) - .int JMPTBL (L(bk_write_46bytes), L(table_48_bytes_bwd)) - .int JMPTBL (L(bk_write_47bytes), L(table_48_bytes_bwd)) - - .popsection - -#ifdef USE_AS_MEMMOVE - ALIGN (4) -L(copy_backward): - PUSH (%esi) - movl %eax, %esi - add %ecx, %edx - add %ecx, %esi - testl $0x3, %edx - jnz L(bk_align) - -L(bk_aligned_4): - cmp $64, %ecx - jae L(bk_write_more64bytes) - -L(bk_write_64bytesless): - cmp $32, %ecx - jb L(bk_write_less32bytes) - -L(bk_write_more32bytes): - /* Copy 32 bytes at a time. */ - sub $32, %ecx - movl -4(%esi), %eax - movl %eax, -4(%edx) - movl -8(%esi), %eax - movl %eax, -8(%edx) - movl -12(%esi), %eax - movl %eax, -12(%edx) - movl -16(%esi), %eax - movl %eax, -16(%edx) - movl -20(%esi), %eax - movl %eax, -20(%edx) - movl -24(%esi), %eax - movl %eax, -24(%edx) - movl -28(%esi), %eax - movl %eax, -28(%edx) - movl -32(%esi), %eax - movl %eax, -32(%edx) - sub $32, %edx - sub $32, %esi - -L(bk_write_less32bytes): - movl %esi, %eax - sub %ecx, %edx - sub %ecx, %eax - POP (%esi) -L(bk_write_less48bytes): - BRANCH_TO_JMPTBL_ENTRY (L(table_48_bytes_bwd), %ecx, 4) - - CFI_PUSH (%esi) - ALIGN (4) -L(bk_align): - cmp $8, %ecx - jbe L(bk_write_less32bytes) - testl $1, %edx - /* We get here only if (EDX & 3 ) != 0 so if (EDX & 1) ==0, - then (EDX & 2) must be != 0. */ - jz L(bk_got2) - sub $1, %esi - sub $1, %ecx - sub $1, %edx - movzbl (%esi), %eax - movb %al, (%edx) - - testl $2, %edx - jz L(bk_aligned_4) - -L(bk_got2): - sub $2, %esi - sub $2, %ecx - sub $2, %edx - movzwl (%esi), %eax - movw %ax, (%edx) - jmp L(bk_aligned_4) - - ALIGN (4) -L(bk_write_more64bytes): - /* Check alignment of last byte. */ - testl $15, %edx - jz L(bk_ssse3_cpy_pre) - -/* EDX is aligned 4 bytes, but not 16 bytes. */ -L(bk_ssse3_align): - sub $4, %esi - sub $4, %ecx - sub $4, %edx - movl (%esi), %eax - movl %eax, (%edx) - - testl $15, %edx - jz L(bk_ssse3_cpy_pre) - - sub $4, %esi - sub $4, %ecx - sub $4, %edx - movl (%esi), %eax - movl %eax, (%edx) - - testl $15, %edx - jz L(bk_ssse3_cpy_pre) - - sub $4, %esi - sub $4, %ecx - sub $4, %edx - movl (%esi), %eax - movl %eax, (%edx) - -L(bk_ssse3_cpy_pre): - cmp $64, %ecx - jb L(bk_write_more32bytes) - -L(bk_ssse3_cpy): - sub $64, %esi - sub $64, %ecx - sub $64, %edx - movdqu 0x30(%esi), %xmm3 - movdqa %xmm3, 0x30(%edx) - movdqu 0x20(%esi), %xmm2 - movdqa %xmm2, 0x20(%edx) - movdqu 0x10(%esi), %xmm1 - movdqa %xmm1, 0x10(%edx) - movdqu (%esi), %xmm0 - movdqa %xmm0, (%edx) - cmp $64, %ecx - jae L(bk_ssse3_cpy) - jmp L(bk_write_64bytesless) - -#endif - -END (MEMCPY) - -#endif diff --git a/sysdeps/i386/i686/multiarch/memcpy-ssse3.S b/sysdeps/i386/i686/multiarch/memcpy-ssse3.S deleted file mode 100644 index 53e8a6ca1d..0000000000 --- a/sysdeps/i386/i686/multiarch/memcpy-ssse3.S +++ /dev/null @@ -1,3162 +0,0 @@ -/* memcpy with SSSE3 - Copyright (C) 2010-2017 Free Software Foundation, Inc. - Contributed by Intel Corporation. - This file is part of the GNU C Library. - - The GNU C Library is free software; you can redistribute it and/or - modify it under the terms of the GNU Lesser General Public - License as published by the Free Software Foundation; either - version 2.1 of the License, or (at your option) any later version. - - The GNU C Library is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - Lesser General Public License for more details. - - You should have received a copy of the GNU Lesser General Public - License along with the GNU C Library; if not, see - <http://www.gnu.org/licenses/>. */ - -#if IS_IN (libc) \ - && (defined SHARED \ - || defined USE_AS_MEMMOVE \ - || !defined USE_MULTIARCH) - -# include <sysdep.h> -# include "asm-syntax.h" - -# ifndef MEMCPY -# define MEMCPY __memcpy_ssse3 -# define MEMCPY_CHK __memcpy_chk_ssse3 -# endif - -# ifdef USE_AS_BCOPY -# define SRC PARMS -# define DEST SRC+4 -# define LEN DEST+4 -# else -# define DEST PARMS -# define SRC DEST+4 -# define LEN SRC+4 -# endif - -# define CFI_PUSH(REG) \ - cfi_adjust_cfa_offset (4); \ - cfi_rel_offset (REG, 0) - -# define CFI_POP(REG) \ - cfi_adjust_cfa_offset (-4); \ - cfi_restore (REG) - -# define PUSH(REG) pushl REG; CFI_PUSH (REG) -# define POP(REG) popl REG; CFI_POP (REG) - -# ifdef SHARED -# define PARMS 8 /* Preserve EBX. */ -# define ENTRANCE PUSH (%ebx); -# define RETURN_END POP (%ebx); ret -# define RETURN RETURN_END; CFI_PUSH (%ebx) -# define JMPTBL(I, B) I - B - -/* Load an entry in a jump table into EBX and branch to it. TABLE is a - jump table with relative offsets. INDEX is a register contains the - index into the jump table. SCALE is the scale of INDEX. */ - -# define BRANCH_TO_JMPTBL_ENTRY(TABLE, INDEX, SCALE) \ - /* We first load PC into EBX. */ \ - SETUP_PIC_REG(bx); \ - /* Get the address of the jump table. */ \ - addl $(TABLE - .), %ebx; \ - /* Get the entry and convert the relative offset to the \ - absolute address. */ \ - addl (%ebx, INDEX, SCALE), %ebx; \ - /* We loaded the jump table. Go. */ \ - jmp *%ebx -# else - -# define PARMS 4 -# define ENTRANCE -# define RETURN_END ret -# define RETURN RETURN_END -# define JMPTBL(I, B) I - -/* Branch to an entry in a jump table. TABLE is a jump table with - absolute offsets. INDEX is a register contains the index into the - jump table. SCALE is the scale of INDEX. */ - -# define BRANCH_TO_JMPTBL_ENTRY(TABLE, INDEX, SCALE) \ - jmp *TABLE(, INDEX, SCALE) -# endif - - .section .text.ssse3,"ax",@progbits -# if !defined USE_AS_BCOPY -ENTRY (MEMCPY_CHK) - movl 12(%esp), %eax - cmpl %eax, 16(%esp) - jb HIDDEN_JUMPTARGET (__chk_fail) -END (MEMCPY_CHK) -# endif -ENTRY (MEMCPY) - ENTRANCE - movl LEN(%esp), %ecx - movl SRC(%esp), %eax - movl DEST(%esp), %edx - -# ifdef USE_AS_MEMMOVE - cmp %eax, %edx - jb L(copy_forward) - je L(fwd_write_0bytes) - cmp $32, %ecx - jae L(memmove_bwd) - jmp L(bk_write_less32bytes_2) - - .p2align 4 -L(memmove_bwd): - add %ecx, %eax - cmp %eax, %edx - movl SRC(%esp), %eax - jb L(copy_backward) - -L(copy_forward): -# endif - cmp $48, %ecx - jae L(48bytesormore) - -L(fwd_write_less32bytes): -# ifndef USE_AS_MEMMOVE - cmp %dl, %al - jb L(bk_write) -# endif - add %ecx, %edx - add %ecx, %eax - BRANCH_TO_JMPTBL_ENTRY (L(table_48bytes_fwd), %ecx, 4) -# ifndef USE_AS_MEMMOVE - .p2align 4 -L(bk_write): - BRANCH_TO_JMPTBL_ENTRY (L(table_48_bytes_bwd), %ecx, 4) -# endif - - .p2align 4 -L(48bytesormore): -# ifndef USE_AS_MEMMOVE - movlpd (%eax), %xmm0 - movlpd 8(%eax), %xmm1 - movlpd %xmm0, (%edx) - movlpd %xmm1, 8(%edx) -# else - movdqu (%eax), %xmm0 -# endif - PUSH (%edi) - movl %edx, %edi - and $-16, %edx - add $16, %edx - sub %edx, %edi - add %edi, %ecx - sub %edi, %eax - -# ifdef SHARED_CACHE_SIZE_HALF - cmp $SHARED_CACHE_SIZE_HALF, %ecx -# else -# ifdef SHARED - SETUP_PIC_REG(bx) - add $_GLOBAL_OFFSET_TABLE_, %ebx - cmp __x86_shared_cache_size_half@GOTOFF(%ebx), %ecx -# else - cmp __x86_shared_cache_size_half, %ecx -# endif -# endif - - mov %eax, %edi - jae L(large_page) - and $0xf, %edi - jz L(shl_0) - BRANCH_TO_JMPTBL_ENTRY (L(shl_table), %edi, 4) - - .p2align 4 -L(shl_0): -# ifdef USE_AS_MEMMOVE - movl DEST+4(%esp), %edi - movdqu %xmm0, (%edi) -# endif - xor %edi, %edi - cmp $127, %ecx - ja L(shl_0_gobble) - lea -32(%ecx), %ecx - - .p2align 4 -L(shl_0_loop): - movdqa (%eax, %edi), %xmm0 - movdqa 16(%eax, %edi), %xmm1 - sub $32, %ecx - movdqa %xmm0, (%edx, %edi) - movdqa %xmm1, 16(%edx, %edi) - lea 32(%edi), %edi - jb L(shl_0_end) - - movdqa (%eax, %edi), %xmm0 - movdqa 16(%eax, %edi), %xmm1 - sub $32, %ecx - movdqa %xmm0, (%edx, %edi) - movdqa %xmm1, 16(%edx, %edi) - lea 32(%edi), %edi - jb L(shl_0_end) - - movdqa (%eax, %edi), %xmm0 - movdqa 16(%eax, %edi), %xmm1 - sub $32, %ecx - movdqa %xmm0, (%edx, %edi) - movdqa %xmm1, 16(%edx, %edi) - lea 32(%edi), %edi - jb L(shl_0_end) - - movdqa (%eax, %edi), %xmm0 - movdqa 16(%eax, %edi), %xmm1 - sub $32, %ecx - movdqa %xmm0, (%edx, %edi) - movdqa %xmm1, 16(%edx, %edi) - lea 32(%edi), %edi - -L(shl_0_end): - lea 32(%ecx), %ecx - add %ecx, %edi - add %edi, %edx - add %edi, %eax - POP (%edi) - BRANCH_TO_JMPTBL_ENTRY (L(table_48bytes_fwd_align), %ecx, 4) - - CFI_PUSH (%edi) - - .p2align 4 -L(shl_0_gobble): -# ifdef DATA_CACHE_SIZE_HALF - cmp $DATA_CACHE_SIZE_HALF, %ecx -# else -# ifdef SHARED - SETUP_PIC_REG(bx) - add $_GLOBAL_OFFSET_TABLE_, %ebx - cmp __x86_data_cache_size_half@GOTOFF(%ebx), %ecx -# else - cmp __x86_data_cache_size_half, %ecx -# endif -# endif - POP (%edi) - lea -128(%ecx), %ecx - jae L(shl_0_gobble_mem_loop) - - .p2align 4 -L(shl_0_gobble_cache_loop): - movdqa (%eax), %xmm0 - movdqa 0x10(%eax), %xmm1 - movdqa 0x20(%eax), %xmm2 - movdqa 0x30(%eax), %xmm3 - movdqa 0x40(%eax), %xmm4 - movdqa 0x50(%eax), %xmm5 - movdqa 0x60(%eax), %xmm6 - movdqa 0x70(%eax), %xmm7 - lea 0x80(%eax), %eax - sub $128, %ecx - movdqa %xmm0, (%edx) - movdqa %xmm1, 0x10(%edx) - movdqa %xmm2, 0x20(%edx) - movdqa %xmm3, 0x30(%edx) - movdqa %xmm4, 0x40(%edx) - movdqa %xmm5, 0x50(%edx) - movdqa %xmm6, 0x60(%edx) - movdqa %xmm7, 0x70(%edx) - lea 0x80(%edx), %edx - - jae L(shl_0_gobble_cache_loop) - cmp $-0x40, %ecx - lea 0x80(%ecx), %ecx - jl L(shl_0_cache_less_64bytes) - - movdqa (%eax), %xmm0 - sub $0x40, %ecx - movdqa 0x10(%eax), %xmm1 - movdqa %xmm0, (%edx) - movdqa %xmm1, 0x10(%edx) - movdqa 0x20(%eax), %xmm0 - movdqa 0x30(%eax), %xmm1 - add $0x40, %eax - movdqa %xmm0, 0x20(%edx) - movdqa %xmm1, 0x30(%edx) - add $0x40, %edx - -L(shl_0_cache_less_64bytes): - cmp $0x20, %ecx - jb L(shl_0_cache_less_32bytes) - movdqa (%eax), %xmm0 - sub $0x20, %ecx - movdqa 0x10(%eax), %xmm1 - add $0x20, %eax - movdqa %xmm0, (%edx) - movdqa %xmm1, 0x10(%edx) - add $0x20, %edx - -L(shl_0_cache_less_32bytes): - cmp $0x10, %ecx - jb L(shl_0_cache_less_16bytes) - sub $0x10, %ecx - movdqa (%eax), %xmm0 - add $0x10, %eax - movdqa %xmm0, (%edx) - add $0x10, %edx - -L(shl_0_cache_less_16bytes): - add %ecx, %edx - add %ecx, %eax - BRANCH_TO_JMPTBL_ENTRY (L(table_48bytes_fwd), %ecx, 4) - - .p2align 4 -L(shl_0_gobble_mem_loop): - prefetcht0 0x1c0(%eax) - prefetcht0 0x280(%eax) - prefetcht0 0x1c0(%edx) - - movdqa (%eax), %xmm0 - movdqa 0x10(%eax), %xmm1 - movdqa 0x20(%eax), %xmm2 - movdqa 0x30(%eax), %xmm3 - movdqa 0x40(%eax), %xmm4 - movdqa 0x50(%eax), %xmm5 - movdqa 0x60(%eax), %xmm6 - movdqa 0x70(%eax), %xmm7 - lea 0x80(%eax), %eax - sub $0x80, %ecx - movdqa %xmm0, (%edx) - movdqa %xmm1, 0x10(%edx) - movdqa %xmm2, 0x20(%edx) - movdqa %xmm3, 0x30(%edx) - movdqa %xmm4, 0x40(%edx) - movdqa %xmm5, 0x50(%edx) - movdqa %xmm6, 0x60(%edx) - movdqa %xmm7, 0x70(%edx) - lea 0x80(%edx), %edx - - jae L(shl_0_gobble_mem_loop) - cmp $-0x40, %ecx - lea 0x80(%ecx), %ecx - jl L(shl_0_mem_less_64bytes) - - movdqa (%eax), %xmm0 - sub $0x40, %ecx - movdqa 0x10(%eax), %xmm1 - - movdqa %xmm0, (%edx) - movdqa %xmm1, 0x10(%edx) - - movdqa 0x20(%eax), %xmm0 - movdqa 0x30(%eax), %xmm1 - add $0x40, %eax - - movdqa %xmm0, 0x20(%edx) - movdqa %xmm1, 0x30(%edx) - add $0x40, %edx - -L(shl_0_mem_less_64bytes): - cmp $0x20, %ecx - jb L(shl_0_mem_less_32bytes) - movdqa (%eax), %xmm0 - sub $0x20, %ecx - movdqa 0x10(%eax), %xmm1 - add $0x20, %eax - movdqa %xmm0, (%edx) - movdqa %xmm1, 0x10(%edx) - add $0x20, %edx - -L(shl_0_mem_less_32bytes): - cmp $0x10, %ecx - jb L(shl_0_mem_less_16bytes) - sub $0x10, %ecx - movdqa (%eax), %xmm0 - add $0x10, %eax - movdqa %xmm0, (%edx) - add $0x10, %edx - -L(shl_0_mem_less_16bytes): - add %ecx, %edx - add %ecx, %eax - BRANCH_TO_JMPTBL_ENTRY (L(table_48bytes_fwd_align), %ecx, 4) - - .p2align 4 -L(shl_1): -# ifndef USE_AS_MEMMOVE - movaps -1(%eax), %xmm1 -# else - movl DEST+4(%esp), %edi - movaps -1(%eax), %xmm1 - movdqu %xmm0, (%edi) -# endif -# ifdef DATA_CACHE_SIZE_HALF - cmp $DATA_CACHE_SIZE_HALF, %ecx -# else -# ifdef SHARED - SETUP_PIC_REG(bx) - add $_GLOBAL_OFFSET_TABLE_, %ebx - cmp __x86_data_cache_size_half@GOTOFF(%ebx), %ecx -# else - cmp __x86_data_cache_size_half, %ecx -# endif -# endif - jb L(sh_1_no_prefetch) - - lea -64(%ecx), %ecx - - .p2align 4 -L(Shl1LoopStart): - prefetcht0 0x1c0(%eax) - prefetcht0 0x1c0(%edx) - movaps 15(%eax), %xmm2 - movaps 31(%eax), %xmm3 - movaps 47(%eax), %xmm4 - movaps 63(%eax), %xmm5 - movaps %xmm5, %xmm7 - palignr $1, %xmm4, %xmm5 - palignr $1, %xmm3, %xmm4 - movaps %xmm5, 48(%edx) - palignr $1, %xmm2, %xmm3 - lea 64(%eax), %eax - palignr $1, %xmm1, %xmm2 - movaps %xmm4, 32(%edx) - movaps %xmm3, 16(%edx) - movaps %xmm7, %xmm1 - movaps %xmm2, (%edx) - lea 64(%edx), %edx - sub $64, %ecx - ja L(Shl1LoopStart) - -L(Shl1LoopLeave): - add $32, %ecx - jle L(shl_end_0) - - movaps 15(%eax), %xmm2 - movaps 31(%eax), %xmm3 - palignr $1, %xmm2, %xmm3 - palignr $1, %xmm1, %xmm2 - movaps %xmm2, (%edx) - movaps %xmm3, 16(%edx) - lea 32(%edx, %ecx), %edx - lea 32(%eax, %ecx), %eax - POP (%edi) - BRANCH_TO_JMPTBL_ENTRY(L(table_48bytes_fwd), %ecx, 4) - - CFI_PUSH (%edi) - - .p2align 4 -L(sh_1_no_prefetch): - lea -32(%ecx), %ecx - lea -1(%eax), %eax - xor %edi, %edi - - .p2align 4 -L(sh_1_no_prefetch_loop): - movdqa 16(%eax, %edi), %xmm2 - sub $32, %ecx - movdqa 32(%eax, %edi), %xmm3 - movdqa %xmm3, %xmm4 - palignr $1, %xmm2, %xmm3 - palignr $1, %xmm1, %xmm2 - lea 32(%edi), %edi - movdqa %xmm2, -32(%edx, %edi) - movdqa %xmm3, -16(%edx, %edi) - jb L(sh_1_end_no_prefetch_loop) - - movdqa 16(%eax, %edi), %xmm2 - sub $32, %ecx - movdqa 32(%eax, %edi), %xmm3 - movdqa %xmm3, %xmm1 - palignr $1, %xmm2, %xmm3 - palignr $1, %xmm4, %xmm2 - lea 32(%edi), %edi - movdqa %xmm2, -32(%edx, %edi) - movdqa %xmm3, -16(%edx, %edi) - jae L(sh_1_no_prefetch_loop) - -L(sh_1_end_no_prefetch_loop): - lea 32(%ecx), %ecx - add %ecx, %edi - add %edi, %edx - lea 1(%edi, %eax), %eax - POP (%edi) - BRANCH_TO_JMPTBL_ENTRY(L(table_48bytes_fwd), %ecx, 4) - - CFI_PUSH (%edi) - - .p2align 4 -L(shl_2): -# ifndef USE_AS_MEMMOVE - movaps -2(%eax), %xmm1 -# else - movl DEST+4(%esp), %edi - movaps -2(%eax), %xmm1 - movdqu %xmm0, (%edi) -# endif -# ifdef DATA_CACHE_SIZE_HALF - cmp $DATA_CACHE_SIZE_HALF, %ecx -# else -# ifdef SHARED - SETUP_PIC_REG(bx) - add $_GLOBAL_OFFSET_TABLE_, %ebx - cmp __x86_data_cache_size_half@GOTOFF(%ebx), %ecx -# else - cmp __x86_data_cache_size_half, %ecx -# endif -# endif - jb L(sh_2_no_prefetch) - - lea -64(%ecx), %ecx - - .p2align 4 -L(Shl2LoopStart): - prefetcht0 0x1c0(%eax) - prefetcht0 0x1c0(%edx) - movaps 14(%eax), %xmm2 - movaps 30(%eax), %xmm3 - movaps 46(%eax), %xmm4 - movaps 62(%eax), %xmm5 - movaps %xmm5, %xmm7 - palignr $2, %xmm4, %xmm5 - palignr $2, %xmm3, %xmm4 - movaps %xmm5, 48(%edx) - palignr $2, %xmm2, %xmm3 - lea 64(%eax), %eax - palignr $2, %xmm1, %xmm2 - movaps %xmm4, 32(%edx) - movaps %xmm3, 16(%edx) - movaps %xmm7, %xmm1 - movaps %xmm2, (%edx) - lea 64(%edx), %edx - sub $64, %ecx - ja L(Shl2LoopStart) - -L(Shl2LoopLeave): - add $32, %ecx - jle L(shl_end_0) - - movaps 14(%eax), %xmm2 - movaps 30(%eax), %xmm3 - palignr $2, %xmm2, %xmm3 - palignr $2, %xmm1, %xmm2 - movaps %xmm2, (%edx) - movaps %xmm3, 16(%edx) - lea 32(%edx, %ecx), %edx - lea 32(%eax, %ecx), %eax - POP (%edi) - BRANCH_TO_JMPTBL_ENTRY(L(table_48bytes_fwd), %ecx, 4) - - CFI_PUSH (%edi) - - .p2align 4 -L(sh_2_no_prefetch): - lea -32(%ecx), %ecx - lea -2(%eax), %eax - xor %edi, %edi - - .p2align 4 -L(sh_2_no_prefetch_loop): - movdqa 16(%eax, %edi), %xmm2 - sub $32, %ecx - movdqa 32(%eax, %edi), %xmm3 - movdqa %xmm3, %xmm4 - palignr $2, %xmm2, %xmm3 - palignr $2, %xmm1, %xmm2 - lea 32(%edi), %edi - movdqa %xmm2, -32(%edx, %edi) - movdqa %xmm3, -16(%edx, %edi) - jb L(sh_2_end_no_prefetch_loop) - - movdqa 16(%eax, %edi), %xmm2 - sub $32, %ecx - movdqa 32(%eax, %edi), %xmm3 - movdqa %xmm3, %xmm1 - palignr $2, %xmm2, %xmm3 - palignr $2, %xmm4, %xmm2 - lea 32(%edi), %edi - movdqa %xmm2, -32(%edx, %edi) - movdqa %xmm3, -16(%edx, %edi) - jae L(sh_2_no_prefetch_loop) - -L(sh_2_end_no_prefetch_loop): - lea 32(%ecx), %ecx - add %ecx, %edi - add %edi, %edx - lea 2(%edi, %eax), %eax - POP (%edi) - BRANCH_TO_JMPTBL_ENTRY(L(table_48bytes_fwd), %ecx, 4) - - CFI_PUSH (%edi) - - .p2align 4 -L(shl_3): -# ifndef USE_AS_MEMMOVE - movaps -3(%eax), %xmm1 -# else - movl DEST+4(%esp), %edi - movaps -3(%eax), %xmm1 - movdqu %xmm0, (%edi) -# endif -# ifdef DATA_CACHE_SIZE_HALF - cmp $DATA_CACHE_SIZE_HALF, %ecx -# else -# ifdef SHARED - SETUP_PIC_REG(bx) - add $_GLOBAL_OFFSET_TABLE_, %ebx - cmp __x86_data_cache_size_half@GOTOFF(%ebx), %ecx -# else - cmp __x86_data_cache_size_half, %ecx -# endif -# endif - jb L(sh_3_no_prefetch) - - lea -64(%ecx), %ecx - - .p2align 4 -L(Shl3LoopStart): - prefetcht0 0x1c0(%eax) - prefetcht0 0x1c0(%edx) - movaps 13(%eax), %xmm2 - movaps 29(%eax), %xmm3 - movaps 45(%eax), %xmm4 - movaps 61(%eax), %xmm5 - movaps %xmm5, %xmm7 - palignr $3, %xmm4, %xmm5 - palignr $3, %xmm3, %xmm4 - movaps %xmm5, 48(%edx) - palignr $3, %xmm2, %xmm3 - lea 64(%eax), %eax - palignr $3, %xmm1, %xmm2 - movaps %xmm4, 32(%edx) - movaps %xmm3, 16(%edx) - movaps %xmm7, %xmm1 - movaps %xmm2, (%edx) - lea 64(%edx), %edx - sub $64, %ecx - ja L(Shl3LoopStart) - -L(Shl3LoopLeave): - add $32, %ecx - jle L(shl_end_0) - - movaps 13(%eax), %xmm2 - movaps 29(%eax), %xmm3 - palignr $3, %xmm2, %xmm3 - palignr $3, %xmm1, %xmm2 - movaps %xmm2, (%edx) - movaps %xmm3, 16(%edx) - lea 32(%edx, %ecx), %edx - lea 32(%eax, %ecx), %eax - POP (%edi) - BRANCH_TO_JMPTBL_ENTRY(L(table_48bytes_fwd), %ecx, 4) - - CFI_PUSH (%edi) - - .p2align 4 -L(sh_3_no_prefetch): - lea -32(%ecx), %ecx - lea -3(%eax), %eax - xor %edi, %edi - - .p2align 4 -L(sh_3_no_prefetch_loop): - movdqa 16(%eax, %edi), %xmm2 - sub $32, %ecx - movdqa 32(%eax, %edi), %xmm3 - movdqa %xmm3, %xmm4 - palignr $3, %xmm2, %xmm3 - palignr $3, %xmm1, %xmm2 - lea 32(%edi), %edi - movdqa %xmm2, -32(%edx, %edi) - movdqa %xmm3, -16(%edx, %edi) - - jb L(sh_3_end_no_prefetch_loop) - - movdqa 16(%eax, %edi), %xmm2 - sub $32, %ecx - movdqa 32(%eax, %edi), %xmm3 - movdqa %xmm3, %xmm1 - palignr $3, %xmm2, %xmm3 - palignr $3, %xmm4, %xmm2 - lea 32(%edi), %edi - movdqa %xmm2, -32(%edx, %edi) - movdqa %xmm3, -16(%edx, %edi) - - jae L(sh_3_no_prefetch_loop) - -L(sh_3_end_no_prefetch_loop): - lea 32(%ecx), %ecx - add %ecx, %edi - add %edi, %edx - lea 3(%edi, %eax), %eax - POP (%edi) - BRANCH_TO_JMPTBL_ENTRY(L(table_48bytes_fwd), %ecx, 4) - - CFI_PUSH (%edi) - - .p2align 4 -L(shl_4): -# ifndef USE_AS_MEMMOVE - movaps -4(%eax), %xmm1 -# else - movl DEST+4(%esp), %edi - movaps -4(%eax), %xmm1 - movdqu %xmm0, (%edi) -# endif -# ifdef DATA_CACHE_SIZE_HALF - cmp $DATA_CACHE_SIZE_HALF, %ecx -# else -# ifdef SHARED - SETUP_PIC_REG(bx) - add $_GLOBAL_OFFSET_TABLE_, %ebx - cmp __x86_data_cache_size_half@GOTOFF(%ebx), %ecx -# else - cmp __x86_data_cache_size_half, %ecx -# endif -# endif - jb L(sh_4_no_prefetch) - - lea -64(%ecx), %ecx - - .p2align 4 -L(Shl4LoopStart): - prefetcht0 0x1c0(%eax) - prefetcht0 0x1c0(%edx) - movaps 12(%eax), %xmm2 - movaps 28(%eax), %xmm3 - movaps 44(%eax), %xmm4 - movaps 60(%eax), %xmm5 - movaps %xmm5, %xmm7 - palignr $4, %xmm4, %xmm5 - palignr $4, %xmm3, %xmm4 - movaps %xmm5, 48(%edx) - palignr $4, %xmm2, %xmm3 - lea 64(%eax), %eax - palignr $4, %xmm1, %xmm2 - movaps %xmm4, 32(%edx) - movaps %xmm3, 16(%edx) - movaps %xmm7, %xmm1 - movaps %xmm2, (%edx) - lea 64(%edx), %edx - sub $64, %ecx - ja L(Shl4LoopStart) - -L(Shl4LoopLeave): - add $32, %ecx - jle L(shl_end_0) - - movaps 12(%eax), %xmm2 - movaps 28(%eax), %xmm3 - palignr $4, %xmm2, %xmm3 - palignr $4, %xmm1, %xmm2 - movaps %xmm2, (%edx) - movaps %xmm3, 16(%edx) - lea 32(%edx, %ecx), %edx - lea 32(%eax, %ecx), %eax - POP (%edi) - BRANCH_TO_JMPTBL_ENTRY(L(table_48bytes_fwd), %ecx, 4) - - CFI_PUSH (%edi) - - .p2align 4 -L(sh_4_no_prefetch): - lea -32(%ecx), %ecx - lea -4(%eax), %eax - xor %edi, %edi - - .p2align 4 -L(sh_4_no_prefetch_loop): - movdqa 16(%eax, %edi), %xmm2 - sub $32, %ecx - movdqa 32(%eax, %edi), %xmm3 - movdqa %xmm3, %xmm4 - palignr $4, %xmm2, %xmm3 - palignr $4, %xmm1, %xmm2 - lea 32(%edi), %edi - movdqa %xmm2, -32(%edx, %edi) - movdqa %xmm3, -16(%edx, %edi) - - jb L(sh_4_end_no_prefetch_loop) - - movdqa 16(%eax, %edi), %xmm2 - sub $32, %ecx - movdqa 32(%eax, %edi), %xmm3 - movdqa %xmm3, %xmm1 - palignr $4, %xmm2, %xmm3 - palignr $4, %xmm4, %xmm2 - lea 32(%edi), %edi - movdqa %xmm2, -32(%edx, %edi) - movdqa %xmm3, -16(%edx, %edi) - - jae L(sh_4_no_prefetch_loop) - -L(sh_4_end_no_prefetch_loop): - lea 32(%ecx), %ecx - add %ecx, %edi - add %edi, %edx - lea 4(%edi, %eax), %eax - POP (%edi) - BRANCH_TO_JMPTBL_ENTRY(L(table_48bytes_fwd), %ecx, 4) - - CFI_PUSH (%edi) - - .p2align 4 -L(shl_5): -# ifndef USE_AS_MEMMOVE - movaps -5(%eax), %xmm1 -# else - movl DEST+4(%esp), %edi - movaps -5(%eax), %xmm1 - movdqu %xmm0, (%edi) -# endif -# ifdef DATA_CACHE_SIZE_HALF - cmp $DATA_CACHE_SIZE_HALF, %ecx -# else -# ifdef SHARED - SETUP_PIC_REG(bx) - add $_GLOBAL_OFFSET_TABLE_, %ebx - cmp __x86_data_cache_size_half@GOTOFF(%ebx), %ecx -# else - cmp __x86_data_cache_size_half, %ecx -# endif -# endif - jb L(sh_5_no_prefetch) - - lea -64(%ecx), %ecx - - .p2align 4 -L(Shl5LoopStart): - prefetcht0 0x1c0(%eax) - prefetcht0 0x1c0(%edx) - movaps 11(%eax), %xmm2 - movaps 27(%eax), %xmm3 - movaps 43(%eax), %xmm4 - movaps 59(%eax), %xmm5 - movaps %xmm5, %xmm7 - palignr $5, %xmm4, %xmm5 - palignr $5, %xmm3, %xmm4 - movaps %xmm5, 48(%edx) - palignr $5, %xmm2, %xmm3 - lea 64(%eax), %eax - palignr $5, %xmm1, %xmm2 - movaps %xmm4, 32(%edx) - movaps %xmm3, 16(%edx) - movaps %xmm7, %xmm1 - movaps %xmm2, (%edx) - lea 64(%edx), %edx - sub $64, %ecx - ja L(Shl5LoopStart) - -L(Shl5LoopLeave): - add $32, %ecx - jle L(shl_end_0) - - movaps 11(%eax), %xmm2 - movaps 27(%eax), %xmm3 - palignr $5, %xmm2, %xmm3 - palignr $5, %xmm1, %xmm2 - movaps %xmm2, (%edx) - movaps %xmm3, 16(%edx) - lea 32(%edx, %ecx), %edx - lea 32(%eax, %ecx), %eax - POP (%edi) - BRANCH_TO_JMPTBL_ENTRY(L(table_48bytes_fwd), %ecx, 4) - - CFI_PUSH (%edi) - - .p2align 4 -L(sh_5_no_prefetch): - lea -32(%ecx), %ecx - lea -5(%eax), %eax - xor %edi, %edi - - .p2align 4 -L(sh_5_no_prefetch_loop): - movdqa 16(%eax, %edi), %xmm2 - sub $32, %ecx - movdqa 32(%eax, %edi), %xmm3 - movdqa %xmm3, %xmm4 - palignr $5, %xmm2, %xmm3 - palignr $5, %xmm1, %xmm2 - lea 32(%edi), %edi - movdqa %xmm2, -32(%edx, %edi) - movdqa %xmm3, -16(%edx, %edi) - - jb L(sh_5_end_no_prefetch_loop) - - movdqa 16(%eax, %edi), %xmm2 - sub $32, %ecx - movdqa 32(%eax, %edi), %xmm3 - movdqa %xmm3, %xmm1 - palignr $5, %xmm2, %xmm3 - palignr $5, %xmm4, %xmm2 - lea 32(%edi), %edi - movdqa %xmm2, -32(%edx, %edi) - movdqa %xmm3, -16(%edx, %edi) - - jae L(sh_5_no_prefetch_loop) - -L(sh_5_end_no_prefetch_loop): - lea 32(%ecx), %ecx - add %ecx, %edi - add %edi, %edx - lea 5(%edi, %eax), %eax - POP (%edi) - BRANCH_TO_JMPTBL_ENTRY(L(table_48bytes_fwd), %ecx, 4) - - CFI_PUSH (%edi) - - .p2align 4 -L(shl_6): -# ifndef USE_AS_MEMMOVE - movaps -6(%eax), %xmm1 -# else - movl DEST+4(%esp), %edi - movaps -6(%eax), %xmm1 - movdqu %xmm0, (%edi) -# endif -# ifdef DATA_CACHE_SIZE_HALF - cmp $DATA_CACHE_SIZE_HALF, %ecx -# else -# ifdef SHARED - SETUP_PIC_REG(bx) - add $_GLOBAL_OFFSET_TABLE_, %ebx - cmp __x86_data_cache_size_half@GOTOFF(%ebx), %ecx -# else - cmp __x86_data_cache_size_half, %ecx -# endif -# endif - jb L(sh_6_no_prefetch) - - lea -64(%ecx), %ecx - - .p2align 4 -L(Shl6LoopStart): - prefetcht0 0x1c0(%eax) - prefetcht0 0x1c0(%edx) - movaps 10(%eax), %xmm2 - movaps 26(%eax), %xmm3 - movaps 42(%eax), %xmm4 - movaps 58(%eax), %xmm5 - movaps %xmm5, %xmm7 - palignr $6, %xmm4, %xmm5 - palignr $6, %xmm3, %xmm4 - movaps %xmm5, 48(%edx) - palignr $6, %xmm2, %xmm3 - lea 64(%eax), %eax - palignr $6, %xmm1, %xmm2 - movaps %xmm4, 32(%edx) - movaps %xmm3, 16(%edx) - movaps %xmm7, %xmm1 - movaps %xmm2, (%edx) - lea 64(%edx), %edx - sub $64, %ecx - ja L(Shl6LoopStart) - -L(Shl6LoopLeave): - add $32, %ecx - jle L(shl_end_0) - - movaps 10(%eax), %xmm2 - movaps 26(%eax), %xmm3 - palignr $6, %xmm2, %xmm3 - palignr $6, %xmm1, %xmm2 - movaps %xmm2, (%edx) - movaps %xmm3, 16(%edx) - lea 32(%edx, %ecx), %edx - lea 32(%eax, %ecx), %eax - POP (%edi) - BRANCH_TO_JMPTBL_ENTRY(L(table_48bytes_fwd), %ecx, 4) - - CFI_PUSH (%edi) - - .p2align 4 -L(sh_6_no_prefetch): - lea -32(%ecx), %ecx - lea -6(%eax), %eax - xor %edi, %edi - - .p2align 4 -L(sh_6_no_prefetch_loop): - movdqa 16(%eax, %edi), %xmm2 - sub $32, %ecx - movdqa 32(%eax, %edi), %xmm3 - movdqa %xmm3, %xmm4 - palignr $6, %xmm2, %xmm3 - palignr $6, %xmm1, %xmm2 - lea 32(%edi), %edi - movdqa %xmm2, -32(%edx, %edi) - movdqa %xmm3, -16(%edx, %edi) - - jb L(sh_6_end_no_prefetch_loop) - - movdqa 16(%eax, %edi), %xmm2 - sub $32, %ecx - movdqa 32(%eax, %edi), %xmm3 - movdqa %xmm3, %xmm1 - palignr $6, %xmm2, %xmm3 - palignr $6, %xmm4, %xmm2 - lea 32(%edi), %edi - movdqa %xmm2, -32(%edx, %edi) - movdqa %xmm3, -16(%edx, %edi) - - jae L(sh_6_no_prefetch_loop) - -L(sh_6_end_no_prefetch_loop): - lea 32(%ecx), %ecx - add %ecx, %edi - add %edi, %edx - lea 6(%edi, %eax), %eax - POP (%edi) - BRANCH_TO_JMPTBL_ENTRY(L(table_48bytes_fwd), %ecx, 4) - - CFI_PUSH (%edi) - - .p2align 4 -L(shl_7): -# ifndef USE_AS_MEMMOVE - movaps -7(%eax), %xmm1 -# else - movl DEST+4(%esp), %edi - movaps -7(%eax), %xmm1 - movdqu %xmm0, (%edi) -# endif -# ifdef DATA_CACHE_SIZE_HALF - cmp $DATA_CACHE_SIZE_HALF, %ecx -# else -# ifdef SHARED - SETUP_PIC_REG(bx) - add $_GLOBAL_OFFSET_TABLE_, %ebx - cmp __x86_data_cache_size_half@GOTOFF(%ebx), %ecx -# else - cmp __x86_data_cache_size_half, %ecx -# endif -# endif - jb L(sh_7_no_prefetch) - - lea -64(%ecx), %ecx - - .p2align 4 -L(Shl7LoopStart): - prefetcht0 0x1c0(%eax) - prefetcht0 0x1c0(%edx) - movaps 9(%eax), %xmm2 - movaps 25(%eax), %xmm3 - movaps 41(%eax), %xmm4 - movaps 57(%eax), %xmm5 - movaps %xmm5, %xmm7 - palignr $7, %xmm4, %xmm5 - palignr $7, %xmm3, %xmm4 - movaps %xmm5, 48(%edx) - palignr $7, %xmm2, %xmm3 - lea 64(%eax), %eax - palignr $7, %xmm1, %xmm2 - movaps %xmm4, 32(%edx) - movaps %xmm3, 16(%edx) - movaps %xmm7, %xmm1 - movaps %xmm2, (%edx) - lea 64(%edx), %edx - sub $64, %ecx - ja L(Shl7LoopStart) - -L(Shl7LoopLeave): - add $32, %ecx - jle L(shl_end_0) - - movaps 9(%eax), %xmm2 - movaps 25(%eax), %xmm3 - palignr $7, %xmm2, %xmm3 - palignr $7, %xmm1, %xmm2 - movaps %xmm2, (%edx) - movaps %xmm3, 16(%edx) - lea 32(%edx, %ecx), %edx - lea 32(%eax, %ecx), %eax - POP (%edi) - BRANCH_TO_JMPTBL_ENTRY(L(table_48bytes_fwd), %ecx, 4) - - CFI_PUSH (%edi) - - .p2align 4 -L(sh_7_no_prefetch): - lea -32(%ecx), %ecx - lea -7(%eax), %eax - xor %edi, %edi - - .p2align 4 -L(sh_7_no_prefetch_loop): - movdqa 16(%eax, %edi), %xmm2 - sub $32, %ecx - movdqa 32(%eax, %edi), %xmm3 - movdqa %xmm3, %xmm4 - palignr $7, %xmm2, %xmm3 - palignr $7, %xmm1, %xmm2 - lea 32(%edi), %edi - movdqa %xmm2, -32(%edx, %edi) - movdqa %xmm3, -16(%edx, %edi) - jb L(sh_7_end_no_prefetch_loop) - - movdqa 16(%eax, %edi), %xmm2 - sub $32, %ecx - movdqa 32(%eax, %edi), %xmm3 - movdqa %xmm3, %xmm1 - palignr $7, %xmm2, %xmm3 - palignr $7, %xmm4, %xmm2 - lea 32(%edi), %edi - movdqa %xmm2, -32(%edx, %edi) - movdqa %xmm3, -16(%edx, %edi) - jae L(sh_7_no_prefetch_loop) - -L(sh_7_end_no_prefetch_loop): - lea 32(%ecx), %ecx - add %ecx, %edi - add %edi, %edx - lea 7(%edi, %eax), %eax - POP (%edi) - BRANCH_TO_JMPTBL_ENTRY(L(table_48bytes_fwd), %ecx, 4) - - CFI_PUSH (%edi) - - .p2align 4 -L(shl_8): -# ifndef USE_AS_MEMMOVE - movaps -8(%eax), %xmm1 -# else - movl DEST+4(%esp), %edi - movaps -8(%eax), %xmm1 - movdqu %xmm0, (%edi) -# endif -# ifdef DATA_CACHE_SIZE_HALF - cmp $DATA_CACHE_SIZE_HALF, %ecx -# else -# ifdef SHARED - SETUP_PIC_REG(bx) - add $_GLOBAL_OFFSET_TABLE_, %ebx - cmp __x86_data_cache_size_half@GOTOFF(%ebx), %ecx -# else - cmp __x86_data_cache_size_half, %ecx -# endif -# endif - jb L(sh_8_no_prefetch) - - lea -64(%ecx), %ecx - - .p2align 4 -L(Shl8LoopStart): - prefetcht0 0x1c0(%eax) - prefetcht0 0x1c0(%edx) - movaps 8(%eax), %xmm2 - movaps 24(%eax), %xmm3 - movaps 40(%eax), %xmm4 - movaps 56(%eax), %xmm5 - movaps %xmm5, %xmm7 - palignr $8, %xmm4, %xmm5 - palignr $8, %xmm3, %xmm4 - movaps %xmm5, 48(%edx) - palignr $8, %xmm2, %xmm3 - lea 64(%eax), %eax - palignr $8, %xmm1, %xmm2 - movaps %xmm4, 32(%edx) - movaps %xmm3, 16(%edx) - movaps %xmm7, %xmm1 - movaps %xmm2, (%edx) - lea 64(%edx), %edx - sub $64, %ecx - ja L(Shl8LoopStart) - -L(LoopLeave8): - add $32, %ecx - jle L(shl_end_0) - - movaps 8(%eax), %xmm2 - movaps 24(%eax), %xmm3 - palignr $8, %xmm2, %xmm3 - palignr $8, %xmm1, %xmm2 - movaps %xmm2, (%edx) - movaps %xmm3, 16(%edx) - lea 32(%edx, %ecx), %edx - lea 32(%eax, %ecx), %eax - POP (%edi) - BRANCH_TO_JMPTBL_ENTRY(L(table_48bytes_fwd), %ecx, 4) - - CFI_PUSH (%edi) - - .p2align 4 -L(sh_8_no_prefetch): - lea -32(%ecx), %ecx - lea -8(%eax), %eax - xor %edi, %edi - - .p2align 4 -L(sh_8_no_prefetch_loop): - movdqa 16(%eax, %edi), %xmm2 - sub $32, %ecx - movdqa 32(%eax, %edi), %xmm3 - movdqa %xmm3, %xmm4 - palignr $8, %xmm2, %xmm3 - palignr $8, %xmm1, %xmm2 - lea 32(%edi), %edi - movdqa %xmm2, -32(%edx, %edi) - movdqa %xmm3, -16(%edx, %edi) - jb L(sh_8_end_no_prefetch_loop) - - movdqa 16(%eax, %edi), %xmm2 - sub $32, %ecx - movdqa 32(%eax, %edi), %xmm3 - movdqa %xmm3, %xmm1 - palignr $8, %xmm2, %xmm3 - palignr $8, %xmm4, %xmm2 - lea 32(%edi), %edi - movdqa %xmm2, -32(%edx, %edi) - movdqa %xmm3, -16(%edx, %edi) - jae L(sh_8_no_prefetch_loop) - -L(sh_8_end_no_prefetch_loop): - lea 32(%ecx), %ecx - add %ecx, %edi - add %edi, %edx - lea 8(%edi, %eax), %eax - POP (%edi) - BRANCH_TO_JMPTBL_ENTRY(L(table_48bytes_fwd), %ecx, 4) - - CFI_PUSH (%edi) - - .p2align 4 -L(shl_9): -# ifndef USE_AS_MEMMOVE - movaps -9(%eax), %xmm1 -# else - movl DEST+4(%esp), %edi - movaps -9(%eax), %xmm1 - movdqu %xmm0, (%edi) -# endif -# ifdef DATA_CACHE_SIZE_HALF - cmp $DATA_CACHE_SIZE_HALF, %ecx -# else -# ifdef SHARED - SETUP_PIC_REG(bx) - add $_GLOBAL_OFFSET_TABLE_, %ebx - cmp __x86_data_cache_size_half@GOTOFF(%ebx), %ecx -# else - cmp __x86_data_cache_size_half, %ecx -# endif -# endif - jb L(sh_9_no_prefetch) - - lea -64(%ecx), %ecx - - .p2align 4 -L(Shl9LoopStart): - prefetcht0 0x1c0(%eax) - prefetcht0 0x1c0(%edx) - movaps 7(%eax), %xmm2 - movaps 23(%eax), %xmm3 - movaps 39(%eax), %xmm4 - movaps 55(%eax), %xmm5 - movaps %xmm5, %xmm7 - palignr $9, %xmm4, %xmm5 - palignr $9, %xmm3, %xmm4 - movaps %xmm5, 48(%edx) - palignr $9, %xmm2, %xmm3 - lea 64(%eax), %eax - palignr $9, %xmm1, %xmm2 - movaps %xmm4, 32(%edx) - movaps %xmm3, 16(%edx) - movaps %xmm7, %xmm1 - movaps %xmm2, (%edx) - lea 64(%edx), %edx - sub $64, %ecx - ja L(Shl9LoopStart) - -L(Shl9LoopLeave): - add $32, %ecx - jle L(shl_end_0) - - movaps 7(%eax), %xmm2 - movaps 23(%eax), %xmm3 - palignr $9, %xmm2, %xmm3 - palignr $9, %xmm1, %xmm2 - - movaps %xmm2, (%edx) - movaps %xmm3, 16(%edx) - lea 32(%edx, %ecx), %edx - lea 32(%eax, %ecx), %eax - POP (%edi) - BRANCH_TO_JMPTBL_ENTRY(L(table_48bytes_fwd), %ecx, 4) - - CFI_PUSH (%edi) - - .p2align 4 -L(sh_9_no_prefetch): - lea -32(%ecx), %ecx - lea -9(%eax), %eax - xor %edi, %edi - - .p2align 4 -L(sh_9_no_prefetch_loop): - movdqa 16(%eax, %edi), %xmm2 - sub $32, %ecx - movdqa 32(%eax, %edi), %xmm3 - movdqa %xmm3, %xmm4 - palignr $9, %xmm2, %xmm3 - palignr $9, %xmm1, %xmm2 - lea 32(%edi), %edi - movdqa %xmm2, -32(%edx, %edi) - movdqa %xmm3, -16(%edx, %edi) - jb L(sh_9_end_no_prefetch_loop) - - movdqa 16(%eax, %edi), %xmm2 - sub $32, %ecx - movdqa 32(%eax, %edi), %xmm3 - movdqa %xmm3, %xmm1 - palignr $9, %xmm2, %xmm3 - palignr $9, %xmm4, %xmm2 - lea 32(%edi), %edi - movdqa %xmm2, -32(%edx, %edi) - movdqa %xmm3, -16(%edx, %edi) - jae L(sh_9_no_prefetch_loop) - -L(sh_9_end_no_prefetch_loop): - lea 32(%ecx), %ecx - add %ecx, %edi - add %edi, %edx - lea 9(%edi, %eax), %eax - POP (%edi) - BRANCH_TO_JMPTBL_ENTRY(L(table_48bytes_fwd), %ecx, 4) - - CFI_PUSH (%edi) - - .p2align 4 -L(shl_10): -# ifndef USE_AS_MEMMOVE - movaps -10(%eax), %xmm1 -# else - movl DEST+4(%esp), %edi - movaps -10(%eax), %xmm1 - movdqu %xmm0, (%edi) -# endif -# ifdef DATA_CACHE_SIZE_HALF - cmp $DATA_CACHE_SIZE_HALF, %ecx -# else -# ifdef SHARED - SETUP_PIC_REG(bx) - add $_GLOBAL_OFFSET_TABLE_, %ebx - cmp __x86_data_cache_size_half@GOTOFF(%ebx), %ecx -# else - cmp __x86_data_cache_size_half, %ecx -# endif -# endif - jb L(sh_10_no_prefetch) - - lea -64(%ecx), %ecx - - .p2align 4 -L(Shl10LoopStart): - prefetcht0 0x1c0(%eax) - prefetcht0 0x1c0(%edx) - movaps 6(%eax), %xmm2 - movaps 22(%eax), %xmm3 - movaps 38(%eax), %xmm4 - movaps 54(%eax), %xmm5 - movaps %xmm5, %xmm7 - palignr $10, %xmm4, %xmm5 - palignr $10, %xmm3, %xmm4 - movaps %xmm5, 48(%edx) - palignr $10, %xmm2, %xmm3 - lea 64(%eax), %eax - palignr $10, %xmm1, %xmm2 - movaps %xmm4, 32(%edx) - movaps %xmm3, 16(%edx) - movaps %xmm7, %xmm1 - movaps %xmm2, (%edx) - lea 64(%edx), %edx - sub $64, %ecx - ja L(Shl10LoopStart) - -L(Shl10LoopLeave): - add $32, %ecx - jle L(shl_end_0) - - movaps 6(%eax), %xmm2 - movaps 22(%eax), %xmm3 - palignr $10, %xmm2, %xmm3 - palignr $10, %xmm1, %xmm2 - - movaps %xmm2, (%edx) - movaps %xmm3, 16(%edx) - lea 32(%edx, %ecx), %edx - lea 32(%eax, %ecx), %eax - POP (%edi) - BRANCH_TO_JMPTBL_ENTRY(L(table_48bytes_fwd), %ecx, 4) - - CFI_PUSH (%edi) - - .p2align 4 -L(sh_10_no_prefetch): - lea -32(%ecx), %ecx - lea -10(%eax), %eax - xor %edi, %edi - - .p2align 4 -L(sh_10_no_prefetch_loop): - movdqa 16(%eax, %edi), %xmm2 - sub $32, %ecx - movdqa 32(%eax, %edi), %xmm3 - movdqa %xmm3, %xmm4 - palignr $10, %xmm2, %xmm3 - palignr $10, %xmm1, %xmm2 - lea 32(%edi), %edi - movdqa %xmm2, -32(%edx, %edi) - movdqa %xmm3, -16(%edx, %edi) - jb L(sh_10_end_no_prefetch_loop) - - movdqa 16(%eax, %edi), %xmm2 - sub $32, %ecx - movdqa 32(%eax, %edi), %xmm3 - movdqa %xmm3, %xmm1 - palignr $10, %xmm2, %xmm3 - palignr $10, %xmm4, %xmm2 - lea 32(%edi), %edi - movdqa %xmm2, -32(%edx, %edi) - movdqa %xmm3, -16(%edx, %edi) - jae L(sh_10_no_prefetch_loop) - -L(sh_10_end_no_prefetch_loop): - lea 32(%ecx), %ecx - add %ecx, %edi - add %edi, %edx - lea 10(%edi, %eax), %eax - POP (%edi) - BRANCH_TO_JMPTBL_ENTRY(L(table_48bytes_fwd), %ecx, 4) - - CFI_PUSH (%edi) - - .p2align 4 -L(shl_11): -# ifndef USE_AS_MEMMOVE - movaps -11(%eax), %xmm1 -# else - movl DEST+4(%esp), %edi - movaps -11(%eax), %xmm1 - movdqu %xmm0, (%edi) -# endif -# ifdef DATA_CACHE_SIZE_HALF - cmp $DATA_CACHE_SIZE_HALF, %ecx -# else -# ifdef SHARED - SETUP_PIC_REG(bx) - add $_GLOBAL_OFFSET_TABLE_, %ebx - cmp __x86_data_cache_size_half@GOTOFF(%ebx), %ecx -# else - cmp __x86_data_cache_size_half, %ecx -# endif -# endif - jb L(sh_11_no_prefetch) - - lea -64(%ecx), %ecx - - .p2align 4 -L(Shl11LoopStart): - prefetcht0 0x1c0(%eax) - prefetcht0 0x1c0(%edx) - movaps 5(%eax), %xmm2 - movaps 21(%eax), %xmm3 - movaps 37(%eax), %xmm4 - movaps 53(%eax), %xmm5 - movaps %xmm5, %xmm7 - palignr $11, %xmm4, %xmm5 - palignr $11, %xmm3, %xmm4 - movaps %xmm5, 48(%edx) - palignr $11, %xmm2, %xmm3 - lea 64(%eax), %eax - palignr $11, %xmm1, %xmm2 - movaps %xmm4, 32(%edx) - movaps %xmm3, 16(%edx) - movaps %xmm7, %xmm1 - movaps %xmm2, (%edx) - lea 64(%edx), %edx - sub $64, %ecx - ja L(Shl11LoopStart) - -L(Shl11LoopLeave): - add $32, %ecx - jle L(shl_end_0) - - movaps 5(%eax), %xmm2 - movaps 21(%eax), %xmm3 - palignr $11, %xmm2, %xmm3 - palignr $11, %xmm1, %xmm2 - - movaps %xmm2, (%edx) - movaps %xmm3, 16(%edx) - lea 32(%edx, %ecx), %edx - lea 32(%eax, %ecx), %eax - POP (%edi) - BRANCH_TO_JMPTBL_ENTRY(L(table_48bytes_fwd), %ecx, 4) - - CFI_PUSH (%edi) - - .p2align 4 -L(sh_11_no_prefetch): - lea -32(%ecx), %ecx - lea -11(%eax), %eax - xor %edi, %edi - - .p2align 4 -L(sh_11_no_prefetch_loop): - movdqa 16(%eax, %edi), %xmm2 - sub $32, %ecx - movdqa 32(%eax, %edi), %xmm3 - movdqa %xmm3, %xmm4 - palignr $11, %xmm2, %xmm3 - palignr $11, %xmm1, %xmm2 - lea 32(%edi), %edi - movdqa %xmm2, -32(%edx, %edi) - movdqa %xmm3, -16(%edx, %edi) - jb L(sh_11_end_no_prefetch_loop) - - movdqa 16(%eax, %edi), %xmm2 - sub $32, %ecx - movdqa 32(%eax, %edi), %xmm3 - movdqa %xmm3, %xmm1 - palignr $11, %xmm2, %xmm3 - palignr $11, %xmm4, %xmm2 - lea 32(%edi), %edi - movdqa %xmm2, -32(%edx, %edi) - movdqa %xmm3, -16(%edx, %edi) - jae L(sh_11_no_prefetch_loop) - -L(sh_11_end_no_prefetch_loop): - lea 32(%ecx), %ecx - add %ecx, %edi - add %edi, %edx - lea 11(%edi, %eax), %eax - POP (%edi) - BRANCH_TO_JMPTBL_ENTRY(L(table_48bytes_fwd), %ecx, 4) - - CFI_PUSH (%edi) - - .p2align 4 -L(shl_12): -# ifndef USE_AS_MEMMOVE - movaps -12(%eax), %xmm1 -# else - movl DEST+4(%esp), %edi - movaps -12(%eax), %xmm1 - movdqu %xmm0, (%edi) -# endif -# ifdef DATA_CACHE_SIZE_HALF - cmp $DATA_CACHE_SIZE_HALF, %ecx -# else -# ifdef SHARED - SETUP_PIC_REG(bx) - add $_GLOBAL_OFFSET_TABLE_, %ebx - cmp __x86_data_cache_size_half@GOTOFF(%ebx), %ecx -# else - cmp __x86_data_cache_size_half, %ecx -# endif -# endif - jb L(sh_12_no_prefetch) - - lea -64(%ecx), %ecx - - .p2align 4 -L(Shl12LoopStart): - prefetcht0 0x1c0(%eax) - prefetcht0 0x1c0(%edx) - movaps 4(%eax), %xmm2 - movaps 20(%eax), %xmm3 - movaps 36(%eax), %xmm4 - movaps 52(%eax), %xmm5 - movaps %xmm5, %xmm7 - palignr $12, %xmm4, %xmm5 - palignr $12, %xmm3, %xmm4 - movaps %xmm5, 48(%edx) - palignr $12, %xmm2, %xmm3 - lea 64(%eax), %eax - palignr $12, %xmm1, %xmm2 - movaps %xmm4, 32(%edx) - movaps %xmm3, 16(%edx) - movaps %xmm7, %xmm1 - movaps %xmm2, (%edx) - lea 64(%edx), %edx - sub $64, %ecx - ja L(Shl12LoopStart) - -L(Shl12LoopLeave): - add $32, %ecx - jle L(shl_end_0) - - movaps 4(%eax), %xmm2 - movaps 20(%eax), %xmm3 - palignr $12, %xmm2, %xmm3 - palignr $12, %xmm1, %xmm2 - - movaps %xmm2, (%edx) - movaps %xmm3, 16(%edx) - lea 32(%edx, %ecx), %edx - lea 32(%eax, %ecx), %eax - POP (%edi) - BRANCH_TO_JMPTBL_ENTRY(L(table_48bytes_fwd), %ecx, 4) - - CFI_PUSH (%edi) - - .p2align 4 -L(sh_12_no_prefetch): - lea -32(%ecx), %ecx - lea -12(%eax), %eax - xor %edi, %edi - - .p2align 4 -L(sh_12_no_prefetch_loop): - movdqa 16(%eax, %edi), %xmm2 - sub $32, %ecx - movdqa 32(%eax, %edi), %xmm3 - movdqa %xmm3, %xmm4 - palignr $12, %xmm2, %xmm3 - palignr $12, %xmm1, %xmm2 - lea 32(%edi), %edi - movdqa %xmm2, -32(%edx, %edi) - movdqa %xmm3, -16(%edx, %edi) - jb L(sh_12_end_no_prefetch_loop) - - movdqa 16(%eax, %edi), %xmm2 - sub $32, %ecx - movdqa 32(%eax, %edi), %xmm3 - movdqa %xmm3, %xmm1 - palignr $12, %xmm2, %xmm3 - palignr $12, %xmm4, %xmm2 - lea 32(%edi), %edi - movdqa %xmm2, -32(%edx, %edi) - movdqa %xmm3, -16(%edx, %edi) - jae L(sh_12_no_prefetch_loop) - -L(sh_12_end_no_prefetch_loop): - lea 32(%ecx), %ecx - add %ecx, %edi - add %edi, %edx - lea 12(%edi, %eax), %eax - POP (%edi) - BRANCH_TO_JMPTBL_ENTRY(L(table_48bytes_fwd), %ecx, 4) - - CFI_PUSH (%edi) - - .p2align 4 -L(shl_13): -# ifndef USE_AS_MEMMOVE - movaps -13(%eax), %xmm1 -# else - movl DEST+4(%esp), %edi - movaps -13(%eax), %xmm1 - movdqu %xmm0, (%edi) -# endif -# ifdef DATA_CACHE_SIZE_HALF - cmp $DATA_CACHE_SIZE_HALF, %ecx -# else -# ifdef SHARED - SETUP_PIC_REG(bx) - add $_GLOBAL_OFFSET_TABLE_, %ebx - cmp __x86_data_cache_size_half@GOTOFF(%ebx), %ecx -# else - cmp __x86_data_cache_size_half, %ecx -# endif -# endif - jb L(sh_13_no_prefetch) - - lea -64(%ecx), %ecx - - .p2align 4 -L(Shl13LoopStart): - prefetcht0 0x1c0(%eax) - prefetcht0 0x1c0(%edx) - movaps 3(%eax), %xmm2 - movaps 19(%eax), %xmm3 - movaps 35(%eax), %xmm4 - movaps 51(%eax), %xmm5 - movaps %xmm5, %xmm7 - palignr $13, %xmm4, %xmm5 - palignr $13, %xmm3, %xmm4 - movaps %xmm5, 48(%edx) - palignr $13, %xmm2, %xmm3 - lea 64(%eax), %eax - palignr $13, %xmm1, %xmm2 - movaps %xmm4, 32(%edx) - movaps %xmm3, 16(%edx) - movaps %xmm7, %xmm1 - movaps %xmm2, (%edx) - lea 64(%edx), %edx - sub $64, %ecx - ja L(Shl13LoopStart) - -L(Shl13LoopLeave): - add $32, %ecx - jle L(shl_end_0) - - movaps 3(%eax), %xmm2 - movaps 19(%eax), %xmm3 - palignr $13, %xmm2, %xmm3 - palignr $13, %xmm1, %xmm2 - - movaps %xmm2, (%edx) - movaps %xmm3, 16(%edx) - lea 32(%edx, %ecx), %edx - lea 32(%eax, %ecx), %eax - POP (%edi) - BRANCH_TO_JMPTBL_ENTRY(L(table_48bytes_fwd), %ecx, 4) - - CFI_PUSH (%edi) - - .p2align 4 -L(sh_13_no_prefetch): - lea -32(%ecx), %ecx - lea -13(%eax), %eax - xor %edi, %edi - - .p2align 4 -L(sh_13_no_prefetch_loop): - movdqa 16(%eax, %edi), %xmm2 - sub $32, %ecx - movdqa 32(%eax, %edi), %xmm3 - movdqa %xmm3, %xmm4 - palignr $13, %xmm2, %xmm3 - palignr $13, %xmm1, %xmm2 - lea 32(%edi), %edi - movdqa %xmm2, -32(%edx, %edi) - movdqa %xmm3, -16(%edx, %edi) - jb L(sh_13_end_no_prefetch_loop) - - movdqa 16(%eax, %edi), %xmm2 - sub $32, %ecx - movdqa 32(%eax, %edi), %xmm3 - movdqa %xmm3, %xmm1 - palignr $13, %xmm2, %xmm3 - palignr $13, %xmm4, %xmm2 - lea 32(%edi), %edi - movdqa %xmm2, -32(%edx, %edi) - movdqa %xmm3, -16(%edx, %edi) - jae L(sh_13_no_prefetch_loop) - -L(sh_13_end_no_prefetch_loop): - lea 32(%ecx), %ecx - add %ecx, %edi - add %edi, %edx - lea 13(%edi, %eax), %eax - POP (%edi) - BRANCH_TO_JMPTBL_ENTRY(L(table_48bytes_fwd), %ecx, 4) - - CFI_PUSH (%edi) - - .p2align 4 -L(shl_14): -# ifndef USE_AS_MEMMOVE - movaps -14(%eax), %xmm1 -# else - movl DEST+4(%esp), %edi - movaps -14(%eax), %xmm1 - movdqu %xmm0, (%edi) -# endif -# ifdef DATA_CACHE_SIZE_HALF - cmp $DATA_CACHE_SIZE_HALF, %ecx -# else -# ifdef SHARED - SETUP_PIC_REG(bx) - add $_GLOBAL_OFFSET_TABLE_, %ebx - cmp __x86_data_cache_size_half@GOTOFF(%ebx), %ecx -# else - cmp __x86_data_cache_size_half, %ecx -# endif -# endif - jb L(sh_14_no_prefetch) - - lea -64(%ecx), %ecx - - .p2align 4 -L(Shl14LoopStart): - prefetcht0 0x1c0(%eax) - prefetcht0 0x1c0(%edx) - movaps 2(%eax), %xmm2 - movaps 18(%eax), %xmm3 - movaps 34(%eax), %xmm4 - movaps 50(%eax), %xmm5 - movaps %xmm5, %xmm7 - palignr $14, %xmm4, %xmm5 - palignr $14, %xmm3, %xmm4 - movaps %xmm5, 48(%edx) - palignr $14, %xmm2, %xmm3 - lea 64(%eax), %eax - palignr $14, %xmm1, %xmm2 - movaps %xmm4, 32(%edx) - movaps %xmm3, 16(%edx) - movaps %xmm7, %xmm1 - movaps %xmm2, (%edx) - lea 64(%edx), %edx - sub $64, %ecx - ja L(Shl14LoopStart) - -L(Shl14LoopLeave): - add $32, %ecx - jle L(shl_end_0) - - movaps 2(%eax), %xmm2 - movaps 18(%eax), %xmm3 - palignr $14, %xmm2, %xmm3 - palignr $14, %xmm1, %xmm2 - - movaps %xmm2, (%edx) - movaps %xmm3, 16(%edx) - lea 32(%edx, %ecx), %edx - lea 32(%eax, %ecx), %eax - POP (%edi) - BRANCH_TO_JMPTBL_ENTRY(L(table_48bytes_fwd), %ecx, 4) - - CFI_PUSH (%edi) - - .p2align 4 -L(sh_14_no_prefetch): - lea -32(%ecx), %ecx - lea -14(%eax), %eax - xor %edi, %edi - - .p2align 4 -L(sh_14_no_prefetch_loop): - movdqa 16(%eax, %edi), %xmm2 - sub $32, %ecx - movdqa 32(%eax, %edi), %xmm3 - movdqa %xmm3, %xmm4 - palignr $14, %xmm2, %xmm3 - palignr $14, %xmm1, %xmm2 - lea 32(%edi), %edi - movdqa %xmm2, -32(%edx, %edi) - movdqa %xmm3, -16(%edx, %edi) - jb L(sh_14_end_no_prefetch_loop) - - movdqa 16(%eax, %edi), %xmm2 - sub $32, %ecx - movdqa 32(%eax, %edi), %xmm3 - movdqa %xmm3, %xmm1 - palignr $14, %xmm2, %xmm3 - palignr $14, %xmm4, %xmm2 - lea 32(%edi), %edi - movdqa %xmm2, -32(%edx, %edi) - movdqa %xmm3, -16(%edx, %edi) - jae L(sh_14_no_prefetch_loop) - -L(sh_14_end_no_prefetch_loop): - lea 32(%ecx), %ecx - add %ecx, %edi - add %edi, %edx - lea 14(%edi, %eax), %eax - POP (%edi) - BRANCH_TO_JMPTBL_ENTRY(L(table_48bytes_fwd), %ecx, 4) - - CFI_PUSH (%edi) - - .p2align 4 -L(shl_15): -# ifndef USE_AS_MEMMOVE - movaps -15(%eax), %xmm1 -# else - movl DEST+4(%esp), %edi - movaps -15(%eax), %xmm1 - movdqu %xmm0, (%edi) -# endif -# ifdef DATA_CACHE_SIZE_HALF - cmp $DATA_CACHE_SIZE_HALF, %ecx -# else -# ifdef SHARED - SETUP_PIC_REG(bx) - add $_GLOBAL_OFFSET_TABLE_, %ebx - cmp __x86_data_cache_size_half@GOTOFF(%ebx), %ecx -# else - cmp __x86_data_cache_size_half, %ecx -# endif -# endif - jb L(sh_15_no_prefetch) - - lea -64(%ecx), %ecx - - .p2align 4 -L(Shl15LoopStart): - prefetcht0 0x1c0(%eax) - prefetcht0 0x1c0(%edx) - movaps 1(%eax), %xmm2 - movaps 17(%eax), %xmm3 - movaps 33(%eax), %xmm4 - movaps 49(%eax), %xmm5 - movaps %xmm5, %xmm7 - palignr $15, %xmm4, %xmm5 - palignr $15, %xmm3, %xmm4 - movaps %xmm5, 48(%edx) - palignr $15, %xmm2, %xmm3 - lea 64(%eax), %eax - palignr $15, %xmm1, %xmm2 - movaps %xmm4, 32(%edx) - movaps %xmm3, 16(%edx) - movaps %xmm7, %xmm1 - movaps %xmm2, (%edx) - lea 64(%edx), %edx - sub $64, %ecx - ja L(Shl15LoopStart) - -L(Shl15LoopLeave): - add $32, %ecx - jle L(shl_end_0) - - movaps 1(%eax), %xmm2 - movaps 17(%eax), %xmm3 - palignr $15, %xmm2, %xmm3 - palignr $15, %xmm1, %xmm2 - - movaps %xmm2, (%edx) - movaps %xmm3, 16(%edx) - lea 32(%edx, %ecx), %edx - lea 32(%eax, %ecx), %eax - POP (%edi) - BRANCH_TO_JMPTBL_ENTRY(L(table_48bytes_fwd), %ecx, 4) - - CFI_PUSH (%edi) - - .p2align 4 -L(sh_15_no_prefetch): - lea -32(%ecx), %ecx - lea -15(%eax), %eax - xor %edi, %edi - - .p2align 4 -L(sh_15_no_prefetch_loop): - movdqa 16(%eax, %edi), %xmm2 - sub $32, %ecx - movdqa 32(%eax, %edi), %xmm3 - movdqa %xmm3, %xmm4 - palignr $15, %xmm2, %xmm3 - palignr $15, %xmm1, %xmm2 - lea 32(%edi), %edi - movdqa %xmm2, -32(%edx, %edi) - movdqa %xmm3, -16(%edx, %edi) - jb L(sh_15_end_no_prefetch_loop) - - movdqa 16(%eax, %edi), %xmm2 - sub $32, %ecx - movdqa 32(%eax, %edi), %xmm3 - movdqa %xmm3, %xmm1 - palignr $15, %xmm2, %xmm3 - palignr $15, %xmm4, %xmm2 - lea 32(%edi), %edi - movdqa %xmm2, -32(%edx, %edi) - movdqa %xmm3, -16(%edx, %edi) - jae L(sh_15_no_prefetch_loop) - -L(sh_15_end_no_prefetch_loop): - lea 32(%ecx), %ecx - add %ecx, %edi - add %edi, %edx - lea 15(%edi, %eax), %eax - POP (%edi) - BRANCH_TO_JMPTBL_ENTRY(L(table_48bytes_fwd), %ecx, 4) - - CFI_PUSH (%edi) - - .p2align 4 -L(shl_end_0): - lea 32(%ecx), %ecx - lea (%edx, %ecx), %edx - lea (%eax, %ecx), %eax - POP (%edi) - BRANCH_TO_JMPTBL_ENTRY(L(table_48bytes_fwd), %ecx, 4) - - .p2align 4 -L(fwd_write_44bytes): - movq -44(%eax), %xmm0 - movq %xmm0, -44(%edx) -L(fwd_write_36bytes): - movq -36(%eax), %xmm0 - movq %xmm0, -36(%edx) -L(fwd_write_28bytes): - movq -28(%eax), %xmm0 - movq %xmm0, -28(%edx) -L(fwd_write_20bytes): - movq -20(%eax), %xmm0 - movq %xmm0, -20(%edx) -L(fwd_write_12bytes): - movq -12(%eax), %xmm0 - movq %xmm0, -12(%edx) -L(fwd_write_4bytes): - movl -4(%eax), %ecx - movl %ecx, -4(%edx) -# ifndef USE_AS_BCOPY -# ifdef USE_AS_MEMPCPY - movl %edx, %eax -# else - movl DEST(%esp), %eax -# endif -# endif - RETURN - - .p2align 4 -L(fwd_write_40bytes): - movq -40(%eax), %xmm0 - movq %xmm0, -40(%edx) -L(fwd_write_32bytes): - movq -32(%eax), %xmm0 - movq %xmm0, -32(%edx) -L(fwd_write_24bytes): - movq -24(%eax), %xmm0 - movq %xmm0, -24(%edx) -L(fwd_write_16bytes): - movq -16(%eax), %xmm0 - movq %xmm0, -16(%edx) -L(fwd_write_8bytes): - movq -8(%eax), %xmm0 - movq %xmm0, -8(%edx) -L(fwd_write_0bytes): -# ifndef USE_AS_BCOPY -# ifdef USE_AS_MEMPCPY - movl %edx, %eax -# else - movl DEST(%esp), %eax -# endif -# endif - RETURN - - .p2align 4 -L(fwd_write_5bytes): - movl -5(%eax), %ecx - movl -4(%eax), %eax - movl %ecx, -5(%edx) - movl %eax, -4(%edx) -# ifndef USE_AS_BCOPY -# ifdef USE_AS_MEMPCPY - movl %edx, %eax -# else - movl DEST(%esp), %eax -# endif -# endif - RETURN - - .p2align 4 -L(fwd_write_45bytes): - movq -45(%eax), %xmm0 - movq %xmm0, -45(%edx) -L(fwd_write_37bytes): - movq -37(%eax), %xmm0 - movq %xmm0, -37(%edx) -L(fwd_write_29bytes): - movq -29(%eax), %xmm0 - movq %xmm0, -29(%edx) -L(fwd_write_21bytes): - movq -21(%eax), %xmm0 - movq %xmm0, -21(%edx) -L(fwd_write_13bytes): - movq -13(%eax), %xmm0 - movq %xmm0, -13(%edx) - movl -5(%eax), %ecx - movl %ecx, -5(%edx) - movzbl -1(%eax), %ecx - movb %cl, -1(%edx) -# ifndef USE_AS_BCOPY -# ifdef USE_AS_MEMPCPY - movl %edx, %eax -# else - movl DEST(%esp), %eax -# endif -# endif - RETURN - - .p2align 4 -L(fwd_write_41bytes): - movq -41(%eax), %xmm0 - movq %xmm0, -41(%edx) -L(fwd_write_33bytes): - movq -33(%eax), %xmm0 - movq %xmm0, -33(%edx) -L(fwd_write_25bytes): - movq -25(%eax), %xmm0 - movq %xmm0, -25(%edx) -L(fwd_write_17bytes): - movq -17(%eax), %xmm0 - movq %xmm0, -17(%edx) -L(fwd_write_9bytes): - movq -9(%eax), %xmm0 - movq %xmm0, -9(%edx) -L(fwd_write_1bytes): - movzbl -1(%eax), %ecx - movb %cl, -1(%edx) -# ifndef USE_AS_BCOPY -# ifdef USE_AS_MEMPCPY - movl %edx, %eax -# else - movl DEST(%esp), %eax -# endif -# endif - RETURN - - .p2align 4 -L(fwd_write_46bytes): - movq -46(%eax), %xmm0 - movq %xmm0, -46(%edx) -L(fwd_write_38bytes): - movq -38(%eax), %xmm0 - movq %xmm0, -38(%edx) -L(fwd_write_30bytes): - movq -30(%eax), %xmm0 - movq %xmm0, -30(%edx) -L(fwd_write_22bytes): - movq -22(%eax), %xmm0 - movq %xmm0, -22(%edx) -L(fwd_write_14bytes): - movq -14(%eax), %xmm0 - movq %xmm0, -14(%edx) -L(fwd_write_6bytes): - movl -6(%eax), %ecx - movl %ecx, -6(%edx) - movzwl -2(%eax), %ecx - movw %cx, -2(%edx) -# ifndef USE_AS_BCOPY -# ifdef USE_AS_MEMPCPY - movl %edx, %eax -# else - movl DEST(%esp), %eax -# endif -# endif - RETURN - - .p2align 4 -L(fwd_write_42bytes): - movq -42(%eax), %xmm0 - movq %xmm0, -42(%edx) -L(fwd_write_34bytes): - movq -34(%eax), %xmm0 - movq %xmm0, -34(%edx) -L(fwd_write_26bytes): - movq -26(%eax), %xmm0 - movq %xmm0, -26(%edx) -L(fwd_write_18bytes): - movq -18(%eax), %xmm0 - movq %xmm0, -18(%edx) -L(fwd_write_10bytes): - movq -10(%eax), %xmm0 - movq %xmm0, -10(%edx) -L(fwd_write_2bytes): - movzwl -2(%eax), %ecx - movw %cx, -2(%edx) -# ifndef USE_AS_BCOPY -# ifdef USE_AS_MEMPCPY - movl %edx, %eax -# else - movl DEST(%esp), %eax -# endif -# endif - RETURN - - .p2align 4 -L(fwd_write_47bytes): - movq -47(%eax), %xmm0 - movq %xmm0, -47(%edx) -L(fwd_write_39bytes): - movq -39(%eax), %xmm0 - movq %xmm0, -39(%edx) -L(fwd_write_31bytes): - movq -31(%eax), %xmm0 - movq %xmm0, -31(%edx) -L(fwd_write_23bytes): - movq -23(%eax), %xmm0 - movq %xmm0, -23(%edx) -L(fwd_write_15bytes): - movq -15(%eax), %xmm0 - movq %xmm0, -15(%edx) -L(fwd_write_7bytes): - movl -7(%eax), %ecx - movl %ecx, -7(%edx) - movzwl -3(%eax), %ecx - movzbl -1(%eax), %eax - movw %cx, -3(%edx) - movb %al, -1(%edx) -# ifndef USE_AS_BCOPY -# ifdef USE_AS_MEMPCPY - movl %edx, %eax -# else - movl DEST(%esp), %eax -# endif -# endif - RETURN - - .p2align 4 -L(fwd_write_43bytes): - movq -43(%eax), %xmm0 - movq %xmm0, -43(%edx) -L(fwd_write_35bytes): - movq -35(%eax), %xmm0 - movq %xmm0, -35(%edx) -L(fwd_write_27bytes): - movq -27(%eax), %xmm0 - movq %xmm0, -27(%edx) -L(fwd_write_19bytes): - movq -19(%eax), %xmm0 - movq %xmm0, -19(%edx) -L(fwd_write_11bytes): - movq -11(%eax), %xmm0 - movq %xmm0, -11(%edx) -L(fwd_write_3bytes): - movzwl -3(%eax), %ecx - movzbl -1(%eax), %eax - movw %cx, -3(%edx) - movb %al, -1(%edx) -# ifndef USE_AS_BCOPY -# ifdef USE_AS_MEMPCPY - movl %edx, %eax -# else - movl DEST(%esp), %eax -# endif -# endif - RETURN - - .p2align 4 -L(fwd_write_40bytes_align): - movdqa -40(%eax), %xmm0 - movdqa %xmm0, -40(%edx) -L(fwd_write_24bytes_align): - movdqa -24(%eax), %xmm0 - movdqa %xmm0, -24(%edx) -L(fwd_write_8bytes_align): - movq -8(%eax), %xmm0 - movq %xmm0, -8(%edx) -L(fwd_write_0bytes_align): -# ifndef USE_AS_BCOPY -# ifdef USE_AS_MEMPCPY - movl %edx, %eax -# else - movl DEST(%esp), %eax -# endif -# endif - RETURN - - .p2align 4 -L(fwd_write_32bytes_align): - movdqa -32(%eax), %xmm0 - movdqa %xmm0, -32(%edx) -L(fwd_write_16bytes_align): - movdqa -16(%eax), %xmm0 - movdqa %xmm0, -16(%edx) -# ifndef USE_AS_BCOPY -# ifdef USE_AS_MEMPCPY - movl %edx, %eax -# else - movl DEST(%esp), %eax -# endif -# endif - RETURN - - .p2align 4 -L(fwd_write_5bytes_align): - movl -5(%eax), %ecx - movl -4(%eax), %eax - movl %ecx, -5(%edx) - movl %eax, -4(%edx) -# ifndef USE_AS_BCOPY -# ifdef USE_AS_MEMPCPY - movl %edx, %eax -# else - movl DEST(%esp), %eax -# endif -# endif - RETURN - - .p2align 4 -L(fwd_write_45bytes_align): - movdqa -45(%eax), %xmm0 - movdqa %xmm0, -45(%edx) -L(fwd_write_29bytes_align): - movdqa -29(%eax), %xmm0 - movdqa %xmm0, -29(%edx) -L(fwd_write_13bytes_align): - movq -13(%eax), %xmm0 - movq %xmm0, -13(%edx) - movl -5(%eax), %ecx - movl %ecx, -5(%edx) - movzbl -1(%eax), %ecx - movb %cl, -1(%edx) -# ifndef USE_AS_BCOPY -# ifdef USE_AS_MEMPCPY - movl %edx, %eax -# else - movl DEST(%esp), %eax -# endif -# endif - RETURN - - .p2align 4 -L(fwd_write_37bytes_align): - movdqa -37(%eax), %xmm0 - movdqa %xmm0, -37(%edx) -L(fwd_write_21bytes_align): - movdqa -21(%eax), %xmm0 - movdqa %xmm0, -21(%edx) - movl -5(%eax), %ecx - movl %ecx, -5(%edx) - movzbl -1(%eax), %ecx - movb %cl, -1(%edx) -# ifndef USE_AS_BCOPY -# ifdef USE_AS_MEMPCPY - movl %edx, %eax -# else - movl DEST(%esp), %eax -# endif -# endif - RETURN - - .p2align 4 -L(fwd_write_41bytes_align): - movdqa -41(%eax), %xmm0 - movdqa %xmm0, -41(%edx) -L(fwd_write_25bytes_align): - movdqa -25(%eax), %xmm0 - movdqa %xmm0, -25(%edx) -L(fwd_write_9bytes_align): - movq -9(%eax), %xmm0 - movq %xmm0, -9(%edx) -L(fwd_write_1bytes_align): - movzbl -1(%eax), %ecx - movb %cl, -1(%edx) -# ifndef USE_AS_BCOPY -# ifdef USE_AS_MEMPCPY - movl %edx, %eax -# else - movl DEST(%esp), %eax -# endif -# endif - RETURN - - .p2align 4 -L(fwd_write_33bytes_align): - movdqa -33(%eax), %xmm0 - movdqa %xmm0, -33(%edx) -L(fwd_write_17bytes_align): - movdqa -17(%eax), %xmm0 - movdqa %xmm0, -17(%edx) - movzbl -1(%eax), %ecx - movb %cl, -1(%edx) -# ifndef USE_AS_BCOPY -# ifdef USE_AS_MEMPCPY - movl %edx, %eax -# else - movl DEST(%esp), %eax -# endif -# endif - RETURN - - .p2align 4 -L(fwd_write_46bytes_align): - movdqa -46(%eax), %xmm0 - movdqa %xmm0, -46(%edx) -L(fwd_write_30bytes_align): - movdqa -30(%eax), %xmm0 - movdqa %xmm0, -30(%edx) -L(fwd_write_14bytes_align): - movq -14(%eax), %xmm0 - movq %xmm0, -14(%edx) -L(fwd_write_6bytes_align): - movl -6(%eax), %ecx - movl %ecx, -6(%edx) - movzwl -2(%eax), %ecx - movw %cx, -2(%edx) -# ifndef USE_AS_BCOPY -# ifdef USE_AS_MEMPCPY - movl %edx, %eax -# else - movl DEST(%esp), %eax -# endif -# endif - RETURN - - .p2align 4 -L(fwd_write_38bytes_align): - movdqa -38(%eax), %xmm0 - movdqa %xmm0, -38(%edx) -L(fwd_write_22bytes_align): - movdqa -22(%eax), %xmm0 - movdqa %xmm0, -22(%edx) - movl -6(%eax), %ecx - movl %ecx, -6(%edx) - movzwl -2(%eax), %ecx - movw %cx, -2(%edx) -# ifndef USE_AS_BCOPY -# ifdef USE_AS_MEMPCPY - movl %edx, %eax -# else - movl DEST(%esp), %eax -# endif -# endif - RETURN - - .p2align 4 -L(fwd_write_42bytes_align): - movdqa -42(%eax), %xmm0 - movdqa %xmm0, -42(%edx) -L(fwd_write_26bytes_align): - movdqa -26(%eax), %xmm0 - movdqa %xmm0, -26(%edx) -L(fwd_write_10bytes_align): - movq -10(%eax), %xmm0 - movq %xmm0, -10(%edx) -L(fwd_write_2bytes_align): - movzwl -2(%eax), %ecx - movw %cx, -2(%edx) -# ifndef USE_AS_BCOPY -# ifdef USE_AS_MEMPCPY - movl %edx, %eax -# else - movl DEST(%esp), %eax -# endif -# endif - RETURN - - .p2align 4 -L(fwd_write_34bytes_align): - movdqa -34(%eax), %xmm0 - movdqa %xmm0, -34(%edx) -L(fwd_write_18bytes_align): - movdqa -18(%eax), %xmm0 - movdqa %xmm0, -18(%edx) - movzwl -2(%eax), %ecx - movw %cx, -2(%edx) -# ifndef USE_AS_BCOPY -# ifdef USE_AS_MEMPCPY - movl %edx, %eax -# else - movl DEST(%esp), %eax -# endif -# endif - RETURN - - .p2align 4 -L(fwd_write_47bytes_align): - movdqa -47(%eax), %xmm0 - movdqa %xmm0, -47(%edx) -L(fwd_write_31bytes_align): - movdqa -31(%eax), %xmm0 - movdqa %xmm0, -31(%edx) -L(fwd_write_15bytes_align): - movq -15(%eax), %xmm0 - movq %xmm0, -15(%edx) -L(fwd_write_7bytes_align): - movl -7(%eax), %ecx - movl %ecx, -7(%edx) - movzwl -3(%eax), %ecx - movzbl -1(%eax), %eax - movw %cx, -3(%edx) - movb %al, -1(%edx) -# ifndef USE_AS_BCOPY -# ifdef USE_AS_MEMPCPY - movl %edx, %eax -# else - movl DEST(%esp), %eax -# endif -# endif - RETURN - - .p2align 4 -L(fwd_write_39bytes_align): - movdqa -39(%eax), %xmm0 - movdqa %xmm0, -39(%edx) -L(fwd_write_23bytes_align): - movdqa -23(%eax), %xmm0 - movdqa %xmm0, -23(%edx) - movl -7(%eax), %ecx - movl %ecx, -7(%edx) - movzwl -3(%eax), %ecx - movzbl -1(%eax), %eax - movw %cx, -3(%edx) - movb %al, -1(%edx) -# ifndef USE_AS_BCOPY -# ifdef USE_AS_MEMPCPY - movl %edx, %eax -# else - movl DEST(%esp), %eax -# endif -# endif - RETURN - - .p2align 4 -L(fwd_write_43bytes_align): - movdqa -43(%eax), %xmm0 - movdqa %xmm0, -43(%edx) -L(fwd_write_27bytes_align): - movdqa -27(%eax), %xmm0 - movdqa %xmm0, -27(%edx) -L(fwd_write_11bytes_align): - movq -11(%eax), %xmm0 - movq %xmm0, -11(%edx) -L(fwd_write_3bytes_align): - movzwl -3(%eax), %ecx - movzbl -1(%eax), %eax - movw %cx, -3(%edx) - movb %al, -1(%edx) -# ifndef USE_AS_BCOPY -# ifdef USE_AS_MEMPCPY - movl %edx, %eax -# else - movl DEST(%esp), %eax -# endif -# endif - RETURN - - .p2align 4 -L(fwd_write_35bytes_align): - movdqa -35(%eax), %xmm0 - movdqa %xmm0, -35(%edx) -L(fwd_write_19bytes_align): - movdqa -19(%eax), %xmm0 - movdqa %xmm0, -19(%edx) - movzwl -3(%eax), %ecx - movzbl -1(%eax), %eax - movw %cx, -3(%edx) - movb %al, -1(%edx) -# ifndef USE_AS_BCOPY -# ifdef USE_AS_MEMPCPY - movl %edx, %eax -# else - movl DEST(%esp), %eax -# endif -# endif - RETURN - - .p2align 4 -L(fwd_write_44bytes_align): - movdqa -44(%eax), %xmm0 - movdqa %xmm0, -44(%edx) -L(fwd_write_28bytes_align): - movdqa -28(%eax), %xmm0 - movdqa %xmm0, -28(%edx) -L(fwd_write_12bytes_align): - movq -12(%eax), %xmm0 - movq %xmm0, -12(%edx) -L(fwd_write_4bytes_align): - movl -4(%eax), %ecx - movl %ecx, -4(%edx) -# ifndef USE_AS_BCOPY -# ifdef USE_AS_MEMPCPY - movl %edx, %eax -# else - movl DEST(%esp), %eax -# endif -# endif - RETURN - - .p2align 4 -L(fwd_write_36bytes_align): - movdqa -36(%eax), %xmm0 - movdqa %xmm0, -36(%edx) -L(fwd_write_20bytes_align): - movdqa -20(%eax), %xmm0 - movdqa %xmm0, -20(%edx) - movl -4(%eax), %ecx - movl %ecx, -4(%edx) -# ifndef USE_AS_BCOPY -# ifdef USE_AS_MEMPCPY - movl %edx, %eax -# else - movl DEST(%esp), %eax -# endif -# endif - RETURN_END - - CFI_PUSH (%edi) - - .p2align 4 -L(large_page): - movdqu (%eax), %xmm1 -# ifdef USE_AS_MEMMOVE - movl DEST+4(%esp), %edi - movdqu %xmm0, (%edi) -# endif - lea 16(%eax), %eax - movntdq %xmm1, (%edx) - lea 16(%edx), %edx - lea -0x90(%ecx), %ecx - POP (%edi) - - .p2align 4 -L(large_page_loop): - movdqu (%eax), %xmm0 - movdqu 0x10(%eax), %xmm1 - movdqu 0x20(%eax), %xmm2 - movdqu 0x30(%eax), %xmm3 - movdqu 0x40(%eax), %xmm4 - movdqu 0x50(%eax), %xmm5 - movdqu 0x60(%eax), %xmm6 - movdqu 0x70(%eax), %xmm7 - lea 0x80(%eax), %eax - - sub $0x80, %ecx - movntdq %xmm0, (%edx) - movntdq %xmm1, 0x10(%edx) - movntdq %xmm2, 0x20(%edx) - movntdq %xmm3, 0x30(%edx) - movntdq %xmm4, 0x40(%edx) - movntdq %xmm5, 0x50(%edx) - movntdq %xmm6, 0x60(%edx) - movntdq %xmm7, 0x70(%edx) - lea 0x80(%edx), %edx - jae L(large_page_loop) - cmp $-0x40, %ecx - lea 0x80(%ecx), %ecx - jl L(large_page_less_64bytes) - - movdqu (%eax), %xmm0 - movdqu 0x10(%eax), %xmm1 - movdqu 0x20(%eax), %xmm2 - movdqu 0x30(%eax), %xmm3 - lea 0x40(%eax), %eax - - movntdq %xmm0, (%edx) - movntdq %xmm1, 0x10(%edx) - movntdq %xmm2, 0x20(%edx) - movntdq %xmm3, 0x30(%edx) - lea 0x40(%edx), %edx - sub $0x40, %ecx -L(large_page_less_64bytes): - cmp $32, %ecx - jb L(large_page_less_32bytes) - movdqu (%eax), %xmm0 - movdqu 0x10(%eax), %xmm1 - lea 0x20(%eax), %eax - movntdq %xmm0, (%edx) - movntdq %xmm1, 0x10(%edx) - lea 0x20(%edx), %edx - sub $0x20, %ecx -L(large_page_less_32bytes): - add %ecx, %edx - add %ecx, %eax - sfence - BRANCH_TO_JMPTBL_ENTRY (L(table_48bytes_fwd), %ecx, 4) - - .p2align 4 -L(bk_write_44bytes): - movq 36(%eax), %xmm0 - movq %xmm0, 36(%edx) -L(bk_write_36bytes): - movq 28(%eax), %xmm0 - movq %xmm0, 28(%edx) -L(bk_write_28bytes): - movq 20(%eax), %xmm0 - movq %xmm0, 20(%edx) -L(bk_write_20bytes): - movq 12(%eax), %xmm0 - movq %xmm0, 12(%edx) -L(bk_write_12bytes): - movq 4(%eax), %xmm0 - movq %xmm0, 4(%edx) -L(bk_write_4bytes): - movl (%eax), %ecx - movl %ecx, (%edx) -L(bk_write_0bytes): -# ifndef USE_AS_BCOPY - movl DEST(%esp), %eax -# ifdef USE_AS_MEMPCPY - movl LEN(%esp), %ecx - add %ecx, %eax -# endif -# endif - RETURN - - .p2align 4 -L(bk_write_40bytes): - movq 32(%eax), %xmm0 - movq %xmm0, 32(%edx) -L(bk_write_32bytes): - movq 24(%eax), %xmm0 - movq %xmm0, 24(%edx) -L(bk_write_24bytes): - movq 16(%eax), %xmm0 - movq %xmm0, 16(%edx) -L(bk_write_16bytes): - movq 8(%eax), %xmm0 - movq %xmm0, 8(%edx) -L(bk_write_8bytes): - movq (%eax), %xmm0 - movq %xmm0, (%edx) -# ifndef USE_AS_BCOPY - movl DEST(%esp), %eax -# ifdef USE_AS_MEMPCPY - movl LEN(%esp), %ecx - add %ecx, %eax -# endif -# endif - RETURN - - .p2align 4 -L(bk_write_45bytes): - movq 37(%eax), %xmm0 - movq %xmm0, 37(%edx) -L(bk_write_37bytes): - movq 29(%eax), %xmm0 - movq %xmm0, 29(%edx) -L(bk_write_29bytes): - movq 21(%eax), %xmm0 - movq %xmm0, 21(%edx) -L(bk_write_21bytes): - movq 13(%eax), %xmm0 - movq %xmm0, 13(%edx) -L(bk_write_13bytes): - movq 5(%eax), %xmm0 - movq %xmm0, 5(%edx) -L(bk_write_5bytes): - movl 1(%eax), %ecx - movl %ecx, 1(%edx) -L(bk_write_1bytes): - movzbl (%eax), %ecx - movb %cl, (%edx) -# ifndef USE_AS_BCOPY - movl DEST(%esp), %eax -# ifdef USE_AS_MEMPCPY - movl LEN(%esp), %ecx - add %ecx, %eax -# endif -# endif - RETURN - - .p2align 4 -L(bk_write_41bytes): - movq 33(%eax), %xmm0 - movq %xmm0, 33(%edx) -L(bk_write_33bytes): - movq 25(%eax), %xmm0 - movq %xmm0, 25(%edx) -L(bk_write_25bytes): - movq 17(%eax), %xmm0 - movq %xmm0, 17(%edx) -L(bk_write_17bytes): - movq 9(%eax), %xmm0 - movq %xmm0, 9(%edx) -L(bk_write_9bytes): - movq 1(%eax), %xmm0 - movq %xmm0, 1(%edx) - movzbl (%eax), %ecx - movb %cl, (%edx) -# ifndef USE_AS_BCOPY - movl DEST(%esp), %eax -# ifdef USE_AS_MEMPCPY - movl LEN(%esp), %ecx - add %ecx, %eax -# endif -# endif - RETURN - - .p2align 4 -L(bk_write_46bytes): - movq 38(%eax), %xmm0 - movq %xmm0, 38(%edx) -L(bk_write_38bytes): - movq 30(%eax), %xmm0 - movq %xmm0, 30(%edx) -L(bk_write_30bytes): - movq 22(%eax), %xmm0 - movq %xmm0, 22(%edx) -L(bk_write_22bytes): - movq 14(%eax), %xmm0 - movq %xmm0, 14(%edx) -L(bk_write_14bytes): - movq 6(%eax), %xmm0 - movq %xmm0, 6(%edx) -L(bk_write_6bytes): - movl 2(%eax), %ecx - movl %ecx, 2(%edx) - movzwl (%eax), %ecx - movw %cx, (%edx) -# ifndef USE_AS_BCOPY - movl DEST(%esp), %eax -# ifdef USE_AS_MEMPCPY - movl LEN(%esp), %ecx - add %ecx, %eax -# endif -# endif - RETURN - - .p2align 4 -L(bk_write_42bytes): - movq 34(%eax), %xmm0 - movq %xmm0, 34(%edx) -L(bk_write_34bytes): - movq 26(%eax), %xmm0 - movq %xmm0, 26(%edx) -L(bk_write_26bytes): - movq 18(%eax), %xmm0 - movq %xmm0, 18(%edx) -L(bk_write_18bytes): - movq 10(%eax), %xmm0 - movq %xmm0, 10(%edx) -L(bk_write_10bytes): - movq 2(%eax), %xmm0 - movq %xmm0, 2(%edx) -L(bk_write_2bytes): - movzwl (%eax), %ecx - movw %cx, (%edx) -# ifndef USE_AS_BCOPY - movl DEST(%esp), %eax -# ifdef USE_AS_MEMPCPY - movl LEN(%esp), %ecx - add %ecx, %eax -# endif -# endif - RETURN - - .p2align 4 -L(bk_write_47bytes): - movq 39(%eax), %xmm0 - movq %xmm0, 39(%edx) -L(bk_write_39bytes): - movq 31(%eax), %xmm0 - movq %xmm0, 31(%edx) -L(bk_write_31bytes): - movq 23(%eax), %xmm0 - movq %xmm0, 23(%edx) -L(bk_write_23bytes): - movq 15(%eax), %xmm0 - movq %xmm0, 15(%edx) -L(bk_write_15bytes): - movq 7(%eax), %xmm0 - movq %xmm0, 7(%edx) -L(bk_write_7bytes): - movl 3(%eax), %ecx - movl %ecx, 3(%edx) - movzwl 1(%eax), %ecx - movw %cx, 1(%edx) - movzbl (%eax), %eax - movb %al, (%edx) -# ifndef USE_AS_BCOPY - movl DEST(%esp), %eax -# ifdef USE_AS_MEMPCPY - movl LEN(%esp), %ecx - add %ecx, %eax -# endif -# endif - RETURN - - .p2align 4 -L(bk_write_43bytes): - movq 35(%eax), %xmm0 - movq %xmm0, 35(%edx) -L(bk_write_35bytes): - movq 27(%eax), %xmm0 - movq %xmm0, 27(%edx) -L(bk_write_27bytes): - movq 19(%eax), %xmm0 - movq %xmm0, 19(%edx) -L(bk_write_19bytes): - movq 11(%eax), %xmm0 - movq %xmm0, 11(%edx) -L(bk_write_11bytes): - movq 3(%eax), %xmm0 - movq %xmm0, 3(%edx) -L(bk_write_3bytes): - movzwl 1(%eax), %ecx - movw %cx, 1(%edx) - movzbl (%eax), %eax - movb %al, (%edx) -# ifndef USE_AS_BCOPY - movl DEST(%esp), %eax -# ifdef USE_AS_MEMPCPY - movl LEN(%esp), %ecx - add %ecx, %eax -# endif -# endif - RETURN_END - - - .pushsection .rodata.ssse3,"a",@progbits - .p2align 2 -L(table_48bytes_fwd): - .int JMPTBL (L(fwd_write_0bytes), L(table_48bytes_fwd)) - .int JMPTBL (L(fwd_write_1bytes), L(table_48bytes_fwd)) - .int JMPTBL (L(fwd_write_2bytes), L(table_48bytes_fwd)) - .int JMPTBL (L(fwd_write_3bytes), L(table_48bytes_fwd)) - .int JMPTBL (L(fwd_write_4bytes), L(table_48bytes_fwd)) - .int JMPTBL (L(fwd_write_5bytes), L(table_48bytes_fwd)) - .int JMPTBL (L(fwd_write_6bytes), L(table_48bytes_fwd)) - .int JMPTBL (L(fwd_write_7bytes), L(table_48bytes_fwd)) - .int JMPTBL (L(fwd_write_8bytes), L(table_48bytes_fwd)) - .int JMPTBL (L(fwd_write_9bytes), L(table_48bytes_fwd)) - .int JMPTBL (L(fwd_write_10bytes), L(table_48bytes_fwd)) - .int JMPTBL (L(fwd_write_11bytes), L(table_48bytes_fwd)) - .int JMPTBL (L(fwd_write_12bytes), L(table_48bytes_fwd)) - .int JMPTBL (L(fwd_write_13bytes), L(table_48bytes_fwd)) - .int JMPTBL (L(fwd_write_14bytes), L(table_48bytes_fwd)) - .int JMPTBL (L(fwd_write_15bytes), L(table_48bytes_fwd)) - .int JMPTBL (L(fwd_write_16bytes), L(table_48bytes_fwd)) - .int JMPTBL (L(fwd_write_17bytes), L(table_48bytes_fwd)) - .int JMPTBL (L(fwd_write_18bytes), L(table_48bytes_fwd)) - .int JMPTBL (L(fwd_write_19bytes), L(table_48bytes_fwd)) - .int JMPTBL (L(fwd_write_20bytes), L(table_48bytes_fwd)) - .int JMPTBL (L(fwd_write_21bytes), L(table_48bytes_fwd)) - .int JMPTBL (L(fwd_write_22bytes), L(table_48bytes_fwd)) - .int JMPTBL (L(fwd_write_23bytes), L(table_48bytes_fwd)) - .int JMPTBL (L(fwd_write_24bytes), L(table_48bytes_fwd)) - .int JMPTBL (L(fwd_write_25bytes), L(table_48bytes_fwd)) - .int JMPTBL (L(fwd_write_26bytes), L(table_48bytes_fwd)) - .int JMPTBL (L(fwd_write_27bytes), L(table_48bytes_fwd)) - .int JMPTBL (L(fwd_write_28bytes), L(table_48bytes_fwd)) - .int JMPTBL (L(fwd_write_29bytes), L(table_48bytes_fwd)) - .int JMPTBL (L(fwd_write_30bytes), L(table_48bytes_fwd)) - .int JMPTBL (L(fwd_write_31bytes), L(table_48bytes_fwd)) - .int JMPTBL (L(fwd_write_32bytes), L(table_48bytes_fwd)) - .int JMPTBL (L(fwd_write_33bytes), L(table_48bytes_fwd)) - .int JMPTBL (L(fwd_write_34bytes), L(table_48bytes_fwd)) - .int JMPTBL (L(fwd_write_35bytes), L(table_48bytes_fwd)) - .int JMPTBL (L(fwd_write_36bytes), L(table_48bytes_fwd)) - .int JMPTBL (L(fwd_write_37bytes), L(table_48bytes_fwd)) - .int JMPTBL (L(fwd_write_38bytes), L(table_48bytes_fwd)) - .int JMPTBL (L(fwd_write_39bytes), L(table_48bytes_fwd)) - .int JMPTBL (L(fwd_write_40bytes), L(table_48bytes_fwd)) - .int JMPTBL (L(fwd_write_41bytes), L(table_48bytes_fwd)) - .int JMPTBL (L(fwd_write_42bytes), L(table_48bytes_fwd)) - .int JMPTBL (L(fwd_write_43bytes), L(table_48bytes_fwd)) - .int JMPTBL (L(fwd_write_44bytes), L(table_48bytes_fwd)) - .int JMPTBL (L(fwd_write_45bytes), L(table_48bytes_fwd)) - .int JMPTBL (L(fwd_write_46bytes), L(table_48bytes_fwd)) - .int JMPTBL (L(fwd_write_47bytes), L(table_48bytes_fwd)) - - .p2align 2 -L(table_48bytes_fwd_align): - .int JMPTBL (L(fwd_write_0bytes_align), L(table_48bytes_fwd_align)) - .int JMPTBL (L(fwd_write_1bytes_align), L(table_48bytes_fwd_align)) - .int JMPTBL (L(fwd_write_2bytes_align), L(table_48bytes_fwd_align)) - .int JMPTBL (L(fwd_write_3bytes_align), L(table_48bytes_fwd_align)) - .int JMPTBL (L(fwd_write_4bytes_align), L(table_48bytes_fwd_align)) - .int JMPTBL (L(fwd_write_5bytes_align), L(table_48bytes_fwd_align)) - .int JMPTBL (L(fwd_write_6bytes_align), L(table_48bytes_fwd_align)) - .int JMPTBL (L(fwd_write_7bytes_align), L(table_48bytes_fwd_align)) - .int JMPTBL (L(fwd_write_8bytes_align), L(table_48bytes_fwd_align)) - .int JMPTBL (L(fwd_write_9bytes_align), L(table_48bytes_fwd_align)) - .int JMPTBL (L(fwd_write_10bytes_align), L(table_48bytes_fwd_align)) - .int JMPTBL (L(fwd_write_11bytes_align), L(table_48bytes_fwd_align)) - .int JMPTBL (L(fwd_write_12bytes_align), L(table_48bytes_fwd_align)) - .int JMPTBL (L(fwd_write_13bytes_align), L(table_48bytes_fwd_align)) - .int JMPTBL (L(fwd_write_14bytes_align), L(table_48bytes_fwd_align)) - .int JMPTBL (L(fwd_write_15bytes_align), L(table_48bytes_fwd_align)) - .int JMPTBL (L(fwd_write_16bytes_align), L(table_48bytes_fwd_align)) - .int JMPTBL (L(fwd_write_17bytes_align), L(table_48bytes_fwd_align)) - .int JMPTBL (L(fwd_write_18bytes_align), L(table_48bytes_fwd_align)) - .int JMPTBL (L(fwd_write_19bytes_align), L(table_48bytes_fwd_align)) - .int JMPTBL (L(fwd_write_20bytes_align), L(table_48bytes_fwd_align)) - .int JMPTBL (L(fwd_write_21bytes_align), L(table_48bytes_fwd_align)) - .int JMPTBL (L(fwd_write_22bytes_align), L(table_48bytes_fwd_align)) - .int JMPTBL (L(fwd_write_23bytes_align), L(table_48bytes_fwd_align)) - .int JMPTBL (L(fwd_write_24bytes_align), L(table_48bytes_fwd_align)) - .int JMPTBL (L(fwd_write_25bytes_align), L(table_48bytes_fwd_align)) - .int JMPTBL (L(fwd_write_26bytes_align), L(table_48bytes_fwd_align)) - .int JMPTBL (L(fwd_write_27bytes_align), L(table_48bytes_fwd_align)) - .int JMPTBL (L(fwd_write_28bytes_align), L(table_48bytes_fwd_align)) - .int JMPTBL (L(fwd_write_29bytes_align), L(table_48bytes_fwd_align)) - .int JMPTBL (L(fwd_write_30bytes_align), L(table_48bytes_fwd_align)) - .int JMPTBL (L(fwd_write_31bytes_align), L(table_48bytes_fwd_align)) - .int JMPTBL (L(fwd_write_32bytes_align), L(table_48bytes_fwd_align)) - .int JMPTBL (L(fwd_write_33bytes_align), L(table_48bytes_fwd_align)) - .int JMPTBL (L(fwd_write_34bytes_align), L(table_48bytes_fwd_align)) - .int JMPTBL (L(fwd_write_35bytes_align), L(table_48bytes_fwd_align)) - .int JMPTBL (L(fwd_write_36bytes_align), L(table_48bytes_fwd_align)) - .int JMPTBL (L(fwd_write_37bytes_align), L(table_48bytes_fwd_align)) - .int JMPTBL (L(fwd_write_38bytes_align), L(table_48bytes_fwd_align)) - .int JMPTBL (L(fwd_write_39bytes_align), L(table_48bytes_fwd_align)) - .int JMPTBL (L(fwd_write_40bytes_align), L(table_48bytes_fwd_align)) - .int JMPTBL (L(fwd_write_41bytes_align), L(table_48bytes_fwd_align)) - .int JMPTBL (L(fwd_write_42bytes_align), L(table_48bytes_fwd_align)) - .int JMPTBL (L(fwd_write_43bytes_align), L(table_48bytes_fwd_align)) - .int JMPTBL (L(fwd_write_44bytes_align), L(table_48bytes_fwd_align)) - .int JMPTBL (L(fwd_write_45bytes_align), L(table_48bytes_fwd_align)) - .int JMPTBL (L(fwd_write_46bytes_align), L(table_48bytes_fwd_align)) - .int JMPTBL (L(fwd_write_47bytes_align), L(table_48bytes_fwd_align)) - - .p2align 2 -L(shl_table): - .int JMPTBL (L(shl_0), L(shl_table)) - .int JMPTBL (L(shl_1), L(shl_table)) - .int JMPTBL (L(shl_2), L(shl_table)) - .int JMPTBL (L(shl_3), L(shl_table)) - .int JMPTBL (L(shl_4), L(shl_table)) - .int JMPTBL (L(shl_5), L(shl_table)) - .int JMPTBL (L(shl_6), L(shl_table)) - .int JMPTBL (L(shl_7), L(shl_table)) - .int JMPTBL (L(shl_8), L(shl_table)) - .int JMPTBL (L(shl_9), L(shl_table)) - .int JMPTBL (L(shl_10), L(shl_table)) - .int JMPTBL (L(shl_11), L(shl_table)) - .int JMPTBL (L(shl_12), L(shl_table)) - .int JMPTBL (L(shl_13), L(shl_table)) - .int JMPTBL (L(shl_14), L(shl_table)) - .int JMPTBL (L(shl_15), L(shl_table)) - - .p2align 2 -L(table_48_bytes_bwd): - .int JMPTBL (L(bk_write_0bytes), L(table_48_bytes_bwd)) - .int JMPTBL (L(bk_write_1bytes), L(table_48_bytes_bwd)) - .int JMPTBL (L(bk_write_2bytes), L(table_48_bytes_bwd)) - .int JMPTBL (L(bk_write_3bytes), L(table_48_bytes_bwd)) - .int JMPTBL (L(bk_write_4bytes), L(table_48_bytes_bwd)) - .int JMPTBL (L(bk_write_5bytes), L(table_48_bytes_bwd)) - .int JMPTBL (L(bk_write_6bytes), L(table_48_bytes_bwd)) - .int JMPTBL (L(bk_write_7bytes), L(table_48_bytes_bwd)) - .int JMPTBL (L(bk_write_8bytes), L(table_48_bytes_bwd)) - .int JMPTBL (L(bk_write_9bytes), L(table_48_bytes_bwd)) - .int JMPTBL (L(bk_write_10bytes), L(table_48_bytes_bwd)) - .int JMPTBL (L(bk_write_11bytes), L(table_48_bytes_bwd)) - .int JMPTBL (L(bk_write_12bytes), L(table_48_bytes_bwd)) - .int JMPTBL (L(bk_write_13bytes), L(table_48_bytes_bwd)) - .int JMPTBL (L(bk_write_14bytes), L(table_48_bytes_bwd)) - .int JMPTBL (L(bk_write_15bytes), L(table_48_bytes_bwd)) - .int JMPTBL (L(bk_write_16bytes), L(table_48_bytes_bwd)) - .int JMPTBL (L(bk_write_17bytes), L(table_48_bytes_bwd)) - .int JMPTBL (L(bk_write_18bytes), L(table_48_bytes_bwd)) - .int JMPTBL (L(bk_write_19bytes), L(table_48_bytes_bwd)) - .int JMPTBL (L(bk_write_20bytes), L(table_48_bytes_bwd)) - .int JMPTBL (L(bk_write_21bytes), L(table_48_bytes_bwd)) - .int JMPTBL (L(bk_write_22bytes), L(table_48_bytes_bwd)) - .int JMPTBL (L(bk_write_23bytes), L(table_48_bytes_bwd)) - .int JMPTBL (L(bk_write_24bytes), L(table_48_bytes_bwd)) - .int JMPTBL (L(bk_write_25bytes), L(table_48_bytes_bwd)) - .int JMPTBL (L(bk_write_26bytes), L(table_48_bytes_bwd)) - .int JMPTBL (L(bk_write_27bytes), L(table_48_bytes_bwd)) - .int JMPTBL (L(bk_write_28bytes), L(table_48_bytes_bwd)) - .int JMPTBL (L(bk_write_29bytes), L(table_48_bytes_bwd)) - .int JMPTBL (L(bk_write_30bytes), L(table_48_bytes_bwd)) - .int JMPTBL (L(bk_write_31bytes), L(table_48_bytes_bwd)) - .int JMPTBL (L(bk_write_32bytes), L(table_48_bytes_bwd)) - .int JMPTBL (L(bk_write_33bytes), L(table_48_bytes_bwd)) - .int JMPTBL (L(bk_write_34bytes), L(table_48_bytes_bwd)) - .int JMPTBL (L(bk_write_35bytes), L(table_48_bytes_bwd)) - .int JMPTBL (L(bk_write_36bytes), L(table_48_bytes_bwd)) - .int JMPTBL (L(bk_write_37bytes), L(table_48_bytes_bwd)) - .int JMPTBL (L(bk_write_38bytes), L(table_48_bytes_bwd)) - .int JMPTBL (L(bk_write_39bytes), L(table_48_bytes_bwd)) - .int JMPTBL (L(bk_write_40bytes), L(table_48_bytes_bwd)) - .int JMPTBL (L(bk_write_41bytes), L(table_48_bytes_bwd)) - .int JMPTBL (L(bk_write_42bytes), L(table_48_bytes_bwd)) - .int JMPTBL (L(bk_write_43bytes), L(table_48_bytes_bwd)) - .int JMPTBL (L(bk_write_44bytes), L(table_48_bytes_bwd)) - .int JMPTBL (L(bk_write_45bytes), L(table_48_bytes_bwd)) - .int JMPTBL (L(bk_write_46bytes), L(table_48_bytes_bwd)) - .int JMPTBL (L(bk_write_47bytes), L(table_48_bytes_bwd)) - - .popsection - -# ifdef USE_AS_MEMMOVE - .p2align 4 -L(copy_backward): - PUSH (%edi) - movl %eax, %edi - lea (%ecx,%edx,1),%edx - lea (%ecx,%edi,1),%edi - testl $0x3, %edx - jnz L(bk_align) - -L(bk_aligned_4): - cmp $64, %ecx - jae L(bk_write_more64bytes) - -L(bk_write_64bytesless): - cmp $32, %ecx - jb L(bk_write_less32bytes) - -L(bk_write_more32bytes): - /* Copy 32 bytes at a time. */ - sub $32, %ecx - movq -8(%edi), %xmm0 - movq %xmm0, -8(%edx) - movq -16(%edi), %xmm0 - movq %xmm0, -16(%edx) - movq -24(%edi), %xmm0 - movq %xmm0, -24(%edx) - movq -32(%edi), %xmm0 - movq %xmm0, -32(%edx) - sub $32, %edx - sub $32, %edi - -L(bk_write_less32bytes): - movl %edi, %eax - sub %ecx, %edx - sub %ecx, %eax - POP (%edi) -L(bk_write_less32bytes_2): - BRANCH_TO_JMPTBL_ENTRY (L(table_48_bytes_bwd), %ecx, 4) - - CFI_PUSH (%edi) - - .p2align 4 -L(bk_align): - cmp $8, %ecx - jbe L(bk_write_less32bytes) - testl $1, %edx - /* We get here only if (EDX & 3 ) != 0 so if (EDX & 1) ==0, - then (EDX & 2) must be != 0. */ - jz L(bk_got2) - sub $1, %edi - sub $1, %ecx - sub $1, %edx - movzbl (%edi), %eax - movb %al, (%edx) - - testl $2, %edx - jz L(bk_aligned_4) - -L(bk_got2): - sub $2, %edi - sub $2, %ecx - sub $2, %edx - movzwl (%edi), %eax - movw %ax, (%edx) - jmp L(bk_aligned_4) - - .p2align 4 -L(bk_write_more64bytes): - /* Check alignment of last byte. */ - testl $15, %edx - jz L(bk_ssse3_cpy_pre) - -/* EDX is aligned 4 bytes, but not 16 bytes. */ -L(bk_ssse3_align): - sub $4, %edi - sub $4, %ecx - sub $4, %edx - movl (%edi), %eax - movl %eax, (%edx) - - testl $15, %edx - jz L(bk_ssse3_cpy_pre) - - sub $4, %edi - sub $4, %ecx - sub $4, %edx - movl (%edi), %eax - movl %eax, (%edx) - - testl $15, %edx - jz L(bk_ssse3_cpy_pre) - - sub $4, %edi - sub $4, %ecx - sub $4, %edx - movl (%edi), %eax - movl %eax, (%edx) - -L(bk_ssse3_cpy_pre): - cmp $64, %ecx - jb L(bk_write_more32bytes) - - .p2align 4 -L(bk_ssse3_cpy): - sub $64, %edi - sub $64, %ecx - sub $64, %edx - movdqu 0x30(%edi), %xmm3 - movdqa %xmm3, 0x30(%edx) - movdqu 0x20(%edi), %xmm2 - movdqa %xmm2, 0x20(%edx) - movdqu 0x10(%edi), %xmm1 - movdqa %xmm1, 0x10(%edx) - movdqu (%edi), %xmm0 - movdqa %xmm0, (%edx) - cmp $64, %ecx - jae L(bk_ssse3_cpy) - jmp L(bk_write_64bytesless) - -# endif - -END (MEMCPY) - -#endif diff --git a/sysdeps/i386/i686/multiarch/memcpy.S b/sysdeps/i386/i686/multiarch/memcpy.S deleted file mode 100644 index f725944620..0000000000 --- a/sysdeps/i386/i686/multiarch/memcpy.S +++ /dev/null @@ -1,78 +0,0 @@ -/* Multiple versions of memcpy - All versions must be listed in ifunc-impl-list.c. - Copyright (C) 2010-2017 Free Software Foundation, Inc. - Contributed by Intel Corporation. - This file is part of the GNU C Library. - - The GNU C Library is free software; you can redistribute it and/or - modify it under the terms of the GNU Lesser General Public - License as published by the Free Software Foundation; either - version 2.1 of the License, or (at your option) any later version. - - The GNU C Library is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - Lesser General Public License for more details. - - You should have received a copy of the GNU Lesser General Public - License along with the GNU C Library; if not, see - <http://www.gnu.org/licenses/>. */ - -#include <sysdep.h> -#include <init-arch.h> - -/* Define multiple versions only for the definition in lib and for - DSO. In static binaries we need memcpy before the initialization - happened. */ -#if defined SHARED && IS_IN (libc) - .text -ENTRY(memcpy) - .type memcpy, @gnu_indirect_function - LOAD_GOT_AND_RTLD_GLOBAL_RO - LOAD_FUNC_GOT_EAX (__memcpy_ia32) - HAS_CPU_FEATURE (SSE2) - jz 2f - LOAD_FUNC_GOT_EAX (__memcpy_sse2_unaligned) - HAS_ARCH_FEATURE (Fast_Unaligned_Load) - jnz 2f - HAS_CPU_FEATURE (SSSE3) - jz 2f - LOAD_FUNC_GOT_EAX (__memcpy_ssse3) - HAS_ARCH_FEATURE (Fast_Rep_String) - jz 2f - LOAD_FUNC_GOT_EAX (__memcpy_ssse3_rep) -2: ret -END(memcpy) - -# undef ENTRY -# define ENTRY(name) \ - .type __memcpy_ia32, @function; \ - .p2align 4; \ - .globl __memcpy_ia32; \ - .hidden __memcpy_ia32; \ - __memcpy_ia32: cfi_startproc; \ - CALL_MCOUNT -# undef END -# define END(name) \ - cfi_endproc; .size __memcpy_ia32, .-__memcpy_ia32 - -# undef ENTRY_CHK -# define ENTRY_CHK(name) \ - .type __memcpy_chk_ia32, @function; \ - .globl __memcpy_chk_ia32; \ - .p2align 4; \ - __memcpy_chk_ia32: cfi_startproc; \ - CALL_MCOUNT -# undef END_CHK -# define END_CHK(name) \ - cfi_endproc; .size __memcpy_chk_ia32, .-__memcpy_chk_ia32 - -# undef libc_hidden_builtin_def -/* IFUNC doesn't work with the hidden functions in shared library since - they will be called without setting up EBX needed for PLT which is - used by IFUNC. */ -# define libc_hidden_builtin_def(name) \ - .globl __GI_memcpy; __GI_memcpy = __memcpy_ia32 -#endif - -#include "../memcpy.S" diff --git a/sysdeps/i386/i686/multiarch/memcpy_chk.S b/sysdeps/i386/i686/multiarch/memcpy_chk.S deleted file mode 100644 index 1b4fbe2e6f..0000000000 --- a/sysdeps/i386/i686/multiarch/memcpy_chk.S +++ /dev/null @@ -1,50 +0,0 @@ -/* Multiple versions of __memcpy_chk - All versions must be listed in ifunc-impl-list.c. - Copyright (C) 2010-2017 Free Software Foundation, Inc. - Contributed by Intel Corporation. - This file is part of the GNU C Library. - - The GNU C Library is free software; you can redistribute it and/or - modify it under the terms of the GNU Lesser General Public - License as published by the Free Software Foundation; either - version 2.1 of the License, or (at your option) any later version. - - The GNU C Library is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - Lesser General Public License for more details. - - You should have received a copy of the GNU Lesser General Public - License along with the GNU C Library; if not, see - <http://www.gnu.org/licenses/>. */ - -#include <sysdep.h> -#include <init-arch.h> - -/* Define multiple versions only for the definition in lib and for - DSO. There are no multiarch memcpy functions for static binaries. - */ -#if IS_IN (libc) -# ifdef SHARED - .text -ENTRY(__memcpy_chk) - .type __memcpy_chk, @gnu_indirect_function - LOAD_GOT_AND_RTLD_GLOBAL_RO - LOAD_FUNC_GOT_EAX (__memcpy_chk_ia32) - HAS_CPU_FEATURE (SSE2) - jz 2f - LOAD_FUNC_GOT_EAX (__memcpy_chk_sse2_unaligned) - HAS_ARCH_FEATURE (Fast_Unaligned_Load) - jnz 2f - HAS_CPU_FEATURE (SSSE3) - jz 2f - LOAD_FUNC_GOT_EAX (__memcpy_chk_ssse3) - HAS_ARCH_FEATURE (Fast_Rep_String) - jz 2f - LOAD_FUNC_GOT_EAX (__memcpy_chk_ssse3_rep) -2: ret -END(__memcpy_chk) -# else -# include "../memcpy_chk.S" -# endif -#endif diff --git a/sysdeps/i386/i686/multiarch/memmove-sse2-unaligned.S b/sysdeps/i386/i686/multiarch/memmove-sse2-unaligned.S deleted file mode 100644 index 3873594cb2..0000000000 --- a/sysdeps/i386/i686/multiarch/memmove-sse2-unaligned.S +++ /dev/null @@ -1,4 +0,0 @@ -#define USE_AS_MEMMOVE -#define MEMCPY __memmove_sse2_unaligned -#define MEMCPY_CHK __memmove_chk_sse2_unaligned -#include "memcpy-sse2-unaligned.S" diff --git a/sysdeps/i386/i686/multiarch/memmove-ssse3-rep.S b/sysdeps/i386/i686/multiarch/memmove-ssse3-rep.S deleted file mode 100644 index d202fc4a13..0000000000 --- a/sysdeps/i386/i686/multiarch/memmove-ssse3-rep.S +++ /dev/null @@ -1,4 +0,0 @@ -#define USE_AS_MEMMOVE -#define MEMCPY __memmove_ssse3_rep -#define MEMCPY_CHK __memmove_chk_ssse3_rep -#include "memcpy-ssse3-rep.S" diff --git a/sysdeps/i386/i686/multiarch/memmove-ssse3.S b/sysdeps/i386/i686/multiarch/memmove-ssse3.S deleted file mode 100644 index 295430b1ef..0000000000 --- a/sysdeps/i386/i686/multiarch/memmove-ssse3.S +++ /dev/null @@ -1,4 +0,0 @@ -#define USE_AS_MEMMOVE -#define MEMCPY __memmove_ssse3 -#define MEMCPY_CHK __memmove_chk_ssse3 -#include "memcpy-ssse3.S" diff --git a/sysdeps/i386/i686/multiarch/memmove.S b/sysdeps/i386/i686/multiarch/memmove.S deleted file mode 100644 index 6eb418ca7f..0000000000 --- a/sysdeps/i386/i686/multiarch/memmove.S +++ /dev/null @@ -1,89 +0,0 @@ -/* Multiple versions of memmove - All versions must be listed in ifunc-impl-list.c. - Copyright (C) 2010-2017 Free Software Foundation, Inc. - Contributed by Intel Corporation. - This file is part of the GNU C Library. - - The GNU C Library is free software; you can redistribute it and/or - modify it under the terms of the GNU Lesser General Public - License as published by the Free Software Foundation; either - version 2.1 of the License, or (at your option) any later version. - - The GNU C Library is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - Lesser General Public License for more details. - - You should have received a copy of the GNU Lesser General Public - License along with the GNU C Library; if not, see - <http://www.gnu.org/licenses/>. */ - -#include <sysdep.h> -#include <init-arch.h> - -/* Define multiple versions only for the definition in lib. */ -#if IS_IN (libc) - .text -ENTRY(memmove) - .type memmove, @gnu_indirect_function - LOAD_GOT_AND_RTLD_GLOBAL_RO - LOAD_FUNC_GOT_EAX (__memmove_ia32) - HAS_CPU_FEATURE (SSE2) - jz 2f - LOAD_FUNC_GOT_EAX (__memmove_sse2_unaligned) - HAS_ARCH_FEATURE (Fast_Unaligned_Load) - jnz 2f - HAS_CPU_FEATURE (SSSE3) - jz 2f - LOAD_FUNC_GOT_EAX (__memmove_ssse3) - HAS_ARCH_FEATURE (Fast_Rep_String) - jz 2f - LOAD_FUNC_GOT_EAX (__memmove_ssse3_rep) -2: ret -END(memmove) - -# ifdef SHARED -# undef ENTRY -# define ENTRY(name) \ - .type __memmove_ia32, @function; \ - .p2align 4; \ - .globl __memmove_ia32; \ - .hidden __memmove_ia32; \ - __memmove_ia32: cfi_startproc; \ - CALL_MCOUNT -# else -# undef ENTRY -# define ENTRY(name) \ - .type __memmove_ia32, @function; \ - .globl __memmove_ia32; \ - .p2align 4; \ - __memmove_ia32: cfi_startproc; \ - CALL_MCOUNT -# endif - -# undef END -# define END(name) \ - cfi_endproc; .size __memmove_ia32, .-__memmove_ia32 - -# undef ENTRY_CHK -# define ENTRY_CHK(name) \ - .type __memmove_chk_ia32, @function; \ - .globl __memmove_chk_ia32; \ - .p2align 4; \ - __memmove_chk_ia32: cfi_startproc; \ - CALL_MCOUNT -# undef END_CHK -# define END_CHK(name) \ - cfi_endproc; .size __memmove_chk_ia32, .-__memmove_chk_ia32 - -# ifdef SHARED -# undef libc_hidden_builtin_def -/* IFUNC doesn't work with the hidden functions in shared library since - they will be called without setting up EBX needed for PLT which is - used by IFUNC. */ -# define libc_hidden_builtin_def(name) \ - .globl __GI_memmove; __GI_memmove = __memmove_ia32 -# endif -#endif - -#include "../memmove.S" diff --git a/sysdeps/i386/i686/multiarch/memmove_chk.S b/sysdeps/i386/i686/multiarch/memmove_chk.S deleted file mode 100644 index 314834c4c6..0000000000 --- a/sysdeps/i386/i686/multiarch/memmove_chk.S +++ /dev/null @@ -1,94 +0,0 @@ -/* Multiple versions of __memmove_chk - All versions must be listed in ifunc-impl-list.c. - Copyright (C) 2010-2017 Free Software Foundation, Inc. - Contributed by Intel Corporation. - This file is part of the GNU C Library. - - The GNU C Library is free software; you can redistribute it and/or - modify it under the terms of the GNU Lesser General Public - License as published by the Free Software Foundation; either - version 2.1 of the License, or (at your option) any later version. - - The GNU C Library is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - Lesser General Public License for more details. - - You should have received a copy of the GNU Lesser General Public - License along with the GNU C Library; if not, see - <http://www.gnu.org/licenses/>. */ - -#include <sysdep.h> -#include <init-arch.h> - -/* Define multiple versions only for the definition in lib. */ -#if IS_IN (libc) - .text -ENTRY(__memmove_chk) - .type __memmove_chk, @gnu_indirect_function - LOAD_GOT_AND_RTLD_GLOBAL_RO - LOAD_FUNC_GOT_EAX (__memmove_chk_ia32) - HAS_CPU_FEATURE (SSE2) - jz 2f - LOAD_FUNC_GOT_EAX (__memmove_chk_sse2_unaligned) - HAS_ARCH_FEATURE (Fast_Unaligned_Load) - jnz 2f - HAS_CPU_FEATURE (SSSE3) - jz 2f - LOAD_FUNC_GOT_EAX (__memmove_chk_ssse3) - HAS_ARCH_FEATURE (Fast_Rep_String) - jz 2f - LOAD_FUNC_GOT_EAX (__memmove_chk_ssse3_rep) -2: ret -END(__memmove_chk) - -# ifndef SHARED - .type __memmove_chk_sse2_unaligned, @function - .p2align 4; -__memmove_chk_sse2_unaligned: - cfi_startproc - CALL_MCOUNT - movl 12(%esp), %eax - cmpl %eax, 16(%esp) - jb __chk_fail - jmp __memmove_sse2_unaligned - cfi_endproc - .size __memmove_chk_sse2_unaligned, .-__memmove_chk_sse2_unaligned - - .type __memmove_chk_ssse3, @function - .p2align 4; -__memmove_chk_ssse3: - cfi_startproc - CALL_MCOUNT - movl 12(%esp), %eax - cmpl %eax, 16(%esp) - jb __chk_fail - jmp __memmove_ssse3 - cfi_endproc - .size __memmove_chk_ssse3, .-__memmove_chk_ssse3 - - .type __memmove_chk_ssse3_rep, @function - .p2align 4; -__memmove_chk_ssse3_rep: - cfi_startproc - CALL_MCOUNT - movl 12(%esp), %eax - cmpl %eax, 16(%esp) - jb __chk_fail - jmp __memmove_ssse3_rep - cfi_endproc - .size __memmove_chk_ssse3_rep, .-__memmove_chk_ssse3_rep - - .type __memmove_chk_ia32, @function - .p2align 4; -__memmove_chk_ia32: - cfi_startproc - CALL_MCOUNT - movl 12(%esp), %eax - cmpl %eax, 16(%esp) - jb __chk_fail - jmp __memmove_ia32 - cfi_endproc - .size __memmove_chk_ia32, .-__memmove_chk_ia32 -# endif -#endif diff --git a/sysdeps/i386/i686/multiarch/mempcpy-sse2-unaligned.S b/sysdeps/i386/i686/multiarch/mempcpy-sse2-unaligned.S deleted file mode 100644 index a1cea50771..0000000000 --- a/sysdeps/i386/i686/multiarch/mempcpy-sse2-unaligned.S +++ /dev/null @@ -1,4 +0,0 @@ -#define USE_AS_MEMPCPY -#define MEMCPY __mempcpy_sse2_unaligned -#define MEMCPY_CHK __mempcpy_chk_sse2_unaligned -#include "memcpy-sse2-unaligned.S" diff --git a/sysdeps/i386/i686/multiarch/mempcpy-ssse3-rep.S b/sysdeps/i386/i686/multiarch/mempcpy-ssse3-rep.S deleted file mode 100644 index 5357b33e18..0000000000 --- a/sysdeps/i386/i686/multiarch/mempcpy-ssse3-rep.S +++ /dev/null @@ -1,4 +0,0 @@ -#define USE_AS_MEMPCPY -#define MEMCPY __mempcpy_ssse3_rep -#define MEMCPY_CHK __mempcpy_chk_ssse3_rep -#include "memcpy-ssse3-rep.S" diff --git a/sysdeps/i386/i686/multiarch/mempcpy-ssse3.S b/sysdeps/i386/i686/multiarch/mempcpy-ssse3.S deleted file mode 100644 index 822d98e954..0000000000 --- a/sysdeps/i386/i686/multiarch/mempcpy-ssse3.S +++ /dev/null @@ -1,4 +0,0 @@ -#define USE_AS_MEMPCPY -#define MEMCPY __mempcpy_ssse3 -#define MEMCPY_CHK __mempcpy_chk_ssse3 -#include "memcpy-ssse3.S" diff --git a/sysdeps/i386/i686/multiarch/mempcpy.S b/sysdeps/i386/i686/multiarch/mempcpy.S deleted file mode 100644 index 06e377fbc9..0000000000 --- a/sysdeps/i386/i686/multiarch/mempcpy.S +++ /dev/null @@ -1,81 +0,0 @@ -/* Multiple versions of mempcpy - All versions must be listed in ifunc-impl-list.c. - Copyright (C) 2010-2017 Free Software Foundation, Inc. - Contributed by Intel Corporation. - This file is part of the GNU C Library. - - The GNU C Library is free software; you can redistribute it and/or - modify it under the terms of the GNU Lesser General Public - License as published by the Free Software Foundation; either - version 2.1 of the License, or (at your option) any later version. - - The GNU C Library is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - Lesser General Public License for more details. - - You should have received a copy of the GNU Lesser General Public - License along with the GNU C Library; if not, see - <http://www.gnu.org/licenses/>. */ - -#include <sysdep.h> -#include <init-arch.h> - -/* Define multiple versions only for the definition in lib and for - DSO. In static binaries we need mempcpy before the initialization - happened. */ -#if defined SHARED && IS_IN (libc) - .text -ENTRY(__mempcpy) - .type __mempcpy, @gnu_indirect_function - LOAD_GOT_AND_RTLD_GLOBAL_RO - LOAD_FUNC_GOT_EAX (__mempcpy_ia32) - HAS_CPU_FEATURE (SSE2) - jz 2f - LOAD_FUNC_GOT_EAX (__mempcpy_sse2_unaligned) - HAS_ARCH_FEATURE (Fast_Unaligned_Load) - jnz 2f - HAS_CPU_FEATURE (SSSE3) - jz 2f - LOAD_FUNC_GOT_EAX (__mempcpy_ssse3) - HAS_ARCH_FEATURE (Fast_Rep_String) - jz 2f - LOAD_FUNC_GOT_EAX (__mempcpy_ssse3_rep) -2: ret -END(__mempcpy) - -# undef ENTRY -# define ENTRY(name) \ - .type __mempcpy_ia32, @function; \ - .p2align 4; \ - .globl __mempcpy_ia32; \ - .hidden __mempcpy_ia32; \ - __mempcpy_ia32: cfi_startproc; \ - CALL_MCOUNT -# undef END -# define END(name) \ - cfi_endproc; .size __mempcpy_ia32, .-__mempcpy_ia32 - -# undef ENTRY_CHK -# define ENTRY_CHK(name) \ - .type __mempcpy_chk_ia32, @function; \ - .globl __mempcpy_chk_ia32; \ - .p2align 4; \ - __mempcpy_chk_ia32: cfi_startproc; \ - CALL_MCOUNT -# undef END_CHK -# define END_CHK(name) \ - cfi_endproc; .size __mempcpy_chk_ia32, .-__mempcpy_chk_ia32 - -# undef libc_hidden_def -# undef libc_hidden_builtin_def -/* IFUNC doesn't work with the hidden functions in shared library since - they will be called without setting up EBX needed for PLT which is - used by IFUNC. */ -# define libc_hidden_def(name) \ - .globl __GI_mempcpy; __GI_mempcpy = __mempcpy_ia32 -# define libc_hidden_builtin_def(name) \ - .globl __GI___mempcpy; __GI___mempcpy = __mempcpy_ia32 -#endif - -#include "../mempcpy.S" diff --git a/sysdeps/i386/i686/multiarch/mempcpy_chk.S b/sysdeps/i386/i686/multiarch/mempcpy_chk.S deleted file mode 100644 index e13e5248a5..0000000000 --- a/sysdeps/i386/i686/multiarch/mempcpy_chk.S +++ /dev/null @@ -1,50 +0,0 @@ -/* Multiple versions of __mempcpy_chk - All versions must be listed in ifunc-impl-list.c. - Copyright (C) 2010-2017 Free Software Foundation, Inc. - Contributed by Intel Corporation. - This file is part of the GNU C Library. - - The GNU C Library is free software; you can redistribute it and/or - modify it under the terms of the GNU Lesser General Public - License as published by the Free Software Foundation; either - version 2.1 of the License, or (at your option) any later version. - - The GNU C Library is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - Lesser General Public License for more details. - - You should have received a copy of the GNU Lesser General Public - License along with the GNU C Library; if not, see - <http://www.gnu.org/licenses/>. */ - -#include <sysdep.h> -#include <init-arch.h> - -/* Define multiple versions only for the definition in lib and for - DSO. There are no multiarch mempcpy functions for static binaries. - */ -#if IS_IN (libc) -# ifdef SHARED - .text -ENTRY(__mempcpy_chk) - .type __mempcpy_chk, @gnu_indirect_function - LOAD_GOT_AND_RTLD_GLOBAL_RO - LOAD_FUNC_GOT_EAX (__mempcpy_chk_ia32) - HAS_CPU_FEATURE (SSE2) - jz 2f - LOAD_FUNC_GOT_EAX (__mempcpy_chk_sse2_unaligned) - HAS_ARCH_FEATURE (Fast_Unaligned_Load) - jnz 2f - HAS_CPU_FEATURE (SSSE3) - jz 2f - LOAD_FUNC_GOT_EAX (__mempcpy_chk_ssse3) - HAS_ARCH_FEATURE (Fast_Rep_String) - jz 2f - LOAD_FUNC_GOT_EAX (__mempcpy_chk_ssse3_rep) -2: ret -END(__mempcpy_chk) -# else -# include "../mempcpy_chk.S" -# endif -#endif diff --git a/sysdeps/i386/i686/multiarch/memrchr-c.c b/sysdeps/i386/i686/multiarch/memrchr-c.c deleted file mode 100644 index ef7bbbe792..0000000000 --- a/sysdeps/i386/i686/multiarch/memrchr-c.c +++ /dev/null @@ -1,7 +0,0 @@ -#if IS_IN (libc) -# define MEMRCHR __memrchr_ia32 -# include <string.h> -extern void *__memrchr_ia32 (const void *, int, size_t); -#endif - -#include "string/memrchr.c" diff --git a/sysdeps/i386/i686/multiarch/memrchr-sse2-bsf.S b/sysdeps/i386/i686/multiarch/memrchr-sse2-bsf.S deleted file mode 100644 index dbbe94fd08..0000000000 --- a/sysdeps/i386/i686/multiarch/memrchr-sse2-bsf.S +++ /dev/null @@ -1,417 +0,0 @@ -/* Optimized memrchr with sse2 - Copyright (C) 2011-2017 Free Software Foundation, Inc. - Contributed by Intel Corporation. - This file is part of the GNU C Library. - - The GNU C Library is free software; you can redistribute it and/or - modify it under the terms of the GNU Lesser General Public - License as published by the Free Software Foundation; either - version 2.1 of the License, or (at your option) any later version. - - The GNU C Library is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - Lesser General Public License for more details. - - You should have received a copy of the GNU Lesser General Public - License along with the GNU C Library; if not, see - <http://www.gnu.org/licenses/>. */ - -#if IS_IN (libc) - -# include <sysdep.h> - -# define CFI_PUSH(REG) \ - cfi_adjust_cfa_offset (4); \ - cfi_rel_offset (REG, 0) - -# define CFI_POP(REG) \ - cfi_adjust_cfa_offset (-4); \ - cfi_restore (REG) - -# define PUSH(REG) pushl REG; CFI_PUSH (REG) -# define POP(REG) popl REG; CFI_POP (REG) - -# define PARMS 4 -# define STR1 PARMS -# define STR2 STR1+4 -# define LEN STR2+4 - -# define MEMCHR __memrchr_sse2_bsf - - .text -ENTRY (MEMCHR) - mov STR1(%esp), %ecx - movd STR2(%esp), %xmm1 - mov LEN(%esp), %edx - - sub $16, %edx - jbe L(length_less16) - - punpcklbw %xmm1, %xmm1 - add %edx, %ecx - punpcklbw %xmm1, %xmm1 - - movdqu (%ecx), %xmm0 - pshufd $0, %xmm1, %xmm1 - pcmpeqb %xmm1, %xmm0 - -/* Check if there is a match. */ - pmovmskb %xmm0, %eax - test %eax, %eax - jnz L(matches0) - - sub $64, %ecx - mov %ecx, %eax - and $15, %eax - jz L(loop_prolog) - - add $16, %ecx - add $16, %edx - sub %eax, %ecx - sub %eax, %edx - - .p2align 4 -/* Loop start on aligned string. */ -L(loop_prolog): - sub $64, %edx - jbe L(exit_loop) - - movdqa 48(%ecx), %xmm0 - pcmpeqb %xmm1, %xmm0 - pmovmskb %xmm0, %eax - test %eax, %eax - jnz L(matches48) - - movdqa 32(%ecx), %xmm2 - pcmpeqb %xmm1, %xmm2 - pmovmskb %xmm2, %eax - test %eax, %eax - jnz L(matches32) - - movdqa 16(%ecx), %xmm3 - pcmpeqb %xmm1, %xmm3 - pmovmskb %xmm3, %eax - test %eax, %eax - jnz L(matches16) - - movdqa (%ecx), %xmm4 - pcmpeqb %xmm1, %xmm4 - pmovmskb %xmm4, %eax - test %eax, %eax - jnz L(matches0) - - sub $64, %ecx - sub $64, %edx - jbe L(exit_loop) - - movdqa 48(%ecx), %xmm0 - pcmpeqb %xmm1, %xmm0 - pmovmskb %xmm0, %eax - test %eax, %eax - jnz L(matches48) - - movdqa 32(%ecx), %xmm2 - pcmpeqb %xmm1, %xmm2 - pmovmskb %xmm2, %eax - test %eax, %eax - jnz L(matches32) - - movdqa 16(%ecx), %xmm3 - pcmpeqb %xmm1, %xmm3 - pmovmskb %xmm3, %eax - test %eax, %eax - jnz L(matches16) - - movdqa (%ecx), %xmm3 - pcmpeqb %xmm1, %xmm3 - pmovmskb %xmm3, %eax - test %eax, %eax - jnz L(matches0) - - mov %ecx, %eax - and $63, %eax - test %eax, %eax - jz L(align64_loop) - - add $64, %ecx - add $64, %edx - sub %eax, %ecx - sub %eax, %edx - - .p2align 4 -L(align64_loop): - sub $64, %ecx - sub $64, %edx - jbe L(exit_loop) - - movdqa (%ecx), %xmm0 - movdqa 16(%ecx), %xmm2 - movdqa 32(%ecx), %xmm3 - movdqa 48(%ecx), %xmm4 - - pcmpeqb %xmm1, %xmm0 - pcmpeqb %xmm1, %xmm2 - pcmpeqb %xmm1, %xmm3 - pcmpeqb %xmm1, %xmm4 - - pmaxub %xmm3, %xmm0 - pmaxub %xmm4, %xmm2 - pmaxub %xmm0, %xmm2 - pmovmskb %xmm2, %eax - - test %eax, %eax - jz L(align64_loop) - - pmovmskb %xmm4, %eax - test %eax, %eax - jnz L(matches48) - - pmovmskb %xmm3, %eax - test %eax, %eax - jnz L(matches32) - - movdqa 16(%ecx), %xmm2 - - pcmpeqb %xmm1, %xmm2 - pcmpeqb (%ecx), %xmm1 - - pmovmskb %xmm2, %eax - test %eax, %eax - jnz L(matches16) - - pmovmskb %xmm1, %eax - bsr %eax, %eax - - add %ecx, %eax - ret - - .p2align 4 -L(exit_loop): - add $64, %edx - cmp $32, %edx - jbe L(exit_loop_32) - - movdqa 48(%ecx), %xmm0 - pcmpeqb %xmm1, %xmm0 - pmovmskb %xmm0, %eax - test %eax, %eax - jnz L(matches48) - - movdqa 32(%ecx), %xmm2 - pcmpeqb %xmm1, %xmm2 - pmovmskb %xmm2, %eax - test %eax, %eax - jnz L(matches32) - - movdqa 16(%ecx), %xmm3 - pcmpeqb %xmm1, %xmm3 - pmovmskb %xmm3, %eax - test %eax, %eax - jnz L(matches16_1) - cmp $48, %edx - jbe L(return_null) - - pcmpeqb (%ecx), %xmm1 - pmovmskb %xmm1, %eax - test %eax, %eax - jnz L(matches0_1) - xor %eax, %eax - ret - - .p2align 4 -L(exit_loop_32): - movdqa 48(%ecx), %xmm0 - pcmpeqb %xmm1, %xmm0 - pmovmskb %xmm0, %eax - test %eax, %eax - jnz L(matches48_1) - cmp $16, %edx - jbe L(return_null) - - pcmpeqb 32(%ecx), %xmm1 - pmovmskb %xmm1, %eax - test %eax, %eax - jnz L(matches32_1) - xor %eax, %eax - ret - - .p2align 4 -L(matches0): - bsr %eax, %eax - add %ecx, %eax - ret - - .p2align 4 -L(matches16): - bsr %eax, %eax - lea 16(%eax, %ecx), %eax - ret - - .p2align 4 -L(matches32): - bsr %eax, %eax - lea 32(%eax, %ecx), %eax - ret - - .p2align 4 -L(matches48): - bsr %eax, %eax - lea 48(%eax, %ecx), %eax - ret - - .p2align 4 -L(matches0_1): - bsr %eax, %eax - sub $64, %edx - add %eax, %edx - jl L(return_null) - add %ecx, %eax - ret - - .p2align 4 -L(matches16_1): - bsr %eax, %eax - sub $48, %edx - add %eax, %edx - jl L(return_null) - lea 16(%ecx, %eax), %eax - ret - - .p2align 4 -L(matches32_1): - bsr %eax, %eax - sub $32, %edx - add %eax, %edx - jl L(return_null) - lea 32(%ecx, %eax), %eax - ret - - .p2align 4 -L(matches48_1): - bsr %eax, %eax - sub $16, %edx - add %eax, %edx - jl L(return_null) - lea 48(%ecx, %eax), %eax - ret - - .p2align 4 -L(return_null): - xor %eax, %eax - ret - - .p2align 4 -L(length_less16_offset0): - mov %dl, %cl - pcmpeqb (%eax), %xmm1 - - mov $1, %edx - sal %cl, %edx - sub $1, %edx - mov %edx, %ecx - - pmovmskb %xmm1, %edx - - and %ecx, %edx - test %edx, %edx - jz L(return_null) - - bsr %edx, %ecx - add %ecx, %eax - ret - - .p2align 4 -L(length_less16): - punpcklbw %xmm1, %xmm1 - mov %ecx, %eax - punpcklbw %xmm1, %xmm1 - add $16, %edx - jz L(return_null) - - pshufd $0, %xmm1, %xmm1 - and $15, %ecx - jz L(length_less16_offset0) - - PUSH (%edi) - mov %cl, %dh - add %dl, %dh - and $-16, %eax - - sub $16, %dh - ja L(length_less16_part2) - - pcmpeqb (%eax), %xmm1 - pmovmskb %xmm1, %edi - - sar %cl, %edi - add %ecx, %eax - mov %dl, %cl - - mov $1, %edx - sal %cl, %edx - sub $1, %edx - - and %edx, %edi - test %edi, %edi - jz L(ret_null) - - bsr %edi, %edi - add %edi, %eax - POP (%edi) - ret - - CFI_PUSH (%edi) - - .p2align 4 -L(length_less16_part2): - movdqa 16(%eax), %xmm2 - pcmpeqb %xmm1, %xmm2 - pmovmskb %xmm2, %edi - - mov %cl, %ch - - mov %dh, %cl - mov $1, %edx - sal %cl, %edx - sub $1, %edx - - and %edx, %edi - - test %edi, %edi - jnz L(length_less16_part2_return) - - pcmpeqb (%eax), %xmm1 - pmovmskb %xmm1, %edi - - mov %ch, %cl - sar %cl, %edi - test %edi, %edi - jz L(ret_null) - - bsr %edi, %edi - add %edi, %eax - xor %ch, %ch - add %ecx, %eax - POP (%edi) - ret - - CFI_PUSH (%edi) - - .p2align 4 -L(length_less16_part2_return): - bsr %edi, %edi - lea 16(%eax, %edi), %eax - POP (%edi) - ret - - CFI_PUSH (%edi) - - .p2align 4 -L(ret_null): - xor %eax, %eax - POP (%edi) - ret - -END (MEMCHR) -#endif diff --git a/sysdeps/i386/i686/multiarch/memrchr-sse2.S b/sysdeps/i386/i686/multiarch/memrchr-sse2.S deleted file mode 100644 index 5f7853f683..0000000000 --- a/sysdeps/i386/i686/multiarch/memrchr-sse2.S +++ /dev/null @@ -1,724 +0,0 @@ -/* Optimized memrchr with sse2 without bsf - Copyright (C) 2011-2017 Free Software Foundation, Inc. - Contributed by Intel Corporation. - This file is part of the GNU C Library. - - The GNU C Library is free software; you can redistribute it and/or - modify it under the terms of the GNU Lesser General Public - License as published by the Free Software Foundation; either - version 2.1 of the License, or (at your option) any later version. - - The GNU C Library is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - Lesser General Public License for more details. - - You should have received a copy of the GNU Lesser General Public - License along with the GNU C Library; if not, see - <http://www.gnu.org/licenses/>. */ - -#if IS_IN (libc) - -# include <sysdep.h> -# define CFI_PUSH(REG) \ - cfi_adjust_cfa_offset (4); \ - cfi_rel_offset (REG, 0) - -# define CFI_POP(REG) \ - cfi_adjust_cfa_offset (-4); \ - cfi_restore (REG) - -# define PUSH(REG) pushl REG; CFI_PUSH (REG) -# define POP(REG) popl REG; CFI_POP (REG) - -# define PARMS 4 -# define STR1 PARMS -# define STR2 STR1+4 -# define LEN STR2+4 - - atom_text_section -ENTRY (__memrchr_sse2) - mov STR1(%esp), %ecx - movd STR2(%esp), %xmm1 - mov LEN(%esp), %edx - - sub $16, %edx - jbe L(length_less16) - - punpcklbw %xmm1, %xmm1 - add %edx, %ecx - punpcklbw %xmm1, %xmm1 - - movdqu (%ecx), %xmm0 - pshufd $0, %xmm1, %xmm1 - pcmpeqb %xmm1, %xmm0 - - pmovmskb %xmm0, %eax - test %eax, %eax - jnz L(exit_dispatch) - - sub $64, %ecx - mov %ecx, %eax - and $15, %eax - jz L(loop_prolog) - - lea 16(%ecx), %ecx - lea 16(%edx), %edx - sub %eax, %edx - and $-16, %ecx - - .p2align 4 -/* Loop start on aligned string. */ -L(loop_prolog): - sub $64, %edx - jbe L(exit_loop) - - movdqa 48(%ecx), %xmm0 - pcmpeqb %xmm1, %xmm0 - pmovmskb %xmm0, %eax - test %eax, %eax - jnz L(matches48) - - movdqa 32(%ecx), %xmm2 - pcmpeqb %xmm1, %xmm2 - pmovmskb %xmm2, %eax - test %eax, %eax - jnz L(matches32) - - movdqa 16(%ecx), %xmm3 - pcmpeqb %xmm1, %xmm3 - pmovmskb %xmm3, %eax - test %eax, %eax - jnz L(matches16) - - movdqa (%ecx), %xmm4 - pcmpeqb %xmm1, %xmm4 - pmovmskb %xmm4, %eax - test %eax, %eax - jnz L(exit_dispatch) - - sub $64, %ecx - sub $64, %edx - jbe L(exit_loop) - - movdqa 48(%ecx), %xmm0 - pcmpeqb %xmm1, %xmm0 - pmovmskb %xmm0, %eax - test %eax, %eax - jnz L(matches48) - - movdqa 32(%ecx), %xmm2 - pcmpeqb %xmm1, %xmm2 - pmovmskb %xmm2, %eax - test %eax, %eax - jnz L(matches32) - - movdqa 16(%ecx), %xmm3 - pcmpeqb %xmm1, %xmm3 - pmovmskb %xmm3, %eax - test %eax, %eax - jnz L(matches16) - - movdqa (%ecx), %xmm3 - pcmpeqb %xmm1, %xmm3 - pmovmskb %xmm3, %eax - test %eax, %eax - jnz L(exit_dispatch) - - mov %ecx, %eax - and $63, %eax - test %eax, %eax - jz L(align64_loop) - - lea 64(%ecx), %ecx - lea 64(%edx), %edx - and $-64, %ecx - sub %eax, %edx - - .p2align 4 -L(align64_loop): - sub $64, %ecx - sub $64, %edx - jbe L(exit_loop) - - movdqa (%ecx), %xmm0 - movdqa 16(%ecx), %xmm2 - movdqa 32(%ecx), %xmm3 - movdqa 48(%ecx), %xmm4 - - pcmpeqb %xmm1, %xmm0 - pcmpeqb %xmm1, %xmm2 - pcmpeqb %xmm1, %xmm3 - pcmpeqb %xmm1, %xmm4 - - pmaxub %xmm3, %xmm0 - pmaxub %xmm4, %xmm2 - pmaxub %xmm0, %xmm2 - pmovmskb %xmm2, %eax - - test %eax, %eax - jz L(align64_loop) - - pmovmskb %xmm4, %eax - test %eax, %eax - jnz L(matches48) - - pmovmskb %xmm3, %eax - test %eax, %eax - jnz L(matches32) - - movdqa 16(%ecx), %xmm2 - - pcmpeqb %xmm1, %xmm2 - pcmpeqb (%ecx), %xmm1 - - pmovmskb %xmm2, %eax - test %eax, %eax - jnz L(matches16) - - pmovmskb %xmm1, %eax - test %ah, %ah - jnz L(exit_dispatch_high) - mov %al, %dl - and $15 << 4, %dl - jnz L(exit_dispatch_8) - test $0x08, %al - jnz L(exit_4) - test $0x04, %al - jnz L(exit_3) - test $0x02, %al - jnz L(exit_2) - mov %ecx, %eax - ret - - .p2align 4 -L(exit_loop): - add $64, %edx - cmp $32, %edx - jbe L(exit_loop_32) - - movdqa 48(%ecx), %xmm0 - pcmpeqb %xmm1, %xmm0 - pmovmskb %xmm0, %eax - test %eax, %eax - jnz L(matches48) - - movdqa 32(%ecx), %xmm2 - pcmpeqb %xmm1, %xmm2 - pmovmskb %xmm2, %eax - test %eax, %eax - jnz L(matches32) - - movdqa 16(%ecx), %xmm3 - pcmpeqb %xmm1, %xmm3 - pmovmskb %xmm3, %eax - test %eax, %eax - jnz L(matches16_1) - cmp $48, %edx - jbe L(return_null) - - pcmpeqb (%ecx), %xmm1 - pmovmskb %xmm1, %eax - test %eax, %eax - jnz L(matches0_1) - xor %eax, %eax - ret - - .p2align 4 -L(exit_loop_32): - movdqa 48(%ecx), %xmm0 - pcmpeqb %xmm1, %xmm0 - pmovmskb %xmm0, %eax - test %eax, %eax - jnz L(matches48_1) - cmp $16, %edx - jbe L(return_null) - - pcmpeqb 32(%ecx), %xmm1 - pmovmskb %xmm1, %eax - test %eax, %eax - jnz L(matches32_1) - xor %eax, %eax - ret - - .p2align 4 -L(matches16): - lea 16(%ecx), %ecx - test %ah, %ah - jnz L(exit_dispatch_high) - mov %al, %dl - and $15 << 4, %dl - jnz L(exit_dispatch_8) - test $0x08, %al - jnz L(exit_4) - test $0x04, %al - jnz L(exit_3) - test $0x02, %al - jnz L(exit_2) - mov %ecx, %eax - ret - - .p2align 4 -L(matches32): - lea 32(%ecx), %ecx - test %ah, %ah - jnz L(exit_dispatch_high) - mov %al, %dl - and $15 << 4, %dl - jnz L(exit_dispatch_8) - test $0x08, %al - jnz L(exit_4) - test $0x04, %al - jnz L(exit_3) - test $0x02, %al - jnz L(exit_2) - mov %ecx, %eax - ret - - .p2align 4 -L(matches48): - lea 48(%ecx), %ecx - - .p2align 4 -L(exit_dispatch): - test %ah, %ah - jnz L(exit_dispatch_high) - mov %al, %dl - and $15 << 4, %dl - jnz L(exit_dispatch_8) - test $0x08, %al - jnz L(exit_4) - test $0x04, %al - jnz L(exit_3) - test $0x02, %al - jnz L(exit_2) - mov %ecx, %eax - ret - - .p2align 4 -L(exit_dispatch_8): - test $0x80, %al - jnz L(exit_8) - test $0x40, %al - jnz L(exit_7) - test $0x20, %al - jnz L(exit_6) - lea 4(%ecx), %eax - ret - - .p2align 4 -L(exit_dispatch_high): - mov %ah, %dh - and $15 << 4, %dh - jnz L(exit_dispatch_high_8) - test $0x08, %ah - jnz L(exit_12) - test $0x04, %ah - jnz L(exit_11) - test $0x02, %ah - jnz L(exit_10) - lea 8(%ecx), %eax - ret - - .p2align 4 -L(exit_dispatch_high_8): - test $0x80, %ah - jnz L(exit_16) - test $0x40, %ah - jnz L(exit_15) - test $0x20, %ah - jnz L(exit_14) - lea 12(%ecx), %eax - ret - - .p2align 4 -L(exit_2): - lea 1(%ecx), %eax - ret - - .p2align 4 -L(exit_3): - lea 2(%ecx), %eax - ret - - .p2align 4 -L(exit_4): - lea 3(%ecx), %eax - ret - - .p2align 4 -L(exit_6): - lea 5(%ecx), %eax - ret - - .p2align 4 -L(exit_7): - lea 6(%ecx), %eax - ret - - .p2align 4 -L(exit_8): - lea 7(%ecx), %eax - ret - - .p2align 4 -L(exit_10): - lea 9(%ecx), %eax - ret - - .p2align 4 -L(exit_11): - lea 10(%ecx), %eax - ret - - .p2align 4 -L(exit_12): - lea 11(%ecx), %eax - ret - - .p2align 4 -L(exit_14): - lea 13(%ecx), %eax - ret - - .p2align 4 -L(exit_15): - lea 14(%ecx), %eax - ret - - .p2align 4 -L(exit_16): - lea 15(%ecx), %eax - ret - - .p2align 4 -L(matches0_1): - lea -64(%edx), %edx - - test %ah, %ah - jnz L(exit_dispatch_1_high) - mov %al, %ah - and $15 << 4, %ah - jnz L(exit_dispatch_1_8) - test $0x08, %al - jnz L(exit_1_4) - test $0x04, %al - jnz L(exit_1_3) - test $0x02, %al - jnz L(exit_1_2) - add $0, %edx - jl L(return_null) - mov %ecx, %eax - ret - - .p2align 4 -L(matches16_1): - lea -48(%edx), %edx - lea 16(%ecx), %ecx - - test %ah, %ah - jnz L(exit_dispatch_1_high) - mov %al, %ah - and $15 << 4, %ah - jnz L(exit_dispatch_1_8) - test $0x08, %al - jnz L(exit_1_4) - test $0x04, %al - jnz L(exit_1_3) - test $0x02, %al - jnz L(exit_1_2) - add $0, %edx - jl L(return_null) - mov %ecx, %eax - ret - - .p2align 4 -L(matches32_1): - lea -32(%edx), %edx - lea 32(%ecx), %ecx - - test %ah, %ah - jnz L(exit_dispatch_1_high) - mov %al, %ah - and $15 << 4, %ah - jnz L(exit_dispatch_1_8) - test $0x08, %al - jnz L(exit_1_4) - test $0x04, %al - jnz L(exit_1_3) - test $0x02, %al - jnz L(exit_1_2) - add $0, %edx - jl L(return_null) - mov %ecx, %eax - ret - - .p2align 4 -L(matches48_1): - lea -16(%edx), %edx - lea 48(%ecx), %ecx - - .p2align 4 -L(exit_dispatch_1): - test %ah, %ah - jnz L(exit_dispatch_1_high) - mov %al, %ah - and $15 << 4, %ah - jnz L(exit_dispatch_1_8) - test $0x08, %al - jnz L(exit_1_4) - test $0x04, %al - jnz L(exit_1_3) - test $0x02, %al - jnz L(exit_1_2) - add $0, %edx - jl L(return_null) - mov %ecx, %eax - ret - - .p2align 4 -L(exit_dispatch_1_8): - test $0x80, %al - jnz L(exit_1_8) - test $0x40, %al - jnz L(exit_1_7) - test $0x20, %al - jnz L(exit_1_6) - add $4, %edx - jl L(return_null) - lea 4(%ecx), %eax - ret - - .p2align 4 -L(exit_dispatch_1_high): - mov %ah, %al - and $15 << 4, %al - jnz L(exit_dispatch_1_high_8) - test $0x08, %ah - jnz L(exit_1_12) - test $0x04, %ah - jnz L(exit_1_11) - test $0x02, %ah - jnz L(exit_1_10) - add $8, %edx - jl L(return_null) - lea 8(%ecx), %eax - ret - - .p2align 4 -L(exit_dispatch_1_high_8): - test $0x80, %ah - jnz L(exit_1_16) - test $0x40, %ah - jnz L(exit_1_15) - test $0x20, %ah - jnz L(exit_1_14) - add $12, %edx - jl L(return_null) - lea 12(%ecx), %eax - ret - - .p2align 4 -L(exit_1_2): - add $1, %edx - jl L(return_null) - lea 1(%ecx), %eax - ret - - .p2align 4 -L(exit_1_3): - add $2, %edx - jl L(return_null) - lea 2(%ecx), %eax - ret - - .p2align 4 -L(exit_1_4): - add $3, %edx - jl L(return_null) - lea 3(%ecx), %eax - ret - - .p2align 4 -L(exit_1_6): - add $5, %edx - jl L(return_null) - lea 5(%ecx), %eax - ret - - .p2align 4 -L(exit_1_7): - add $6, %edx - jl L(return_null) - lea 6(%ecx), %eax - ret - - .p2align 4 -L(exit_1_8): - add $7, %edx - jl L(return_null) - lea 7(%ecx), %eax - ret - - .p2align 4 -L(exit_1_10): - add $9, %edx - jl L(return_null) - lea 9(%ecx), %eax - ret - - .p2align 4 -L(exit_1_11): - add $10, %edx - jl L(return_null) - lea 10(%ecx), %eax - ret - - .p2align 4 -L(exit_1_12): - add $11, %edx - jl L(return_null) - lea 11(%ecx), %eax - ret - - .p2align 4 -L(exit_1_14): - add $13, %edx - jl L(return_null) - lea 13(%ecx), %eax - ret - - .p2align 4 -L(exit_1_15): - add $14, %edx - jl L(return_null) - lea 14(%ecx), %eax - ret - - .p2align 4 -L(exit_1_16): - add $15, %edx - jl L(return_null) - lea 15(%ecx), %eax - ret - - .p2align 4 -L(return_null): - xor %eax, %eax - ret - - .p2align 4 -L(length_less16_offset0): - mov %dl, %cl - pcmpeqb (%eax), %xmm1 - - mov $1, %edx - sal %cl, %edx - sub $1, %edx - - mov %eax, %ecx - pmovmskb %xmm1, %eax - - and %edx, %eax - test %eax, %eax - jnz L(exit_dispatch) - - xor %eax, %eax - ret - - .p2align 4 -L(length_less16): - punpcklbw %xmm1, %xmm1 - add $16, %edx - je L(return_null) - punpcklbw %xmm1, %xmm1 - - mov %ecx, %eax - pshufd $0, %xmm1, %xmm1 - - and $15, %ecx - jz L(length_less16_offset0) - - PUSH (%edi) - - mov %cl, %dh - add %dl, %dh - and $-16, %eax - - sub $16, %dh - ja L(length_less16_part2) - - pcmpeqb (%eax), %xmm1 - pmovmskb %xmm1, %edi - - sar %cl, %edi - add %ecx, %eax - mov %dl, %cl - - mov $1, %edx - sal %cl, %edx - sub $1, %edx - - and %edx, %edi - test %edi, %edi - jz L(ret_null) - - bsr %edi, %edi - add %edi, %eax - POP (%edi) - ret - - CFI_PUSH (%edi) - - .p2align 4 -L(length_less16_part2): - movdqa 16(%eax), %xmm2 - pcmpeqb %xmm1, %xmm2 - pmovmskb %xmm2, %edi - - mov %cl, %ch - - mov %dh, %cl - mov $1, %edx - sal %cl, %edx - sub $1, %edx - - and %edx, %edi - - test %edi, %edi - jnz L(length_less16_part2_return) - - pcmpeqb (%eax), %xmm1 - pmovmskb %xmm1, %edi - - mov %ch, %cl - sar %cl, %edi - test %edi, %edi - jz L(ret_null) - - bsr %edi, %edi - add %edi, %eax - xor %ch, %ch - add %ecx, %eax - POP (%edi) - ret - - CFI_PUSH (%edi) - - .p2align 4 -L(length_less16_part2_return): - bsr %edi, %edi - lea 16(%eax, %edi), %eax - POP (%edi) - ret - - CFI_PUSH (%edi) - - .p2align 4 -L(ret_null): - xor %eax, %eax - POP (%edi) - ret - -END (__memrchr_sse2) -#endif diff --git a/sysdeps/i386/i686/multiarch/memrchr.S b/sysdeps/i386/i686/multiarch/memrchr.S deleted file mode 100644 index d4253a553b..0000000000 --- a/sysdeps/i386/i686/multiarch/memrchr.S +++ /dev/null @@ -1,45 +0,0 @@ -/* Multiple versions of memrchr - All versions must be listed in ifunc-impl-list.c. - Copyright (C) 2011-2017 Free Software Foundation, Inc. - Contributed by Intel Corporation. - This file is part of the GNU C Library. - - The GNU C Library is free software; you can redistribute it and/or - modify it under the terms of the GNU Lesser General Public - License as published by the Free Software Foundation; either - version 2.1 of the License, or (at your option) any later version. - - The GNU C Library is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - Lesser General Public License for more details. - - You should have received a copy of the GNU Lesser General Public - License along with the GNU C Library; if not, see - <http://www.gnu.org/licenses/>. */ - -#include <sysdep.h> -#include <init-arch.h> - -#if IS_IN (libc) - .text -ENTRY(__memrchr) - .type __memrchr, @gnu_indirect_function - LOAD_GOT_AND_RTLD_GLOBAL_RO - HAS_CPU_FEATURE (SSE2) - jz 2f - HAS_ARCH_FEATURE (Slow_BSF) - jz 3f - - LOAD_FUNC_GOT_EAX (__memrchr_sse2) - ret - -2: LOAD_FUNC_GOT_EAX (__memrchr_ia32) - ret - -3: LOAD_FUNC_GOT_EAX (__memrchr_sse2_bsf) - ret -END(__memrchr) - -weak_alias(__memrchr, memrchr) -#endif diff --git a/sysdeps/i386/i686/multiarch/memset-sse2-rep.S b/sysdeps/i386/i686/multiarch/memset-sse2-rep.S deleted file mode 100644 index 3221077e49..0000000000 --- a/sysdeps/i386/i686/multiarch/memset-sse2-rep.S +++ /dev/null @@ -1,811 +0,0 @@ -/* memset with SSE2 and REP string. - Copyright (C) 2010-2017 Free Software Foundation, Inc. - Contributed by Intel Corporation. - This file is part of the GNU C Library. - - The GNU C Library is free software; you can redistribute it and/or - modify it under the terms of the GNU Lesser General Public - License as published by the Free Software Foundation; either - version 2.1 of the License, or (at your option) any later version. - - The GNU C Library is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - Lesser General Public License for more details. - - You should have received a copy of the GNU Lesser General Public - License along with the GNU C Library; if not, see - <http://www.gnu.org/licenses/>. */ - -#if IS_IN (libc) - -#include <sysdep.h> -#include "asm-syntax.h" - -#define CFI_PUSH(REG) \ - cfi_adjust_cfa_offset (4); \ - cfi_rel_offset (REG, 0) - -#define CFI_POP(REG) \ - cfi_adjust_cfa_offset (-4); \ - cfi_restore (REG) - -#define PUSH(REG) pushl REG; CFI_PUSH (REG) -#define POP(REG) popl REG; CFI_POP (REG) - -#ifdef USE_AS_BZERO -# define DEST PARMS -# define LEN DEST+4 -# define SETRTNVAL -#else -# define DEST PARMS -# define CHR DEST+4 -# define LEN CHR+4 -# define SETRTNVAL movl DEST(%esp), %eax -#endif - -#ifdef SHARED -# define ENTRANCE PUSH (%ebx); -# define RETURN_END POP (%ebx); ret -# define RETURN RETURN_END; CFI_PUSH (%ebx) -# define PARMS 8 /* Preserve EBX. */ -# define JMPTBL(I, B) I - B - -/* Load an entry in a jump table into EBX and branch to it. TABLE is a - jump table with relative offsets. */ -# define BRANCH_TO_JMPTBL_ENTRY(TABLE) \ - /* We first load PC into EBX. */ \ - SETUP_PIC_REG(bx); \ - /* Get the address of the jump table. */ \ - add $(TABLE - .), %ebx; \ - /* Get the entry and convert the relative offset to the \ - absolute address. */ \ - add (%ebx,%ecx,4), %ebx; \ - add %ecx, %edx; \ - /* We loaded the jump table and adjusted EDX. Go. */ \ - jmp *%ebx -#else -# define ENTRANCE -# define RETURN_END ret -# define RETURN RETURN_END -# define PARMS 4 -# define JMPTBL(I, B) I - -/* Branch to an entry in a jump table. TABLE is a jump table with - absolute offsets. */ -# define BRANCH_TO_JMPTBL_ENTRY(TABLE) \ - add %ecx, %edx; \ - jmp *TABLE(,%ecx,4) -#endif - - .section .text.sse2,"ax",@progbits -#if defined SHARED && IS_IN (libc) && !defined USE_AS_BZERO -ENTRY (__memset_chk_sse2_rep) - movl 12(%esp), %eax - cmpl %eax, 16(%esp) - jb HIDDEN_JUMPTARGET (__chk_fail) -END (__memset_chk_sse2_rep) -#endif -ENTRY (__memset_sse2_rep) - ENTRANCE - - movl LEN(%esp), %ecx -#ifdef USE_AS_BZERO - xor %eax, %eax -#else - movzbl CHR(%esp), %eax - movb %al, %ah - /* Fill the whole EAX with pattern. */ - movl %eax, %edx - shl $16, %eax - or %edx, %eax -#endif - movl DEST(%esp), %edx - cmp $32, %ecx - jae L(32bytesormore) - -L(write_less32bytes): - BRANCH_TO_JMPTBL_ENTRY (L(table_less_32bytes)) - - - .pushsection .rodata.sse2,"a",@progbits - ALIGN (2) -L(table_less_32bytes): - .int JMPTBL (L(write_0bytes), L(table_less_32bytes)) - .int JMPTBL (L(write_1bytes), L(table_less_32bytes)) - .int JMPTBL (L(write_2bytes), L(table_less_32bytes)) - .int JMPTBL (L(write_3bytes), L(table_less_32bytes)) - .int JMPTBL (L(write_4bytes), L(table_less_32bytes)) - .int JMPTBL (L(write_5bytes), L(table_less_32bytes)) - .int JMPTBL (L(write_6bytes), L(table_less_32bytes)) - .int JMPTBL (L(write_7bytes), L(table_less_32bytes)) - .int JMPTBL (L(write_8bytes), L(table_less_32bytes)) - .int JMPTBL (L(write_9bytes), L(table_less_32bytes)) - .int JMPTBL (L(write_10bytes), L(table_less_32bytes)) - .int JMPTBL (L(write_11bytes), L(table_less_32bytes)) - .int JMPTBL (L(write_12bytes), L(table_less_32bytes)) - .int JMPTBL (L(write_13bytes), L(table_less_32bytes)) - .int JMPTBL (L(write_14bytes), L(table_less_32bytes)) - .int JMPTBL (L(write_15bytes), L(table_less_32bytes)) - .int JMPTBL (L(write_16bytes), L(table_less_32bytes)) - .int JMPTBL (L(write_17bytes), L(table_less_32bytes)) - .int JMPTBL (L(write_18bytes), L(table_less_32bytes)) - .int JMPTBL (L(write_19bytes), L(table_less_32bytes)) - .int JMPTBL (L(write_20bytes), L(table_less_32bytes)) - .int JMPTBL (L(write_21bytes), L(table_less_32bytes)) - .int JMPTBL (L(write_22bytes), L(table_less_32bytes)) - .int JMPTBL (L(write_23bytes), L(table_less_32bytes)) - .int JMPTBL (L(write_24bytes), L(table_less_32bytes)) - .int JMPTBL (L(write_25bytes), L(table_less_32bytes)) - .int JMPTBL (L(write_26bytes), L(table_less_32bytes)) - .int JMPTBL (L(write_27bytes), L(table_less_32bytes)) - .int JMPTBL (L(write_28bytes), L(table_less_32bytes)) - .int JMPTBL (L(write_29bytes), L(table_less_32bytes)) - .int JMPTBL (L(write_30bytes), L(table_less_32bytes)) - .int JMPTBL (L(write_31bytes), L(table_less_32bytes)) - .popsection - - ALIGN (4) -L(write_28bytes): - movl %eax, -28(%edx) -L(write_24bytes): - movl %eax, -24(%edx) -L(write_20bytes): - movl %eax, -20(%edx) -L(write_16bytes): - movl %eax, -16(%edx) -L(write_12bytes): - movl %eax, -12(%edx) -L(write_8bytes): - movl %eax, -8(%edx) -L(write_4bytes): - movl %eax, -4(%edx) -L(write_0bytes): - SETRTNVAL - RETURN - - ALIGN (4) -L(write_29bytes): - movl %eax, -29(%edx) -L(write_25bytes): - movl %eax, -25(%edx) -L(write_21bytes): - movl %eax, -21(%edx) -L(write_17bytes): - movl %eax, -17(%edx) -L(write_13bytes): - movl %eax, -13(%edx) -L(write_9bytes): - movl %eax, -9(%edx) -L(write_5bytes): - movl %eax, -5(%edx) -L(write_1bytes): - movb %al, -1(%edx) - SETRTNVAL - RETURN - - ALIGN (4) -L(write_30bytes): - movl %eax, -30(%edx) -L(write_26bytes): - movl %eax, -26(%edx) -L(write_22bytes): - movl %eax, -22(%edx) -L(write_18bytes): - movl %eax, -18(%edx) -L(write_14bytes): - movl %eax, -14(%edx) -L(write_10bytes): - movl %eax, -10(%edx) -L(write_6bytes): - movl %eax, -6(%edx) -L(write_2bytes): - movw %ax, -2(%edx) - SETRTNVAL - RETURN - - ALIGN (4) -L(write_31bytes): - movl %eax, -31(%edx) -L(write_27bytes): - movl %eax, -27(%edx) -L(write_23bytes): - movl %eax, -23(%edx) -L(write_19bytes): - movl %eax, -19(%edx) -L(write_15bytes): - movl %eax, -15(%edx) -L(write_11bytes): - movl %eax, -11(%edx) -L(write_7bytes): - movl %eax, -7(%edx) -L(write_3bytes): - movw %ax, -3(%edx) - movb %al, -1(%edx) - SETRTNVAL - RETURN - - ALIGN (4) -/* ECX > 32 and EDX is 4 byte aligned. */ -L(32bytesormore): - /* Fill xmm0 with the pattern. */ -#ifdef USE_AS_BZERO - pxor %xmm0, %xmm0 -#else - movd %eax, %xmm0 - pshufd $0, %xmm0, %xmm0 -#endif - testl $0xf, %edx - jz L(aligned_16) -/* ECX > 32 and EDX is not 16 byte aligned. */ -L(not_aligned_16): - movdqu %xmm0, (%edx) - movl %edx, %eax - and $-16, %edx - add $16, %edx - sub %edx, %eax - add %eax, %ecx - movd %xmm0, %eax - - ALIGN (4) -L(aligned_16): - cmp $128, %ecx - jae L(128bytesormore) - -L(aligned_16_less128bytes): - BRANCH_TO_JMPTBL_ENTRY (L(table_16_128bytes)) - - ALIGN (4) -L(128bytesormore): - PUSH (%edi) -#ifdef DATA_CACHE_SIZE - PUSH (%ebx) - mov $DATA_CACHE_SIZE, %ebx -#else -# ifdef SHARED - SETUP_PIC_REG(bx) - add $_GLOBAL_OFFSET_TABLE_, %ebx - mov __x86_data_cache_size@GOTOFF(%ebx), %ebx -# else - PUSH (%ebx) - mov __x86_data_cache_size, %ebx -# endif -#endif - mov %ebx, %edi - shr $4, %ebx - sub %ebx, %edi -#if defined DATA_CACHE_SIZE || !defined SHARED - POP (%ebx) -#endif -/* - * When data size approximate the end of L1 cache, - * fast string will prefetch and combine data efficiently. - */ - cmp %edi, %ecx - jae L(128bytesormore_endof_L1) - subl $128, %ecx -L(128bytesormore_normal): - sub $128, %ecx - movdqa %xmm0, (%edx) - movdqa %xmm0, 0x10(%edx) - movdqa %xmm0, 0x20(%edx) - movdqa %xmm0, 0x30(%edx) - movdqa %xmm0, 0x40(%edx) - movdqa %xmm0, 0x50(%edx) - movdqa %xmm0, 0x60(%edx) - movdqa %xmm0, 0x70(%edx) - lea 128(%edx), %edx - jb L(128bytesless_normal) - - - sub $128, %ecx - movdqa %xmm0, (%edx) - movdqa %xmm0, 0x10(%edx) - movdqa %xmm0, 0x20(%edx) - movdqa %xmm0, 0x30(%edx) - movdqa %xmm0, 0x40(%edx) - movdqa %xmm0, 0x50(%edx) - movdqa %xmm0, 0x60(%edx) - movdqa %xmm0, 0x70(%edx) - lea 128(%edx), %edx - jae L(128bytesormore_normal) - -L(128bytesless_normal): - POP (%edi) - add $128, %ecx - BRANCH_TO_JMPTBL_ENTRY (L(table_16_128bytes)) - - CFI_PUSH (%edi) - ALIGN (4) -L(128bytesormore_endof_L1): - mov %edx, %edi - mov %ecx, %edx - shr $2, %ecx - and $3, %edx - rep stosl - jz L(copy_page_by_rep_exit) - cmp $2, %edx - jb L(copy_page_by_rep_left_1) - movw %ax, (%edi) - add $2, %edi - sub $2, %edx - jz L(copy_page_by_rep_exit) -L(copy_page_by_rep_left_1): - movb %al, (%edi) -L(copy_page_by_rep_exit): - POP (%edi) - SETRTNVAL - RETURN - - .pushsection .rodata.sse2,"a",@progbits - ALIGN (2) -L(table_16_128bytes): - .int JMPTBL (L(aligned_16_0bytes), L(table_16_128bytes)) - .int JMPTBL (L(aligned_16_1bytes), L(table_16_128bytes)) - .int JMPTBL (L(aligned_16_2bytes), L(table_16_128bytes)) - .int JMPTBL (L(aligned_16_3bytes), L(table_16_128bytes)) - .int JMPTBL (L(aligned_16_4bytes), L(table_16_128bytes)) - .int JMPTBL (L(aligned_16_5bytes), L(table_16_128bytes)) - .int JMPTBL (L(aligned_16_6bytes), L(table_16_128bytes)) - .int JMPTBL (L(aligned_16_7bytes), L(table_16_128bytes)) - .int JMPTBL (L(aligned_16_8bytes), L(table_16_128bytes)) - .int JMPTBL (L(aligned_16_9bytes), L(table_16_128bytes)) - .int JMPTBL (L(aligned_16_10bytes), L(table_16_128bytes)) - .int JMPTBL (L(aligned_16_11bytes), L(table_16_128bytes)) - .int JMPTBL (L(aligned_16_12bytes), L(table_16_128bytes)) - .int JMPTBL (L(aligned_16_13bytes), L(table_16_128bytes)) - .int JMPTBL (L(aligned_16_14bytes), L(table_16_128bytes)) - .int JMPTBL (L(aligned_16_15bytes), L(table_16_128bytes)) - .int JMPTBL (L(aligned_16_16bytes), L(table_16_128bytes)) - .int JMPTBL (L(aligned_16_17bytes), L(table_16_128bytes)) - .int JMPTBL (L(aligned_16_18bytes), L(table_16_128bytes)) - .int JMPTBL (L(aligned_16_19bytes), L(table_16_128bytes)) - .int JMPTBL (L(aligned_16_20bytes), L(table_16_128bytes)) - .int JMPTBL (L(aligned_16_21bytes), L(table_16_128bytes)) - .int JMPTBL (L(aligned_16_22bytes), L(table_16_128bytes)) - .int JMPTBL (L(aligned_16_23bytes), L(table_16_128bytes)) - .int JMPTBL (L(aligned_16_24bytes), L(table_16_128bytes)) - .int JMPTBL (L(aligned_16_25bytes), L(table_16_128bytes)) - .int JMPTBL (L(aligned_16_26bytes), L(table_16_128bytes)) - .int JMPTBL (L(aligned_16_27bytes), L(table_16_128bytes)) - .int JMPTBL (L(aligned_16_28bytes), L(table_16_128bytes)) - .int JMPTBL (L(aligned_16_29bytes), L(table_16_128bytes)) - .int JMPTBL (L(aligned_16_30bytes), L(table_16_128bytes)) - .int JMPTBL (L(aligned_16_31bytes), L(table_16_128bytes)) - .int JMPTBL (L(aligned_16_32bytes), L(table_16_128bytes)) - .int JMPTBL (L(aligned_16_33bytes), L(table_16_128bytes)) - .int JMPTBL (L(aligned_16_34bytes), L(table_16_128bytes)) - .int JMPTBL (L(aligned_16_35bytes), L(table_16_128bytes)) - .int JMPTBL (L(aligned_16_36bytes), L(table_16_128bytes)) - .int JMPTBL (L(aligned_16_37bytes), L(table_16_128bytes)) - .int JMPTBL (L(aligned_16_38bytes), L(table_16_128bytes)) - .int JMPTBL (L(aligned_16_39bytes), L(table_16_128bytes)) - .int JMPTBL (L(aligned_16_40bytes), L(table_16_128bytes)) - .int JMPTBL (L(aligned_16_41bytes), L(table_16_128bytes)) - .int JMPTBL (L(aligned_16_42bytes), L(table_16_128bytes)) - .int JMPTBL (L(aligned_16_43bytes), L(table_16_128bytes)) - .int JMPTBL (L(aligned_16_44bytes), L(table_16_128bytes)) - .int JMPTBL (L(aligned_16_45bytes), L(table_16_128bytes)) - .int JMPTBL (L(aligned_16_46bytes), L(table_16_128bytes)) - .int JMPTBL (L(aligned_16_47bytes), L(table_16_128bytes)) - .int JMPTBL (L(aligned_16_48bytes), L(table_16_128bytes)) - .int JMPTBL (L(aligned_16_49bytes), L(table_16_128bytes)) - .int JMPTBL (L(aligned_16_50bytes), L(table_16_128bytes)) - .int JMPTBL (L(aligned_16_51bytes), L(table_16_128bytes)) - .int JMPTBL (L(aligned_16_52bytes), L(table_16_128bytes)) - .int JMPTBL (L(aligned_16_53bytes), L(table_16_128bytes)) - .int JMPTBL (L(aligned_16_54bytes), L(table_16_128bytes)) - .int JMPTBL (L(aligned_16_55bytes), L(table_16_128bytes)) - .int JMPTBL (L(aligned_16_56bytes), L(table_16_128bytes)) - .int JMPTBL (L(aligned_16_57bytes), L(table_16_128bytes)) - .int JMPTBL (L(aligned_16_58bytes), L(table_16_128bytes)) - .int JMPTBL (L(aligned_16_59bytes), L(table_16_128bytes)) - .int JMPTBL (L(aligned_16_60bytes), L(table_16_128bytes)) - .int JMPTBL (L(aligned_16_61bytes), L(table_16_128bytes)) - .int JMPTBL (L(aligned_16_62bytes), L(table_16_128bytes)) - .int JMPTBL (L(aligned_16_63bytes), L(table_16_128bytes)) - .int JMPTBL (L(aligned_16_64bytes), L(table_16_128bytes)) - .int JMPTBL (L(aligned_16_65bytes), L(table_16_128bytes)) - .int JMPTBL (L(aligned_16_66bytes), L(table_16_128bytes)) - .int JMPTBL (L(aligned_16_67bytes), L(table_16_128bytes)) - .int JMPTBL (L(aligned_16_68bytes), L(table_16_128bytes)) - .int JMPTBL (L(aligned_16_69bytes), L(table_16_128bytes)) - .int JMPTBL (L(aligned_16_70bytes), L(table_16_128bytes)) - .int JMPTBL (L(aligned_16_71bytes), L(table_16_128bytes)) - .int JMPTBL (L(aligned_16_72bytes), L(table_16_128bytes)) - .int JMPTBL (L(aligned_16_73bytes), L(table_16_128bytes)) - .int JMPTBL (L(aligned_16_74bytes), L(table_16_128bytes)) - .int JMPTBL (L(aligned_16_75bytes), L(table_16_128bytes)) - .int JMPTBL (L(aligned_16_76bytes), L(table_16_128bytes)) - .int JMPTBL (L(aligned_16_77bytes), L(table_16_128bytes)) - .int JMPTBL (L(aligned_16_78bytes), L(table_16_128bytes)) - .int JMPTBL (L(aligned_16_79bytes), L(table_16_128bytes)) - .int JMPTBL (L(aligned_16_80bytes), L(table_16_128bytes)) - .int JMPTBL (L(aligned_16_81bytes), L(table_16_128bytes)) - .int JMPTBL (L(aligned_16_82bytes), L(table_16_128bytes)) - .int JMPTBL (L(aligned_16_83bytes), L(table_16_128bytes)) - .int JMPTBL (L(aligned_16_84bytes), L(table_16_128bytes)) - .int JMPTBL (L(aligned_16_85bytes), L(table_16_128bytes)) - .int JMPTBL (L(aligned_16_86bytes), L(table_16_128bytes)) - .int JMPTBL (L(aligned_16_87bytes), L(table_16_128bytes)) - .int JMPTBL (L(aligned_16_88bytes), L(table_16_128bytes)) - .int JMPTBL (L(aligned_16_89bytes), L(table_16_128bytes)) - .int JMPTBL (L(aligned_16_90bytes), L(table_16_128bytes)) - .int JMPTBL (L(aligned_16_91bytes), L(table_16_128bytes)) - .int JMPTBL (L(aligned_16_92bytes), L(table_16_128bytes)) - .int JMPTBL (L(aligned_16_93bytes), L(table_16_128bytes)) - .int JMPTBL (L(aligned_16_94bytes), L(table_16_128bytes)) - .int JMPTBL (L(aligned_16_95bytes), L(table_16_128bytes)) - .int JMPTBL (L(aligned_16_96bytes), L(table_16_128bytes)) - .int JMPTBL (L(aligned_16_97bytes), L(table_16_128bytes)) - .int JMPTBL (L(aligned_16_98bytes), L(table_16_128bytes)) - .int JMPTBL (L(aligned_16_99bytes), L(table_16_128bytes)) - .int JMPTBL (L(aligned_16_100bytes), L(table_16_128bytes)) - .int JMPTBL (L(aligned_16_101bytes), L(table_16_128bytes)) - .int JMPTBL (L(aligned_16_102bytes), L(table_16_128bytes)) - .int JMPTBL (L(aligned_16_103bytes), L(table_16_128bytes)) - .int JMPTBL (L(aligned_16_104bytes), L(table_16_128bytes)) - .int JMPTBL (L(aligned_16_105bytes), L(table_16_128bytes)) - .int JMPTBL (L(aligned_16_106bytes), L(table_16_128bytes)) - .int JMPTBL (L(aligned_16_107bytes), L(table_16_128bytes)) - .int JMPTBL (L(aligned_16_108bytes), L(table_16_128bytes)) - .int JMPTBL (L(aligned_16_109bytes), L(table_16_128bytes)) - .int JMPTBL (L(aligned_16_110bytes), L(table_16_128bytes)) - .int JMPTBL (L(aligned_16_111bytes), L(table_16_128bytes)) - .int JMPTBL (L(aligned_16_112bytes), L(table_16_128bytes)) - .int JMPTBL (L(aligned_16_113bytes), L(table_16_128bytes)) - .int JMPTBL (L(aligned_16_114bytes), L(table_16_128bytes)) - .int JMPTBL (L(aligned_16_115bytes), L(table_16_128bytes)) - .int JMPTBL (L(aligned_16_116bytes), L(table_16_128bytes)) - .int JMPTBL (L(aligned_16_117bytes), L(table_16_128bytes)) - .int JMPTBL (L(aligned_16_118bytes), L(table_16_128bytes)) - .int JMPTBL (L(aligned_16_119bytes), L(table_16_128bytes)) - .int JMPTBL (L(aligned_16_120bytes), L(table_16_128bytes)) - .int JMPTBL (L(aligned_16_121bytes), L(table_16_128bytes)) - .int JMPTBL (L(aligned_16_122bytes), L(table_16_128bytes)) - .int JMPTBL (L(aligned_16_123bytes), L(table_16_128bytes)) - .int JMPTBL (L(aligned_16_124bytes), L(table_16_128bytes)) - .int JMPTBL (L(aligned_16_125bytes), L(table_16_128bytes)) - .int JMPTBL (L(aligned_16_126bytes), L(table_16_128bytes)) - .int JMPTBL (L(aligned_16_127bytes), L(table_16_128bytes)) - .popsection - - ALIGN (4) -L(aligned_16_112bytes): - movdqa %xmm0, -112(%edx) -L(aligned_16_96bytes): - movdqa %xmm0, -96(%edx) -L(aligned_16_80bytes): - movdqa %xmm0, -80(%edx) -L(aligned_16_64bytes): - movdqa %xmm0, -64(%edx) -L(aligned_16_48bytes): - movdqa %xmm0, -48(%edx) -L(aligned_16_32bytes): - movdqa %xmm0, -32(%edx) -L(aligned_16_16bytes): - movdqa %xmm0, -16(%edx) -L(aligned_16_0bytes): - SETRTNVAL - RETURN - - ALIGN (4) -L(aligned_16_113bytes): - movdqa %xmm0, -113(%edx) -L(aligned_16_97bytes): - movdqa %xmm0, -97(%edx) -L(aligned_16_81bytes): - movdqa %xmm0, -81(%edx) -L(aligned_16_65bytes): - movdqa %xmm0, -65(%edx) -L(aligned_16_49bytes): - movdqa %xmm0, -49(%edx) -L(aligned_16_33bytes): - movdqa %xmm0, -33(%edx) -L(aligned_16_17bytes): - movdqa %xmm0, -17(%edx) -L(aligned_16_1bytes): - movb %al, -1(%edx) - SETRTNVAL - RETURN - - ALIGN (4) -L(aligned_16_114bytes): - movdqa %xmm0, -114(%edx) -L(aligned_16_98bytes): - movdqa %xmm0, -98(%edx) -L(aligned_16_82bytes): - movdqa %xmm0, -82(%edx) -L(aligned_16_66bytes): - movdqa %xmm0, -66(%edx) -L(aligned_16_50bytes): - movdqa %xmm0, -50(%edx) -L(aligned_16_34bytes): - movdqa %xmm0, -34(%edx) -L(aligned_16_18bytes): - movdqa %xmm0, -18(%edx) -L(aligned_16_2bytes): - movw %ax, -2(%edx) - SETRTNVAL - RETURN - - ALIGN (4) -L(aligned_16_115bytes): - movdqa %xmm0, -115(%edx) -L(aligned_16_99bytes): - movdqa %xmm0, -99(%edx) -L(aligned_16_83bytes): - movdqa %xmm0, -83(%edx) -L(aligned_16_67bytes): - movdqa %xmm0, -67(%edx) -L(aligned_16_51bytes): - movdqa %xmm0, -51(%edx) -L(aligned_16_35bytes): - movdqa %xmm0, -35(%edx) -L(aligned_16_19bytes): - movdqa %xmm0, -19(%edx) -L(aligned_16_3bytes): - movw %ax, -3(%edx) - movb %al, -1(%edx) - SETRTNVAL - RETURN - - ALIGN (4) -L(aligned_16_116bytes): - movdqa %xmm0, -116(%edx) -L(aligned_16_100bytes): - movdqa %xmm0, -100(%edx) -L(aligned_16_84bytes): - movdqa %xmm0, -84(%edx) -L(aligned_16_68bytes): - movdqa %xmm0, -68(%edx) -L(aligned_16_52bytes): - movdqa %xmm0, -52(%edx) -L(aligned_16_36bytes): - movdqa %xmm0, -36(%edx) -L(aligned_16_20bytes): - movdqa %xmm0, -20(%edx) -L(aligned_16_4bytes): - movl %eax, -4(%edx) - SETRTNVAL - RETURN - - ALIGN (4) -L(aligned_16_117bytes): - movdqa %xmm0, -117(%edx) -L(aligned_16_101bytes): - movdqa %xmm0, -101(%edx) -L(aligned_16_85bytes): - movdqa %xmm0, -85(%edx) -L(aligned_16_69bytes): - movdqa %xmm0, -69(%edx) -L(aligned_16_53bytes): - movdqa %xmm0, -53(%edx) -L(aligned_16_37bytes): - movdqa %xmm0, -37(%edx) -L(aligned_16_21bytes): - movdqa %xmm0, -21(%edx) -L(aligned_16_5bytes): - movl %eax, -5(%edx) - movb %al, -1(%edx) - SETRTNVAL - RETURN - - ALIGN (4) -L(aligned_16_118bytes): - movdqa %xmm0, -118(%edx) -L(aligned_16_102bytes): - movdqa %xmm0, -102(%edx) -L(aligned_16_86bytes): - movdqa %xmm0, -86(%edx) -L(aligned_16_70bytes): - movdqa %xmm0, -70(%edx) -L(aligned_16_54bytes): - movdqa %xmm0, -54(%edx) -L(aligned_16_38bytes): - movdqa %xmm0, -38(%edx) -L(aligned_16_22bytes): - movdqa %xmm0, -22(%edx) -L(aligned_16_6bytes): - movl %eax, -6(%edx) - movw %ax, -2(%edx) - SETRTNVAL - RETURN - - ALIGN (4) -L(aligned_16_119bytes): - movdqa %xmm0, -119(%edx) -L(aligned_16_103bytes): - movdqa %xmm0, -103(%edx) -L(aligned_16_87bytes): - movdqa %xmm0, -87(%edx) -L(aligned_16_71bytes): - movdqa %xmm0, -71(%edx) -L(aligned_16_55bytes): - movdqa %xmm0, -55(%edx) -L(aligned_16_39bytes): - movdqa %xmm0, -39(%edx) -L(aligned_16_23bytes): - movdqa %xmm0, -23(%edx) -L(aligned_16_7bytes): - movl %eax, -7(%edx) - movw %ax, -3(%edx) - movb %al, -1(%edx) - SETRTNVAL - RETURN - - ALIGN (4) -L(aligned_16_120bytes): - movdqa %xmm0, -120(%edx) -L(aligned_16_104bytes): - movdqa %xmm0, -104(%edx) -L(aligned_16_88bytes): - movdqa %xmm0, -88(%edx) -L(aligned_16_72bytes): - movdqa %xmm0, -72(%edx) -L(aligned_16_56bytes): - movdqa %xmm0, -56(%edx) -L(aligned_16_40bytes): - movdqa %xmm0, -40(%edx) -L(aligned_16_24bytes): - movdqa %xmm0, -24(%edx) -L(aligned_16_8bytes): - movq %xmm0, -8(%edx) - SETRTNVAL - RETURN - - ALIGN (4) -L(aligned_16_121bytes): - movdqa %xmm0, -121(%edx) -L(aligned_16_105bytes): - movdqa %xmm0, -105(%edx) -L(aligned_16_89bytes): - movdqa %xmm0, -89(%edx) -L(aligned_16_73bytes): - movdqa %xmm0, -73(%edx) -L(aligned_16_57bytes): - movdqa %xmm0, -57(%edx) -L(aligned_16_41bytes): - movdqa %xmm0, -41(%edx) -L(aligned_16_25bytes): - movdqa %xmm0, -25(%edx) -L(aligned_16_9bytes): - movq %xmm0, -9(%edx) - movb %al, -1(%edx) - SETRTNVAL - RETURN - - ALIGN (4) -L(aligned_16_122bytes): - movdqa %xmm0, -122(%edx) -L(aligned_16_106bytes): - movdqa %xmm0, -106(%edx) -L(aligned_16_90bytes): - movdqa %xmm0, -90(%edx) -L(aligned_16_74bytes): - movdqa %xmm0, -74(%edx) -L(aligned_16_58bytes): - movdqa %xmm0, -58(%edx) -L(aligned_16_42bytes): - movdqa %xmm0, -42(%edx) -L(aligned_16_26bytes): - movdqa %xmm0, -26(%edx) -L(aligned_16_10bytes): - movq %xmm0, -10(%edx) - movw %ax, -2(%edx) - SETRTNVAL - RETURN - - ALIGN (4) -L(aligned_16_123bytes): - movdqa %xmm0, -123(%edx) -L(aligned_16_107bytes): - movdqa %xmm0, -107(%edx) -L(aligned_16_91bytes): - movdqa %xmm0, -91(%edx) -L(aligned_16_75bytes): - movdqa %xmm0, -75(%edx) -L(aligned_16_59bytes): - movdqa %xmm0, -59(%edx) -L(aligned_16_43bytes): - movdqa %xmm0, -43(%edx) -L(aligned_16_27bytes): - movdqa %xmm0, -27(%edx) -L(aligned_16_11bytes): - movq %xmm0, -11(%edx) - movw %ax, -3(%edx) - movb %al, -1(%edx) - SETRTNVAL - RETURN - - ALIGN (4) -L(aligned_16_124bytes): - movdqa %xmm0, -124(%edx) -L(aligned_16_108bytes): - movdqa %xmm0, -108(%edx) -L(aligned_16_92bytes): - movdqa %xmm0, -92(%edx) -L(aligned_16_76bytes): - movdqa %xmm0, -76(%edx) -L(aligned_16_60bytes): - movdqa %xmm0, -60(%edx) -L(aligned_16_44bytes): - movdqa %xmm0, -44(%edx) -L(aligned_16_28bytes): - movdqa %xmm0, -28(%edx) -L(aligned_16_12bytes): - movq %xmm0, -12(%edx) - movl %eax, -4(%edx) - SETRTNVAL - RETURN - - ALIGN (4) -L(aligned_16_125bytes): - movdqa %xmm0, -125(%edx) -L(aligned_16_109bytes): - movdqa %xmm0, -109(%edx) -L(aligned_16_93bytes): - movdqa %xmm0, -93(%edx) -L(aligned_16_77bytes): - movdqa %xmm0, -77(%edx) -L(aligned_16_61bytes): - movdqa %xmm0, -61(%edx) -L(aligned_16_45bytes): - movdqa %xmm0, -45(%edx) -L(aligned_16_29bytes): - movdqa %xmm0, -29(%edx) -L(aligned_16_13bytes): - movq %xmm0, -13(%edx) - movl %eax, -5(%edx) - movb %al, -1(%edx) - SETRTNVAL - RETURN - - ALIGN (4) -L(aligned_16_126bytes): - movdqa %xmm0, -126(%edx) -L(aligned_16_110bytes): - movdqa %xmm0, -110(%edx) -L(aligned_16_94bytes): - movdqa %xmm0, -94(%edx) -L(aligned_16_78bytes): - movdqa %xmm0, -78(%edx) -L(aligned_16_62bytes): - movdqa %xmm0, -62(%edx) -L(aligned_16_46bytes): - movdqa %xmm0, -46(%edx) -L(aligned_16_30bytes): - movdqa %xmm0, -30(%edx) -L(aligned_16_14bytes): - movq %xmm0, -14(%edx) - movl %eax, -6(%edx) - movw %ax, -2(%edx) - SETRTNVAL - RETURN - - ALIGN (4) -L(aligned_16_127bytes): - movdqa %xmm0, -127(%edx) -L(aligned_16_111bytes): - movdqa %xmm0, -111(%edx) -L(aligned_16_95bytes): - movdqa %xmm0, -95(%edx) -L(aligned_16_79bytes): - movdqa %xmm0, -79(%edx) -L(aligned_16_63bytes): - movdqa %xmm0, -63(%edx) -L(aligned_16_47bytes): - movdqa %xmm0, -47(%edx) -L(aligned_16_31bytes): - movdqa %xmm0, -31(%edx) -L(aligned_16_15bytes): - movq %xmm0, -15(%edx) - movl %eax, -7(%edx) - movw %ax, -3(%edx) - movb %al, -1(%edx) - SETRTNVAL - RETURN_END - -END (__memset_sse2_rep) - -#endif diff --git a/sysdeps/i386/i686/multiarch/memset-sse2.S b/sysdeps/i386/i686/multiarch/memset-sse2.S deleted file mode 100644 index d7b8be9114..0000000000 --- a/sysdeps/i386/i686/multiarch/memset-sse2.S +++ /dev/null @@ -1,860 +0,0 @@ -/* memset with SSE2 - Copyright (C) 2010-2017 Free Software Foundation, Inc. - Contributed by Intel Corporation. - This file is part of the GNU C Library. - - The GNU C Library is free software; you can redistribute it and/or - modify it under the terms of the GNU Lesser General Public - License as published by the Free Software Foundation; either - version 2.1 of the License, or (at your option) any later version. - - The GNU C Library is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - Lesser General Public License for more details. - - You should have received a copy of the GNU Lesser General Public - License along with the GNU C Library; if not, see - <http://www.gnu.org/licenses/>. */ - -#if IS_IN (libc) - -#include <sysdep.h> -#include "asm-syntax.h" - -#define CFI_PUSH(REG) \ - cfi_adjust_cfa_offset (4); \ - cfi_rel_offset (REG, 0) - -#define CFI_POP(REG) \ - cfi_adjust_cfa_offset (-4); \ - cfi_restore (REG) - -#define PUSH(REG) pushl REG; CFI_PUSH (REG) -#define POP(REG) popl REG; CFI_POP (REG) - -#ifdef USE_AS_BZERO -# define DEST PARMS -# define LEN DEST+4 -# define SETRTNVAL -#else -# define DEST PARMS -# define CHR DEST+4 -# define LEN CHR+4 -# define SETRTNVAL movl DEST(%esp), %eax -#endif - -#ifdef SHARED -# define ENTRANCE PUSH (%ebx); -# define RETURN_END POP (%ebx); ret -# define RETURN RETURN_END; CFI_PUSH (%ebx) -# define PARMS 8 /* Preserve EBX. */ -# define JMPTBL(I, B) I - B - -/* Load an entry in a jump table into EBX and branch to it. TABLE is a - jump table with relative offsets. */ -# define BRANCH_TO_JMPTBL_ENTRY(TABLE) \ - /* We first load PC into EBX. */ \ - SETUP_PIC_REG(bx); \ - /* Get the address of the jump table. */ \ - add $(TABLE - .), %ebx; \ - /* Get the entry and convert the relative offset to the \ - absolute address. */ \ - add (%ebx,%ecx,4), %ebx; \ - add %ecx, %edx; \ - /* We loaded the jump table and adjusted EDX. Go. */ \ - jmp *%ebx -#else -# define ENTRANCE -# define RETURN_END ret -# define RETURN RETURN_END -# define PARMS 4 -# define JMPTBL(I, B) I - -/* Branch to an entry in a jump table. TABLE is a jump table with - absolute offsets. */ -# define BRANCH_TO_JMPTBL_ENTRY(TABLE) \ - add %ecx, %edx; \ - jmp *TABLE(,%ecx,4) -#endif - - .section .text.sse2,"ax",@progbits -#if defined SHARED && IS_IN (libc) && !defined USE_AS_BZERO -ENTRY (__memset_chk_sse2) - movl 12(%esp), %eax - cmpl %eax, 16(%esp) - jb HIDDEN_JUMPTARGET (__chk_fail) -END (__memset_chk_sse2) -#endif -ENTRY (__memset_sse2) - ENTRANCE - - movl LEN(%esp), %ecx -#ifdef USE_AS_BZERO - xor %eax, %eax -#else - movzbl CHR(%esp), %eax - movb %al, %ah - /* Fill the whole EAX with pattern. */ - movl %eax, %edx - shl $16, %eax - or %edx, %eax -#endif - movl DEST(%esp), %edx - cmp $32, %ecx - jae L(32bytesormore) - -L(write_less32bytes): - BRANCH_TO_JMPTBL_ENTRY (L(table_less_32bytes)) - - - .pushsection .rodata.sse2,"a",@progbits - ALIGN (2) -L(table_less_32bytes): - .int JMPTBL (L(write_0bytes), L(table_less_32bytes)) - .int JMPTBL (L(write_1bytes), L(table_less_32bytes)) - .int JMPTBL (L(write_2bytes), L(table_less_32bytes)) - .int JMPTBL (L(write_3bytes), L(table_less_32bytes)) - .int JMPTBL (L(write_4bytes), L(table_less_32bytes)) - .int JMPTBL (L(write_5bytes), L(table_less_32bytes)) - .int JMPTBL (L(write_6bytes), L(table_less_32bytes)) - .int JMPTBL (L(write_7bytes), L(table_less_32bytes)) - .int JMPTBL (L(write_8bytes), L(table_less_32bytes)) - .int JMPTBL (L(write_9bytes), L(table_less_32bytes)) - .int JMPTBL (L(write_10bytes), L(table_less_32bytes)) - .int JMPTBL (L(write_11bytes), L(table_less_32bytes)) - .int JMPTBL (L(write_12bytes), L(table_less_32bytes)) - .int JMPTBL (L(write_13bytes), L(table_less_32bytes)) - .int JMPTBL (L(write_14bytes), L(table_less_32bytes)) - .int JMPTBL (L(write_15bytes), L(table_less_32bytes)) - .int JMPTBL (L(write_16bytes), L(table_less_32bytes)) - .int JMPTBL (L(write_17bytes), L(table_less_32bytes)) - .int JMPTBL (L(write_18bytes), L(table_less_32bytes)) - .int JMPTBL (L(write_19bytes), L(table_less_32bytes)) - .int JMPTBL (L(write_20bytes), L(table_less_32bytes)) - .int JMPTBL (L(write_21bytes), L(table_less_32bytes)) - .int JMPTBL (L(write_22bytes), L(table_less_32bytes)) - .int JMPTBL (L(write_23bytes), L(table_less_32bytes)) - .int JMPTBL (L(write_24bytes), L(table_less_32bytes)) - .int JMPTBL (L(write_25bytes), L(table_less_32bytes)) - .int JMPTBL (L(write_26bytes), L(table_less_32bytes)) - .int JMPTBL (L(write_27bytes), L(table_less_32bytes)) - .int JMPTBL (L(write_28bytes), L(table_less_32bytes)) - .int JMPTBL (L(write_29bytes), L(table_less_32bytes)) - .int JMPTBL (L(write_30bytes), L(table_less_32bytes)) - .int JMPTBL (L(write_31bytes), L(table_less_32bytes)) - .popsection - - ALIGN (4) -L(write_28bytes): - movl %eax, -28(%edx) -L(write_24bytes): - movl %eax, -24(%edx) -L(write_20bytes): - movl %eax, -20(%edx) -L(write_16bytes): - movl %eax, -16(%edx) -L(write_12bytes): - movl %eax, -12(%edx) -L(write_8bytes): - movl %eax, -8(%edx) -L(write_4bytes): - movl %eax, -4(%edx) -L(write_0bytes): - SETRTNVAL - RETURN - - ALIGN (4) -L(write_29bytes): - movl %eax, -29(%edx) -L(write_25bytes): - movl %eax, -25(%edx) -L(write_21bytes): - movl %eax, -21(%edx) -L(write_17bytes): - movl %eax, -17(%edx) -L(write_13bytes): - movl %eax, -13(%edx) -L(write_9bytes): - movl %eax, -9(%edx) -L(write_5bytes): - movl %eax, -5(%edx) -L(write_1bytes): - movb %al, -1(%edx) - SETRTNVAL - RETURN - - ALIGN (4) -L(write_30bytes): - movl %eax, -30(%edx) -L(write_26bytes): - movl %eax, -26(%edx) -L(write_22bytes): - movl %eax, -22(%edx) -L(write_18bytes): - movl %eax, -18(%edx) -L(write_14bytes): - movl %eax, -14(%edx) -L(write_10bytes): - movl %eax, -10(%edx) -L(write_6bytes): - movl %eax, -6(%edx) -L(write_2bytes): - movw %ax, -2(%edx) - SETRTNVAL - RETURN - - ALIGN (4) -L(write_31bytes): - movl %eax, -31(%edx) -L(write_27bytes): - movl %eax, -27(%edx) -L(write_23bytes): - movl %eax, -23(%edx) -L(write_19bytes): - movl %eax, -19(%edx) -L(write_15bytes): - movl %eax, -15(%edx) -L(write_11bytes): - movl %eax, -11(%edx) -L(write_7bytes): - movl %eax, -7(%edx) -L(write_3bytes): - movw %ax, -3(%edx) - movb %al, -1(%edx) - SETRTNVAL - RETURN - - ALIGN (4) -/* ECX > 32 and EDX is 4 byte aligned. */ -L(32bytesormore): - /* Fill xmm0 with the pattern. */ -#ifdef USE_AS_BZERO - pxor %xmm0, %xmm0 -#else - movd %eax, %xmm0 - pshufd $0, %xmm0, %xmm0 -#endif - testl $0xf, %edx - jz L(aligned_16) -/* ECX > 32 and EDX is not 16 byte aligned. */ -L(not_aligned_16): - movdqu %xmm0, (%edx) - movl %edx, %eax - and $-16, %edx - add $16, %edx - sub %edx, %eax - add %eax, %ecx - movd %xmm0, %eax - - ALIGN (4) -L(aligned_16): - cmp $128, %ecx - jae L(128bytesormore) - -L(aligned_16_less128bytes): - BRANCH_TO_JMPTBL_ENTRY (L(table_16_128bytes)) - - ALIGN (4) -L(128bytesormore): -#ifdef SHARED_CACHE_SIZE - PUSH (%ebx) - mov $SHARED_CACHE_SIZE, %ebx -#else -# ifdef SHARED - SETUP_PIC_REG(bx) - add $_GLOBAL_OFFSET_TABLE_, %ebx - mov __x86_shared_cache_size@GOTOFF(%ebx), %ebx -# else - PUSH (%ebx) - mov __x86_shared_cache_size, %ebx -# endif -#endif - cmp %ebx, %ecx - jae L(128bytesormore_nt_start) - - -#ifdef DATA_CACHE_SIZE - POP (%ebx) -# define RESTORE_EBX_STATE CFI_PUSH (%ebx) - cmp $DATA_CACHE_SIZE, %ecx -#else -# ifdef SHARED -# define RESTORE_EBX_STATE - SETUP_PIC_REG(bx) - add $_GLOBAL_OFFSET_TABLE_, %ebx - cmp __x86_data_cache_size@GOTOFF(%ebx), %ecx -# else - POP (%ebx) -# define RESTORE_EBX_STATE CFI_PUSH (%ebx) - cmp __x86_data_cache_size, %ecx -# endif -#endif - - jae L(128bytes_L2_normal) - subl $128, %ecx -L(128bytesormore_normal): - sub $128, %ecx - movdqa %xmm0, (%edx) - movdqa %xmm0, 0x10(%edx) - movdqa %xmm0, 0x20(%edx) - movdqa %xmm0, 0x30(%edx) - movdqa %xmm0, 0x40(%edx) - movdqa %xmm0, 0x50(%edx) - movdqa %xmm0, 0x60(%edx) - movdqa %xmm0, 0x70(%edx) - lea 128(%edx), %edx - jb L(128bytesless_normal) - - - sub $128, %ecx - movdqa %xmm0, (%edx) - movdqa %xmm0, 0x10(%edx) - movdqa %xmm0, 0x20(%edx) - movdqa %xmm0, 0x30(%edx) - movdqa %xmm0, 0x40(%edx) - movdqa %xmm0, 0x50(%edx) - movdqa %xmm0, 0x60(%edx) - movdqa %xmm0, 0x70(%edx) - lea 128(%edx), %edx - jae L(128bytesormore_normal) - -L(128bytesless_normal): - add $128, %ecx - BRANCH_TO_JMPTBL_ENTRY (L(table_16_128bytes)) - - ALIGN (4) -L(128bytes_L2_normal): - prefetcht0 0x380(%edx) - prefetcht0 0x3c0(%edx) - sub $128, %ecx - movdqa %xmm0, (%edx) - movaps %xmm0, 0x10(%edx) - movaps %xmm0, 0x20(%edx) - movaps %xmm0, 0x30(%edx) - movaps %xmm0, 0x40(%edx) - movaps %xmm0, 0x50(%edx) - movaps %xmm0, 0x60(%edx) - movaps %xmm0, 0x70(%edx) - add $128, %edx - cmp $128, %ecx - jae L(128bytes_L2_normal) - -L(128bytesless_L2_normal): - BRANCH_TO_JMPTBL_ENTRY (L(table_16_128bytes)) - - RESTORE_EBX_STATE -L(128bytesormore_nt_start): - sub %ebx, %ecx - ALIGN (4) -L(128bytesormore_shared_cache_loop): - prefetcht0 0x3c0(%edx) - prefetcht0 0x380(%edx) - sub $0x80, %ebx - movdqa %xmm0, (%edx) - movdqa %xmm0, 0x10(%edx) - movdqa %xmm0, 0x20(%edx) - movdqa %xmm0, 0x30(%edx) - movdqa %xmm0, 0x40(%edx) - movdqa %xmm0, 0x50(%edx) - movdqa %xmm0, 0x60(%edx) - movdqa %xmm0, 0x70(%edx) - add $0x80, %edx - cmp $0x80, %ebx - jae L(128bytesormore_shared_cache_loop) - cmp $0x80, %ecx - jb L(shared_cache_loop_end) - ALIGN (4) -L(128bytesormore_nt): - sub $0x80, %ecx - movntdq %xmm0, (%edx) - movntdq %xmm0, 0x10(%edx) - movntdq %xmm0, 0x20(%edx) - movntdq %xmm0, 0x30(%edx) - movntdq %xmm0, 0x40(%edx) - movntdq %xmm0, 0x50(%edx) - movntdq %xmm0, 0x60(%edx) - movntdq %xmm0, 0x70(%edx) - add $0x80, %edx - cmp $0x80, %ecx - jae L(128bytesormore_nt) - sfence -L(shared_cache_loop_end): -#if defined DATA_CACHE_SIZE || !defined SHARED - POP (%ebx) -#endif - BRANCH_TO_JMPTBL_ENTRY (L(table_16_128bytes)) - - - .pushsection .rodata.sse2,"a",@progbits - ALIGN (2) -L(table_16_128bytes): - .int JMPTBL (L(aligned_16_0bytes), L(table_16_128bytes)) - .int JMPTBL (L(aligned_16_1bytes), L(table_16_128bytes)) - .int JMPTBL (L(aligned_16_2bytes), L(table_16_128bytes)) - .int JMPTBL (L(aligned_16_3bytes), L(table_16_128bytes)) - .int JMPTBL (L(aligned_16_4bytes), L(table_16_128bytes)) - .int JMPTBL (L(aligned_16_5bytes), L(table_16_128bytes)) - .int JMPTBL (L(aligned_16_6bytes), L(table_16_128bytes)) - .int JMPTBL (L(aligned_16_7bytes), L(table_16_128bytes)) - .int JMPTBL (L(aligned_16_8bytes), L(table_16_128bytes)) - .int JMPTBL (L(aligned_16_9bytes), L(table_16_128bytes)) - .int JMPTBL (L(aligned_16_10bytes), L(table_16_128bytes)) - .int JMPTBL (L(aligned_16_11bytes), L(table_16_128bytes)) - .int JMPTBL (L(aligned_16_12bytes), L(table_16_128bytes)) - .int JMPTBL (L(aligned_16_13bytes), L(table_16_128bytes)) - .int JMPTBL (L(aligned_16_14bytes), L(table_16_128bytes)) - .int JMPTBL (L(aligned_16_15bytes), L(table_16_128bytes)) - .int JMPTBL (L(aligned_16_16bytes), L(table_16_128bytes)) - .int JMPTBL (L(aligned_16_17bytes), L(table_16_128bytes)) - .int JMPTBL (L(aligned_16_18bytes), L(table_16_128bytes)) - .int JMPTBL (L(aligned_16_19bytes), L(table_16_128bytes)) - .int JMPTBL (L(aligned_16_20bytes), L(table_16_128bytes)) - .int JMPTBL (L(aligned_16_21bytes), L(table_16_128bytes)) - .int JMPTBL (L(aligned_16_22bytes), L(table_16_128bytes)) - .int JMPTBL (L(aligned_16_23bytes), L(table_16_128bytes)) - .int JMPTBL (L(aligned_16_24bytes), L(table_16_128bytes)) - .int JMPTBL (L(aligned_16_25bytes), L(table_16_128bytes)) - .int JMPTBL (L(aligned_16_26bytes), L(table_16_128bytes)) - .int JMPTBL (L(aligned_16_27bytes), L(table_16_128bytes)) - .int JMPTBL (L(aligned_16_28bytes), L(table_16_128bytes)) - .int JMPTBL (L(aligned_16_29bytes), L(table_16_128bytes)) - .int JMPTBL (L(aligned_16_30bytes), L(table_16_128bytes)) - .int JMPTBL (L(aligned_16_31bytes), L(table_16_128bytes)) - .int JMPTBL (L(aligned_16_32bytes), L(table_16_128bytes)) - .int JMPTBL (L(aligned_16_33bytes), L(table_16_128bytes)) - .int JMPTBL (L(aligned_16_34bytes), L(table_16_128bytes)) - .int JMPTBL (L(aligned_16_35bytes), L(table_16_128bytes)) - .int JMPTBL (L(aligned_16_36bytes), L(table_16_128bytes)) - .int JMPTBL (L(aligned_16_37bytes), L(table_16_128bytes)) - .int JMPTBL (L(aligned_16_38bytes), L(table_16_128bytes)) - .int JMPTBL (L(aligned_16_39bytes), L(table_16_128bytes)) - .int JMPTBL (L(aligned_16_40bytes), L(table_16_128bytes)) - .int JMPTBL (L(aligned_16_41bytes), L(table_16_128bytes)) - .int JMPTBL (L(aligned_16_42bytes), L(table_16_128bytes)) - .int JMPTBL (L(aligned_16_43bytes), L(table_16_128bytes)) - .int JMPTBL (L(aligned_16_44bytes), L(table_16_128bytes)) - .int JMPTBL (L(aligned_16_45bytes), L(table_16_128bytes)) - .int JMPTBL (L(aligned_16_46bytes), L(table_16_128bytes)) - .int JMPTBL (L(aligned_16_47bytes), L(table_16_128bytes)) - .int JMPTBL (L(aligned_16_48bytes), L(table_16_128bytes)) - .int JMPTBL (L(aligned_16_49bytes), L(table_16_128bytes)) - .int JMPTBL (L(aligned_16_50bytes), L(table_16_128bytes)) - .int JMPTBL (L(aligned_16_51bytes), L(table_16_128bytes)) - .int JMPTBL (L(aligned_16_52bytes), L(table_16_128bytes)) - .int JMPTBL (L(aligned_16_53bytes), L(table_16_128bytes)) - .int JMPTBL (L(aligned_16_54bytes), L(table_16_128bytes)) - .int JMPTBL (L(aligned_16_55bytes), L(table_16_128bytes)) - .int JMPTBL (L(aligned_16_56bytes), L(table_16_128bytes)) - .int JMPTBL (L(aligned_16_57bytes), L(table_16_128bytes)) - .int JMPTBL (L(aligned_16_58bytes), L(table_16_128bytes)) - .int JMPTBL (L(aligned_16_59bytes), L(table_16_128bytes)) - .int JMPTBL (L(aligned_16_60bytes), L(table_16_128bytes)) - .int JMPTBL (L(aligned_16_61bytes), L(table_16_128bytes)) - .int JMPTBL (L(aligned_16_62bytes), L(table_16_128bytes)) - .int JMPTBL (L(aligned_16_63bytes), L(table_16_128bytes)) - .int JMPTBL (L(aligned_16_64bytes), L(table_16_128bytes)) - .int JMPTBL (L(aligned_16_65bytes), L(table_16_128bytes)) - .int JMPTBL (L(aligned_16_66bytes), L(table_16_128bytes)) - .int JMPTBL (L(aligned_16_67bytes), L(table_16_128bytes)) - .int JMPTBL (L(aligned_16_68bytes), L(table_16_128bytes)) - .int JMPTBL (L(aligned_16_69bytes), L(table_16_128bytes)) - .int JMPTBL (L(aligned_16_70bytes), L(table_16_128bytes)) - .int JMPTBL (L(aligned_16_71bytes), L(table_16_128bytes)) - .int JMPTBL (L(aligned_16_72bytes), L(table_16_128bytes)) - .int JMPTBL (L(aligned_16_73bytes), L(table_16_128bytes)) - .int JMPTBL (L(aligned_16_74bytes), L(table_16_128bytes)) - .int JMPTBL (L(aligned_16_75bytes), L(table_16_128bytes)) - .int JMPTBL (L(aligned_16_76bytes), L(table_16_128bytes)) - .int JMPTBL (L(aligned_16_77bytes), L(table_16_128bytes)) - .int JMPTBL (L(aligned_16_78bytes), L(table_16_128bytes)) - .int JMPTBL (L(aligned_16_79bytes), L(table_16_128bytes)) - .int JMPTBL (L(aligned_16_80bytes), L(table_16_128bytes)) - .int JMPTBL (L(aligned_16_81bytes), L(table_16_128bytes)) - .int JMPTBL (L(aligned_16_82bytes), L(table_16_128bytes)) - .int JMPTBL (L(aligned_16_83bytes), L(table_16_128bytes)) - .int JMPTBL (L(aligned_16_84bytes), L(table_16_128bytes)) - .int JMPTBL (L(aligned_16_85bytes), L(table_16_128bytes)) - .int JMPTBL (L(aligned_16_86bytes), L(table_16_128bytes)) - .int JMPTBL (L(aligned_16_87bytes), L(table_16_128bytes)) - .int JMPTBL (L(aligned_16_88bytes), L(table_16_128bytes)) - .int JMPTBL (L(aligned_16_89bytes), L(table_16_128bytes)) - .int JMPTBL (L(aligned_16_90bytes), L(table_16_128bytes)) - .int JMPTBL (L(aligned_16_91bytes), L(table_16_128bytes)) - .int JMPTBL (L(aligned_16_92bytes), L(table_16_128bytes)) - .int JMPTBL (L(aligned_16_93bytes), L(table_16_128bytes)) - .int JMPTBL (L(aligned_16_94bytes), L(table_16_128bytes)) - .int JMPTBL (L(aligned_16_95bytes), L(table_16_128bytes)) - .int JMPTBL (L(aligned_16_96bytes), L(table_16_128bytes)) - .int JMPTBL (L(aligned_16_97bytes), L(table_16_128bytes)) - .int JMPTBL (L(aligned_16_98bytes), L(table_16_128bytes)) - .int JMPTBL (L(aligned_16_99bytes), L(table_16_128bytes)) - .int JMPTBL (L(aligned_16_100bytes), L(table_16_128bytes)) - .int JMPTBL (L(aligned_16_101bytes), L(table_16_128bytes)) - .int JMPTBL (L(aligned_16_102bytes), L(table_16_128bytes)) - .int JMPTBL (L(aligned_16_103bytes), L(table_16_128bytes)) - .int JMPTBL (L(aligned_16_104bytes), L(table_16_128bytes)) - .int JMPTBL (L(aligned_16_105bytes), L(table_16_128bytes)) - .int JMPTBL (L(aligned_16_106bytes), L(table_16_128bytes)) - .int JMPTBL (L(aligned_16_107bytes), L(table_16_128bytes)) - .int JMPTBL (L(aligned_16_108bytes), L(table_16_128bytes)) - .int JMPTBL (L(aligned_16_109bytes), L(table_16_128bytes)) - .int JMPTBL (L(aligned_16_110bytes), L(table_16_128bytes)) - .int JMPTBL (L(aligned_16_111bytes), L(table_16_128bytes)) - .int JMPTBL (L(aligned_16_112bytes), L(table_16_128bytes)) - .int JMPTBL (L(aligned_16_113bytes), L(table_16_128bytes)) - .int JMPTBL (L(aligned_16_114bytes), L(table_16_128bytes)) - .int JMPTBL (L(aligned_16_115bytes), L(table_16_128bytes)) - .int JMPTBL (L(aligned_16_116bytes), L(table_16_128bytes)) - .int JMPTBL (L(aligned_16_117bytes), L(table_16_128bytes)) - .int JMPTBL (L(aligned_16_118bytes), L(table_16_128bytes)) - .int JMPTBL (L(aligned_16_119bytes), L(table_16_128bytes)) - .int JMPTBL (L(aligned_16_120bytes), L(table_16_128bytes)) - .int JMPTBL (L(aligned_16_121bytes), L(table_16_128bytes)) - .int JMPTBL (L(aligned_16_122bytes), L(table_16_128bytes)) - .int JMPTBL (L(aligned_16_123bytes), L(table_16_128bytes)) - .int JMPTBL (L(aligned_16_124bytes), L(table_16_128bytes)) - .int JMPTBL (L(aligned_16_125bytes), L(table_16_128bytes)) - .int JMPTBL (L(aligned_16_126bytes), L(table_16_128bytes)) - .int JMPTBL (L(aligned_16_127bytes), L(table_16_128bytes)) - .popsection - - ALIGN (4) -L(aligned_16_112bytes): - movdqa %xmm0, -112(%edx) -L(aligned_16_96bytes): - movdqa %xmm0, -96(%edx) -L(aligned_16_80bytes): - movdqa %xmm0, -80(%edx) -L(aligned_16_64bytes): - movdqa %xmm0, -64(%edx) -L(aligned_16_48bytes): - movdqa %xmm0, -48(%edx) -L(aligned_16_32bytes): - movdqa %xmm0, -32(%edx) -L(aligned_16_16bytes): - movdqa %xmm0, -16(%edx) -L(aligned_16_0bytes): - SETRTNVAL - RETURN - - ALIGN (4) -L(aligned_16_113bytes): - movdqa %xmm0, -113(%edx) -L(aligned_16_97bytes): - movdqa %xmm0, -97(%edx) -L(aligned_16_81bytes): - movdqa %xmm0, -81(%edx) -L(aligned_16_65bytes): - movdqa %xmm0, -65(%edx) -L(aligned_16_49bytes): - movdqa %xmm0, -49(%edx) -L(aligned_16_33bytes): - movdqa %xmm0, -33(%edx) -L(aligned_16_17bytes): - movdqa %xmm0, -17(%edx) -L(aligned_16_1bytes): - movb %al, -1(%edx) - SETRTNVAL - RETURN - - ALIGN (4) -L(aligned_16_114bytes): - movdqa %xmm0, -114(%edx) -L(aligned_16_98bytes): - movdqa %xmm0, -98(%edx) -L(aligned_16_82bytes): - movdqa %xmm0, -82(%edx) -L(aligned_16_66bytes): - movdqa %xmm0, -66(%edx) -L(aligned_16_50bytes): - movdqa %xmm0, -50(%edx) -L(aligned_16_34bytes): - movdqa %xmm0, -34(%edx) -L(aligned_16_18bytes): - movdqa %xmm0, -18(%edx) -L(aligned_16_2bytes): - movw %ax, -2(%edx) - SETRTNVAL - RETURN - - ALIGN (4) -L(aligned_16_115bytes): - movdqa %xmm0, -115(%edx) -L(aligned_16_99bytes): - movdqa %xmm0, -99(%edx) -L(aligned_16_83bytes): - movdqa %xmm0, -83(%edx) -L(aligned_16_67bytes): - movdqa %xmm0, -67(%edx) -L(aligned_16_51bytes): - movdqa %xmm0, -51(%edx) -L(aligned_16_35bytes): - movdqa %xmm0, -35(%edx) -L(aligned_16_19bytes): - movdqa %xmm0, -19(%edx) -L(aligned_16_3bytes): - movw %ax, -3(%edx) - movb %al, -1(%edx) - SETRTNVAL - RETURN - - ALIGN (4) -L(aligned_16_116bytes): - movdqa %xmm0, -116(%edx) -L(aligned_16_100bytes): - movdqa %xmm0, -100(%edx) -L(aligned_16_84bytes): - movdqa %xmm0, -84(%edx) -L(aligned_16_68bytes): - movdqa %xmm0, -68(%edx) -L(aligned_16_52bytes): - movdqa %xmm0, -52(%edx) -L(aligned_16_36bytes): - movdqa %xmm0, -36(%edx) -L(aligned_16_20bytes): - movdqa %xmm0, -20(%edx) -L(aligned_16_4bytes): - movl %eax, -4(%edx) - SETRTNVAL - RETURN - - ALIGN (4) -L(aligned_16_117bytes): - movdqa %xmm0, -117(%edx) -L(aligned_16_101bytes): - movdqa %xmm0, -101(%edx) -L(aligned_16_85bytes): - movdqa %xmm0, -85(%edx) -L(aligned_16_69bytes): - movdqa %xmm0, -69(%edx) -L(aligned_16_53bytes): - movdqa %xmm0, -53(%edx) -L(aligned_16_37bytes): - movdqa %xmm0, -37(%edx) -L(aligned_16_21bytes): - movdqa %xmm0, -21(%edx) -L(aligned_16_5bytes): - movl %eax, -5(%edx) - movb %al, -1(%edx) - SETRTNVAL - RETURN - - ALIGN (4) -L(aligned_16_118bytes): - movdqa %xmm0, -118(%edx) -L(aligned_16_102bytes): - movdqa %xmm0, -102(%edx) -L(aligned_16_86bytes): - movdqa %xmm0, -86(%edx) -L(aligned_16_70bytes): - movdqa %xmm0, -70(%edx) -L(aligned_16_54bytes): - movdqa %xmm0, -54(%edx) -L(aligned_16_38bytes): - movdqa %xmm0, -38(%edx) -L(aligned_16_22bytes): - movdqa %xmm0, -22(%edx) -L(aligned_16_6bytes): - movl %eax, -6(%edx) - movw %ax, -2(%edx) - SETRTNVAL - RETURN - - ALIGN (4) -L(aligned_16_119bytes): - movdqa %xmm0, -119(%edx) -L(aligned_16_103bytes): - movdqa %xmm0, -103(%edx) -L(aligned_16_87bytes): - movdqa %xmm0, -87(%edx) -L(aligned_16_71bytes): - movdqa %xmm0, -71(%edx) -L(aligned_16_55bytes): - movdqa %xmm0, -55(%edx) -L(aligned_16_39bytes): - movdqa %xmm0, -39(%edx) -L(aligned_16_23bytes): - movdqa %xmm0, -23(%edx) -L(aligned_16_7bytes): - movl %eax, -7(%edx) - movw %ax, -3(%edx) - movb %al, -1(%edx) - SETRTNVAL - RETURN - - ALIGN (4) -L(aligned_16_120bytes): - movdqa %xmm0, -120(%edx) -L(aligned_16_104bytes): - movdqa %xmm0, -104(%edx) -L(aligned_16_88bytes): - movdqa %xmm0, -88(%edx) -L(aligned_16_72bytes): - movdqa %xmm0, -72(%edx) -L(aligned_16_56bytes): - movdqa %xmm0, -56(%edx) -L(aligned_16_40bytes): - movdqa %xmm0, -40(%edx) -L(aligned_16_24bytes): - movdqa %xmm0, -24(%edx) -L(aligned_16_8bytes): - movq %xmm0, -8(%edx) - SETRTNVAL - RETURN - - ALIGN (4) -L(aligned_16_121bytes): - movdqa %xmm0, -121(%edx) -L(aligned_16_105bytes): - movdqa %xmm0, -105(%edx) -L(aligned_16_89bytes): - movdqa %xmm0, -89(%edx) -L(aligned_16_73bytes): - movdqa %xmm0, -73(%edx) -L(aligned_16_57bytes): - movdqa %xmm0, -57(%edx) -L(aligned_16_41bytes): - movdqa %xmm0, -41(%edx) -L(aligned_16_25bytes): - movdqa %xmm0, -25(%edx) -L(aligned_16_9bytes): - movq %xmm0, -9(%edx) - movb %al, -1(%edx) - SETRTNVAL - RETURN - - ALIGN (4) -L(aligned_16_122bytes): - movdqa %xmm0, -122(%edx) -L(aligned_16_106bytes): - movdqa %xmm0, -106(%edx) -L(aligned_16_90bytes): - movdqa %xmm0, -90(%edx) -L(aligned_16_74bytes): - movdqa %xmm0, -74(%edx) -L(aligned_16_58bytes): - movdqa %xmm0, -58(%edx) -L(aligned_16_42bytes): - movdqa %xmm0, -42(%edx) -L(aligned_16_26bytes): - movdqa %xmm0, -26(%edx) -L(aligned_16_10bytes): - movq %xmm0, -10(%edx) - movw %ax, -2(%edx) - SETRTNVAL - RETURN - - ALIGN (4) -L(aligned_16_123bytes): - movdqa %xmm0, -123(%edx) -L(aligned_16_107bytes): - movdqa %xmm0, -107(%edx) -L(aligned_16_91bytes): - movdqa %xmm0, -91(%edx) -L(aligned_16_75bytes): - movdqa %xmm0, -75(%edx) -L(aligned_16_59bytes): - movdqa %xmm0, -59(%edx) -L(aligned_16_43bytes): - movdqa %xmm0, -43(%edx) -L(aligned_16_27bytes): - movdqa %xmm0, -27(%edx) -L(aligned_16_11bytes): - movq %xmm0, -11(%edx) - movw %ax, -3(%edx) - movb %al, -1(%edx) - SETRTNVAL - RETURN - - ALIGN (4) -L(aligned_16_124bytes): - movdqa %xmm0, -124(%edx) -L(aligned_16_108bytes): - movdqa %xmm0, -108(%edx) -L(aligned_16_92bytes): - movdqa %xmm0, -92(%edx) -L(aligned_16_76bytes): - movdqa %xmm0, -76(%edx) -L(aligned_16_60bytes): - movdqa %xmm0, -60(%edx) -L(aligned_16_44bytes): - movdqa %xmm0, -44(%edx) -L(aligned_16_28bytes): - movdqa %xmm0, -28(%edx) -L(aligned_16_12bytes): - movq %xmm0, -12(%edx) - movl %eax, -4(%edx) - SETRTNVAL - RETURN - - ALIGN (4) -L(aligned_16_125bytes): - movdqa %xmm0, -125(%edx) -L(aligned_16_109bytes): - movdqa %xmm0, -109(%edx) -L(aligned_16_93bytes): - movdqa %xmm0, -93(%edx) -L(aligned_16_77bytes): - movdqa %xmm0, -77(%edx) -L(aligned_16_61bytes): - movdqa %xmm0, -61(%edx) -L(aligned_16_45bytes): - movdqa %xmm0, -45(%edx) -L(aligned_16_29bytes): - movdqa %xmm0, -29(%edx) -L(aligned_16_13bytes): - movq %xmm0, -13(%edx) - movl %eax, -5(%edx) - movb %al, -1(%edx) - SETRTNVAL - RETURN - - ALIGN (4) -L(aligned_16_126bytes): - movdqa %xmm0, -126(%edx) -L(aligned_16_110bytes): - movdqa %xmm0, -110(%edx) -L(aligned_16_94bytes): - movdqa %xmm0, -94(%edx) -L(aligned_16_78bytes): - movdqa %xmm0, -78(%edx) -L(aligned_16_62bytes): - movdqa %xmm0, -62(%edx) -L(aligned_16_46bytes): - movdqa %xmm0, -46(%edx) -L(aligned_16_30bytes): - movdqa %xmm0, -30(%edx) -L(aligned_16_14bytes): - movq %xmm0, -14(%edx) - movl %eax, -6(%edx) - movw %ax, -2(%edx) - SETRTNVAL - RETURN - - ALIGN (4) -L(aligned_16_127bytes): - movdqa %xmm0, -127(%edx) -L(aligned_16_111bytes): - movdqa %xmm0, -111(%edx) -L(aligned_16_95bytes): - movdqa %xmm0, -95(%edx) -L(aligned_16_79bytes): - movdqa %xmm0, -79(%edx) -L(aligned_16_63bytes): - movdqa %xmm0, -63(%edx) -L(aligned_16_47bytes): - movdqa %xmm0, -47(%edx) -L(aligned_16_31bytes): - movdqa %xmm0, -31(%edx) -L(aligned_16_15bytes): - movq %xmm0, -15(%edx) - movl %eax, -7(%edx) - movw %ax, -3(%edx) - movb %al, -1(%edx) - SETRTNVAL - RETURN_END - -END (__memset_sse2) - -#endif diff --git a/sysdeps/i386/i686/multiarch/memset.S b/sysdeps/i386/i686/multiarch/memset.S deleted file mode 100644 index f601663a9f..0000000000 --- a/sysdeps/i386/i686/multiarch/memset.S +++ /dev/null @@ -1,75 +0,0 @@ -/* Multiple versions of memset - All versions must be listed in ifunc-impl-list.c. - Copyright (C) 2010-2017 Free Software Foundation, Inc. - Contributed by Intel Corporation. - This file is part of the GNU C Library. - - The GNU C Library is free software; you can redistribute it and/or - modify it under the terms of the GNU Lesser General Public - License as published by the Free Software Foundation; either - version 2.1 of the License, or (at your option) any later version. - - The GNU C Library is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - Lesser General Public License for more details. - - You should have received a copy of the GNU Lesser General Public - License along with the GNU C Library; if not, see - <http://www.gnu.org/licenses/>. */ - -#include <sysdep.h> -#include <init-arch.h> - -/* Define multiple versions only for the definition in lib. */ -#if IS_IN (libc) - .text -ENTRY(memset) - .type memset, @gnu_indirect_function - LOAD_GOT_AND_RTLD_GLOBAL_RO - LOAD_FUNC_GOT_EAX (__memset_ia32) - HAS_CPU_FEATURE (SSE2) - jz 2f - LOAD_FUNC_GOT_EAX (__memset_sse2) - HAS_ARCH_FEATURE (Fast_Rep_String) - jz 2f - LOAD_FUNC_GOT_EAX (__memset_sse2_rep) -2: ret -END(memset) - -# undef ENTRY -# define ENTRY(name) \ - .type __memset_ia32, @function; \ - .globl __memset_ia32; \ - .p2align 4; \ - __memset_ia32: cfi_startproc; \ - CALL_MCOUNT -# undef END -# define END(name) \ - cfi_endproc; .size __memset_ia32, .-__memset_ia32 - -# undef ENTRY_CHK -# define ENTRY_CHK(name) \ - .type __memset_chk_ia32, @function; \ - .globl __memset_chk_ia32; \ - .p2align 4; \ - __memset_chk_ia32: cfi_startproc; \ - CALL_MCOUNT -# undef END_CHK -# define END_CHK(name) \ - cfi_endproc; .size __memset_chk_ia32, .-__memset_chk_ia32 - -# ifdef SHARED -# undef libc_hidden_builtin_def -/* IFUNC doesn't work with the hidden functions in shared library since - they will be called without setting up EBX needed for PLT which is - used by IFUNC. */ -# define libc_hidden_builtin_def(name) \ - .globl __GI_memset; __GI_memset = __memset_ia32 -# endif - -# undef strong_alias -# define strong_alias(original, alias) -#endif - -#include "../memset.S" diff --git a/sysdeps/i386/i686/multiarch/memset_chk.S b/sysdeps/i386/i686/multiarch/memset_chk.S deleted file mode 100644 index 573cf4208a..0000000000 --- a/sysdeps/i386/i686/multiarch/memset_chk.S +++ /dev/null @@ -1,82 +0,0 @@ -/* Multiple versions of __memset_chk - All versions must be listed in ifunc-impl-list.c. - Copyright (C) 2010-2017 Free Software Foundation, Inc. - Contributed by Intel Corporation. - This file is part of the GNU C Library. - - The GNU C Library is free software; you can redistribute it and/or - modify it under the terms of the GNU Lesser General Public - License as published by the Free Software Foundation; either - version 2.1 of the License, or (at your option) any later version. - - The GNU C Library is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - Lesser General Public License for more details. - - You should have received a copy of the GNU Lesser General Public - License along with the GNU C Library; if not, see - <http://www.gnu.org/licenses/>. */ - -#include <sysdep.h> -#include <init-arch.h> - -/* Define multiple versions only for the definition in lib. */ -#if IS_IN (libc) - .text -ENTRY(__memset_chk) - .type __memset_chk, @gnu_indirect_function - LOAD_GOT_AND_RTLD_GLOBAL_RO - LOAD_FUNC_GOT_EAX (__memset_chk_ia32) - HAS_CPU_FEATURE (SSE2) - jz 2f - LOAD_FUNC_GOT_EAX (__memset_chk_sse2) - HAS_ARCH_FEATURE (Fast_Rep_String) - jz 2f - LOAD_FUNC_GOT_EAX (__memset_chk_sse2_rep) -2: ret -END(__memset_chk) - -# ifdef SHARED -strong_alias (__memset_chk, __memset_zero_constant_len_parameter) - .section .gnu.warning.__memset_zero_constant_len_parameter - .string "memset used with constant zero length parameter; this could be due to transposed parameters" -# else - .text - .type __memset_chk_sse2, @function - .p2align 4; -__memset_chk_sse2: - cfi_startproc - CALL_MCOUNT - movl 12(%esp), %eax - cmpl %eax, 16(%esp) - jb __chk_fail - jmp __memset_sse2 - cfi_endproc - .size __memset_chk_sse2, .-__memset_chk_sse2 - - .type __memset_chk_sse2_rep, @function - .p2align 4; -__memset_chk_sse2_rep: - cfi_startproc - CALL_MCOUNT - movl 12(%esp), %eax - cmpl %eax, 16(%esp) - jb __chk_fail - jmp __memset_sse2_rep - cfi_endproc - .size __memset_chk_sse2_rep, .-__memset_chk_sse2_rep - - .type __memset_chk_ia32, @function - .p2align 4; -__memset_chk_ia32: - cfi_startproc - CALL_MCOUNT - movl 12(%esp), %eax - cmpl %eax, 16(%esp) - jb __chk_fail - jmp __memset_ia32 - cfi_endproc - .size __memset_chk_ia32, .-__memset_chk_ia32 -# endif -#endif diff --git a/sysdeps/i386/i686/multiarch/rawmemchr-sse2-bsf.S b/sysdeps/i386/i686/multiarch/rawmemchr-sse2-bsf.S deleted file mode 100644 index 88c0e5776c..0000000000 --- a/sysdeps/i386/i686/multiarch/rawmemchr-sse2-bsf.S +++ /dev/null @@ -1,3 +0,0 @@ -#define USE_AS_RAWMEMCHR -#define MEMCHR __rawmemchr_sse2_bsf -#include "memchr-sse2-bsf.S" diff --git a/sysdeps/i386/i686/multiarch/rawmemchr-sse2.S b/sysdeps/i386/i686/multiarch/rawmemchr-sse2.S deleted file mode 100644 index 038c74896b..0000000000 --- a/sysdeps/i386/i686/multiarch/rawmemchr-sse2.S +++ /dev/null @@ -1,3 +0,0 @@ -#define USE_AS_RAWMEMCHR -#define MEMCHR __rawmemchr_sse2 -#include "memchr-sse2.S" diff --git a/sysdeps/i386/i686/multiarch/rawmemchr.S b/sysdeps/i386/i686/multiarch/rawmemchr.S deleted file mode 100644 index 0a41d63ee8..0000000000 --- a/sysdeps/i386/i686/multiarch/rawmemchr.S +++ /dev/null @@ -1,65 +0,0 @@ -/* Multiple versions of rawmemchr - All versions must be listed in ifunc-impl-list.c. - Copyright (C) 2011-2017 Free Software Foundation, Inc. - Contributed by Intel Corporation. - This file is part of the GNU C Library. - - The GNU C Library is free software; you can redistribute it and/or - modify it under the terms of the GNU Lesser General Public - License as published by the Free Software Foundation; either - version 2.1 of the License, or (at your option) any later version. - - The GNU C Library is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - Lesser General Public License for more details. - - You should have received a copy of the GNU Lesser General Public - License along with the GNU C Library; if not, see - <http://www.gnu.org/licenses/>. */ - -#include <sysdep.h> -#include <init-arch.h> - -#if IS_IN (libc) - .text -ENTRY(__rawmemchr) - .type __rawmemchr, @gnu_indirect_function - LOAD_GOT_AND_RTLD_GLOBAL_RO - HAS_CPU_FEATURE (SSE2) - jz 2f - HAS_ARCH_FEATURE (Slow_BSF) - jz 3f - - LOAD_FUNC_GOT_EAX (__rawmemchr_sse2) - ret - -2: LOAD_FUNC_GOT_EAX (__rawmemchr_ia32) - ret - -3: LOAD_FUNC_GOT_EAX (__rawmemchr_sse2_bsf) - ret -END(__rawmemchr) - -weak_alias(__rawmemchr, rawmemchr) - -# undef ENTRY -# define ENTRY(name) \ - .type __rawmemchr_ia32, @function; \ - .globl __rawmemchr_ia32; \ - .p2align 4; \ - __rawmemchr_ia32: cfi_startproc; \ - CALL_MCOUNT -# undef END -# define END(name) \ - cfi_endproc; .size __rawmemchr_ia32, .-__rawmemchr_ia32 - -# undef libc_hidden_def -/* IFUNC doesn't work with the hidden functions in shared library since - they will be called without setting up EBX needed for PLT which is - used by IFUNC. */ -# define libc_hidden_def(name) \ - .globl __GI___rawmemchr; __GI___rawmemchr = __rawmemchr_ia32 - -#endif -#include "../../rawmemchr.S" diff --git a/sysdeps/i386/i686/multiarch/rtld-strnlen.c b/sysdeps/i386/i686/multiarch/rtld-strnlen.c deleted file mode 100644 index 1aa5440644..0000000000 --- a/sysdeps/i386/i686/multiarch/rtld-strnlen.c +++ /dev/null @@ -1 +0,0 @@ -#include <string/strnlen.c> diff --git a/sysdeps/i386/i686/multiarch/s_fma-fma.c b/sysdeps/i386/i686/multiarch/s_fma-fma.c deleted file mode 100644 index 2e9619f97c..0000000000 --- a/sysdeps/i386/i686/multiarch/s_fma-fma.c +++ /dev/null @@ -1,27 +0,0 @@ -/* FMA version of fma. - Copyright (C) 2010-2017 Free Software Foundation, Inc. - Contributed by Intel Corporation. - This file is part of the GNU C Library. - - The GNU C Library is free software; you can redistribute it and/or - modify it under the terms of the GNU Lesser General Public - License as published by the Free Software Foundation; either - version 2.1 of the License, or (at your option) any later version. - - The GNU C Library is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - Lesser General Public License for more details. - - You should have received a copy of the GNU Lesser General Public - License along with the GNU C Library; if not, see - <http://www.gnu.org/licenses/>. */ - -#include <config.h> - -double -__fma_fma (double x, double y, double z) -{ - asm ("vfmadd213sd %3, %2, %0" : "=x" (x) : "0" (x), "x" (y), "xm" (z)); - return x; -} diff --git a/sysdeps/i386/i686/multiarch/s_fma.c b/sysdeps/i386/i686/multiarch/s_fma.c deleted file mode 100644 index 411ebb2ba9..0000000000 --- a/sysdeps/i386/i686/multiarch/s_fma.c +++ /dev/null @@ -1,34 +0,0 @@ -/* Multiple versions of fma. - Copyright (C) 2010-2017 Free Software Foundation, Inc. - Contributed by Intel Corporation. - This file is part of the GNU C Library. - - The GNU C Library is free software; you can redistribute it and/or - modify it under the terms of the GNU Lesser General Public - License as published by the Free Software Foundation; either - version 2.1 of the License, or (at your option) any later version. - - The GNU C Library is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - Lesser General Public License for more details. - - You should have received a copy of the GNU Lesser General Public - License along with the GNU C Library; if not, see - <http://www.gnu.org/licenses/>. */ - -#include <config.h> - -#include <math.h> -#include <init-arch.h> - -extern double __fma_ia32 (double x, double y, double z) attribute_hidden; -extern double __fma_fma (double x, double y, double z) attribute_hidden; - -libm_ifunc (__fma, - HAS_ARCH_FEATURE (FMA_Usable) ? __fma_fma : __fma_ia32); -weak_alias (__fma, fma) - -#define __fma __fma_ia32 - -#include <sysdeps/ieee754/ldbl-96/s_fma.c> diff --git a/sysdeps/i386/i686/multiarch/s_fmaf-fma.c b/sysdeps/i386/i686/multiarch/s_fmaf-fma.c deleted file mode 100644 index ee57abfda2..0000000000 --- a/sysdeps/i386/i686/multiarch/s_fmaf-fma.c +++ /dev/null @@ -1,27 +0,0 @@ -/* FMA version of fmaf. - Copyright (C) 2010-2017 Free Software Foundation, Inc. - Contributed by Intel Corporation. - This file is part of the GNU C Library. - - The GNU C Library is free software; you can redistribute it and/or - modify it under the terms of the GNU Lesser General Public - License as published by the Free Software Foundation; either - version 2.1 of the License, or (at your option) any later version. - - The GNU C Library is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - Lesser General Public License for more details. - - You should have received a copy of the GNU Lesser General Public - License along with the GNU C Library; if not, see - <http://www.gnu.org/licenses/>. */ - -#include <config.h> - -float -__fmaf_fma (float x, float y, float z) -{ - asm ("vfmadd213ss %3, %2, %0" : "=x" (x) : "0" (x), "x" (y), "xm" (z)); - return x; -} diff --git a/sysdeps/i386/i686/multiarch/s_fmaf.c b/sysdeps/i386/i686/multiarch/s_fmaf.c deleted file mode 100644 index 00b0fbcfc5..0000000000 --- a/sysdeps/i386/i686/multiarch/s_fmaf.c +++ /dev/null @@ -1,34 +0,0 @@ -/* Multiple versions of fmaf. - Copyright (C) 2010-2017 Free Software Foundation, Inc. - Contributed by Intel Corporation. - This file is part of the GNU C Library. - - The GNU C Library is free software; you can redistribute it and/or - modify it under the terms of the GNU Lesser General Public - License as published by the Free Software Foundation; either - version 2.1 of the License, or (at your option) any later version. - - The GNU C Library is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - Lesser General Public License for more details. - - You should have received a copy of the GNU Lesser General Public - License along with the GNU C Library; if not, see - <http://www.gnu.org/licenses/>. */ - -#include <config.h> - -#include <math.h> -#include <init-arch.h> - -extern float __fmaf_ia32 (float x, float y, float z) attribute_hidden; -extern float __fmaf_fma (float x, float y, float z) attribute_hidden; - -libm_ifunc (__fmaf, - HAS_ARCH_FEATURE (FMA_Usable) ? __fmaf_fma : __fmaf_ia32); -weak_alias (__fmaf, fmaf) - -#define __fmaf __fmaf_ia32 - -#include <sysdeps/ieee754/dbl-64/s_fmaf.c> diff --git a/sysdeps/i386/i686/multiarch/sched_cpucount.c b/sysdeps/i386/i686/multiarch/sched_cpucount.c deleted file mode 100644 index 7db31b02f8..0000000000 --- a/sysdeps/i386/i686/multiarch/sched_cpucount.c +++ /dev/null @@ -1 +0,0 @@ -#include <sysdeps/x86_64/multiarch/sched_cpucount.c> diff --git a/sysdeps/i386/i686/multiarch/stpcpy-sse2.S b/sysdeps/i386/i686/multiarch/stpcpy-sse2.S deleted file mode 100644 index 46ca1b3074..0000000000 --- a/sysdeps/i386/i686/multiarch/stpcpy-sse2.S +++ /dev/null @@ -1,3 +0,0 @@ -#define USE_AS_STPCPY -#define STRCPY __stpcpy_sse2 -#include "strcpy-sse2.S" diff --git a/sysdeps/i386/i686/multiarch/stpcpy-ssse3.S b/sysdeps/i386/i686/multiarch/stpcpy-ssse3.S deleted file mode 100644 index d971c2da38..0000000000 --- a/sysdeps/i386/i686/multiarch/stpcpy-ssse3.S +++ /dev/null @@ -1,3 +0,0 @@ -#define USE_AS_STPCPY -#define STRCPY __stpcpy_ssse3 -#include "strcpy-ssse3.S" diff --git a/sysdeps/i386/i686/multiarch/stpcpy.S b/sysdeps/i386/i686/multiarch/stpcpy.S deleted file mode 100644 index ee81ab6ae3..0000000000 --- a/sysdeps/i386/i686/multiarch/stpcpy.S +++ /dev/null @@ -1,9 +0,0 @@ -/* Multiple versions of stpcpy - All versions must be listed in ifunc-impl-list.c. */ -#define USE_AS_STPCPY -#define STRCPY __stpcpy -#include "strcpy.S" - -weak_alias (__stpcpy, stpcpy) -libc_hidden_def (__stpcpy) -libc_hidden_builtin_def (stpcpy) diff --git a/sysdeps/i386/i686/multiarch/stpncpy-sse2.S b/sysdeps/i386/i686/multiarch/stpncpy-sse2.S deleted file mode 100644 index 37a703cb76..0000000000 --- a/sysdeps/i386/i686/multiarch/stpncpy-sse2.S +++ /dev/null @@ -1,4 +0,0 @@ -#define USE_AS_STPCPY -#define USE_AS_STRNCPY -#define STRCPY __stpncpy_sse2 -#include "strcpy-sse2.S" diff --git a/sysdeps/i386/i686/multiarch/stpncpy-ssse3.S b/sysdeps/i386/i686/multiarch/stpncpy-ssse3.S deleted file mode 100644 index 14ed16f6b5..0000000000 --- a/sysdeps/i386/i686/multiarch/stpncpy-ssse3.S +++ /dev/null @@ -1,4 +0,0 @@ -#define USE_AS_STPCPY -#define USE_AS_STRNCPY -#define STRCPY __stpncpy_ssse3 -#include "strcpy-ssse3.S" diff --git a/sysdeps/i386/i686/multiarch/stpncpy.S b/sysdeps/i386/i686/multiarch/stpncpy.S deleted file mode 100644 index 2698ca6a8c..0000000000 --- a/sysdeps/i386/i686/multiarch/stpncpy.S +++ /dev/null @@ -1,8 +0,0 @@ -/* Multiple versions of stpncpy - All versions must be listed in ifunc-impl-list.c. */ -#define STRCPY __stpncpy -#define USE_AS_STPCPY -#define USE_AS_STRNCPY -#include "strcpy.S" - -weak_alias (__stpncpy, stpncpy) diff --git a/sysdeps/i386/i686/multiarch/strcasecmp-c.c b/sysdeps/i386/i686/multiarch/strcasecmp-c.c deleted file mode 100644 index 753c6ec84a..0000000000 --- a/sysdeps/i386/i686/multiarch/strcasecmp-c.c +++ /dev/null @@ -1,12 +0,0 @@ -#include <string.h> - -extern __typeof (strcasecmp) __strcasecmp_nonascii; - -#define __strcasecmp __strcasecmp_nonascii -#include <string/strcasecmp.c> - -strong_alias (__strcasecmp_nonascii, __strcasecmp_ia32) - -/* The needs of strcasecmp in libc are minimal, no need to go through - the IFUNC. */ -strong_alias (__strcasecmp_nonascii, __GI___strcasecmp) diff --git a/sysdeps/i386/i686/multiarch/strcasecmp.S b/sysdeps/i386/i686/multiarch/strcasecmp.S deleted file mode 100644 index ec59276408..0000000000 --- a/sysdeps/i386/i686/multiarch/strcasecmp.S +++ /dev/null @@ -1,39 +0,0 @@ -/* Entry point for multi-version x86 strcasecmp. - All versions must be listed in ifunc-impl-list.c. - Copyright (C) 2011-2017 Free Software Foundation, Inc. - This file is part of the GNU C Library. - - The GNU C Library is free software; you can redistribute it and/or - modify it under the terms of the GNU Lesser General Public - License as published by the Free Software Foundation; either - version 2.1 of the License, or (at your option) any later version. - - The GNU C Library is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - Lesser General Public License for more details. - - You should have received a copy of the GNU Lesser General Public - License along with the GNU C Library; if not, see - <http://www.gnu.org/licenses/>. */ - -#include <sysdep.h> -#include <init-arch.h> - - .text -ENTRY(__strcasecmp) - .type __strcasecmp, @gnu_indirect_function - LOAD_GOT_AND_RTLD_GLOBAL_RO - LOAD_FUNC_GOT_EAX (__strcasecmp_ia32) - HAS_CPU_FEATURE (SSSE3) - jz 2f - LOAD_FUNC_GOT_EAX (__strcasecmp_ssse3) - HAS_CPU_FEATURE (SSE4_2) - jz 2f - HAS_ARCH_FEATURE (Slow_SSE4_2) - jnz 2f - LOAD_FUNC_GOT_EAX (__strcasecmp_sse4_2) -2: ret -END(__strcasecmp) - -weak_alias (__strcasecmp, strcasecmp) diff --git a/sysdeps/i386/i686/multiarch/strcasecmp_l-c.c b/sysdeps/i386/i686/multiarch/strcasecmp_l-c.c deleted file mode 100644 index d4fcd2b4a1..0000000000 --- a/sysdeps/i386/i686/multiarch/strcasecmp_l-c.c +++ /dev/null @@ -1,13 +0,0 @@ -#include <string.h> - -extern __typeof (strcasecmp_l) __strcasecmp_l_nonascii; - -#define __strcasecmp_l __strcasecmp_l_nonascii -#define USE_IN_EXTENDED_LOCALE_MODEL 1 -#include <string/strcasecmp.c> - -strong_alias (__strcasecmp_l_nonascii, __strcasecmp_l_ia32) - -/* The needs of strcasecmp in libc are minimal, no need to go through - the IFUNC. */ -strong_alias (__strcasecmp_l_nonascii, __GI___strcasecmp_l) diff --git a/sysdeps/i386/i686/multiarch/strcasecmp_l-sse4.S b/sysdeps/i386/i686/multiarch/strcasecmp_l-sse4.S deleted file mode 100644 index 411d4153f2..0000000000 --- a/sysdeps/i386/i686/multiarch/strcasecmp_l-sse4.S +++ /dev/null @@ -1,2 +0,0 @@ -#define USE_AS_STRCASECMP_L 1 -#include "strcmp-sse4.S" diff --git a/sysdeps/i386/i686/multiarch/strcasecmp_l-ssse3.S b/sysdeps/i386/i686/multiarch/strcasecmp_l-ssse3.S deleted file mode 100644 index a22b93c518..0000000000 --- a/sysdeps/i386/i686/multiarch/strcasecmp_l-ssse3.S +++ /dev/null @@ -1,2 +0,0 @@ -#define USE_AS_STRCASECMP_L 1 -#include "strcmp-ssse3.S" diff --git a/sysdeps/i386/i686/multiarch/strcasecmp_l.S b/sysdeps/i386/i686/multiarch/strcasecmp_l.S deleted file mode 100644 index 711c09b0dc..0000000000 --- a/sysdeps/i386/i686/multiarch/strcasecmp_l.S +++ /dev/null @@ -1,7 +0,0 @@ -/* Multiple versions of strcasecmp_l - All versions must be listed in ifunc-impl-list.c. */ -#define STRCMP __strcasecmp_l -#define USE_AS_STRCASECMP_L -#include "strcmp.S" - -weak_alias (__strcasecmp_l, strcasecmp_l) diff --git a/sysdeps/i386/i686/multiarch/strcat-sse2.S b/sysdeps/i386/i686/multiarch/strcat-sse2.S deleted file mode 100644 index 6359c7330c..0000000000 --- a/sysdeps/i386/i686/multiarch/strcat-sse2.S +++ /dev/null @@ -1,1245 +0,0 @@ -/* strcat with SSE2 - Copyright (C) 2011-2017 Free Software Foundation, Inc. - Contributed by Intel Corporation. - This file is part of the GNU C Library. - - The GNU C Library is free software; you can redistribute it and/or - modify it under the terms of the GNU Lesser General Public - License as published by the Free Software Foundation; either - version 2.1 of the License, or (at your option) any later version. - - The GNU C Library is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - Lesser General Public License for more details. - - You should have received a copy of the GNU Lesser General Public - License along with the GNU C Library; if not, see - <http://www.gnu.org/licenses/>. */ - - -#if IS_IN (libc) - -# include <sysdep.h> - - -# define CFI_PUSH(REG) \ - cfi_adjust_cfa_offset (4); \ - cfi_rel_offset (REG, 0) - -# define CFI_POP(REG) \ - cfi_adjust_cfa_offset (-4); \ - cfi_restore (REG) - -# define PUSH(REG) pushl REG; CFI_PUSH (REG) -# define POP(REG) popl REG; CFI_POP (REG) - -# ifdef SHARED -# define JMPTBL(I, B) I - B - -/* Load an entry in a jump table into ECX and branch to it. TABLE is a - jump table with relative offsets. INDEX is a register contains the - index into the jump table. SCALE is the scale of INDEX. */ - -# define BRANCH_TO_JMPTBL_ENTRY(TABLE, INDEX, SCALE) \ - /* We first load PC into ECX. */ \ - SETUP_PIC_REG(cx); \ - /* Get the address of the jump table. */ \ - addl $(TABLE - .), %ecx; \ - /* Get the entry and convert the relative offset to the \ - absolute address. */ \ - addl (%ecx,INDEX,SCALE), %ecx; \ - /* We loaded the jump table and adjusted ECX. Go. */ \ - jmp *%ecx -# else -# define JMPTBL(I, B) I - -/* Branch to an entry in a jump table. TABLE is a jump table with - absolute offsets. INDEX is a register contains the index into the - jump table. SCALE is the scale of INDEX. */ - -# define BRANCH_TO_JMPTBL_ENTRY(TABLE, INDEX, SCALE) \ - jmp *TABLE(,INDEX,SCALE) -# endif - -# ifndef STRCAT -# define STRCAT __strcat_sse2 -# endif - -# define PARMS 4 -# define STR1 PARMS+4 -# define STR2 STR1+4 - -# ifdef USE_AS_STRNCAT -# define LEN STR2+8 -# define STR3 STR1+4 -# else -# define STR3 STR1 -# endif - -# define USE_AS_STRCAT -# ifdef USE_AS_STRNCAT -# define RETURN POP(%ebx); POP(%esi); ret; CFI_PUSH(%ebx); CFI_PUSH(%esi); -# else -# define RETURN POP(%esi); ret; CFI_PUSH(%esi); -# endif - -.text -ENTRY (STRCAT) - PUSH (%esi) - mov STR1(%esp), %eax - mov STR2(%esp), %esi -# ifdef USE_AS_STRNCAT - PUSH (%ebx) - movl LEN(%esp), %ebx - test %ebx, %ebx - jz L(ExitZero) -# endif - cmpb $0, (%esi) - mov %esi, %ecx - mov %eax, %edx - jz L(ExitZero) - - and $63, %ecx - and $63, %edx - cmp $32, %ecx - ja L(StrlenCore7_1) - cmp $48, %edx - ja L(alignment_prolog) - - pxor %xmm0, %xmm0 - pxor %xmm4, %xmm4 - pxor %xmm7, %xmm7 - movdqu (%eax), %xmm1 - movdqu (%esi), %xmm5 - pcmpeqb %xmm1, %xmm0 - movdqu 16(%esi), %xmm6 - pmovmskb %xmm0, %ecx - pcmpeqb %xmm5, %xmm4 - pcmpeqb %xmm6, %xmm7 - test %ecx, %ecx - jnz L(exit_less16_) - mov %eax, %ecx - and $-16, %eax - jmp L(loop_prolog) - -L(alignment_prolog): - pxor %xmm0, %xmm0 - pxor %xmm4, %xmm4 - mov %edx, %ecx - pxor %xmm7, %xmm7 - and $15, %ecx - and $-16, %eax - pcmpeqb (%eax), %xmm0 - movdqu (%esi), %xmm5 - movdqu 16(%esi), %xmm6 - pmovmskb %xmm0, %edx - pcmpeqb %xmm5, %xmm4 - shr %cl, %edx - pcmpeqb %xmm6, %xmm7 - test %edx, %edx - jnz L(exit_less16) - add %eax, %ecx - - pxor %xmm0, %xmm0 -L(loop_prolog): - pxor %xmm1, %xmm1 - pxor %xmm2, %xmm2 - pxor %xmm3, %xmm3 - .p2align 4 -L(align16_loop): - pcmpeqb 16(%eax), %xmm0 - pmovmskb %xmm0, %edx - test %edx, %edx - jnz L(exit16) - - pcmpeqb 32(%eax), %xmm1 - pmovmskb %xmm1, %edx - test %edx, %edx - jnz L(exit32) - - pcmpeqb 48(%eax), %xmm2 - pmovmskb %xmm2, %edx - test %edx, %edx - jnz L(exit48) - - pcmpeqb 64(%eax), %xmm3 - pmovmskb %xmm3, %edx - lea 64(%eax), %eax - test %edx, %edx - jz L(align16_loop) - bsf %edx, %edx - add %edx, %eax - jmp L(StartStrcpyPart) - - .p2align 4 -L(exit16): - bsf %edx, %edx - lea 16(%eax, %edx), %eax - jmp L(StartStrcpyPart) - - .p2align 4 -L(exit32): - bsf %edx, %edx - lea 32(%eax, %edx), %eax - jmp L(StartStrcpyPart) - - .p2align 4 -L(exit48): - bsf %edx, %edx - lea 48(%eax, %edx), %eax - jmp L(StartStrcpyPart) - - .p2align 4 -L(exit_less16): - bsf %edx, %edx - add %ecx, %eax - add %edx, %eax - jmp L(StartStrcpyPart) - - .p2align 4 -L(exit_less16_): - bsf %ecx, %ecx - add %ecx, %eax - - .p2align 4 -L(StartStrcpyPart): - pmovmskb %xmm4, %edx -# ifdef USE_AS_STRNCAT - cmp $16, %ebx - jbe L(CopyFrom1To16BytesTail1Case2OrCase3) -# endif - test %edx, %edx - jnz L(CopyFrom1To16BytesTail1) - - movdqu %xmm5, (%eax) - pmovmskb %xmm7, %edx -# ifdef USE_AS_STRNCAT - cmp $32, %ebx - jbe L(CopyFrom1To32Bytes1Case2OrCase3) -# endif - test %edx, %edx - jnz L(CopyFrom1To32Bytes1) - - mov %esi, %ecx - and $-16, %esi - and $15, %ecx - pxor %xmm0, %xmm0 -# ifdef USE_AS_STRNCAT - add %ecx, %ebx - sbb %edx, %edx - or %edx, %ebx -# endif - sub %ecx, %eax - jmp L(Unalign16Both) - -L(StrlenCore7_1): - mov %eax, %ecx - pxor %xmm0, %xmm0 - and $15, %ecx - and $-16, %eax - pcmpeqb (%eax), %xmm0 - pmovmskb %xmm0, %edx - shr %cl, %edx - test %edx, %edx - jnz L(exit_less16_1) - add %eax, %ecx - - pxor %xmm0, %xmm0 - pxor %xmm1, %xmm1 - pxor %xmm2, %xmm2 - pxor %xmm3, %xmm3 - - .p2align 4 -L(align16_loop_1): - pcmpeqb 16(%eax), %xmm0 - pmovmskb %xmm0, %edx - test %edx, %edx - jnz L(exit16_1) - - pcmpeqb 32(%eax), %xmm1 - pmovmskb %xmm1, %edx - test %edx, %edx - jnz L(exit32_1) - - pcmpeqb 48(%eax), %xmm2 - pmovmskb %xmm2, %edx - test %edx, %edx - jnz L(exit48_1) - - pcmpeqb 64(%eax), %xmm3 - pmovmskb %xmm3, %edx - lea 64(%eax), %eax - test %edx, %edx - jz L(align16_loop_1) - bsf %edx, %edx - add %edx, %eax - jmp L(StartStrcpyPart_1) - - .p2align 4 -L(exit16_1): - bsf %edx, %edx - lea 16(%eax, %edx), %eax - jmp L(StartStrcpyPart_1) - - .p2align 4 -L(exit32_1): - bsf %edx, %edx - lea 32(%eax, %edx), %eax - jmp L(StartStrcpyPart_1) - - .p2align 4 -L(exit48_1): - bsf %edx, %edx - lea 48(%eax, %edx), %eax - jmp L(StartStrcpyPart_1) - - .p2align 4 -L(exit_less16_1): - bsf %edx, %edx - add %ecx, %eax - add %edx, %eax - - .p2align 4 -L(StartStrcpyPart_1): - mov %esi, %ecx - and $15, %ecx - and $-16, %esi - pxor %xmm0, %xmm0 - pxor %xmm1, %xmm1 - -# ifdef USE_AS_STRNCAT - cmp $48, %ebx - ja L(BigN) -# endif - pcmpeqb (%esi), %xmm1 -# ifdef USE_AS_STRNCAT - add %ecx, %ebx -# endif - pmovmskb %xmm1, %edx - shr %cl, %edx -# ifdef USE_AS_STRNCAT - cmp $16, %ebx - jbe L(CopyFrom1To16BytesTailCase2OrCase3) -# endif - test %edx, %edx - jnz L(CopyFrom1To16BytesTail) - - pcmpeqb 16(%esi), %xmm0 - pmovmskb %xmm0, %edx -# ifdef USE_AS_STRNCAT - cmp $32, %ebx - jbe L(CopyFrom1To32BytesCase2OrCase3) -# endif - test %edx, %edx - jnz L(CopyFrom1To32Bytes) - - movdqu (%esi, %ecx), %xmm1 /* copy 16 bytes */ - movdqu %xmm1, (%eax) - sub %ecx, %eax - - .p2align 4 -L(Unalign16Both): - mov $16, %ecx - movdqa (%esi, %ecx), %xmm1 - movaps 16(%esi, %ecx), %xmm2 - movdqu %xmm1, (%eax, %ecx) - pcmpeqb %xmm2, %xmm0 - pmovmskb %xmm0, %edx - add $16, %ecx -# ifdef USE_AS_STRNCAT - sub $48, %ebx - jbe L(CopyFrom1To16BytesCase2OrCase3) -# endif - test %edx, %edx - jnz L(CopyFrom1To16Bytes) -L(Unalign16BothBigN): - movaps 16(%esi, %ecx), %xmm3 - movdqu %xmm2, (%eax, %ecx) - pcmpeqb %xmm3, %xmm0 - pmovmskb %xmm0, %edx - add $16, %ecx -# ifdef USE_AS_STRNCAT - sub $16, %ebx - jbe L(CopyFrom1To16BytesCase2OrCase3) -# endif - test %edx, %edx - jnz L(CopyFrom1To16Bytes) - - movaps 16(%esi, %ecx), %xmm4 - movdqu %xmm3, (%eax, %ecx) - pcmpeqb %xmm4, %xmm0 - pmovmskb %xmm0, %edx - add $16, %ecx -# ifdef USE_AS_STRNCAT - sub $16, %ebx - jbe L(CopyFrom1To16BytesCase2OrCase3) -# endif - test %edx, %edx - jnz L(CopyFrom1To16Bytes) - - movaps 16(%esi, %ecx), %xmm1 - movdqu %xmm4, (%eax, %ecx) - pcmpeqb %xmm1, %xmm0 - pmovmskb %xmm0, %edx - add $16, %ecx -# ifdef USE_AS_STRNCAT - sub $16, %ebx - jbe L(CopyFrom1To16BytesCase2OrCase3) -# endif - test %edx, %edx - jnz L(CopyFrom1To16Bytes) - - movaps 16(%esi, %ecx), %xmm2 - movdqu %xmm1, (%eax, %ecx) - pcmpeqb %xmm2, %xmm0 - pmovmskb %xmm0, %edx - add $16, %ecx -# ifdef USE_AS_STRNCAT - sub $16, %ebx - jbe L(CopyFrom1To16BytesCase2OrCase3) -# endif - test %edx, %edx - jnz L(CopyFrom1To16Bytes) - - movaps 16(%esi, %ecx), %xmm3 - movdqu %xmm2, (%eax, %ecx) - pcmpeqb %xmm3, %xmm0 - pmovmskb %xmm0, %edx - add $16, %ecx -# ifdef USE_AS_STRNCAT - sub $16, %ebx - jbe L(CopyFrom1To16BytesCase2OrCase3) -# endif - test %edx, %edx - jnz L(CopyFrom1To16Bytes) - - movdqu %xmm3, (%eax, %ecx) - mov %esi, %edx - lea 16(%esi, %ecx), %esi - and $-0x40, %esi - sub %esi, %edx - sub %edx, %eax -# ifdef USE_AS_STRNCAT - lea 128(%ebx, %edx), %ebx -# endif - movaps (%esi), %xmm2 - movaps %xmm2, %xmm4 - movaps 16(%esi), %xmm5 - movaps 32(%esi), %xmm3 - movaps %xmm3, %xmm6 - movaps 48(%esi), %xmm7 - pminub %xmm5, %xmm2 - pminub %xmm7, %xmm3 - pminub %xmm2, %xmm3 - pcmpeqb %xmm0, %xmm3 - pmovmskb %xmm3, %edx -# ifdef USE_AS_STRNCAT - sub $64, %ebx - jbe L(UnalignedLeaveCase2OrCase3) -# endif - test %edx, %edx - jnz L(Unaligned64Leave) - - .p2align 4 -L(Unaligned64Loop_start): - add $64, %eax - add $64, %esi - movdqu %xmm4, -64(%eax) - movaps (%esi), %xmm2 - movdqa %xmm2, %xmm4 - movdqu %xmm5, -48(%eax) - movaps 16(%esi), %xmm5 - pminub %xmm5, %xmm2 - movaps 32(%esi), %xmm3 - movdqu %xmm6, -32(%eax) - movaps %xmm3, %xmm6 - movdqu %xmm7, -16(%eax) - movaps 48(%esi), %xmm7 - pminub %xmm7, %xmm3 - pminub %xmm2, %xmm3 - pcmpeqb %xmm0, %xmm3 - pmovmskb %xmm3, %edx -# ifdef USE_AS_STRNCAT - sub $64, %ebx - jbe L(UnalignedLeaveCase2OrCase3) -# endif - test %edx, %edx - jz L(Unaligned64Loop_start) - -L(Unaligned64Leave): - pxor %xmm1, %xmm1 - - pcmpeqb %xmm4, %xmm0 - pcmpeqb %xmm5, %xmm1 - pmovmskb %xmm0, %edx - pmovmskb %xmm1, %ecx - test %edx, %edx - jnz L(CopyFrom1To16BytesUnaligned_0) - test %ecx, %ecx - jnz L(CopyFrom1To16BytesUnaligned_16) - - pcmpeqb %xmm6, %xmm0 - pcmpeqb %xmm7, %xmm1 - pmovmskb %xmm0, %edx - pmovmskb %xmm1, %ecx - test %edx, %edx - jnz L(CopyFrom1To16BytesUnaligned_32) - - bsf %ecx, %edx - movdqu %xmm4, (%eax) - movdqu %xmm5, 16(%eax) - movdqu %xmm6, 32(%eax) - add $48, %esi - add $48, %eax - BRANCH_TO_JMPTBL_ENTRY (L(ExitTable), %edx, 4) - -# ifdef USE_AS_STRNCAT - .p2align 4 -L(BigN): - pcmpeqb (%esi), %xmm1 - pmovmskb %xmm1, %edx - shr %cl, %edx - test %edx, %edx - jnz L(CopyFrom1To16BytesTail) - - pcmpeqb 16(%esi), %xmm0 - pmovmskb %xmm0, %edx - test %edx, %edx - jnz L(CopyFrom1To32Bytes) - - movdqu (%esi, %ecx), %xmm1 /* copy 16 bytes */ - movdqu %xmm1, (%eax) - sub %ecx, %eax - sub $48, %ebx - add %ecx, %ebx - - mov $16, %ecx - movdqa (%esi, %ecx), %xmm1 - movaps 16(%esi, %ecx), %xmm2 - movdqu %xmm1, (%eax, %ecx) - pcmpeqb %xmm2, %xmm0 - pmovmskb %xmm0, %edx - add $16, %ecx - test %edx, %edx - jnz L(CopyFrom1To16Bytes) - jmp L(Unalign16BothBigN) -# endif - -/*------------end of main part-------------------------------*/ - -/* Case1 */ - .p2align 4 -L(CopyFrom1To16Bytes): - add %ecx, %eax - add %ecx, %esi - bsf %edx, %edx - BRANCH_TO_JMPTBL_ENTRY (L(ExitTable), %edx, 4) - - .p2align 4 -L(CopyFrom1To16BytesTail): - add %ecx, %esi - bsf %edx, %edx - BRANCH_TO_JMPTBL_ENTRY (L(ExitTable), %edx, 4) - - .p2align 4 -L(CopyFrom1To32Bytes1): - add $16, %esi - add $16, %eax -L(CopyFrom1To16BytesTail1): - bsf %edx, %edx - BRANCH_TO_JMPTBL_ENTRY (L(ExitTable), %edx, 4) - - .p2align 4 -L(CopyFrom1To32Bytes): - bsf %edx, %edx - add %ecx, %esi - add $16, %edx - sub %ecx, %edx - BRANCH_TO_JMPTBL_ENTRY (L(ExitTable), %edx, 4) - - .p2align 4 -L(CopyFrom1To16BytesUnaligned_0): - bsf %edx, %edx - BRANCH_TO_JMPTBL_ENTRY (L(ExitTable), %edx, 4) - - .p2align 4 -L(CopyFrom1To16BytesUnaligned_16): - bsf %ecx, %edx - movdqu %xmm4, (%eax) - add $16, %esi - add $16, %eax - BRANCH_TO_JMPTBL_ENTRY (L(ExitTable), %edx, 4) - - .p2align 4 -L(CopyFrom1To16BytesUnaligned_32): - bsf %edx, %edx - movdqu %xmm4, (%eax) - movdqu %xmm5, 16(%eax) - add $32, %esi - add $32, %eax - BRANCH_TO_JMPTBL_ENTRY (L(ExitTable), %edx, 4) - -# ifdef USE_AS_STRNCAT - - .p2align 4 -L(CopyFrom1To16BytesExit): - BRANCH_TO_JMPTBL_ENTRY (L(ExitTable), %edx, 4) - -/* Case2 */ - - .p2align 4 -L(CopyFrom1To16BytesCase2): - add $16, %ebx - add %ecx, %eax - add %ecx, %esi - bsf %edx, %edx - cmp %ebx, %edx - jb L(CopyFrom1To16BytesExit) - BRANCH_TO_JMPTBL_ENTRY (L(ExitStrncatTable), %ebx, 4) - - .p2align 4 -L(CopyFrom1To32BytesCase2): - sub %ecx, %ebx - add %ecx, %esi - bsf %edx, %edx - add $16, %edx - sub %ecx, %edx - cmp %ebx, %edx - jb L(CopyFrom1To16BytesExit) - BRANCH_TO_JMPTBL_ENTRY (L(ExitStrncatTable), %ebx, 4) - -L(CopyFrom1To16BytesTailCase2): - sub %ecx, %ebx - add %ecx, %esi - bsf %edx, %edx - cmp %ebx, %edx - jb L(CopyFrom1To16BytesExit) - BRANCH_TO_JMPTBL_ENTRY (L(ExitStrncatTable), %ebx, 4) - -L(CopyFrom1To16BytesTail1Case2): - bsf %edx, %edx - cmp %ebx, %edx - jb L(CopyFrom1To16BytesExit) - BRANCH_TO_JMPTBL_ENTRY (L(ExitStrncatTable), %ebx, 4) - -/* Case2 or Case3, Case3 */ - - .p2align 4 -L(CopyFrom1To16BytesCase2OrCase3): - test %edx, %edx - jnz L(CopyFrom1To16BytesCase2) -L(CopyFrom1To16BytesCase3): - add $16, %ebx - add %ecx, %eax - add %ecx, %esi - BRANCH_TO_JMPTBL_ENTRY (L(ExitStrncatTable), %ebx, 4) - - .p2align 4 -L(CopyFrom1To32BytesCase2OrCase3): - test %edx, %edx - jnz L(CopyFrom1To32BytesCase2) - sub %ecx, %ebx - add %ecx, %esi - BRANCH_TO_JMPTBL_ENTRY (L(ExitStrncatTable), %ebx, 4) - - .p2align 4 -L(CopyFrom1To16BytesTailCase2OrCase3): - test %edx, %edx - jnz L(CopyFrom1To16BytesTailCase2) - sub %ecx, %ebx - add %ecx, %esi - BRANCH_TO_JMPTBL_ENTRY (L(ExitStrncatTable), %ebx, 4) - - .p2align 4 -L(CopyFrom1To32Bytes1Case2OrCase3): - add $16, %eax - add $16, %esi - sub $16, %ebx -L(CopyFrom1To16BytesTail1Case2OrCase3): - test %edx, %edx - jnz L(CopyFrom1To16BytesTail1Case2) - BRANCH_TO_JMPTBL_ENTRY (L(ExitStrncatTable), %ebx, 4) - -# endif - -# ifdef USE_AS_STRNCAT - .p2align 4 -L(StrncatExit0): - movb %bh, (%eax) - mov STR3(%esp), %eax - RETURN -# endif - - .p2align 4 -# ifdef USE_AS_STRNCAT -L(StrncatExit1): - movb %bh, 1(%eax) -# endif -L(Exit1): -# ifdef USE_AS_STRNCAT - movb (%esi), %dh -# endif - movb %dh, (%eax) - mov STR3(%esp), %eax - RETURN - - .p2align 4 -# ifdef USE_AS_STRNCAT -L(StrncatExit2): - movb %bh, 2(%eax) -# endif -L(Exit2): - movw (%esi), %dx - movw %dx, (%eax) - mov STR3(%esp), %eax - RETURN - - .p2align 4 -# ifdef USE_AS_STRNCAT -L(StrncatExit3): - movb %bh, 3(%eax) -# endif -L(Exit3): - movw (%esi), %cx - movw %cx, (%eax) -# ifdef USE_AS_STRNCAT - movb 2(%esi), %dh -# endif - movb %dh, 2(%eax) - mov STR3(%esp), %eax - RETURN - - .p2align 4 -# ifdef USE_AS_STRNCAT -L(StrncatExit4): - movb %bh, 4(%eax) -# endif -L(Exit4): - movl (%esi), %edx - movl %edx, (%eax) - mov STR3(%esp), %eax - RETURN - - .p2align 4 -# ifdef USE_AS_STRNCAT -L(StrncatExit5): - movb %bh, 5(%eax) -# endif -L(Exit5): - movl (%esi), %ecx -# ifdef USE_AS_STRNCAT - movb 4(%esi), %dh -# endif - movb %dh, 4(%eax) - movl %ecx, (%eax) - mov STR3(%esp), %eax - RETURN - - .p2align 4 -# ifdef USE_AS_STRNCAT -L(StrncatExit6): - movb %bh, 6(%eax) -# endif -L(Exit6): - movl (%esi), %ecx - movw 4(%esi), %dx - movl %ecx, (%eax) - movw %dx, 4(%eax) - mov STR3(%esp), %eax - RETURN - - .p2align 4 -# ifdef USE_AS_STRNCAT -L(StrncatExit7): - movb %bh, 7(%eax) -# endif -L(Exit7): - movl (%esi), %ecx - movl 3(%esi), %edx - movl %ecx, (%eax) - movl %edx, 3(%eax) - mov STR3(%esp), %eax - RETURN - - .p2align 4 -# ifdef USE_AS_STRNCAT -L(StrncatExit8): - movb %bh, 8(%eax) -# endif -L(Exit8): - movlpd (%esi), %xmm0 - movlpd %xmm0, (%eax) - mov STR3(%esp), %eax - RETURN - - .p2align 4 -# ifdef USE_AS_STRNCAT -L(StrncatExit9): - movb %bh, 9(%eax) -# endif -L(Exit9): - movlpd (%esi), %xmm0 -# ifdef USE_AS_STRNCAT - movb 8(%esi), %dh -# endif - movb %dh, 8(%eax) - movlpd %xmm0, (%eax) - mov STR3(%esp), %eax - RETURN - - .p2align 4 -# ifdef USE_AS_STRNCAT -L(StrncatExit10): - movb %bh, 10(%eax) -# endif -L(Exit10): - movlpd (%esi), %xmm0 - movw 8(%esi), %dx - movlpd %xmm0, (%eax) - movw %dx, 8(%eax) - mov STR3(%esp), %eax - RETURN - - .p2align 4 -# ifdef USE_AS_STRNCAT -L(StrncatExit11): - movb %bh, 11(%eax) -# endif -L(Exit11): - movlpd (%esi), %xmm0 - movl 7(%esi), %edx - movlpd %xmm0, (%eax) - movl %edx, 7(%eax) - mov STR3(%esp), %eax - RETURN - - .p2align 4 -# ifdef USE_AS_STRNCAT -L(StrncatExit12): - movb %bh, 12(%eax) -# endif -L(Exit12): - movlpd (%esi), %xmm0 - movl 8(%esi), %edx - movlpd %xmm0, (%eax) - movl %edx, 8(%eax) - mov STR3(%esp), %eax - RETURN - - .p2align 4 -# ifdef USE_AS_STRNCAT -L(StrncatExit13): - movb %bh, 13(%eax) -# endif -L(Exit13): - movlpd (%esi), %xmm0 - movlpd 5(%esi), %xmm1 - movlpd %xmm0, (%eax) - movlpd %xmm1, 5(%eax) - mov STR3(%esp), %eax - RETURN - - .p2align 4 -# ifdef USE_AS_STRNCAT -L(StrncatExit14): - movb %bh, 14(%eax) -# endif -L(Exit14): - movlpd (%esi), %xmm0 - movlpd 6(%esi), %xmm1 - movlpd %xmm0, (%eax) - movlpd %xmm1, 6(%eax) - mov STR3(%esp), %eax - RETURN - - .p2align 4 -# ifdef USE_AS_STRNCAT -L(StrncatExit15): - movb %bh, 15(%eax) -# endif -L(Exit15): - movlpd (%esi), %xmm0 - movlpd 7(%esi), %xmm1 - movlpd %xmm0, (%eax) - movlpd %xmm1, 7(%eax) - mov STR3(%esp), %eax - RETURN - - .p2align 4 -# ifdef USE_AS_STRNCAT -L(StrncatExit16): - movb %bh, 16(%eax) -# endif -L(Exit16): - movdqu (%esi), %xmm0 - movdqu %xmm0, (%eax) - mov STR3(%esp), %eax - RETURN - - .p2align 4 -# ifdef USE_AS_STRNCAT -L(StrncatExit17): - movb %bh, 17(%eax) -# endif -L(Exit17): - movdqu (%esi), %xmm0 -# ifdef USE_AS_STRNCAT - movb 16(%esi), %dh -# endif - movdqu %xmm0, (%eax) - movb %dh, 16(%eax) - mov STR3(%esp), %eax - RETURN - - .p2align 4 -# ifdef USE_AS_STRNCAT -L(StrncatExit18): - movb %bh, 18(%eax) -# endif -L(Exit18): - movdqu (%esi), %xmm0 - movw 16(%esi), %cx - movdqu %xmm0, (%eax) - movw %cx, 16(%eax) - mov STR3(%esp), %eax - RETURN - - .p2align 4 -# ifdef USE_AS_STRNCAT -L(StrncatExit19): - movb %bh, 19(%eax) -# endif -L(Exit19): - movdqu (%esi), %xmm0 - movl 15(%esi), %ecx - movdqu %xmm0, (%eax) - movl %ecx, 15(%eax) - mov STR3(%esp), %eax - RETURN - - .p2align 4 -# ifdef USE_AS_STRNCAT -L(StrncatExit20): - movb %bh, 20(%eax) -# endif -L(Exit20): - movdqu (%esi), %xmm0 - movl 16(%esi), %ecx - movdqu %xmm0, (%eax) - movl %ecx, 16(%eax) - mov STR3(%esp), %eax - RETURN - - .p2align 4 -# ifdef USE_AS_STRNCAT -L(StrncatExit21): - movb %bh, 21(%eax) -# endif -L(Exit21): - movdqu (%esi), %xmm0 - movl 16(%esi), %ecx -# ifdef USE_AS_STRNCAT - movb 20(%esi), %dh -# endif - movdqu %xmm0, (%eax) - movl %ecx, 16(%eax) - movb %dh, 20(%eax) - mov STR3(%esp), %eax - RETURN - - .p2align 4 -# ifdef USE_AS_STRNCAT -L(StrncatExit22): - movb %bh, 22(%eax) -# endif -L(Exit22): - movdqu (%esi), %xmm0 - movlpd 14(%esi), %xmm3 - movdqu %xmm0, (%eax) - movlpd %xmm3, 14(%eax) - mov STR3(%esp), %eax - RETURN - - .p2align 4 -# ifdef USE_AS_STRNCAT -L(StrncatExit23): - movb %bh, 23(%eax) -# endif -L(Exit23): - movdqu (%esi), %xmm0 - movlpd 15(%esi), %xmm3 - movdqu %xmm0, (%eax) - movlpd %xmm3, 15(%eax) - mov STR3(%esp), %eax - RETURN - - .p2align 4 -# ifdef USE_AS_STRNCAT -L(StrncatExit24): - movb %bh, 24(%eax) -# endif -L(Exit24): - movdqu (%esi), %xmm0 - movlpd 16(%esi), %xmm2 - movdqu %xmm0, (%eax) - movlpd %xmm2, 16(%eax) - mov STR3(%esp), %eax - RETURN - - .p2align 4 -# ifdef USE_AS_STRNCAT -L(StrncatExit25): - movb %bh, 25(%eax) -# endif -L(Exit25): - movdqu (%esi), %xmm0 - movlpd 16(%esi), %xmm2 -# ifdef USE_AS_STRNCAT - movb 24(%esi), %dh -# endif - movdqu %xmm0, (%eax) - movlpd %xmm2, 16(%eax) - movb %dh, 24(%eax) - mov STR3(%esp), %eax - RETURN - - .p2align 4 -# ifdef USE_AS_STRNCAT -L(StrncatExit26): - movb %bh, 26(%eax) -# endif -L(Exit26): - movdqu (%esi), %xmm0 - movlpd 16(%esi), %xmm2 - movw 24(%esi), %cx - movdqu %xmm0, (%eax) - movlpd %xmm2, 16(%eax) - movw %cx, 24(%eax) - mov STR3(%esp), %eax - RETURN - - .p2align 4 -# ifdef USE_AS_STRNCAT -L(StrncatExit27): - movb %bh, 27(%eax) -# endif -L(Exit27): - movdqu (%esi), %xmm0 - movlpd 16(%esi), %xmm2 - movl 23(%esi), %ecx - movdqu %xmm0, (%eax) - movlpd %xmm2, 16(%eax) - movl %ecx, 23(%eax) - mov STR3(%esp), %eax - RETURN - - .p2align 4 -# ifdef USE_AS_STRNCAT -L(StrncatExit28): - movb %bh, 28(%eax) -# endif -L(Exit28): - movdqu (%esi), %xmm0 - movlpd 16(%esi), %xmm2 - movl 24(%esi), %ecx - movdqu %xmm0, (%eax) - movlpd %xmm2, 16(%eax) - movl %ecx, 24(%eax) - mov STR3(%esp), %eax - RETURN - - .p2align 4 -# ifdef USE_AS_STRNCAT -L(StrncatExit29): - movb %bh, 29(%eax) -# endif -L(Exit29): - movdqu (%esi), %xmm0 - movdqu 13(%esi), %xmm2 - movdqu %xmm0, (%eax) - movdqu %xmm2, 13(%eax) - mov STR3(%esp), %eax - RETURN - - .p2align 4 -# ifdef USE_AS_STRNCAT -L(StrncatExit30): - movb %bh, 30(%eax) -# endif -L(Exit30): - movdqu (%esi), %xmm0 - movdqu 14(%esi), %xmm2 - movdqu %xmm0, (%eax) - movdqu %xmm2, 14(%eax) - mov STR3(%esp), %eax - RETURN - - .p2align 4 -# ifdef USE_AS_STRNCAT -L(StrncatExit31): - movb %bh, 31(%eax) -# endif -L(Exit31): - movdqu (%esi), %xmm0 - movdqu 15(%esi), %xmm2 - movdqu %xmm0, (%eax) - movdqu %xmm2, 15(%eax) - mov STR3(%esp), %eax - RETURN - - .p2align 4 -# ifdef USE_AS_STRNCAT -L(StrncatExit32): - movb %bh, 32(%eax) -# endif -L(Exit32): - movdqu (%esi), %xmm0 - movdqu 16(%esi), %xmm2 - movdqu %xmm0, (%eax) - movdqu %xmm2, 16(%eax) - mov STR3(%esp), %eax - RETURN - -# ifdef USE_AS_STRNCAT - - .p2align 4 -L(UnalignedLeaveCase2OrCase3): - test %edx, %edx - jnz L(Unaligned64LeaveCase2) -L(Unaligned64LeaveCase3): - lea 64(%ebx), %ecx - and $-16, %ecx - add $48, %ebx - jl L(CopyFrom1To16BytesCase3) - movdqu %xmm4, (%eax) - sub $16, %ebx - jb L(CopyFrom1To16BytesCase3) - movdqu %xmm5, 16(%eax) - sub $16, %ebx - jb L(CopyFrom1To16BytesCase3) - movdqu %xmm6, 32(%eax) - sub $16, %ebx - jb L(CopyFrom1To16BytesCase3) - movdqu %xmm7, 48(%eax) - xor %bh, %bh - movb %bh, 64(%eax) - mov STR3(%esp), %eax - RETURN - - .p2align 4 -L(Unaligned64LeaveCase2): - xor %ecx, %ecx - pcmpeqb %xmm4, %xmm0 - pmovmskb %xmm0, %edx - add $48, %ebx - jle L(CopyFrom1To16BytesCase2OrCase3) - test %edx, %edx - jnz L(CopyFrom1To16Bytes) - - pcmpeqb %xmm5, %xmm0 - pmovmskb %xmm0, %edx - movdqu %xmm4, (%eax) - add $16, %ecx - sub $16, %ebx - jbe L(CopyFrom1To16BytesCase2OrCase3) - test %edx, %edx - jnz L(CopyFrom1To16Bytes) - - pcmpeqb %xmm6, %xmm0 - pmovmskb %xmm0, %edx - movdqu %xmm5, 16(%eax) - add $16, %ecx - sub $16, %ebx - jbe L(CopyFrom1To16BytesCase2OrCase3) - test %edx, %edx - jnz L(CopyFrom1To16Bytes) - - pcmpeqb %xmm7, %xmm0 - pmovmskb %xmm0, %edx - movdqu %xmm6, 32(%eax) - lea 16(%eax, %ecx), %eax - lea 16(%esi, %ecx), %esi - bsf %edx, %edx - cmp %ebx, %edx - jb L(CopyFrom1To16BytesExit) - BRANCH_TO_JMPTBL_ENTRY (L(ExitStrncatTable), %ebx, 4) -# endif - .p2align 4 -L(ExitZero): - RETURN - -END (STRCAT) - - .p2align 4 - .section .rodata -L(ExitTable): - .int JMPTBL(L(Exit1), L(ExitTable)) - .int JMPTBL(L(Exit2), L(ExitTable)) - .int JMPTBL(L(Exit3), L(ExitTable)) - .int JMPTBL(L(Exit4), L(ExitTable)) - .int JMPTBL(L(Exit5), L(ExitTable)) - .int JMPTBL(L(Exit6), L(ExitTable)) - .int JMPTBL(L(Exit7), L(ExitTable)) - .int JMPTBL(L(Exit8), L(ExitTable)) - .int JMPTBL(L(Exit9), L(ExitTable)) - .int JMPTBL(L(Exit10), L(ExitTable)) - .int JMPTBL(L(Exit11), L(ExitTable)) - .int JMPTBL(L(Exit12), L(ExitTable)) - .int JMPTBL(L(Exit13), L(ExitTable)) - .int JMPTBL(L(Exit14), L(ExitTable)) - .int JMPTBL(L(Exit15), L(ExitTable)) - .int JMPTBL(L(Exit16), L(ExitTable)) - .int JMPTBL(L(Exit17), L(ExitTable)) - .int JMPTBL(L(Exit18), L(ExitTable)) - .int JMPTBL(L(Exit19), L(ExitTable)) - .int JMPTBL(L(Exit20), L(ExitTable)) - .int JMPTBL(L(Exit21), L(ExitTable)) - .int JMPTBL(L(Exit22), L(ExitTable)) - .int JMPTBL(L(Exit23), L(ExitTable)) - .int JMPTBL(L(Exit24), L(ExitTable)) - .int JMPTBL(L(Exit25), L(ExitTable)) - .int JMPTBL(L(Exit26), L(ExitTable)) - .int JMPTBL(L(Exit27), L(ExitTable)) - .int JMPTBL(L(Exit28), L(ExitTable)) - .int JMPTBL(L(Exit29), L(ExitTable)) - .int JMPTBL(L(Exit30), L(ExitTable)) - .int JMPTBL(L(Exit31), L(ExitTable)) - .int JMPTBL(L(Exit32), L(ExitTable)) -# ifdef USE_AS_STRNCAT -L(ExitStrncatTable): - .int JMPTBL(L(StrncatExit0), L(ExitStrncatTable)) - .int JMPTBL(L(StrncatExit1), L(ExitStrncatTable)) - .int JMPTBL(L(StrncatExit2), L(ExitStrncatTable)) - .int JMPTBL(L(StrncatExit3), L(ExitStrncatTable)) - .int JMPTBL(L(StrncatExit4), L(ExitStrncatTable)) - .int JMPTBL(L(StrncatExit5), L(ExitStrncatTable)) - .int JMPTBL(L(StrncatExit6), L(ExitStrncatTable)) - .int JMPTBL(L(StrncatExit7), L(ExitStrncatTable)) - .int JMPTBL(L(StrncatExit8), L(ExitStrncatTable)) - .int JMPTBL(L(StrncatExit9), L(ExitStrncatTable)) - .int JMPTBL(L(StrncatExit10), L(ExitStrncatTable)) - .int JMPTBL(L(StrncatExit11), L(ExitStrncatTable)) - .int JMPTBL(L(StrncatExit12), L(ExitStrncatTable)) - .int JMPTBL(L(StrncatExit13), L(ExitStrncatTable)) - .int JMPTBL(L(StrncatExit14), L(ExitStrncatTable)) - .int JMPTBL(L(StrncatExit15), L(ExitStrncatTable)) - .int JMPTBL(L(StrncatExit16), L(ExitStrncatTable)) - .int JMPTBL(L(StrncatExit17), L(ExitStrncatTable)) - .int JMPTBL(L(StrncatExit18), L(ExitStrncatTable)) - .int JMPTBL(L(StrncatExit19), L(ExitStrncatTable)) - .int JMPTBL(L(StrncatExit20), L(ExitStrncatTable)) - .int JMPTBL(L(StrncatExit21), L(ExitStrncatTable)) - .int JMPTBL(L(StrncatExit22), L(ExitStrncatTable)) - .int JMPTBL(L(StrncatExit23), L(ExitStrncatTable)) - .int JMPTBL(L(StrncatExit24), L(ExitStrncatTable)) - .int JMPTBL(L(StrncatExit25), L(ExitStrncatTable)) - .int JMPTBL(L(StrncatExit26), L(ExitStrncatTable)) - .int JMPTBL(L(StrncatExit27), L(ExitStrncatTable)) - .int JMPTBL(L(StrncatExit28), L(ExitStrncatTable)) - .int JMPTBL(L(StrncatExit29), L(ExitStrncatTable)) - .int JMPTBL(L(StrncatExit30), L(ExitStrncatTable)) - .int JMPTBL(L(StrncatExit31), L(ExitStrncatTable)) - .int JMPTBL(L(StrncatExit32), L(ExitStrncatTable)) -# endif -#endif diff --git a/sysdeps/i386/i686/multiarch/strcat-ssse3.S b/sysdeps/i386/i686/multiarch/strcat-ssse3.S deleted file mode 100644 index 59ffbc60a5..0000000000 --- a/sysdeps/i386/i686/multiarch/strcat-ssse3.S +++ /dev/null @@ -1,572 +0,0 @@ -/* strcat with SSSE3 - Copyright (C) 2011-2017 Free Software Foundation, Inc. - Contributed by Intel Corporation. - This file is part of the GNU C Library. - - The GNU C Library is free software; you can redistribute it and/or - modify it under the terms of the GNU Lesser General Public - License as published by the Free Software Foundation; either - version 2.1 of the License, or (at your option) any later version. - - The GNU C Library is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - Lesser General Public License for more details. - - You should have received a copy of the GNU Lesser General Public - License along with the GNU C Library; if not, see - <http://www.gnu.org/licenses/>. */ - - -#if IS_IN (libc) - -# include <sysdep.h> - -# define CFI_PUSH(REG) \ - cfi_adjust_cfa_offset (4); \ - cfi_rel_offset (REG, 0) - -# define CFI_POP(REG) \ - cfi_adjust_cfa_offset (-4); \ - cfi_restore (REG) - -# define PUSH(REG) pushl REG; CFI_PUSH (REG) -# define POP(REG) popl REG; CFI_POP (REG) - -# ifndef STRCAT -# define STRCAT __strcat_ssse3 -# endif - -# define PARMS 4 -# define STR1 PARMS+4 -# define STR2 STR1+4 - -# ifdef USE_AS_STRNCAT -# define LEN STR2+8 -# endif - -# define USE_AS_STRCAT - -.text -ENTRY (STRCAT) - PUSH (%edi) - mov STR1(%esp), %edi - mov %edi, %edx - -# define RETURN jmp L(StartStrcpyPart) -# include "strlen-sse2.S" - -L(StartStrcpyPart): - mov STR2(%esp), %ecx - lea (%edi, %eax), %edx -# ifdef USE_AS_STRNCAT - PUSH (%ebx) - mov LEN(%esp), %ebx - test %ebx, %ebx - jz L(StrncatExit0) - cmp $8, %ebx - jbe L(StrncatExit8Bytes) -# endif - cmpb $0, (%ecx) - jz L(Exit1) - cmpb $0, 1(%ecx) - jz L(Exit2) - cmpb $0, 2(%ecx) - jz L(Exit3) - cmpb $0, 3(%ecx) - jz L(Exit4) - cmpb $0, 4(%ecx) - jz L(Exit5) - cmpb $0, 5(%ecx) - jz L(Exit6) - cmpb $0, 6(%ecx) - jz L(Exit7) - cmpb $0, 7(%ecx) - jz L(Exit8) - cmpb $0, 8(%ecx) - jz L(Exit9) -# ifdef USE_AS_STRNCAT - cmp $16, %ebx - jb L(StrncatExit15Bytes) -# endif - cmpb $0, 9(%ecx) - jz L(Exit10) - cmpb $0, 10(%ecx) - jz L(Exit11) - cmpb $0, 11(%ecx) - jz L(Exit12) - cmpb $0, 12(%ecx) - jz L(Exit13) - cmpb $0, 13(%ecx) - jz L(Exit14) - cmpb $0, 14(%ecx) - jz L(Exit15) - cmpb $0, 15(%ecx) - jz L(Exit16) -# ifdef USE_AS_STRNCAT - cmp $16, %ebx - je L(StrncatExit16) - -# define RETURN1 \ - POP (%ebx); \ - POP (%edi); \ - ret; \ - CFI_PUSH (%ebx); \ - CFI_PUSH (%edi) -# define USE_AS_STRNCPY -# else -# define RETURN1 POP (%edi); ret; CFI_PUSH (%edi) -# endif -# include "strcpy-ssse3.S" - .p2align 4 -L(CopyFrom1To16Bytes): - add %esi, %edx - add %esi, %ecx - - POP (%esi) - test %al, %al - jz L(ExitHigh) - test $0x01, %al - jnz L(Exit1) - test $0x02, %al - jnz L(Exit2) - test $0x04, %al - jnz L(Exit3) - test $0x08, %al - jnz L(Exit4) - test $0x10, %al - jnz L(Exit5) - test $0x20, %al - jnz L(Exit6) - test $0x40, %al - jnz L(Exit7) - movlpd (%ecx), %xmm0 - movlpd %xmm0, (%edx) - movl %edi, %eax - RETURN1 - - .p2align 4 -L(ExitHigh): - test $0x01, %ah - jnz L(Exit9) - test $0x02, %ah - jnz L(Exit10) - test $0x04, %ah - jnz L(Exit11) - test $0x08, %ah - jnz L(Exit12) - test $0x10, %ah - jnz L(Exit13) - test $0x20, %ah - jnz L(Exit14) - test $0x40, %ah - jnz L(Exit15) - movlpd (%ecx), %xmm0 - movlpd 8(%ecx), %xmm1 - movlpd %xmm0, (%edx) - movlpd %xmm1, 8(%edx) - movl %edi, %eax - RETURN1 - - .p2align 4 -L(StrncatExit1): - movb %bh, 1(%edx) -L(Exit1): - movb (%ecx), %al - movb %al, (%edx) - movl %edi, %eax - RETURN1 - - .p2align 4 -L(StrncatExit2): - movb %bh, 2(%edx) -L(Exit2): - movw (%ecx), %ax - movw %ax, (%edx) - movl %edi, %eax - RETURN1 - - .p2align 4 -L(StrncatExit3): - movb %bh, 3(%edx) -L(Exit3): - movw (%ecx), %ax - movw %ax, (%edx) - movb 2(%ecx), %al - movb %al, 2(%edx) - movl %edi, %eax - RETURN1 - - .p2align 4 -L(StrncatExit4): - movb %bh, 4(%edx) -L(Exit4): - movl (%ecx), %eax - movl %eax, (%edx) - movl %edi, %eax - RETURN1 - - .p2align 4 -L(StrncatExit5): - movb %bh, 5(%edx) -L(Exit5): - movl (%ecx), %eax - movl %eax, (%edx) - movb 4(%ecx), %al - movb %al, 4(%edx) - movl %edi, %eax - RETURN1 - - .p2align 4 -L(StrncatExit6): - movb %bh, 6(%edx) -L(Exit6): - movl (%ecx), %eax - movl %eax, (%edx) - movw 4(%ecx), %ax - movw %ax, 4(%edx) - movl %edi, %eax - RETURN1 - - .p2align 4 -L(StrncatExit7): - movb %bh, 7(%edx) -L(Exit7): - movl (%ecx), %eax - movl %eax, (%edx) - movl 3(%ecx), %eax - movl %eax, 3(%edx) - movl %edi, %eax - RETURN1 - - .p2align 4 -L(StrncatExit8): - movb %bh, 8(%edx) -L(Exit8): - movlpd (%ecx), %xmm0 - movlpd %xmm0, (%edx) - movl %edi, %eax - RETURN1 - - .p2align 4 -L(StrncatExit9): - movb %bh, 9(%edx) -L(Exit9): - movlpd (%ecx), %xmm0 - movlpd %xmm0, (%edx) - movb 8(%ecx), %al - movb %al, 8(%edx) - movl %edi, %eax - RETURN1 - - .p2align 4 -L(StrncatExit10): - movb %bh, 10(%edx) -L(Exit10): - movlpd (%ecx), %xmm0 - movlpd %xmm0, (%edx) - movw 8(%ecx), %ax - movw %ax, 8(%edx) - movl %edi, %eax - RETURN1 - - .p2align 4 -L(StrncatExit11): - movb %bh, 11(%edx) -L(Exit11): - movlpd (%ecx), %xmm0 - movlpd %xmm0, (%edx) - movl 7(%ecx), %eax - movl %eax, 7(%edx) - movl %edi, %eax - RETURN1 - - .p2align 4 -L(StrncatExit12): - movb %bh, 12(%edx) -L(Exit12): - movlpd (%ecx), %xmm0 - movlpd %xmm0, (%edx) - movl 8(%ecx), %eax - movl %eax, 8(%edx) - movl %edi, %eax - RETURN1 - - .p2align 4 -L(StrncatExit13): - movb %bh, 13(%edx) -L(Exit13): - movlpd (%ecx), %xmm0 - movlpd %xmm0, (%edx) - movlpd 5(%ecx), %xmm0 - movlpd %xmm0, 5(%edx) - movl %edi, %eax - RETURN1 - - .p2align 4 -L(StrncatExit14): - movb %bh, 14(%edx) -L(Exit14): - movlpd (%ecx), %xmm0 - movlpd %xmm0, (%edx) - movlpd 6(%ecx), %xmm0 - movlpd %xmm0, 6(%edx) - movl %edi, %eax - RETURN1 - - .p2align 4 -L(StrncatExit15): - movb %bh, 15(%edx) -L(Exit15): - movlpd (%ecx), %xmm0 - movlpd %xmm0, (%edx) - movlpd 7(%ecx), %xmm0 - movlpd %xmm0, 7(%edx) - movl %edi, %eax - RETURN1 - - .p2align 4 -L(StrncatExit16): - movb %bh, 16(%edx) -L(Exit16): - movlpd (%ecx), %xmm0 - movlpd 8(%ecx), %xmm1 - movlpd %xmm0, (%edx) - movlpd %xmm1, 8(%edx) - movl %edi, %eax - RETURN1 - -# ifdef USE_AS_STRNCPY - - CFI_PUSH(%esi) - - .p2align 4 -L(CopyFrom1To16BytesCase2): - add $16, %ebx - add %esi, %ecx - lea (%esi, %edx), %esi - lea -9(%ebx), %edx - and $1<<7, %dh - or %al, %dh - test %dh, %dh - lea (%esi), %edx - POP (%esi) - jz L(ExitHighCase2) - - test $0x01, %al - jnz L(Exit1) - cmp $1, %ebx - je L(StrncatExit1) - test $0x02, %al - jnz L(Exit2) - cmp $2, %ebx - je L(StrncatExit2) - test $0x04, %al - jnz L(Exit3) - cmp $3, %ebx - je L(StrncatExit3) - test $0x08, %al - jnz L(Exit4) - cmp $4, %ebx - je L(StrncatExit4) - test $0x10, %al - jnz L(Exit5) - cmp $5, %ebx - je L(StrncatExit5) - test $0x20, %al - jnz L(Exit6) - cmp $6, %ebx - je L(StrncatExit6) - test $0x40, %al - jnz L(Exit7) - cmp $7, %ebx - je L(StrncatExit7) - movlpd (%ecx), %xmm0 - movlpd %xmm0, (%edx) - lea 7(%edx), %eax - cmpb $1, (%eax) - sbb $-1, %eax - xor %cl, %cl - movb %cl, (%eax) - movl %edi, %eax - RETURN1 - - .p2align 4 -L(ExitHighCase2): - test $0x01, %ah - jnz L(Exit9) - cmp $9, %ebx - je L(StrncatExit9) - test $0x02, %ah - jnz L(Exit10) - cmp $10, %ebx - je L(StrncatExit10) - test $0x04, %ah - jnz L(Exit11) - cmp $11, %ebx - je L(StrncatExit11) - test $0x8, %ah - jnz L(Exit12) - cmp $12, %ebx - je L(StrncatExit12) - test $0x10, %ah - jnz L(Exit13) - cmp $13, %ebx - je L(StrncatExit13) - test $0x20, %ah - jnz L(Exit14) - cmp $14, %ebx - je L(StrncatExit14) - test $0x40, %ah - jnz L(Exit15) - cmp $15, %ebx - je L(StrncatExit15) - movlpd (%ecx), %xmm0 - movlpd %xmm0, (%edx) - movlpd 8(%ecx), %xmm1 - movlpd %xmm1, 8(%edx) - movl %edi, %eax - RETURN1 - - CFI_PUSH(%esi) - -L(CopyFrom1To16BytesCase2OrCase3): - test %eax, %eax - jnz L(CopyFrom1To16BytesCase2) - - .p2align 4 -L(CopyFrom1To16BytesCase3): - add $16, %ebx - add %esi, %edx - add %esi, %ecx - - POP (%esi) - - cmp $8, %ebx - ja L(ExitHighCase3) - cmp $1, %ebx - je L(StrncatExit1) - cmp $2, %ebx - je L(StrncatExit2) - cmp $3, %ebx - je L(StrncatExit3) - cmp $4, %ebx - je L(StrncatExit4) - cmp $5, %ebx - je L(StrncatExit5) - cmp $6, %ebx - je L(StrncatExit6) - cmp $7, %ebx - je L(StrncatExit7) - movlpd (%ecx), %xmm0 - movlpd %xmm0, (%edx) - movb %bh, 8(%edx) - movl %edi, %eax - RETURN1 - - .p2align 4 -L(ExitHighCase3): - cmp $9, %ebx - je L(StrncatExit9) - cmp $10, %ebx - je L(StrncatExit10) - cmp $11, %ebx - je L(StrncatExit11) - cmp $12, %ebx - je L(StrncatExit12) - cmp $13, %ebx - je L(StrncatExit13) - cmp $14, %ebx - je L(StrncatExit14) - cmp $15, %ebx - je L(StrncatExit15) - movlpd (%ecx), %xmm0 - movlpd %xmm0, (%edx) - movlpd 8(%ecx), %xmm1 - movlpd %xmm1, 8(%edx) - movb %bh, 16(%edx) - movl %edi, %eax - RETURN1 - - .p2align 4 -L(StrncatExit0): - movl %edi, %eax - RETURN1 - - .p2align 4 -L(StrncatExit15Bytes): - cmp $9, %ebx - je L(StrncatExit9) - cmpb $0, 9(%ecx) - jz L(Exit10) - cmp $10, %ebx - je L(StrncatExit10) - cmpb $0, 10(%ecx) - jz L(Exit11) - cmp $11, %ebx - je L(StrncatExit11) - cmpb $0, 11(%ecx) - jz L(Exit12) - cmp $12, %ebx - je L(StrncatExit12) - cmpb $0, 12(%ecx) - jz L(Exit13) - cmp $13, %ebx - je L(StrncatExit13) - cmpb $0, 13(%ecx) - jz L(Exit14) - cmp $14, %ebx - je L(StrncatExit14) - movlpd (%ecx), %xmm0 - movlpd %xmm0, (%edx) - movlpd 7(%ecx), %xmm0 - movlpd %xmm0, 7(%edx) - lea 14(%edx), %eax - cmpb $1, (%eax) - sbb $-1, %eax - movb %bh, (%eax) - movl %edi, %eax - RETURN1 - - .p2align 4 -L(StrncatExit8Bytes): - cmpb $0, (%ecx) - jz L(Exit1) - cmp $1, %ebx - je L(StrncatExit1) - cmpb $0, 1(%ecx) - jz L(Exit2) - cmp $2, %ebx - je L(StrncatExit2) - cmpb $0, 2(%ecx) - jz L(Exit3) - cmp $3, %ebx - je L(StrncatExit3) - cmpb $0, 3(%ecx) - jz L(Exit4) - cmp $4, %ebx - je L(StrncatExit4) - cmpb $0, 4(%ecx) - jz L(Exit5) - cmp $5, %ebx - je L(StrncatExit5) - cmpb $0, 5(%ecx) - jz L(Exit6) - cmp $6, %ebx - je L(StrncatExit6) - cmpb $0, 6(%ecx) - jz L(Exit7) - cmp $7, %ebx - je L(StrncatExit7) - movlpd (%ecx), %xmm0 - movlpd %xmm0, (%edx) - lea 7(%edx), %eax - cmpb $1, (%eax) - sbb $-1, %eax - movb %bh, (%eax) - movl %edi, %eax - RETURN1 - -# endif -END (STRCAT) -#endif diff --git a/sysdeps/i386/i686/multiarch/strcat.S b/sysdeps/i386/i686/multiarch/strcat.S deleted file mode 100644 index 8412cb6f23..0000000000 --- a/sysdeps/i386/i686/multiarch/strcat.S +++ /dev/null @@ -1,92 +0,0 @@ -/* Multiple versions of strcat - All versions must be listed in ifunc-impl-list.c. - Copyright (C) 2011-2017 Free Software Foundation, Inc. - Contributed by Intel Corporation. - This file is part of the GNU C Library. - - The GNU C Library is free software; you can redistribute it and/or - modify it under the terms of the GNU Lesser General Public - License as published by the Free Software Foundation; either - version 2.1 of the License, or (at your option) any later version. - - The GNU C Library is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - Lesser General Public License for more details. - - You should have received a copy of the GNU Lesser General Public - License along with the GNU C Library; if not, see - <http://www.gnu.org/licenses/>. */ - -#include <sysdep.h> -#include <init-arch.h> - -#ifndef USE_AS_STRNCAT -# ifndef STRCAT -# define STRCAT strcat -# endif -#endif - -#ifdef USE_AS_STRNCAT -# define STRCAT_SSSE3 __strncat_ssse3 -# define STRCAT_SSE2 __strncat_sse2 -# define STRCAT_IA32 __strncat_ia32 -# define __GI_STRCAT __GI_strncat -#else -# define STRCAT_SSSE3 __strcat_ssse3 -# define STRCAT_SSE2 __strcat_sse2 -# define STRCAT_IA32 __strcat_ia32 -# define __GI_STRCAT __GI_strcat -#endif - - -/* Define multiple versions only for the definition in libc. Don't - define multiple versions for strncat in static library since we - need strncat before the initialization happened. */ -#if IS_IN (libc) - - .text -ENTRY(STRCAT) - .type STRCAT, @gnu_indirect_function - LOAD_GOT_AND_RTLD_GLOBAL_RO - LOAD_FUNC_GOT_EAX (STRCAT_IA32) - HAS_CPU_FEATURE (SSE2) - jz 2f - LOAD_FUNC_GOT_EAX (STRCAT_SSE2) - HAS_ARCH_FEATURE (Fast_Unaligned_Load) - jnz 2f - HAS_CPU_FEATURE (SSSE3) - jz 2f - LOAD_FUNC_GOT_EAX (STRCAT_SSSE3) -2: ret -END(STRCAT) - -# undef ENTRY -# define ENTRY(name) \ - .type STRCAT_IA32, @function; \ - .align 16; \ - .globl STRCAT_IA32; \ - .hidden STRCAT_IA32; \ - STRCAT_IA32: cfi_startproc; \ - CALL_MCOUNT -# undef END -# define END(name) \ - cfi_endproc; .size STRCAT_IA32, .-STRCAT_IA32 - -# ifdef SHARED -# undef libc_hidden_builtin_def -/* It doesn't make sense to send libc-internal strcat calls through a PLT. - The speedup we get from using SSSE3 instruction is likely eaten away - by the indirect call in the PLT. */ -# define libc_hidden_builtin_def(name) \ - .globl __GI_STRCAT; __GI_STRCAT = STRCAT_IA32 -# undef libc_hidden_def -# define libc_hidden_def(name) \ - .globl __GI___STRCAT; __GI___STRCAT = STRCAT_IA32 - -# endif -#endif - -#ifndef USE_AS_STRNCAT -# include "../../strcat.S" -#endif diff --git a/sysdeps/i386/i686/multiarch/strchr-sse2-bsf.S b/sysdeps/i386/i686/multiarch/strchr-sse2-bsf.S deleted file mode 100644 index 95fd7c084e..0000000000 --- a/sysdeps/i386/i686/multiarch/strchr-sse2-bsf.S +++ /dev/null @@ -1,158 +0,0 @@ -/* strchr with SSE2 with bsf - Copyright (C) 2011-2017 Free Software Foundation, Inc. - Contributed by Intel Corporation. - This file is part of the GNU C Library. - - The GNU C Library is free software; you can redistribute it and/or - modify it under the terms of the GNU Lesser General Public - License as published by the Free Software Foundation; either - version 2.1 of the License, or (at your option) any later version. - - The GNU C Library is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - Lesser General Public License for more details. - - You should have received a copy of the GNU Lesser General Public - License along with the GNU C Library; if not, see - <http://www.gnu.org/licenses/>. */ - -#if IS_IN (libc) - -# include <sysdep.h> - -# define CFI_PUSH(REG) \ - cfi_adjust_cfa_offset (4); \ - cfi_rel_offset (REG, 0) - -# define CFI_POP(REG) \ - cfi_adjust_cfa_offset (-4); \ - cfi_restore (REG) - -# define PUSH(REG) pushl REG; CFI_PUSH (REG) -# define POP(REG) popl REG; CFI_POP (REG) - -# define PARMS 8 -# define ENTRANCE PUSH(%edi) -# define RETURN POP(%edi); ret; CFI_PUSH(%edi); - -# define STR1 PARMS -# define STR2 STR1+4 - - .text -ENTRY (__strchr_sse2_bsf) - - ENTRANCE - mov STR1(%esp), %ecx - movd STR2(%esp), %xmm1 - - pxor %xmm2, %xmm2 - mov %ecx, %edi - punpcklbw %xmm1, %xmm1 - punpcklbw %xmm1, %xmm1 - /* ECX has OFFSET. */ - and $15, %ecx - pshufd $0, %xmm1, %xmm1 - je L(loop) - -/* Handle unaligned string. */ - and $-16, %edi - movdqa (%edi), %xmm0 - pcmpeqb %xmm0, %xmm2 - pcmpeqb %xmm1, %xmm0 - /* Find where NULL is. */ - pmovmskb %xmm2, %edx - /* Check if there is a match. */ - pmovmskb %xmm0, %eax - /* Remove the leading bytes. */ - sarl %cl, %edx - sarl %cl, %eax - test %eax, %eax - je L(unaligned_no_match) - /* Check which byte is a match. */ - bsf %eax, %eax - /* Is there a NULL? */ - test %edx, %edx - je L(unaligned_match) - bsf %edx, %edx - cmpl %edx, %eax - /* Return NULL if NULL comes first. */ - ja L(return_null) -L(unaligned_match): - add %edi, %eax - add %ecx, %eax - RETURN - - .p2align 4 -L(unaligned_no_match): - test %edx, %edx - jne L(return_null) - pxor %xmm2, %xmm2 - - add $16, %edi - - .p2align 4 -/* Loop start on aligned string. */ -L(loop): - movdqa (%edi), %xmm0 - pcmpeqb %xmm0, %xmm2 - add $16, %edi - pcmpeqb %xmm1, %xmm0 - pmovmskb %xmm2, %edx - pmovmskb %xmm0, %eax - or %eax, %edx - jnz L(matches) - - movdqa (%edi), %xmm0 - pcmpeqb %xmm0, %xmm2 - add $16, %edi - pcmpeqb %xmm1, %xmm0 - pmovmskb %xmm2, %edx - pmovmskb %xmm0, %eax - or %eax, %edx - jnz L(matches) - - movdqa (%edi), %xmm0 - pcmpeqb %xmm0, %xmm2 - add $16, %edi - pcmpeqb %xmm1, %xmm0 - pmovmskb %xmm2, %edx - pmovmskb %xmm0, %eax - or %eax, %edx - jnz L(matches) - - movdqa (%edi), %xmm0 - pcmpeqb %xmm0, %xmm2 - add $16, %edi - pcmpeqb %xmm1, %xmm0 - pmovmskb %xmm2, %edx - pmovmskb %xmm0, %eax - or %eax, %edx - jnz L(matches) - jmp L(loop) - -L(matches): - pmovmskb %xmm2, %edx - test %eax, %eax - jz L(return_null) - bsf %eax, %eax - /* There is a match. First find where NULL is. */ - test %edx, %edx - je L(match) - bsf %edx, %ecx - /* Check if NULL comes first. */ - cmpl %ecx, %eax - ja L(return_null) -L(match): - sub $16, %edi - add %edi, %eax - RETURN - -/* Return NULL. */ - .p2align 4 -L(return_null): - xor %eax, %eax - RETURN - -END (__strchr_sse2_bsf) -#endif diff --git a/sysdeps/i386/i686/multiarch/strchr-sse2.S b/sysdeps/i386/i686/multiarch/strchr-sse2.S deleted file mode 100644 index 1f9e875b04..0000000000 --- a/sysdeps/i386/i686/multiarch/strchr-sse2.S +++ /dev/null @@ -1,348 +0,0 @@ -/* strchr SSE2 without bsf - Copyright (C) 2011-2017 Free Software Foundation, Inc. - Contributed by Intel Corporation. - This file is part of the GNU C Library. - - The GNU C Library is free software; you can redistribute it and/or - modify it under the terms of the GNU Lesser General Public - License as published by the Free Software Foundation; either - version 2.1 of the License, or (at your option) any later version. - - The GNU C Library is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - Lesser General Public License for more details. - - You should have received a copy of the GNU Lesser General Public - License along with the GNU C Library; if not, see - <http://www.gnu.org/licenses/>. */ - -#if IS_IN (libc) - -# include <sysdep.h> - -# define CFI_PUSH(REG) \ - cfi_adjust_cfa_offset (4); \ - cfi_rel_offset (REG, 0) - -# define CFI_POP(REG) \ - cfi_adjust_cfa_offset (-4); \ - cfi_restore (REG) - -# define PUSH(REG) pushl REG; CFI_PUSH (REG) -# define POP(REG) popl REG; CFI_POP (REG) - -# define PARMS 8 -# define ENTRANCE PUSH(%edi) -# define RETURN POP(%edi); ret; CFI_PUSH(%edi); - -# define STR1 PARMS -# define STR2 STR1+4 - - atom_text_section -ENTRY (__strchr_sse2) - - ENTRANCE - mov STR1(%esp), %ecx - movd STR2(%esp), %xmm1 - - pxor %xmm2, %xmm2 - mov %ecx, %edi - punpcklbw %xmm1, %xmm1 - punpcklbw %xmm1, %xmm1 - /* ECX has OFFSET. */ - and $15, %ecx - pshufd $0, %xmm1, %xmm1 - je L(loop) - -/* Handle unaligned string. */ - and $-16, %edi - movdqa (%edi), %xmm0 - pcmpeqb %xmm0, %xmm2 - pcmpeqb %xmm1, %xmm0 - /* Find where NULL is. */ - pmovmskb %xmm2, %edx - /* Check if there is a match. */ - pmovmskb %xmm0, %eax - /* Remove the leading bytes. */ - sarl %cl, %edx - sarl %cl, %eax - test %eax, %eax - jz L(unaligned_no_match) - /* Check which byte is a match. */ - /* Is there a NULL? */ - add %ecx, %edi - test %edx, %edx - jz L(match_case1) - jmp L(match_case2) - - .p2align 4 -L(unaligned_no_match): - test %edx, %edx - jne L(return_null) - - pxor %xmm2, %xmm2 - add $16, %edi - - .p2align 4 -/* Loop start on aligned string. */ -L(loop): - movdqa (%edi), %xmm0 - pcmpeqb %xmm0, %xmm2 - pcmpeqb %xmm1, %xmm0 - pmovmskb %xmm2, %edx - pmovmskb %xmm0, %eax - test %eax, %eax - jnz L(matches) - test %edx, %edx - jnz L(return_null) - add $16, %edi - - movdqa (%edi), %xmm0 - pcmpeqb %xmm0, %xmm2 - pcmpeqb %xmm1, %xmm0 - pmovmskb %xmm2, %edx - pmovmskb %xmm0, %eax - test %eax, %eax - jnz L(matches) - test %edx, %edx - jnz L(return_null) - add $16, %edi - - movdqa (%edi), %xmm0 - pcmpeqb %xmm0, %xmm2 - pcmpeqb %xmm1, %xmm0 - pmovmskb %xmm2, %edx - pmovmskb %xmm0, %eax - test %eax, %eax - jnz L(matches) - test %edx, %edx - jnz L(return_null) - add $16, %edi - - movdqa (%edi), %xmm0 - pcmpeqb %xmm0, %xmm2 - pcmpeqb %xmm1, %xmm0 - pmovmskb %xmm2, %edx - pmovmskb %xmm0, %eax - test %eax, %eax - jnz L(matches) - test %edx, %edx - jnz L(return_null) - add $16, %edi - jmp L(loop) - -L(matches): - /* There is a match. First find where NULL is. */ - test %edx, %edx - jz L(match_case1) - - .p2align 4 -L(match_case2): - test %al, %al - jz L(match_higth_case2) - - mov %al, %cl - and $15, %cl - jnz L(match_case2_4) - - mov %dl, %ch - and $15, %ch - jnz L(return_null) - - test $0x10, %al - jnz L(Exit5) - test $0x10, %dl - jnz L(return_null) - test $0x20, %al - jnz L(Exit6) - test $0x20, %dl - jnz L(return_null) - test $0x40, %al - jnz L(Exit7) - test $0x40, %dl - jnz L(return_null) - lea 7(%edi), %eax - RETURN - - .p2align 4 -L(match_case2_4): - test $0x01, %al - jnz L(Exit1) - test $0x01, %dl - jnz L(return_null) - test $0x02, %al - jnz L(Exit2) - test $0x02, %dl - jnz L(return_null) - test $0x04, %al - jnz L(Exit3) - test $0x04, %dl - jnz L(return_null) - lea 3(%edi), %eax - RETURN - - .p2align 4 -L(match_higth_case2): - test %dl, %dl - jnz L(return_null) - - mov %ah, %cl - and $15, %cl - jnz L(match_case2_12) - - mov %dh, %ch - and $15, %ch - jnz L(return_null) - - test $0x10, %ah - jnz L(Exit13) - test $0x10, %dh - jnz L(return_null) - test $0x20, %ah - jnz L(Exit14) - test $0x20, %dh - jnz L(return_null) - test $0x40, %ah - jnz L(Exit15) - test $0x40, %dh - jnz L(return_null) - lea 15(%edi), %eax - RETURN - - .p2align 4 -L(match_case2_12): - test $0x01, %ah - jnz L(Exit9) - test $0x01, %dh - jnz L(return_null) - test $0x02, %ah - jnz L(Exit10) - test $0x02, %dh - jnz L(return_null) - test $0x04, %ah - jnz L(Exit11) - test $0x04, %dh - jnz L(return_null) - lea 11(%edi), %eax - RETURN - - .p2align 4 -L(match_case1): - test %al, %al - jz L(match_higth_case1) - - test $0x01, %al - jnz L(Exit1) - test $0x02, %al - jnz L(Exit2) - test $0x04, %al - jnz L(Exit3) - test $0x08, %al - jnz L(Exit4) - test $0x10, %al - jnz L(Exit5) - test $0x20, %al - jnz L(Exit6) - test $0x40, %al - jnz L(Exit7) - lea 7(%edi), %eax - RETURN - - .p2align 4 -L(match_higth_case1): - test $0x01, %ah - jnz L(Exit9) - test $0x02, %ah - jnz L(Exit10) - test $0x04, %ah - jnz L(Exit11) - test $0x08, %ah - jnz L(Exit12) - test $0x10, %ah - jnz L(Exit13) - test $0x20, %ah - jnz L(Exit14) - test $0x40, %ah - jnz L(Exit15) - lea 15(%edi), %eax - RETURN - - .p2align 4 -L(Exit1): - lea (%edi), %eax - RETURN - - .p2align 4 -L(Exit2): - lea 1(%edi), %eax - RETURN - - .p2align 4 -L(Exit3): - lea 2(%edi), %eax - RETURN - - .p2align 4 -L(Exit4): - lea 3(%edi), %eax - RETURN - - .p2align 4 -L(Exit5): - lea 4(%edi), %eax - RETURN - - .p2align 4 -L(Exit6): - lea 5(%edi), %eax - RETURN - - .p2align 4 -L(Exit7): - lea 6(%edi), %eax - RETURN - - .p2align 4 -L(Exit9): - lea 8(%edi), %eax - RETURN - - .p2align 4 -L(Exit10): - lea 9(%edi), %eax - RETURN - - .p2align 4 -L(Exit11): - lea 10(%edi), %eax - RETURN - - .p2align 4 -L(Exit12): - lea 11(%edi), %eax - RETURN - - .p2align 4 -L(Exit13): - lea 12(%edi), %eax - RETURN - - .p2align 4 -L(Exit14): - lea 13(%edi), %eax - RETURN - - .p2align 4 -L(Exit15): - lea 14(%edi), %eax - RETURN - -/* Return NULL. */ - .p2align 4 -L(return_null): - xor %eax, %eax - RETURN - -END (__strchr_sse2) -#endif diff --git a/sysdeps/i386/i686/multiarch/strchr.S b/sysdeps/i386/i686/multiarch/strchr.S deleted file mode 100644 index 5b97b1c767..0000000000 --- a/sysdeps/i386/i686/multiarch/strchr.S +++ /dev/null @@ -1,57 +0,0 @@ -/* Multiple versions of strchr - All versions must be listed in ifunc-impl-list.c. - Copyright (C) 2011-2017 Free Software Foundation, Inc. - Contributed by Intel Corporation. - This file is part of the GNU C Library. - - The GNU C Library is free software; you can redistribute it and/or - modify it under the terms of the GNU Lesser General Public - License as published by the Free Software Foundation; either - version 2.1 of the License, or (at your option) any later version. - - The GNU C Library is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - Lesser General Public License for more details. - - You should have received a copy of the GNU Lesser General Public - License along with the GNU C Library; if not, see - <http://www.gnu.org/licenses/>. */ - -#include <sysdep.h> -#include <init-arch.h> - -#if IS_IN (libc) - .text -ENTRY(strchr) - .type strchr, @gnu_indirect_function - LOAD_GOT_AND_RTLD_GLOBAL_RO - LOAD_FUNC_GOT_EAX (__strchr_ia32) - HAS_CPU_FEATURE (SSE2) - jz 2f - LOAD_FUNC_GOT_EAX (__strchr_sse2_bsf) - HAS_ARCH_FEATURE (Slow_BSF) - jz 2f - LOAD_FUNC_GOT_EAX (__strchr_sse2) -2: ret -END(strchr) - -# undef ENTRY -# define ENTRY(name) \ - .type __strchr_ia32, @function; \ - .globl __strchr_ia32; \ - .p2align 4; \ - __strchr_ia32: cfi_startproc; \ - CALL_MCOUNT -# undef END -# define END(name) \ - cfi_endproc; .size __strchr_ia32, .-__strchr_ia32 -# undef libc_hidden_builtin_def -/* IFUNC doesn't work with the hidden functions in shared library since - they will be called without setting up EBX needed for PLT which is - used by IFUNC. */ -# define libc_hidden_builtin_def(name) \ - .globl __GI_strchr; __GI_strchr = __strchr_ia32 -#endif - -#include "../../i586/strchr.S" diff --git a/sysdeps/i386/i686/multiarch/strcmp-sse4.S b/sysdeps/i386/i686/multiarch/strcmp-sse4.S deleted file mode 100644 index cd26058671..0000000000 --- a/sysdeps/i386/i686/multiarch/strcmp-sse4.S +++ /dev/null @@ -1,804 +0,0 @@ -/* strcmp with SSE4.2 - Copyright (C) 2010-2017 Free Software Foundation, Inc. - Contributed by Intel Corporation. - This file is part of the GNU C Library. - - The GNU C Library is free software; you can redistribute it and/or - modify it under the terms of the GNU Lesser General Public - License as published by the Free Software Foundation; either - version 2.1 of the License, or (at your option) any later version. - - The GNU C Library is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - Lesser General Public License for more details. - - You should have received a copy of the GNU Lesser General Public - License along with the GNU C Library; if not, see - <http://www.gnu.org/licenses/>. */ - -#if IS_IN (libc) - -#include <sysdep.h> -#include "asm-syntax.h" - -#define CFI_PUSH(REG) \ - cfi_adjust_cfa_offset (4); \ - cfi_rel_offset (REG, 0) - -#define CFI_POP(REG) \ - cfi_adjust_cfa_offset (-4); \ - cfi_restore (REG) - -#define PUSH(REG) pushl REG; CFI_PUSH (REG) -#define POP(REG) popl REG; CFI_POP (REG) - -#ifdef USE_AS_STRNCMP -# ifndef STRCMP -# define STRCMP __strncmp_sse4_2 -# endif -# define STR1 8 -# define STR2 STR1+4 -# define CNT STR2+4 -# define RETURN POP (REM); ret; .p2align 4; CFI_PUSH (REM) -# define REM %ebp -#elif defined USE_AS_STRCASECMP_L -# include "locale-defines.h" -# ifndef STRCMP -# define STRCMP __strcasecmp_l_sse4_2 -# endif -# ifdef PIC -# define STR1 12 -# else -# define STR1 8 -# endif -# define STR2 STR1+4 -# define LOCALE 12 /* Loaded before the adjustment. */ -# ifdef PIC -# define RETURN POP (%edi); POP (%ebx); ret; \ - .p2align 4; CFI_PUSH (%ebx); CFI_PUSH (%edi) -# else -# define RETURN POP (%edi); ret; .p2align 4; CFI_PUSH (%edi) -# endif -# define NONASCII __strcasecmp_nonascii -#elif defined USE_AS_STRNCASECMP_L -# include "locale-defines.h" -# ifndef STRCMP -# define STRCMP __strncasecmp_l_sse4_2 -# endif -# ifdef PIC -# define STR1 16 -# else -# define STR1 12 -# endif -# define STR2 STR1+4 -# define CNT STR2+4 -# define LOCALE 16 /* Loaded before the adjustment. */ -# ifdef PIC -# define RETURN POP (%edi); POP (REM); POP (%ebx); ret; \ - .p2align 4; \ - CFI_PUSH (%ebx); CFI_PUSH (REM); CFI_PUSH (%edi) -# else -# define RETURN POP (%edi); POP (REM); ret; \ - .p2align 4; CFI_PUSH (REM); CFI_PUSH (%edi) -# endif -# define REM %ebp -# define NONASCII __strncasecmp_nonascii -#else -# ifndef STRCMP -# define STRCMP __strcmp_sse4_2 -# endif -# define STR1 4 -# define STR2 STR1+4 -# define RETURN ret; .p2align 4 -#endif - - .section .text.sse4.2,"ax",@progbits - -#ifdef USE_AS_STRCASECMP_L -ENTRY (__strcasecmp_sse4_2) -# ifdef PIC - PUSH (%ebx) - LOAD_PIC_REG(bx) - movl __libc_tsd_LOCALE@GOTNTPOFF(%ebx), %eax -# ifdef NO_TLS_DIRECT_SEG_REFS - addl %gs:0, %eax - movl (%eax), %eax -# else - movl %gs:(%eax), %eax -# endif -# else -# ifdef NO_TLS_DIRECT_SEG_REFS - movl %gs:0, %eax - movl __libc_tsd_LOCALE@NTPOFF(%eax), %eax -# else - movl %gs:__libc_tsd_LOCALE@NTPOFF, %eax -# endif -# endif -# if LOCALE_T___LOCALES != 0 || LC_CTYPE != 0 - movl LOCALE_T___LOCALES+LC_CTYPE*4(%eax), %eax -# else - movl (%eax), %eax -# endif - testl $1, LOCALE_DATA_VALUES+_NL_CTYPE_NONASCII_CASE*SIZEOF_VALUES(%eax) -# ifdef PIC - je L(ascii) - POP (%ebx) - jmp __strcasecmp_nonascii -# else - jne __strcasecmp_nonascii - jmp L(ascii) -# endif -END (__strcasecmp_sse4_2) -#endif - -#ifdef USE_AS_STRNCASECMP_L -ENTRY (__strncasecmp_sse4_2) -# ifdef PIC - PUSH (%ebx) - LOAD_PIC_REG(bx) - movl __libc_tsd_LOCALE@GOTNTPOFF(%ebx), %eax -# ifdef NO_TLS_DIRECT_SEG_REFS - addl %gs:0, %eax - movl (%eax), %eax -# else - movl %gs:(%eax), %eax -# endif -# else -# ifdef NO_TLS_DIRECT_SEG_REFS - movl %gs:0, %eax - movl __libc_tsd_LOCALE@NTPOFF(%eax), %eax -# else - movl %gs:__libc_tsd_LOCALE@NTPOFF, %eax -# endif -# endif -# if LOCALE_T___LOCALES != 0 || LC_CTYPE != 0 - movl LOCALE_T___LOCALES+LC_CTYPE*4(%eax), %eax -# else - movl (%eax), %eax -# endif - testl $1, LOCALE_DATA_VALUES+_NL_CTYPE_NONASCII_CASE*SIZEOF_VALUES(%eax) -# ifdef PIC - je L(ascii) - POP (%ebx) - jmp __strncasecmp_nonascii -# else - jne __strncasecmp_nonascii - jmp L(ascii) -# endif -END (__strncasecmp_sse4_2) -#endif - - ENTRY (STRCMP) -#if defined USE_AS_STRCASECMP_L || defined USE_AS_STRNCASECMP_L - movl LOCALE(%esp), %eax -# if LOCALE_T___LOCALES != 0 || LC_CTYPE != 0 - movl LOCALE_T___LOCALES+LC_CTYPE*4(%eax), %eax -# else - movl (%eax), %eax -# endif - testl $1, LOCALE_DATA_VALUES+_NL_CTYPE_NONASCII_CASE*SIZEOF_VALUES(%eax) - jne NONASCII - -# ifdef PIC - PUSH (%ebx) - LOAD_PIC_REG(bx) -# endif -L(ascii): - .section .rodata.cst16,"aM",@progbits,16 - .align 16 -.Lbelowupper: - .quad 0x4040404040404040 - .quad 0x4040404040404040 -.Ltopupper: - .quad 0x5b5b5b5b5b5b5b5b - .quad 0x5b5b5b5b5b5b5b5b -.Ltouppermask: - .quad 0x2020202020202020 - .quad 0x2020202020202020 - .previous - -# ifdef PIC -# define UCLOW_reg .Lbelowupper@GOTOFF(%ebx) -# define UCHIGH_reg .Ltopupper@GOTOFF(%ebx) -# define LCQWORD_reg .Ltouppermask@GOTOFF(%ebx) -# else -# define UCLOW_reg .Lbelowupper -# define UCHIGH_reg .Ltopupper -# define LCQWORD_reg .Ltouppermask -# endif -#endif - -#if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L - PUSH (REM) -#endif -#if defined USE_AS_STRCASECMP_L || defined USE_AS_STRNCASECMP_L - PUSH (%edi) -#endif - mov STR1(%esp), %edx - mov STR2(%esp), %eax -#if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L - movl CNT(%esp), REM - test REM, REM - je L(eq) -#endif - mov %dx, %cx - and $0xfff, %cx - cmp $0xff0, %cx - ja L(first4bytes) - movdqu (%edx), %xmm2 - mov %eax, %ecx - and $0xfff, %ecx - cmp $0xff0, %ecx - ja L(first4bytes) -#if defined USE_AS_STRCASECMP_L || defined USE_AS_STRNCASECMP_L -# define TOLOWER(reg1, reg2) \ - movdqa reg1, %xmm3; \ - movdqa UCHIGH_reg, %xmm4; \ - movdqa reg2, %xmm5; \ - movdqa UCHIGH_reg, %xmm6; \ - pcmpgtb UCLOW_reg, %xmm3; \ - pcmpgtb reg1, %xmm4; \ - pcmpgtb UCLOW_reg, %xmm5; \ - pcmpgtb reg2, %xmm6; \ - pand %xmm4, %xmm3; \ - pand %xmm6, %xmm5; \ - pand LCQWORD_reg, %xmm3; \ - pand LCQWORD_reg, %xmm5; \ - por %xmm3, reg1; \ - por %xmm5, reg2 - - movdqu (%eax), %xmm1 - TOLOWER (%xmm2, %xmm1) - movd %xmm2, %ecx - movd %xmm1, %edi - movdqa %xmm2, %xmm3 - movdqa %xmm1, %xmm4 - cmpl %edi, %ecx -#else -# define TOLOWER(reg1, reg) - - movd %xmm2, %ecx - cmp (%eax), %ecx -#endif - jne L(less4bytes) -#if !defined USE_AS_STRCASECMP_L && !defined USE_AS_STRNCASECMP_L - movdqu (%eax), %xmm1 -#endif - pxor %xmm2, %xmm1 - pxor %xmm0, %xmm0 - ptest %xmm1, %xmm0 - jnc L(less16bytes) - pcmpeqb %xmm0, %xmm2 - ptest %xmm2, %xmm0 - jnc L(less16bytes) - -#if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L - sub $16, REM - jbe L(eq) -#endif - add $16, %edx - add $16, %eax -L(first4bytes): - movzbl (%eax), %ecx -#if defined USE_AS_STRCASECMP_L || defined USE_AS_STRNCASECMP_L - movzbl (%edx), %edi -# ifdef PIC - movl _nl_C_LC_CTYPE_tolower@GOTOFF+128*4(%ebx,%ecx,4), %ecx - movl _nl_C_LC_CTYPE_tolower@GOTOFF+128*4(%ebx,%edi,4), %edi -# else - movl _nl_C_LC_CTYPE_tolower+128*4(,%ecx,4), %ecx - movl _nl_C_LC_CTYPE_tolower+128*4(,%edi,4), %edi -# endif - cmpl %ecx, %edi -#else - cmpb %cl, (%edx) -#endif - jne L(neq) - cmpl $0, %ecx - je L(eq) - -#if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L - cmp $1, REM - je L(eq) -#endif - - movzbl 1(%eax), %ecx -#if defined USE_AS_STRCASECMP_L || defined USE_AS_STRNCASECMP_L - movzbl 1(%edx), %edi -# ifdef PIC - movl _nl_C_LC_CTYPE_tolower@GOTOFF+128*4(%ebx,%ecx,4), %ecx - movl _nl_C_LC_CTYPE_tolower@GOTOFF+128*4(%ebx,%edi,4), %edi -# else - movl _nl_C_LC_CTYPE_tolower+128*4(,%ecx,4), %ecx - movl _nl_C_LC_CTYPE_tolower+128*4(,%edi,4), %edi -# endif - cmpl %ecx, %edi -#else - cmpb %cl, 1(%edx) -#endif - jne L(neq) - cmpl $0, %ecx - je L(eq) - -#if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L - cmp $2, REM - je L(eq) -#endif - movzbl 2(%eax), %ecx -#if defined USE_AS_STRCASECMP_L || defined USE_AS_STRNCASECMP_L - movzbl 2(%edx), %edi -# ifdef PIC - movl _nl_C_LC_CTYPE_tolower@GOTOFF+128*4(%ebx,%ecx,4), %ecx - movl _nl_C_LC_CTYPE_tolower@GOTOFF+128*4(%ebx,%edi,4), %edi -# else - movl _nl_C_LC_CTYPE_tolower+128*4(,%ecx,4), %ecx - movl _nl_C_LC_CTYPE_tolower+128*4(,%edi,4), %edi -# endif - cmpl %ecx, %edi -#else - cmpb %cl, 2(%edx) -#endif - jne L(neq) - cmpl $0, %ecx - je L(eq) - -#if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L - cmp $3, REM - je L(eq) -#endif - movzbl 3(%eax), %ecx -#if defined USE_AS_STRCASECMP_L || defined USE_AS_STRNCASECMP_L - movzbl 3(%edx), %edi -# ifdef PIC - movl _nl_C_LC_CTYPE_tolower@GOTOFF+128*4(%ebx,%ecx,4), %ecx - movl _nl_C_LC_CTYPE_tolower@GOTOFF+128*4(%ebx,%edi,4), %edi -# else - movl _nl_C_LC_CTYPE_tolower+128*4(,%ecx,4), %ecx - movl _nl_C_LC_CTYPE_tolower+128*4(,%edi,4), %edi -# endif - cmpl %ecx, %edi -#else - cmpb %cl, 3(%edx) -#endif - jne L(neq) - cmpl $0, %ecx - je L(eq) - -#if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L - cmp $4, REM - je L(eq) -#endif - movzbl 4(%eax), %ecx -#if defined USE_AS_STRCASECMP_L || defined USE_AS_STRNCASECMP_L - movzbl 4(%edx), %edi -# ifdef PIC - movl _nl_C_LC_CTYPE_tolower@GOTOFF+128*4(%ebx,%ecx,4), %ecx - movl _nl_C_LC_CTYPE_tolower@GOTOFF+128*4(%ebx,%edi,4), %edi -# else - movl _nl_C_LC_CTYPE_tolower+128*4(,%ecx,4), %ecx - movl _nl_C_LC_CTYPE_tolower+128*4(,%edi,4), %edi -# endif - cmpl %ecx, %edi -#else - cmpb %cl, 4(%edx) -#endif - jne L(neq) - cmpl $0, %ecx - je L(eq) - -#if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L - cmp $5, REM - je L(eq) -#endif - movzbl 5(%eax), %ecx -#if defined USE_AS_STRCASECMP_L || defined USE_AS_STRNCASECMP_L - movzbl 5(%edx), %edi -# ifdef PIC - movl _nl_C_LC_CTYPE_tolower@GOTOFF+128*4(%ebx,%ecx,4), %ecx - movl _nl_C_LC_CTYPE_tolower@GOTOFF+128*4(%ebx,%edi,4), %edi -# else - movl _nl_C_LC_CTYPE_tolower+128*4(,%ecx,4), %ecx - movl _nl_C_LC_CTYPE_tolower+128*4(,%edi,4), %edi -# endif - cmpl %ecx, %edi -#else - cmpb %cl, 5(%edx) -#endif - jne L(neq) - cmpl $0, %ecx - je L(eq) - -#if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L - cmp $6, REM - je L(eq) -#endif - movzbl 6(%eax), %ecx -#if defined USE_AS_STRCASECMP_L || defined USE_AS_STRNCASECMP_L - movzbl 6(%edx), %edi -# ifdef PIC - movl _nl_C_LC_CTYPE_tolower@GOTOFF+128*4(%ebx,%ecx,4), %ecx - movl _nl_C_LC_CTYPE_tolower@GOTOFF+128*4(%ebx,%edi,4), %edi -# else - movl _nl_C_LC_CTYPE_tolower+128*4(,%ecx,4), %ecx - movl _nl_C_LC_CTYPE_tolower+128*4(,%edi,4), %edi -# endif - cmpl %ecx, %edi -#else - cmpb %cl, 6(%edx) -#endif - jne L(neq) - cmpl $0, %ecx - je L(eq) - -#if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L - cmp $7, REM - je L(eq) -#endif - movzbl 7(%eax), %ecx -#if defined USE_AS_STRCASECMP_L || defined USE_AS_STRNCASECMP_L - movzbl 7(%edx), %edi -# ifdef PIC - movl _nl_C_LC_CTYPE_tolower@GOTOFF+128*4(%ebx,%ecx,4), %ecx - movl _nl_C_LC_CTYPE_tolower@GOTOFF+128*4(%ebx,%edi,4), %edi -# else - movl _nl_C_LC_CTYPE_tolower+128*4(,%ecx,4), %ecx - movl _nl_C_LC_CTYPE_tolower+128*4(,%edi,4), %edi -# endif - cmpl %ecx, %edi -#else - cmpb %cl, 7(%edx) -#endif - jne L(neq) - cmpl $0, %ecx - je L(eq) - -#if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L - sub $8, REM - je L(eq) -#endif - add $8, %eax - add $8, %edx - -#if !defined USE_AS_STRCASECMP_L && !defined USE_AS_STRNCASECMP_L - PUSH (%edi) -#endif - PUSH (%esi) -#if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L - cfi_remember_state -#endif - mov %edx, %edi - mov %eax, %esi - xorl %eax, %eax -L(check_offset): - movl %edi, %edx - movl %esi, %ecx - andl $0xfff, %edx - andl $0xfff, %ecx - cmpl %edx, %ecx - cmovl %edx, %ecx - lea -0xff0(%ecx), %edx - sub %edx, %edi - sub %edx, %esi - testl %edx, %edx - jg L(crosspage) -L(loop): - movdqu (%esi,%edx), %xmm2 - movdqu (%edi,%edx), %xmm1 - TOLOWER (%xmm2, %xmm1) - pcmpistri $0x1a, %xmm2, %xmm1 - jbe L(end) - -#if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L - sub $16, REM - jbe L(more16byteseq) -#endif - - add $16, %edx - jle L(loop) -L(crosspage): - movzbl (%edi,%edx), %eax - movzbl (%esi,%edx), %ecx -#if defined USE_AS_STRCASECMP_L || defined USE_AS_STRNCASECMP_L -# ifdef PIC - movl _nl_C_LC_CTYPE_tolower@GOTOFF+128*4(%ebx,%eax,4), %eax - movl _nl_C_LC_CTYPE_tolower@GOTOFF+128*4(%ebx,%ecx,4), %ecx -# else - movl _nl_C_LC_CTYPE_tolower+128*4(,%eax,4), %eax - movl _nl_C_LC_CTYPE_tolower+128*4(,%ecx,4), %ecx -# endif -#endif - subl %ecx, %eax - jne L(ret) - testl %ecx, %ecx - je L(ret) -#if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L - sub $1, REM - jbe L(more16byteseq) -#endif - inc %edx - cmp $15, %edx - jle L(crosspage) - add %edx, %edi - add %edx, %esi - jmp L(check_offset) - - .p2align 4 -L(end): - jnc L(ret) -#if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L - sub %ecx, REM - jbe L(more16byteseq) -#endif - lea (%ecx,%edx), %ecx - movzbl (%edi,%ecx), %eax - movzbl (%esi,%ecx), %ecx -#if defined USE_AS_STRCASECMP_L || defined USE_AS_STRNCASECMP_L -# ifdef PIC - movl _nl_C_LC_CTYPE_tolower@GOTOFF+128*4(%ebx,%eax,4), %eax - movl _nl_C_LC_CTYPE_tolower@GOTOFF+128*4(%ebx,%ecx,4), %ecx -# else - movl _nl_C_LC_CTYPE_tolower+128*4(,%eax,4), %eax - movl _nl_C_LC_CTYPE_tolower+128*4(,%ecx,4), %ecx -# endif -#endif - subl %ecx, %eax -L(ret): - POP (%esi) - POP (%edi) -#if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L - POP (REM) -#endif -#if defined USE_AS_STRCASECMP_L || defined USE_AS_STRNCASECMP_L -# ifdef PIC - POP (%ebx) -# endif -#endif - ret - - .p2align 4 -#if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L - cfi_restore_state -L(more16byteseq): - POP (%esi) -# ifdef USE_AS_STRNCMP - POP (%edi) -# endif -#endif -L(eq): - xorl %eax, %eax - RETURN - -L(neq): - mov $1, %eax - ja L(neq_bigger) - neg %eax -L(neq_bigger): - RETURN - -L(less16bytes): - add $0xfefefeff, %ecx - jnc L(less4bytes) -#if defined USE_AS_STRCASECMP_L || defined USE_AS_STRNCASECMP_L - movd %xmm3, %edi - xor %edi, %ecx -#else - xor (%edx), %ecx -#endif - or $0xfefefeff, %ecx - add $1, %ecx - jnz L(less4bytes) - -#if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L - cmp $4, REM - jbe L(eq) -#endif -#if defined USE_AS_STRCASECMP_L || defined USE_AS_STRNCASECMP_L - psrldq $4, %xmm3 - psrldq $4, %xmm4 - movd %xmm3, %ecx - movd %xmm4, %edi - cmp %edi, %ecx - mov %ecx, %edi -#else - mov 4(%edx), %ecx - cmp 4(%eax), %ecx -#endif - jne L(more4bytes) - add $0xfefefeff, %ecx - jnc L(more4bytes) -#if defined USE_AS_STRCASECMP_L || defined USE_AS_STRNCASECMP_L - xor %edi, %ecx -#else - xor 4(%edx), %ecx -#endif - or $0xfefefeff, %ecx - add $1, %ecx - jnz L(more4bytes) - -#if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L - sub $8, REM - jbe L(eq) -#endif - - add $8, %edx - add $8, %eax -L(less4bytes): - - movzbl (%eax), %ecx -#if defined USE_AS_STRCASECMP_L || defined USE_AS_STRNCASECMP_L - movzbl (%edx), %edi -# ifdef PIC - movl _nl_C_LC_CTYPE_tolower@GOTOFF+128*4(%ebx,%ecx,4), %ecx - movl _nl_C_LC_CTYPE_tolower@GOTOFF+128*4(%ebx,%edi,4), %edi -# else - movl _nl_C_LC_CTYPE_tolower+128*4(,%ecx,4), %ecx - movl _nl_C_LC_CTYPE_tolower+128*4(,%edi,4), %edi -# endif - cmpl %ecx, %edi -#else - cmpb %cl, (%edx) -#endif - jne L(neq) - cmpl $0, %ecx - je L(eq) - -#if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L - cmp $1, REM - je L(eq) -#endif - movzbl 1(%eax), %ecx -#if defined USE_AS_STRCASECMP_L || defined USE_AS_STRNCASECMP_L - movzbl 1(%edx), %edi -# ifdef PIC - movl _nl_C_LC_CTYPE_tolower@GOTOFF+128*4(%ebx,%ecx,4), %ecx - movl _nl_C_LC_CTYPE_tolower@GOTOFF+128*4(%ebx,%edi,4), %edi -# else - movl _nl_C_LC_CTYPE_tolower+128*4(,%ecx,4), %ecx - movl _nl_C_LC_CTYPE_tolower+128*4(,%edi,4), %edi -# endif - cmpl %ecx, %edi -#else - cmpb %cl, 1(%edx) -#endif - jne L(neq) - cmpl $0, %ecx - je L(eq) - -#if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L - cmp $2, REM - je L(eq) -#endif - - movzbl 2(%eax), %ecx -#if defined USE_AS_STRCASECMP_L || defined USE_AS_STRNCASECMP_L - movzbl 2(%edx), %edi -# ifdef PIC - movl _nl_C_LC_CTYPE_tolower@GOTOFF+128*4(%ebx,%ecx,4), %ecx - movl _nl_C_LC_CTYPE_tolower@GOTOFF+128*4(%ebx,%edi,4), %edi -# else - movl _nl_C_LC_CTYPE_tolower+128*4(,%ecx,4), %ecx - movl _nl_C_LC_CTYPE_tolower+128*4(,%edi,4), %edi -# endif - cmpl %ecx, %edi -#else - cmpb %cl, 2(%edx) -#endif - jne L(neq) - cmpl $0, %ecx - je L(eq) - -#if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L - cmp $3, REM - je L(eq) -#endif - movzbl 3(%eax), %ecx -#if defined USE_AS_STRCASECMP_L || defined USE_AS_STRNCASECMP_L - movzbl 3(%edx), %edi -# ifdef PIC - movl _nl_C_LC_CTYPE_tolower@GOTOFF+128*4(%ebx,%ecx,4), %ecx - movl _nl_C_LC_CTYPE_tolower@GOTOFF+128*4(%ebx,%edi,4), %edi -# else - movl _nl_C_LC_CTYPE_tolower+128*4(,%ecx,4), %ecx - movl _nl_C_LC_CTYPE_tolower+128*4(,%edi,4), %edi -# endif - cmpl %ecx, %edi -#else - cmpb %cl, 3(%edx) -#endif - jne L(neq) - cmpl $0, %ecx - je L(eq) - -L(more4bytes): -#if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L - cmp $4, REM - je L(eq) -#endif - movzbl 4(%eax), %ecx -#if defined USE_AS_STRCASECMP_L || defined USE_AS_STRNCASECMP_L - movzbl 4(%edx), %edi -# ifdef PIC - movl _nl_C_LC_CTYPE_tolower@GOTOFF+128*4(%ebx,%ecx,4), %ecx - movl _nl_C_LC_CTYPE_tolower@GOTOFF+128*4(%ebx,%edi,4), %edi -# else - movl _nl_C_LC_CTYPE_tolower+128*4(,%ecx,4), %ecx - movl _nl_C_LC_CTYPE_tolower+128*4(,%edi,4), %edi -# endif - cmpl %ecx, %edi -#else - cmpb %cl, 4(%edx) -#endif - jne L(neq) - cmpl $0, %ecx - je L(eq) - - -#if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L - cmp $5, REM - je L(eq) -#endif - movzbl 5(%eax), %ecx -#if defined USE_AS_STRCASECMP_L || defined USE_AS_STRNCASECMP_L - movzbl 5(%edx), %edi -# ifdef PIC - movl _nl_C_LC_CTYPE_tolower@GOTOFF+128*4(%ebx,%ecx,4), %ecx - movl _nl_C_LC_CTYPE_tolower@GOTOFF+128*4(%ebx,%edi,4), %edi -# else - movl _nl_C_LC_CTYPE_tolower+128*4(,%ecx,4), %ecx - movl _nl_C_LC_CTYPE_tolower+128*4(,%edi,4), %edi -# endif - cmpl %ecx, %edi -#else - cmpb %cl, 5(%edx) -#endif - jne L(neq) - cmpl $0, %ecx - je L(eq) - -#if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L - cmp $6, REM - je L(eq) -#endif - movzbl 6(%eax), %ecx -#if defined USE_AS_STRCASECMP_L || defined USE_AS_STRNCASECMP_L - movzbl 6(%edx), %edi -# ifdef PIC - movl _nl_C_LC_CTYPE_tolower@GOTOFF+128*4(%ebx,%ecx,4), %ecx - movl _nl_C_LC_CTYPE_tolower@GOTOFF+128*4(%ebx,%edi,4), %edi -# else - movl _nl_C_LC_CTYPE_tolower+128*4(,%ecx,4), %ecx - movl _nl_C_LC_CTYPE_tolower+128*4(,%edi,4), %edi -# endif - cmpl %ecx, %edi -#else - cmpb %cl, 6(%edx) -#endif - jne L(neq) - cmpl $0, %ecx - je L(eq) - -#if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L - cmp $7, REM - je L(eq) -#endif - movzbl 7(%eax), %ecx -#if defined USE_AS_STRCASECMP_L || defined USE_AS_STRNCASECMP_L - movzbl 7(%edx), %edi -# ifdef PIC - movl _nl_C_LC_CTYPE_tolower@GOTOFF+128*4(%ebx,%ecx,4), %ecx - movl _nl_C_LC_CTYPE_tolower@GOTOFF+128*4(%ebx,%edi,4), %edi -# else - movl _nl_C_LC_CTYPE_tolower+128*4(,%ecx,4), %ecx - movl _nl_C_LC_CTYPE_tolower+128*4(,%edi,4), %edi -# endif - cmpl %ecx, %edi -#else - cmpb %cl, 7(%edx) -#endif - jne L(neq) - jmp L(eq) - -END (STRCMP) - -#endif diff --git a/sysdeps/i386/i686/multiarch/strcmp-ssse3.S b/sysdeps/i386/i686/multiarch/strcmp-ssse3.S deleted file mode 100644 index b25cc3e068..0000000000 --- a/sysdeps/i386/i686/multiarch/strcmp-ssse3.S +++ /dev/null @@ -1,2810 +0,0 @@ -/* strcmp with SSSE3 - Copyright (C) 2010-2017 Free Software Foundation, Inc. - Contributed by Intel Corporation. - This file is part of the GNU C Library. - - The GNU C Library is free software; you can redistribute it and/or - modify it under the terms of the GNU Lesser General Public - License as published by the Free Software Foundation; either - version 2.1 of the License, or (at your option) any later version. - - The GNU C Library is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - Lesser General Public License for more details. - - You should have received a copy of the GNU Lesser General Public - License along with the GNU C Library; if not, see - <http://www.gnu.org/licenses/>. */ - -#if IS_IN (libc) - -#include <sysdep.h> -#include "asm-syntax.h" - -#define CFI_PUSH(REG) \ - cfi_adjust_cfa_offset (4); \ - cfi_rel_offset (REG, 0) - -#define CFI_POP(REG) \ - cfi_adjust_cfa_offset (-4); \ - cfi_restore (REG) - -#define PUSH(REG) pushl REG; CFI_PUSH (REG) -#define POP(REG) popl REG; CFI_POP (REG) - -#ifdef USE_AS_STRNCMP -# ifndef STRCMP -# define STRCMP __strncmp_ssse3 -# endif -# define STR1 8 -# define STR2 STR1+4 -# define CNT STR2+4 -# define RETURN POP (REM); ret; .p2align 4; CFI_PUSH (REM) -# define UPDATE_STRNCMP_COUNTER \ - /* calculate left number to compare */ \ - mov $16, %esi; \ - sub %ecx, %esi; \ - cmp %esi, REM; \ - jbe L(more8byteseq); \ - sub %esi, REM -# define FLAGS %ebx -# define REM %ebp -#elif defined USE_AS_STRCASECMP_L -# include "locale-defines.h" -# ifndef STRCMP -# define STRCMP __strcasecmp_l_ssse3 -# endif -# ifdef PIC -# define STR1 8 -# else -# define STR1 4 -# endif -# define STR2 STR1+4 -# define LOCALE 12 /* Loaded before the adjustment. */ -# ifdef PIC -# define RETURN POP (%ebx); ret; .p2align 4; CFI_PUSH (%ebx) -# else -# define RETURN ret; .p2align 4 -# endif -# define UPDATE_STRNCMP_COUNTER -# define FLAGS (%esp) -# define NONASCII __strcasecmp_nonascii -#elif defined USE_AS_STRNCASECMP_L -# include "locale-defines.h" -# ifndef STRCMP -# define STRCMP __strncasecmp_l_ssse3 -# endif -# ifdef PIC -# define STR1 12 -# else -# define STR1 8 -# endif -# define STR2 STR1+4 -# define CNT STR2+4 -# define LOCALE 16 /* Loaded before the adjustment. */ -# ifdef PIC -# define RETURN POP (REM); POP (%ebx); ret; \ - .p2align 4; CFI_PUSH (%ebx); CFI_PUSH (REM) -# else -# define RETURN POP (REM); ret; .p2align 4; CFI_PUSH (REM) -# endif -# define UPDATE_STRNCMP_COUNTER \ - /* calculate left number to compare */ \ - mov $16, %esi; \ - sub %ecx, %esi; \ - cmp %esi, REM; \ - jbe L(more8byteseq); \ - sub %esi, REM -# define FLAGS (%esp) -# define REM %ebp -# define NONASCII __strncasecmp_nonascii -#else -# ifndef STRCMP -# define STRCMP __strcmp_ssse3 -# endif -# define STR1 4 -# define STR2 STR1+4 -# define RETURN ret; .p2align 4 -# define UPDATE_STRNCMP_COUNTER -# define FLAGS %ebx -#endif - - .section .text.ssse3,"ax",@progbits - -#ifdef USE_AS_STRCASECMP_L -ENTRY (__strcasecmp_ssse3) -# ifdef PIC - PUSH (%ebx) - LOAD_PIC_REG(bx) - movl __libc_tsd_LOCALE@GOTNTPOFF(%ebx), %eax -# ifdef NO_TLS_DIRECT_SEG_REFS - addl %gs:0, %eax - movl (%eax), %eax -# else - movl %gs:(%eax), %eax -# endif -# else -# ifdef NO_TLS_DIRECT_SEG_REFS - movl %gs:0, %eax - movl __libc_tsd_LOCALE@NTPOFF(%eax), %eax -# else - movl %gs:__libc_tsd_LOCALE@NTPOFF, %eax -# endif -# endif -# if LOCALE_T___LOCALES != 0 || LC_CTYPE != 0 - movl LOCALE_T___LOCALES+LC_CTYPE*4(%eax), %eax -# else - movl (%eax), %eax -# endif - testl $1, LOCALE_DATA_VALUES+_NL_CTYPE_NONASCII_CASE*SIZEOF_VALUES(%eax) -# ifdef PIC - je L(ascii) - POP (%ebx) - jmp __strcasecmp_nonascii -# else - jne __strcasecmp_nonascii - jmp L(ascii) -# endif -END (__strcasecmp_ssse3) -#endif - -#ifdef USE_AS_STRNCASECMP_L -ENTRY (__strncasecmp_ssse3) -# ifdef PIC - PUSH (%ebx) - LOAD_PIC_REG(bx) - movl __libc_tsd_LOCALE@GOTNTPOFF(%ebx), %eax -# ifdef NO_TLS_DIRECT_SEG_REFS - addl %gs:0, %eax - movl (%eax), %eax -# else - movl %gs:(%eax), %eax -# endif -# else -# ifdef NO_TLS_DIRECT_SEG_REFS - movl %gs:0, %eax - movl __libc_tsd_LOCALE@NTPOFF(%eax), %eax -# else - movl %gs:__libc_tsd_LOCALE@NTPOFF, %eax -# endif -# endif -# if LOCALE_T___LOCALES != 0 || LC_CTYPE != 0 - movl LOCALE_T___LOCALES+LC_CTYPE*4(%eax), %eax -# else - movl (%eax), %eax -# endif - testl $1, LOCALE_DATA_VALUES+_NL_CTYPE_NONASCII_CASE*SIZEOF_VALUES(%eax) -# ifdef PIC - je L(ascii) - POP (%ebx) - jmp __strncasecmp_nonascii -# else - jne __strncasecmp_nonascii - jmp L(ascii) -# endif -END (__strncasecmp_ssse3) -#endif - -ENTRY (STRCMP) -#if defined USE_AS_STRCASECMP_L || defined USE_AS_STRNCASECMP_L - movl LOCALE(%esp), %eax -# if LOCALE_T___LOCALES != 0 || LC_CTYPE != 0 - movl LOCALE_T___LOCALES+LC_CTYPE*4(%eax), %eax -# else - movl (%eax), %eax -# endif - testl $1, LOCALE_DATA_VALUES+_NL_CTYPE_NONASCII_CASE*SIZEOF_VALUES(%eax) - jne NONASCII - -# ifdef PIC - PUSH (%ebx) - LOAD_PIC_REG(bx) -# endif -L(ascii): - .section .rodata.cst16,"aM",@progbits,16 - .align 16 -.Lbelowupper: - .quad 0x4040404040404040 - .quad 0x4040404040404040 -.Ltopupper: - .quad 0x5b5b5b5b5b5b5b5b - .quad 0x5b5b5b5b5b5b5b5b -.Ltouppermask: - .quad 0x2020202020202020 - .quad 0x2020202020202020 - .previous - -# ifdef PIC -# define UCLOW_reg .Lbelowupper@GOTOFF(%ebx) -# define UCHIGH_reg .Ltopupper@GOTOFF(%ebx) -# define LCQWORD_reg .Ltouppermask@GOTOFF(%ebx) -# else -# define UCLOW_reg .Lbelowupper -# define UCHIGH_reg .Ltopupper -# define LCQWORD_reg .Ltouppermask -# endif -#endif - -#if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L - PUSH (REM) -#endif - - movl STR1(%esp), %edx - movl STR2(%esp), %eax -#if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L - movl CNT(%esp), REM - cmp $16, REM - jb L(less16bytes_sncmp) -#elif !defined USE_AS_STRCASECMP_L - movzbl (%eax), %ecx - cmpb %cl, (%edx) - jne L(neq) - cmpl $0, %ecx - je L(eq) - - movzbl 1(%eax), %ecx - cmpb %cl, 1(%edx) - jne L(neq) - cmpl $0, %ecx - je L(eq) - - movzbl 2(%eax), %ecx - cmpb %cl, 2(%edx) - jne L(neq) - cmpl $0, %ecx - je L(eq) - - movzbl 3(%eax), %ecx - cmpb %cl, 3(%edx) - jne L(neq) - cmpl $0, %ecx - je L(eq) - - movzbl 4(%eax), %ecx - cmpb %cl, 4(%edx) - jne L(neq) - cmpl $0, %ecx - je L(eq) - - movzbl 5(%eax), %ecx - cmpb %cl, 5(%edx) - jne L(neq) - cmpl $0, %ecx - je L(eq) - - movzbl 6(%eax), %ecx - cmpb %cl, 6(%edx) - jne L(neq) - cmpl $0, %ecx - je L(eq) - - movzbl 7(%eax), %ecx - cmpb %cl, 7(%edx) - jne L(neq) - cmpl $0, %ecx - je L(eq) - - add $8, %edx - add $8, %eax -#endif - movl %edx, %ecx - and $0xfff, %ecx - cmp $0xff0, %ecx - ja L(crosspage) - mov %eax, %ecx - and $0xfff, %ecx - cmp $0xff0, %ecx - ja L(crosspage) - pxor %xmm0, %xmm0 - movlpd (%eax), %xmm1 - movlpd (%edx), %xmm2 - movhpd 8(%eax), %xmm1 - movhpd 8(%edx), %xmm2 -#if defined USE_AS_STRCASECMP_L || defined USE_AS_STRNCASECMP_L -# define TOLOWER(reg1, reg2) \ - movdqa reg1, %xmm5; \ - movdqa reg2, %xmm7; \ - movdqa UCHIGH_reg, %xmm6; \ - pcmpgtb UCLOW_reg, %xmm5; \ - pcmpgtb UCLOW_reg, %xmm7; \ - pcmpgtb reg1, %xmm6; \ - pand %xmm6, %xmm5; \ - movdqa UCHIGH_reg, %xmm6; \ - pcmpgtb reg2, %xmm6; \ - pand %xmm6, %xmm7; \ - pand LCQWORD_reg, %xmm5; \ - por %xmm5, reg1; \ - pand LCQWORD_reg, %xmm7; \ - por %xmm7, reg2 - TOLOWER (%xmm1, %xmm2) -#else -# define TOLOWER(reg1, reg2) -#endif - pcmpeqb %xmm1, %xmm0 - pcmpeqb %xmm2, %xmm1 - psubb %xmm0, %xmm1 - pmovmskb %xmm1, %ecx - sub $0xffff, %ecx - jnz L(less16bytes) -#if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L - cmp $16, REM - lea -16(REM), REM - jbe L(eq) -#endif - add $16, %eax - add $16, %edx - -L(crosspage): - -#if !defined USE_AS_STRCASECMP_L && !defined USE_AS_STRNCASECMP_L - PUSH (FLAGS) -#endif - PUSH (%edi) - PUSH (%esi) -#if defined USE_AS_STRCASECMP_L || defined USE_AS_STRNCASECMP_L - pushl $0 - cfi_adjust_cfa_offset (4) -#endif -#if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L - cfi_remember_state -#endif - - movl %edx, %edi - movl %eax, %ecx - and $0xf, %ecx - and $0xf, %edi - xor %ecx, %eax - xor %edi, %edx -#if !defined USE_AS_STRCASECMP_L && !defined USE_AS_STRNCASECMP_L - xor FLAGS, FLAGS -#endif - cmp %edi, %ecx - je L(ashr_0) - ja L(bigger) - orl $0x20, FLAGS - xchg %edx, %eax - xchg %ecx, %edi -L(bigger): - lea 15(%edi), %edi - sub %ecx, %edi - cmp $8, %edi - jle L(ashr_less_8) - cmp $14, %edi - je L(ashr_15) - cmp $13, %edi - je L(ashr_14) - cmp $12, %edi - je L(ashr_13) - cmp $11, %edi - je L(ashr_12) - cmp $10, %edi - je L(ashr_11) - cmp $9, %edi - je L(ashr_10) -L(ashr_less_8): - je L(ashr_9) - cmp $7, %edi - je L(ashr_8) - cmp $6, %edi - je L(ashr_7) - cmp $5, %edi - je L(ashr_6) - cmp $4, %edi - je L(ashr_5) - cmp $3, %edi - je L(ashr_4) - cmp $2, %edi - je L(ashr_3) - cmp $1, %edi - je L(ashr_2) - cmp $0, %edi - je L(ashr_1) - -/* - * The following cases will be handled by ashr_0 - * ecx(offset of esi) eax(offset of edi) relative offset corresponding case - * n(0~15) n(0~15) 15(15+ n-n) ashr_0 - */ - .p2align 4 -L(ashr_0): - mov $0xffff, %esi - movdqa (%eax), %xmm1 - pxor %xmm0, %xmm0 - pcmpeqb %xmm1, %xmm0 -#if defined USE_AS_STRCASECMP_L || defined USE_AS_STRNCASECMP_L - movdqa (%edx), %xmm2 - TOLOWER (%xmm1, %xmm2) - pcmpeqb %xmm2, %xmm1 -#else - pcmpeqb (%edx), %xmm1 -#endif - psubb %xmm0, %xmm1 - pmovmskb %xmm1, %edi - shr %cl, %esi - shr %cl, %edi - sub %edi, %esi - mov %ecx, %edi - jne L(less32bytes) - UPDATE_STRNCMP_COUNTER - movl $0x10, FLAGS - mov $0x10, %ecx - pxor %xmm0, %xmm0 - .p2align 4 -L(loop_ashr_0): - movdqa (%eax, %ecx), %xmm1 -#if defined USE_AS_STRCASECMP_L || defined USE_AS_STRNCASECMP_L - movdqa (%edx, %ecx), %xmm2 - TOLOWER (%xmm1, %xmm2) - - pcmpeqb %xmm1, %xmm0 - pcmpeqb %xmm2, %xmm1 -#else - pcmpeqb %xmm1, %xmm0 - pcmpeqb (%edx, %ecx), %xmm1 -#endif - psubb %xmm0, %xmm1 - pmovmskb %xmm1, %esi - sub $0xffff, %esi - jnz L(exit) -#if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L - cmp $16, REM - lea -16(REM), REM - jbe L(more8byteseq) -#endif - add $16, %ecx - jmp L(loop_ashr_0) - -/* - * The following cases will be handled by ashr_1 - * ecx(offset of esi) eax(offset of edi) relative offset corresponding case - * n(15) n -15 0(15 +(n-15) - n) ashr_1 - */ - .p2align 4 -L(ashr_1): - mov $0xffff, %esi - pxor %xmm0, %xmm0 - movdqa (%edx), %xmm2 - movdqa (%eax), %xmm1 - pcmpeqb %xmm1, %xmm0 - pslldq $15, %xmm2 - TOLOWER (%xmm1, %xmm2) - pcmpeqb %xmm1, %xmm2 - psubb %xmm0, %xmm2 - pmovmskb %xmm2, %edi - shr %cl, %esi - shr %cl, %edi - sub %edi, %esi - lea -15(%ecx), %edi - jnz L(less32bytes) - - UPDATE_STRNCMP_COUNTER - - movdqa (%edx), %xmm3 - pxor %xmm0, %xmm0 - mov $16, %ecx - orl $1, FLAGS - lea 1(%edx), %edi - and $0xfff, %edi - sub $0x1000, %edi - - .p2align 4 -L(loop_ashr_1): - add $16, %edi - jg L(nibble_ashr_1) - -L(gobble_ashr_1): - movdqa (%eax, %ecx), %xmm1 - movdqa (%edx, %ecx), %xmm2 - movdqa %xmm2, %xmm4 - - palignr $1, %xmm3, %xmm2 - TOLOWER (%xmm1, %xmm2) - - pcmpeqb %xmm1, %xmm0 - pcmpeqb %xmm2, %xmm1 - psubb %xmm0, %xmm1 - pmovmskb %xmm1, %esi - sub $0xffff, %esi - jnz L(exit) -#if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L - cmp $16, REM - lea -16(REM), REM - jbe L(more8byteseq) -#endif - - add $16, %ecx - movdqa %xmm4, %xmm3 - - add $16, %edi - jg L(nibble_ashr_1) - - movdqa (%eax, %ecx), %xmm1 - movdqa (%edx, %ecx), %xmm2 - movdqa %xmm2, %xmm4 - - palignr $1, %xmm3, %xmm2 - TOLOWER (%xmm1, %xmm2) - - pcmpeqb %xmm1, %xmm0 - pcmpeqb %xmm2, %xmm1 - psubb %xmm0, %xmm1 - pmovmskb %xmm1, %esi - sub $0xffff, %esi - jnz L(exit) - -#if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L - cmp $16, REM - lea -16(REM), REM - jbe L(more8byteseq) -#endif - add $16, %ecx - movdqa %xmm4, %xmm3 - jmp L(loop_ashr_1) - - .p2align 4 -L(nibble_ashr_1): - pcmpeqb %xmm3, %xmm0 - pmovmskb %xmm0, %esi - test $0xfffe, %esi - jnz L(ashr_1_exittail) - -#if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L - cmp $15, REM - jbe L(ashr_1_exittail) -#endif - pxor %xmm0, %xmm0 - sub $0x1000, %edi - jmp L(gobble_ashr_1) - - .p2align 4 -L(ashr_1_exittail): - movdqa (%eax, %ecx), %xmm1 - psrldq $1, %xmm0 - psrldq $1, %xmm3 - jmp L(aftertail) - -/* - * The following cases will be handled by ashr_2 - * ecx(offset of esi) eax(offset of edi) relative offset corresponding case - * n(14~15) n -14 1(15 +(n-14) - n) ashr_2 - */ - .p2align 4 -L(ashr_2): - mov $0xffff, %esi - pxor %xmm0, %xmm0 - movdqa (%edx), %xmm2 - movdqa (%eax), %xmm1 - pcmpeqb %xmm1, %xmm0 - pslldq $14, %xmm2 - TOLOWER (%xmm1, %xmm2) - pcmpeqb %xmm1, %xmm2 - psubb %xmm0, %xmm2 - pmovmskb %xmm2, %edi - shr %cl, %esi - shr %cl, %edi - sub %edi, %esi - lea -14(%ecx), %edi - jnz L(less32bytes) - - UPDATE_STRNCMP_COUNTER - - movdqa (%edx), %xmm3 - pxor %xmm0, %xmm0 - mov $16, %ecx - orl $2, FLAGS - lea 2(%edx), %edi - and $0xfff, %edi - sub $0x1000, %edi - - .p2align 4 -L(loop_ashr_2): - add $16, %edi - jg L(nibble_ashr_2) - -L(gobble_ashr_2): - movdqa (%eax, %ecx), %xmm1 - movdqa (%edx, %ecx), %xmm2 - movdqa %xmm2, %xmm4 - - palignr $2, %xmm3, %xmm2 - TOLOWER (%xmm1, %xmm2) - - pcmpeqb %xmm1, %xmm0 - pcmpeqb %xmm2, %xmm1 - psubb %xmm0, %xmm1 - pmovmskb %xmm1, %esi - sub $0xffff, %esi - jnz L(exit) - -#if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L - cmp $16, REM - lea -16(REM), REM - jbe L(more8byteseq) -#endif - add $16, %ecx - movdqa %xmm4, %xmm3 - - add $16, %edi - jg L(nibble_ashr_2) - - movdqa (%eax, %ecx), %xmm1 - movdqa (%edx, %ecx), %xmm2 - movdqa %xmm2, %xmm4 - - palignr $2, %xmm3, %xmm2 - TOLOWER (%xmm1, %xmm2) - - pcmpeqb %xmm1, %xmm0 - pcmpeqb %xmm2, %xmm1 - psubb %xmm0, %xmm1 - pmovmskb %xmm1, %esi - sub $0xffff, %esi - jnz L(exit) - -#if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L - cmp $16, REM - lea -16(REM), REM - jbe L(more8byteseq) -#endif - add $16, %ecx - movdqa %xmm4, %xmm3 - jmp L(loop_ashr_2) - - .p2align 4 -L(nibble_ashr_2): - pcmpeqb %xmm3, %xmm0 - pmovmskb %xmm0, %esi - test $0xfffc, %esi - jnz L(ashr_2_exittail) - -#if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L - cmp $14, REM - jbe L(ashr_2_exittail) -#endif - - pxor %xmm0, %xmm0 - sub $0x1000, %edi - jmp L(gobble_ashr_2) - - .p2align 4 -L(ashr_2_exittail): - movdqa (%eax, %ecx), %xmm1 - psrldq $2, %xmm0 - psrldq $2, %xmm3 - jmp L(aftertail) - -/* - * The following cases will be handled by ashr_3 - * ecx(offset of esi) eax(offset of edi) relative offset corresponding case - * n(13~15) n -13 2(15 +(n-13) - n) ashr_3 - */ - .p2align 4 -L(ashr_3): - mov $0xffff, %esi - pxor %xmm0, %xmm0 - movdqa (%edx), %xmm2 - movdqa (%eax), %xmm1 - pcmpeqb %xmm1, %xmm0 - pslldq $13, %xmm2 - TOLOWER (%xmm1, %xmm2) - pcmpeqb %xmm1, %xmm2 - psubb %xmm0, %xmm2 - pmovmskb %xmm2, %edi - shr %cl, %esi - shr %cl, %edi - sub %edi, %esi - lea -13(%ecx), %edi - jnz L(less32bytes) - - UPDATE_STRNCMP_COUNTER - - movdqa (%edx), %xmm3 - pxor %xmm0, %xmm0 - mov $16, %ecx - orl $3, FLAGS - lea 3(%edx), %edi - and $0xfff, %edi - sub $0x1000, %edi - - .p2align 4 -L(loop_ashr_3): - add $16, %edi - jg L(nibble_ashr_3) - -L(gobble_ashr_3): - movdqa (%eax, %ecx), %xmm1 - movdqa (%edx, %ecx), %xmm2 - movdqa %xmm2, %xmm4 - - palignr $3, %xmm3, %xmm2 - TOLOWER (%xmm1, %xmm2) - - pcmpeqb %xmm1, %xmm0 - pcmpeqb %xmm2, %xmm1 - psubb %xmm0, %xmm1 - pmovmskb %xmm1, %esi - sub $0xffff, %esi - jnz L(exit) - -#if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L - cmp $16, REM - lea -16(REM), REM - jbe L(more8byteseq) -#endif - add $16, %ecx - movdqa %xmm4, %xmm3 - - add $16, %edi - jg L(nibble_ashr_3) - - movdqa (%eax, %ecx), %xmm1 - movdqa (%edx, %ecx), %xmm2 - movdqa %xmm2, %xmm4 - - palignr $3, %xmm3, %xmm2 - TOLOWER (%xmm1, %xmm2) - - pcmpeqb %xmm1, %xmm0 - pcmpeqb %xmm2, %xmm1 - psubb %xmm0, %xmm1 - pmovmskb %xmm1, %esi - sub $0xffff, %esi - jnz L(exit) - -#if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L - cmp $16, REM - lea -16(REM), REM - jbe L(more8byteseq) -#endif - add $16, %ecx - movdqa %xmm4, %xmm3 - jmp L(loop_ashr_3) - - .p2align 4 -L(nibble_ashr_3): - pcmpeqb %xmm3, %xmm0 - pmovmskb %xmm0, %esi - test $0xfff8, %esi - jnz L(ashr_3_exittail) - -#if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L - cmp $13, REM - jbe L(ashr_3_exittail) -#endif - pxor %xmm0, %xmm0 - sub $0x1000, %edi - jmp L(gobble_ashr_3) - - .p2align 4 -L(ashr_3_exittail): - movdqa (%eax, %ecx), %xmm1 - psrldq $3, %xmm0 - psrldq $3, %xmm3 - jmp L(aftertail) - -/* - * The following cases will be handled by ashr_4 - * ecx(offset of esi) eax(offset of edi) relative offset corresponding case - * n(12~15) n -12 3(15 +(n-12) - n) ashr_4 - */ - .p2align 4 -L(ashr_4): - mov $0xffff, %esi - pxor %xmm0, %xmm0 - movdqa (%edx), %xmm2 - movdqa (%eax), %xmm1 - pcmpeqb %xmm1, %xmm0 - pslldq $12, %xmm2 - TOLOWER (%xmm1, %xmm2) - pcmpeqb %xmm1, %xmm2 - psubb %xmm0, %xmm2 - pmovmskb %xmm2, %edi - shr %cl, %esi - shr %cl, %edi - sub %edi, %esi - lea -12(%ecx), %edi - jnz L(less32bytes) - - UPDATE_STRNCMP_COUNTER - - movdqa (%edx), %xmm3 - pxor %xmm0, %xmm0 - mov $16, %ecx - orl $4, FLAGS - lea 4(%edx), %edi - and $0xfff, %edi - sub $0x1000, %edi - - .p2align 4 -L(loop_ashr_4): - add $16, %edi - jg L(nibble_ashr_4) - -L(gobble_ashr_4): - movdqa (%eax, %ecx), %xmm1 - movdqa (%edx, %ecx), %xmm2 - movdqa %xmm2, %xmm4 - - palignr $4, %xmm3, %xmm2 - TOLOWER (%xmm1, %xmm2) - - pcmpeqb %xmm1, %xmm0 - pcmpeqb %xmm2, %xmm1 - psubb %xmm0, %xmm1 - pmovmskb %xmm1, %esi - sub $0xffff, %esi - jnz L(exit) - -#if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L - cmp $16, REM - lea -16(REM), REM - jbe L(more8byteseq) -#endif - - add $16, %ecx - movdqa %xmm4, %xmm3 - - add $16, %edi - jg L(nibble_ashr_4) - - movdqa (%eax, %ecx), %xmm1 - movdqa (%edx, %ecx), %xmm2 - movdqa %xmm2, %xmm4 - - palignr $4, %xmm3, %xmm2 - TOLOWER (%xmm1, %xmm2) - - pcmpeqb %xmm1, %xmm0 - pcmpeqb %xmm2, %xmm1 - psubb %xmm0, %xmm1 - pmovmskb %xmm1, %esi - sub $0xffff, %esi - jnz L(exit) - -#if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L - cmp $16, REM - lea -16(REM), REM - jbe L(more8byteseq) -#endif - - add $16, %ecx - movdqa %xmm4, %xmm3 - jmp L(loop_ashr_4) - - .p2align 4 -L(nibble_ashr_4): - pcmpeqb %xmm3, %xmm0 - pmovmskb %xmm0, %esi - test $0xfff0, %esi - jnz L(ashr_4_exittail) - -#if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L - cmp $12, REM - jbe L(ashr_4_exittail) -#endif - - pxor %xmm0, %xmm0 - sub $0x1000, %edi - jmp L(gobble_ashr_4) - - .p2align 4 -L(ashr_4_exittail): - movdqa (%eax, %ecx), %xmm1 - psrldq $4, %xmm0 - psrldq $4, %xmm3 - jmp L(aftertail) - -/* - * The following cases will be handled by ashr_5 - * ecx(offset of esi) eax(offset of edi) relative offset corresponding case - * n(11~15) n -11 4(15 +(n-11) - n) ashr_5 - */ - .p2align 4 -L(ashr_5): - mov $0xffff, %esi - pxor %xmm0, %xmm0 - movdqa (%edx), %xmm2 - movdqa (%eax), %xmm1 - pcmpeqb %xmm1, %xmm0 - pslldq $11, %xmm2 - TOLOWER (%xmm1, %xmm2) - pcmpeqb %xmm1, %xmm2 - psubb %xmm0, %xmm2 - pmovmskb %xmm2, %edi - shr %cl, %esi - shr %cl, %edi - sub %edi, %esi - lea -11(%ecx), %edi - jnz L(less32bytes) - - UPDATE_STRNCMP_COUNTER - - movdqa (%edx), %xmm3 - pxor %xmm0, %xmm0 - mov $16, %ecx - orl $5, FLAGS - lea 5(%edx), %edi - and $0xfff, %edi - sub $0x1000, %edi - - .p2align 4 -L(loop_ashr_5): - add $16, %edi - jg L(nibble_ashr_5) - -L(gobble_ashr_5): - movdqa (%eax, %ecx), %xmm1 - movdqa (%edx, %ecx), %xmm2 - movdqa %xmm2, %xmm4 - - palignr $5, %xmm3, %xmm2 - TOLOWER (%xmm1, %xmm2) - - pcmpeqb %xmm1, %xmm0 - pcmpeqb %xmm2, %xmm1 - psubb %xmm0, %xmm1 - pmovmskb %xmm1, %esi - sub $0xffff, %esi - jnz L(exit) - -#if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L - cmp $16, REM - lea -16(REM), REM - jbe L(more8byteseq) -#endif - add $16, %ecx - movdqa %xmm4, %xmm3 - - add $16, %edi - jg L(nibble_ashr_5) - - movdqa (%eax, %ecx), %xmm1 - movdqa (%edx, %ecx), %xmm2 - movdqa %xmm2, %xmm4 - - palignr $5, %xmm3, %xmm2 - TOLOWER (%xmm1, %xmm2) - - pcmpeqb %xmm1, %xmm0 - pcmpeqb %xmm2, %xmm1 - psubb %xmm0, %xmm1 - pmovmskb %xmm1, %esi - sub $0xffff, %esi - jnz L(exit) - -#if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L - cmp $16, REM - lea -16(REM), REM - jbe L(more8byteseq) -#endif - add $16, %ecx - movdqa %xmm4, %xmm3 - jmp L(loop_ashr_5) - - .p2align 4 -L(nibble_ashr_5): - pcmpeqb %xmm3, %xmm0 - pmovmskb %xmm0, %esi - test $0xffe0, %esi - jnz L(ashr_5_exittail) - -#if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L - cmp $11, REM - jbe L(ashr_5_exittail) -#endif - pxor %xmm0, %xmm0 - sub $0x1000, %edi - jmp L(gobble_ashr_5) - - .p2align 4 -L(ashr_5_exittail): - movdqa (%eax, %ecx), %xmm1 - psrldq $5, %xmm0 - psrldq $5, %xmm3 - jmp L(aftertail) - -/* - * The following cases will be handled by ashr_6 - * ecx(offset of esi) eax(offset of edi) relative offset corresponding case - * n(10~15) n -10 5(15 +(n-10) - n) ashr_6 - */ - - .p2align 4 -L(ashr_6): - mov $0xffff, %esi - pxor %xmm0, %xmm0 - movdqa (%edx), %xmm2 - movdqa (%eax), %xmm1 - pcmpeqb %xmm1, %xmm0 - pslldq $10, %xmm2 - TOLOWER (%xmm1, %xmm2) - pcmpeqb %xmm1, %xmm2 - psubb %xmm0, %xmm2 - pmovmskb %xmm2, %edi - shr %cl, %esi - shr %cl, %edi - sub %edi, %esi - lea -10(%ecx), %edi - jnz L(less32bytes) - - UPDATE_STRNCMP_COUNTER - - movdqa (%edx), %xmm3 - pxor %xmm0, %xmm0 - mov $16, %ecx - orl $6, FLAGS - lea 6(%edx), %edi - and $0xfff, %edi - sub $0x1000, %edi - - .p2align 4 -L(loop_ashr_6): - add $16, %edi - jg L(nibble_ashr_6) - -L(gobble_ashr_6): - movdqa (%eax, %ecx), %xmm1 - movdqa (%edx, %ecx), %xmm2 - movdqa %xmm2, %xmm4 - - palignr $6, %xmm3, %xmm2 - TOLOWER (%xmm1, %xmm2) - - pcmpeqb %xmm1, %xmm0 - pcmpeqb %xmm2, %xmm1 - psubb %xmm0, %xmm1 - pmovmskb %xmm1, %esi - sub $0xffff, %esi - jnz L(exit) - -#if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L - cmp $16, REM - lea -16(REM), REM - jbe L(more8byteseq) -#endif - - add $16, %ecx - movdqa %xmm4, %xmm3 - - add $16, %edi - jg L(nibble_ashr_6) - - movdqa (%eax, %ecx), %xmm1 - movdqa (%edx, %ecx), %xmm2 - movdqa %xmm2, %xmm4 - - palignr $6, %xmm3, %xmm2 - TOLOWER (%xmm1, %xmm2) - - pcmpeqb %xmm1, %xmm0 - pcmpeqb %xmm2, %xmm1 - psubb %xmm0, %xmm1 - pmovmskb %xmm1, %esi - sub $0xffff, %esi - jnz L(exit) -#if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L - cmp $16, REM - lea -16(REM), REM - jbe L(more8byteseq) -#endif - - add $16, %ecx - movdqa %xmm4, %xmm3 - jmp L(loop_ashr_6) - - .p2align 4 -L(nibble_ashr_6): - pcmpeqb %xmm3, %xmm0 - pmovmskb %xmm0, %esi - test $0xffc0, %esi - jnz L(ashr_6_exittail) - -#if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L - cmp $10, REM - jbe L(ashr_6_exittail) -#endif - pxor %xmm0, %xmm0 - sub $0x1000, %edi - jmp L(gobble_ashr_6) - - .p2align 4 -L(ashr_6_exittail): - movdqa (%eax, %ecx), %xmm1 - psrldq $6, %xmm0 - psrldq $6, %xmm3 - jmp L(aftertail) - -/* - * The following cases will be handled by ashr_7 - * ecx(offset of esi) eax(offset of edi) relative offset corresponding case - * n(9~15) n - 9 6(15 +(n-9) - n) ashr_7 - */ - - .p2align 4 -L(ashr_7): - mov $0xffff, %esi - pxor %xmm0, %xmm0 - movdqa (%edx), %xmm2 - movdqa (%eax), %xmm1 - pcmpeqb %xmm1, %xmm0 - pslldq $9, %xmm2 - TOLOWER (%xmm1, %xmm2) - pcmpeqb %xmm1, %xmm2 - psubb %xmm0, %xmm2 - pmovmskb %xmm2, %edi - shr %cl, %esi - shr %cl, %edi - sub %edi, %esi - lea -9(%ecx), %edi - jnz L(less32bytes) - - UPDATE_STRNCMP_COUNTER - - movdqa (%edx), %xmm3 - pxor %xmm0, %xmm0 - mov $16, %ecx - orl $7, FLAGS - lea 8(%edx), %edi - and $0xfff, %edi - sub $0x1000, %edi - - .p2align 4 -L(loop_ashr_7): - add $16, %edi - jg L(nibble_ashr_7) - -L(gobble_ashr_7): - movdqa (%eax, %ecx), %xmm1 - movdqa (%edx, %ecx), %xmm2 - movdqa %xmm2, %xmm4 - - palignr $7, %xmm3, %xmm2 - TOLOWER (%xmm1, %xmm2) - - pcmpeqb %xmm1, %xmm0 - pcmpeqb %xmm2, %xmm1 - psubb %xmm0, %xmm1 - pmovmskb %xmm1, %esi - sub $0xffff, %esi - jnz L(exit) - -#if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L - cmp $16, REM - lea -16(REM), REM - jbe L(more8byteseq) -#endif - - add $16, %ecx - movdqa %xmm4, %xmm3 - - add $16, %edi - jg L(nibble_ashr_7) - - movdqa (%eax, %ecx), %xmm1 - movdqa (%edx, %ecx), %xmm2 - movdqa %xmm2, %xmm4 - - palignr $7, %xmm3, %xmm2 - TOLOWER (%xmm1, %xmm2) - - pcmpeqb %xmm1, %xmm0 - pcmpeqb %xmm2, %xmm1 - psubb %xmm0, %xmm1 - pmovmskb %xmm1, %esi - sub $0xffff, %esi - jnz L(exit) - -#if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L - cmp $16, REM - lea -16(REM), REM - jbe L(more8byteseq) -#endif - - add $16, %ecx - movdqa %xmm4, %xmm3 - jmp L(loop_ashr_7) - - .p2align 4 -L(nibble_ashr_7): - pcmpeqb %xmm3, %xmm0 - pmovmskb %xmm0, %esi - test $0xff80, %esi - jnz L(ashr_7_exittail) - -#if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L - cmp $9, REM - jbe L(ashr_7_exittail) -#endif - pxor %xmm0, %xmm0 - pxor %xmm0, %xmm0 - sub $0x1000, %edi - jmp L(gobble_ashr_7) - - .p2align 4 -L(ashr_7_exittail): - movdqa (%eax, %ecx), %xmm1 - psrldq $7, %xmm0 - psrldq $7, %xmm3 - jmp L(aftertail) - -/* - * The following cases will be handled by ashr_8 - * ecx(offset of esi) eax(offset of edi) relative offset corresponding case - * n(8~15) n - 8 7(15 +(n-8) - n) ashr_8 - */ - .p2align 4 -L(ashr_8): - mov $0xffff, %esi - pxor %xmm0, %xmm0 - movdqa (%edx), %xmm2 - movdqa (%eax), %xmm1 - pcmpeqb %xmm1, %xmm0 - pslldq $8, %xmm2 - TOLOWER (%xmm1, %xmm2) - pcmpeqb %xmm1, %xmm2 - psubb %xmm0, %xmm2 - pmovmskb %xmm2, %edi - shr %cl, %esi - shr %cl, %edi - sub %edi, %esi - lea -8(%ecx), %edi - jnz L(less32bytes) - - UPDATE_STRNCMP_COUNTER - - movdqa (%edx), %xmm3 - pxor %xmm0, %xmm0 - mov $16, %ecx - orl $8, FLAGS - lea 8(%edx), %edi - and $0xfff, %edi - sub $0x1000, %edi - - .p2align 4 -L(loop_ashr_8): - add $16, %edi - jg L(nibble_ashr_8) - -L(gobble_ashr_8): - movdqa (%eax, %ecx), %xmm1 - movdqa (%edx, %ecx), %xmm2 - movdqa %xmm2, %xmm4 - - palignr $8, %xmm3, %xmm2 - TOLOWER (%xmm1, %xmm2) - - pcmpeqb %xmm1, %xmm0 - pcmpeqb %xmm2, %xmm1 - psubb %xmm0, %xmm1 - pmovmskb %xmm1, %esi - sub $0xffff, %esi - jnz L(exit) - -#if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L - cmp $16, REM - lea -16(REM), REM - jbe L(more8byteseq) -#endif - add $16, %ecx - movdqa %xmm4, %xmm3 - - add $16, %edi - jg L(nibble_ashr_8) - - movdqa (%eax, %ecx), %xmm1 - movdqa (%edx, %ecx), %xmm2 - movdqa %xmm2, %xmm4 - - palignr $8, %xmm3, %xmm2 - TOLOWER (%xmm1, %xmm2) - - pcmpeqb %xmm1, %xmm0 - pcmpeqb %xmm2, %xmm1 - psubb %xmm0, %xmm1 - pmovmskb %xmm1, %esi - sub $0xffff, %esi - jnz L(exit) - -#if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L - cmp $16, REM - lea -16(REM), REM - jbe L(more8byteseq) -#endif - add $16, %ecx - movdqa %xmm4, %xmm3 - jmp L(loop_ashr_8) - - .p2align 4 -L(nibble_ashr_8): - pcmpeqb %xmm3, %xmm0 - pmovmskb %xmm0, %esi - test $0xff00, %esi - jnz L(ashr_8_exittail) - -#if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L - cmp $8, REM - jbe L(ashr_8_exittail) -#endif - pxor %xmm0, %xmm0 - pxor %xmm0, %xmm0 - sub $0x1000, %edi - jmp L(gobble_ashr_8) - - .p2align 4 -L(ashr_8_exittail): - movdqa (%eax, %ecx), %xmm1 - psrldq $8, %xmm0 - psrldq $8, %xmm3 - jmp L(aftertail) - -/* - * The following cases will be handled by ashr_9 - * ecx(offset of esi) eax(offset of edi) relative offset corresponding case - * n(7~15) n - 7 8(15 +(n-7) - n) ashr_9 - */ - .p2align 4 -L(ashr_9): - mov $0xffff, %esi - pxor %xmm0, %xmm0 - movdqa (%edx), %xmm2 - movdqa (%eax), %xmm1 - pcmpeqb %xmm1, %xmm0 - pslldq $7, %xmm2 - TOLOWER (%xmm1, %xmm2) - pcmpeqb %xmm1, %xmm2 - psubb %xmm0, %xmm2 - pmovmskb %xmm2, %edi - shr %cl, %esi - shr %cl, %edi - sub %edi, %esi - lea -7(%ecx), %edi - jnz L(less32bytes) - - UPDATE_STRNCMP_COUNTER - - movdqa (%edx), %xmm3 - pxor %xmm0, %xmm0 - mov $16, %ecx - orl $9, FLAGS - lea 9(%edx), %edi - and $0xfff, %edi - sub $0x1000, %edi - - .p2align 4 -L(loop_ashr_9): - add $16, %edi - jg L(nibble_ashr_9) - -L(gobble_ashr_9): - movdqa (%eax, %ecx), %xmm1 - movdqa (%edx, %ecx), %xmm2 - movdqa %xmm2, %xmm4 - - palignr $9, %xmm3, %xmm2 - TOLOWER (%xmm1, %xmm2) - - pcmpeqb %xmm1, %xmm0 - pcmpeqb %xmm2, %xmm1 - psubb %xmm0, %xmm1 - pmovmskb %xmm1, %esi - sub $0xffff, %esi - jnz L(exit) - -#if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L - cmp $16, REM - lea -16(REM), REM - jbe L(more8byteseq) -#endif - add $16, %ecx - movdqa %xmm4, %xmm3 - - add $16, %edi - jg L(nibble_ashr_9) - - movdqa (%eax, %ecx), %xmm1 - movdqa (%edx, %ecx), %xmm2 - movdqa %xmm2, %xmm4 - - palignr $9, %xmm3, %xmm2 - TOLOWER (%xmm1, %xmm2) - - pcmpeqb %xmm1, %xmm0 - pcmpeqb %xmm2, %xmm1 - psubb %xmm0, %xmm1 - pmovmskb %xmm1, %esi - sub $0xffff, %esi - jnz L(exit) - -#if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L - cmp $16, REM - lea -16(REM), REM - jbe L(more8byteseq) -#endif - add $16, %ecx - movdqa %xmm4, %xmm3 - jmp L(loop_ashr_9) - - .p2align 4 -L(nibble_ashr_9): - pcmpeqb %xmm3, %xmm0 - pmovmskb %xmm0, %esi - test $0xfe00, %esi - jnz L(ashr_9_exittail) - -#if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L - cmp $7, REM - jbe L(ashr_9_exittail) -#endif - pxor %xmm0, %xmm0 - sub $0x1000, %edi - jmp L(gobble_ashr_9) - - .p2align 4 -L(ashr_9_exittail): - movdqa (%eax, %ecx), %xmm1 - psrldq $9, %xmm0 - psrldq $9, %xmm3 - jmp L(aftertail) - -/* - * The following cases will be handled by ashr_10 - * ecx(offset of esi) eax(offset of edi) relative offset corresponding case - * n(6~15) n - 6 9(15 +(n-6) - n) ashr_10 - */ - .p2align 4 -L(ashr_10): - mov $0xffff, %esi - pxor %xmm0, %xmm0 - movdqa (%edx), %xmm2 - movdqa (%eax), %xmm1 - pcmpeqb %xmm1, %xmm0 - pslldq $6, %xmm2 - TOLOWER (%xmm1, %xmm2) - pcmpeqb %xmm1, %xmm2 - psubb %xmm0, %xmm2 - pmovmskb %xmm2, %edi - shr %cl, %esi - shr %cl, %edi - sub %edi, %esi - lea -6(%ecx), %edi - jnz L(less32bytes) - - UPDATE_STRNCMP_COUNTER - - movdqa (%edx), %xmm3 - pxor %xmm0, %xmm0 - mov $16, %ecx - orl $10, FLAGS - lea 10(%edx), %edi - and $0xfff, %edi - sub $0x1000, %edi - - .p2align 4 -L(loop_ashr_10): - add $16, %edi - jg L(nibble_ashr_10) - -L(gobble_ashr_10): - movdqa (%eax, %ecx), %xmm1 - movdqa (%edx, %ecx), %xmm2 - movdqa %xmm2, %xmm4 - - palignr $10, %xmm3, %xmm2 - TOLOWER (%xmm1, %xmm2) - - pcmpeqb %xmm1, %xmm0 - pcmpeqb %xmm2, %xmm1 - psubb %xmm0, %xmm1 - pmovmskb %xmm1, %esi - sub $0xffff, %esi - jnz L(exit) - -#if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L - cmp $16, REM - lea -16(REM), REM - jbe L(more8byteseq) -#endif - add $16, %ecx - movdqa %xmm4, %xmm3 - - add $16, %edi - jg L(nibble_ashr_10) - - movdqa (%eax, %ecx), %xmm1 - movdqa (%edx, %ecx), %xmm2 - movdqa %xmm2, %xmm4 - - palignr $10, %xmm3, %xmm2 - TOLOWER (%xmm1, %xmm2) - - pcmpeqb %xmm1, %xmm0 - pcmpeqb %xmm2, %xmm1 - psubb %xmm0, %xmm1 - pmovmskb %xmm1, %esi - sub $0xffff, %esi - jnz L(exit) - -#if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L - cmp $16, REM - lea -16(REM), REM - jbe L(more8byteseq) -#endif - add $16, %ecx - movdqa %xmm4, %xmm3 - jmp L(loop_ashr_10) - - .p2align 4 -L(nibble_ashr_10): - pcmpeqb %xmm3, %xmm0 - pmovmskb %xmm0, %esi - test $0xfc00, %esi - jnz L(ashr_10_exittail) - -#if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L - cmp $6, REM - jbe L(ashr_10_exittail) -#endif - pxor %xmm0, %xmm0 - sub $0x1000, %edi - jmp L(gobble_ashr_10) - - .p2align 4 -L(ashr_10_exittail): - movdqa (%eax, %ecx), %xmm1 - psrldq $10, %xmm0 - psrldq $10, %xmm3 - jmp L(aftertail) - -/* - * The following cases will be handled by ashr_11 - * ecx(offset of esi) eax(offset of edi) relative offset corresponding case - * n(5~15) n - 5 10(15 +(n-5) - n) ashr_11 - */ - .p2align 4 -L(ashr_11): - mov $0xffff, %esi - pxor %xmm0, %xmm0 - movdqa (%edx), %xmm2 - movdqa (%eax), %xmm1 - pcmpeqb %xmm1, %xmm0 - pslldq $5, %xmm2 - TOLOWER (%xmm1, %xmm2) - pcmpeqb %xmm1, %xmm2 - psubb %xmm0, %xmm2 - pmovmskb %xmm2, %edi - shr %cl, %esi - shr %cl, %edi - sub %edi, %esi - lea -5(%ecx), %edi - jnz L(less32bytes) - - UPDATE_STRNCMP_COUNTER - - movdqa (%edx), %xmm3 - pxor %xmm0, %xmm0 - mov $16, %ecx - orl $11, FLAGS - lea 11(%edx), %edi - and $0xfff, %edi - sub $0x1000, %edi - - .p2align 4 -L(loop_ashr_11): - add $16, %edi - jg L(nibble_ashr_11) - -L(gobble_ashr_11): - movdqa (%eax, %ecx), %xmm1 - movdqa (%edx, %ecx), %xmm2 - movdqa %xmm2, %xmm4 - - palignr $11, %xmm3, %xmm2 - TOLOWER (%xmm1, %xmm2) - - pcmpeqb %xmm1, %xmm0 - pcmpeqb %xmm2, %xmm1 - psubb %xmm0, %xmm1 - pmovmskb %xmm1, %esi - sub $0xffff, %esi - jnz L(exit) - -#if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L - cmp $16, REM - lea -16(REM), REM - jbe L(more8byteseq) -#endif - add $16, %ecx - movdqa %xmm4, %xmm3 - - add $16, %edi - jg L(nibble_ashr_11) - - movdqa (%eax, %ecx), %xmm1 - movdqa (%edx, %ecx), %xmm2 - movdqa %xmm2, %xmm4 - - palignr $11, %xmm3, %xmm2 - TOLOWER (%xmm1, %xmm2) - - pcmpeqb %xmm1, %xmm0 - pcmpeqb %xmm2, %xmm1 - psubb %xmm0, %xmm1 - pmovmskb %xmm1, %esi - sub $0xffff, %esi - jnz L(exit) - -#if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L - cmp $16, REM - lea -16(REM), REM - jbe L(more8byteseq) -#endif - add $16, %ecx - movdqa %xmm4, %xmm3 - jmp L(loop_ashr_11) - - .p2align 4 -L(nibble_ashr_11): - pcmpeqb %xmm3, %xmm0 - pmovmskb %xmm0, %esi - test $0xf800, %esi - jnz L(ashr_11_exittail) - -#if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L - cmp $5, REM - jbe L(ashr_11_exittail) -#endif - pxor %xmm0, %xmm0 - sub $0x1000, %edi - jmp L(gobble_ashr_11) - - .p2align 4 -L(ashr_11_exittail): - movdqa (%eax, %ecx), %xmm1 - psrldq $11, %xmm0 - psrldq $11, %xmm3 - jmp L(aftertail) - -/* - * The following cases will be handled by ashr_12 - * ecx(offset of esi) eax(offset of edi) relative offset corresponding case - * n(4~15) n - 4 11(15 +(n-4) - n) ashr_12 - */ - .p2align 4 -L(ashr_12): - mov $0xffff, %esi - pxor %xmm0, %xmm0 - movdqa (%edx), %xmm2 - movdqa (%eax), %xmm1 - pcmpeqb %xmm1, %xmm0 - pslldq $4, %xmm2 - TOLOWER (%xmm1, %xmm2) - pcmpeqb %xmm1, %xmm2 - psubb %xmm0, %xmm2 - pmovmskb %xmm2, %edi - shr %cl, %esi - shr %cl, %edi - sub %edi, %esi - lea -4(%ecx), %edi - jnz L(less32bytes) - - UPDATE_STRNCMP_COUNTER - - movdqa (%edx), %xmm3 - pxor %xmm0, %xmm0 - mov $16, %ecx - orl $12, FLAGS - lea 12(%edx), %edi - and $0xfff, %edi - sub $0x1000, %edi - - .p2align 4 -L(loop_ashr_12): - add $16, %edi - jg L(nibble_ashr_12) - -L(gobble_ashr_12): - movdqa (%eax, %ecx), %xmm1 - movdqa (%edx, %ecx), %xmm2 - movdqa %xmm2, %xmm4 - - palignr $12, %xmm3, %xmm2 - TOLOWER (%xmm1, %xmm2) - - pcmpeqb %xmm1, %xmm0 - pcmpeqb %xmm2, %xmm1 - psubb %xmm0, %xmm1 - pmovmskb %xmm1, %esi - sub $0xffff, %esi - jnz L(exit) - -#if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L - cmp $16, REM - lea -16(REM), REM - jbe L(more8byteseq) -#endif - - add $16, %ecx - movdqa %xmm4, %xmm3 - - add $16, %edi - jg L(nibble_ashr_12) - - movdqa (%eax, %ecx), %xmm1 - movdqa (%edx, %ecx), %xmm2 - movdqa %xmm2, %xmm4 - - palignr $12, %xmm3, %xmm2 - TOLOWER (%xmm1, %xmm2) - - pcmpeqb %xmm1, %xmm0 - pcmpeqb %xmm2, %xmm1 - psubb %xmm0, %xmm1 - pmovmskb %xmm1, %esi - sub $0xffff, %esi - jnz L(exit) - -#if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L - cmp $16, REM - lea -16(REM), REM - jbe L(more8byteseq) -#endif - add $16, %ecx - movdqa %xmm4, %xmm3 - jmp L(loop_ashr_12) - - .p2align 4 -L(nibble_ashr_12): - pcmpeqb %xmm3, %xmm0 - pmovmskb %xmm0, %esi - test $0xf000, %esi - jnz L(ashr_12_exittail) - -#if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L - cmp $4, REM - jbe L(ashr_12_exittail) -#endif - pxor %xmm0, %xmm0 - sub $0x1000, %edi - jmp L(gobble_ashr_12) - - .p2align 4 -L(ashr_12_exittail): - movdqa (%eax, %ecx), %xmm1 - psrldq $12, %xmm0 - psrldq $12, %xmm3 - jmp L(aftertail) - -/* - * The following cases will be handled by ashr_13 - * ecx(offset of esi) eax(offset of edi) relative offset corresponding case - * n(3~15) n - 3 12(15 +(n-3) - n) ashr_13 - */ - .p2align 4 -L(ashr_13): - mov $0xffff, %esi - pxor %xmm0, %xmm0 - movdqa (%edx), %xmm2 - movdqa (%eax), %xmm1 - pcmpeqb %xmm1, %xmm0 - pslldq $3, %xmm2 - TOLOWER (%xmm1, %xmm2) - pcmpeqb %xmm1, %xmm2 - psubb %xmm0, %xmm2 - pmovmskb %xmm2, %edi - shr %cl, %esi - shr %cl, %edi - sub %edi, %esi - lea -3(%ecx), %edi - jnz L(less32bytes) - - UPDATE_STRNCMP_COUNTER - - movdqa (%edx), %xmm3 - pxor %xmm0, %xmm0 - mov $16, %ecx - orl $13, FLAGS - lea 13(%edx), %edi - and $0xfff, %edi - sub $0x1000, %edi - - .p2align 4 -L(loop_ashr_13): - add $16, %edi - jg L(nibble_ashr_13) - -L(gobble_ashr_13): - movdqa (%eax, %ecx), %xmm1 - movdqa (%edx, %ecx), %xmm2 - movdqa %xmm2, %xmm4 - - palignr $13, %xmm3, %xmm2 - TOLOWER (%xmm1, %xmm2) - - pcmpeqb %xmm1, %xmm0 - pcmpeqb %xmm2, %xmm1 - psubb %xmm0, %xmm1 - pmovmskb %xmm1, %esi - sub $0xffff, %esi - jnz L(exit) - -#if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L - cmp $16, REM - lea -16(REM), REM - jbe L(more8byteseq) -#endif - add $16, %ecx - movdqa %xmm4, %xmm3 - - add $16, %edi - jg L(nibble_ashr_13) - - movdqa (%eax, %ecx), %xmm1 - movdqa (%edx, %ecx), %xmm2 - movdqa %xmm2, %xmm4 - - palignr $13, %xmm3, %xmm2 - TOLOWER (%xmm1, %xmm2) - - pcmpeqb %xmm1, %xmm0 - pcmpeqb %xmm2, %xmm1 - psubb %xmm0, %xmm1 - pmovmskb %xmm1, %esi - sub $0xffff, %esi - jnz L(exit) - -#if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L - cmp $16, REM - lea -16(REM), REM - jbe L(more8byteseq) -#endif - add $16, %ecx - movdqa %xmm4, %xmm3 - jmp L(loop_ashr_13) - - .p2align 4 -L(nibble_ashr_13): - pcmpeqb %xmm3, %xmm0 - pmovmskb %xmm0, %esi - test $0xe000, %esi - jnz L(ashr_13_exittail) - -#if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L - cmp $3, REM - jbe L(ashr_13_exittail) -#endif - pxor %xmm0, %xmm0 - sub $0x1000, %edi - jmp L(gobble_ashr_13) - - .p2align 4 -L(ashr_13_exittail): - movdqa (%eax, %ecx), %xmm1 - psrldq $13, %xmm0 - psrldq $13, %xmm3 - jmp L(aftertail) - -/* - * The following cases will be handled by ashr_14 - * ecx(offset of esi) eax(offset of edi) relative offset corresponding case - * n(2~15) n - 2 13(15 +(n-2) - n) ashr_14 - */ - .p2align 4 -L(ashr_14): - mov $0xffff, %esi - pxor %xmm0, %xmm0 - movdqa (%edx), %xmm2 - movdqa (%eax), %xmm1 - pcmpeqb %xmm1, %xmm0 - pslldq $2, %xmm2 - TOLOWER (%xmm1, %xmm2) - pcmpeqb %xmm1, %xmm2 - psubb %xmm0, %xmm2 - pmovmskb %xmm2, %edi - shr %cl, %esi - shr %cl, %edi - sub %edi, %esi - lea -2(%ecx), %edi - jnz L(less32bytes) - - UPDATE_STRNCMP_COUNTER - - movdqa (%edx), %xmm3 - pxor %xmm0, %xmm0 - mov $16, %ecx - orl $14, FLAGS - lea 14(%edx), %edi - and $0xfff, %edi - sub $0x1000, %edi - - .p2align 4 -L(loop_ashr_14): - add $16, %edi - jg L(nibble_ashr_14) - -L(gobble_ashr_14): - movdqa (%eax, %ecx), %xmm1 - movdqa (%edx, %ecx), %xmm2 - movdqa %xmm2, %xmm4 - - palignr $14, %xmm3, %xmm2 - TOLOWER (%xmm1, %xmm2) - - pcmpeqb %xmm1, %xmm0 - pcmpeqb %xmm2, %xmm1 - psubb %xmm0, %xmm1 - pmovmskb %xmm1, %esi - sub $0xffff, %esi - jnz L(exit) - -#if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L - cmp $16, REM - lea -16(REM), REM - jbe L(more8byteseq) -#endif - add $16, %ecx - movdqa %xmm4, %xmm3 - - add $16, %edi - jg L(nibble_ashr_14) - - movdqa (%eax, %ecx), %xmm1 - movdqa (%edx, %ecx), %xmm2 - movdqa %xmm2, %xmm4 - - palignr $14, %xmm3, %xmm2 - TOLOWER (%xmm1, %xmm2) - - pcmpeqb %xmm1, %xmm0 - pcmpeqb %xmm2, %xmm1 - psubb %xmm0, %xmm1 - pmovmskb %xmm1, %esi - sub $0xffff, %esi - jnz L(exit) - -#if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L - cmp $16, REM - lea -16(REM), REM - jbe L(more8byteseq) -#endif - add $16, %ecx - movdqa %xmm4, %xmm3 - jmp L(loop_ashr_14) - - .p2align 4 -L(nibble_ashr_14): - pcmpeqb %xmm3, %xmm0 - pmovmskb %xmm0, %esi - test $0xc000, %esi - jnz L(ashr_14_exittail) - -#if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L - cmp $2, REM - jbe L(ashr_14_exittail) -#endif - pxor %xmm0, %xmm0 - sub $0x1000, %edi - jmp L(gobble_ashr_14) - - .p2align 4 -L(ashr_14_exittail): - movdqa (%eax, %ecx), %xmm1 - psrldq $14, %xmm0 - psrldq $14, %xmm3 - jmp L(aftertail) - -/* - * The following cases will be handled by ashr_14 - * ecx(offset of esi) eax(offset of edi) relative offset corresponding case - * n(1~15) n - 1 14(15 +(n-1) - n) ashr_15 - */ - - .p2align 4 -L(ashr_15): - mov $0xffff, %esi - pxor %xmm0, %xmm0 - movdqa (%edx), %xmm2 - movdqa (%eax), %xmm1 - pcmpeqb %xmm1, %xmm0 - pslldq $1, %xmm2 - TOLOWER (%xmm1, %xmm2) - pcmpeqb %xmm1, %xmm2 - psubb %xmm0, %xmm2 - pmovmskb %xmm2, %edi - shr %cl, %esi - shr %cl, %edi - sub %edi, %esi - lea -1(%ecx), %edi - jnz L(less32bytes) - - UPDATE_STRNCMP_COUNTER - - movdqa (%edx), %xmm3 - pxor %xmm0, %xmm0 - mov $16, %ecx - orl $15, FLAGS - lea 15(%edx), %edi - and $0xfff, %edi - sub $0x1000, %edi - - .p2align 4 -L(loop_ashr_15): - add $16, %edi - jg L(nibble_ashr_15) - -L(gobble_ashr_15): - movdqa (%eax, %ecx), %xmm1 - movdqa (%edx, %ecx), %xmm2 - movdqa %xmm2, %xmm4 - - palignr $15, %xmm3, %xmm2 - TOLOWER (%xmm1, %xmm2) - - pcmpeqb %xmm1, %xmm0 - pcmpeqb %xmm2, %xmm1 - psubb %xmm0, %xmm1 - pmovmskb %xmm1, %esi - sub $0xffff, %esi - jnz L(exit) - -#if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L - cmp $16, REM - lea -16(REM), REM - jbe L(more8byteseq) -#endif - add $16, %ecx - movdqa %xmm4, %xmm3 - - add $16, %edi - jg L(nibble_ashr_15) - - movdqa (%eax, %ecx), %xmm1 - movdqa (%edx, %ecx), %xmm2 - movdqa %xmm2, %xmm4 - - palignr $15, %xmm3, %xmm2 - TOLOWER (%xmm1, %xmm2) - - pcmpeqb %xmm1, %xmm0 - pcmpeqb %xmm2, %xmm1 - psubb %xmm0, %xmm1 - pmovmskb %xmm1, %esi - sub $0xffff, %esi - jnz L(exit) - -#if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L - cmp $16, REM - lea -16(REM), REM - jbe L(more8byteseq) -#endif - add $16, %ecx - movdqa %xmm4, %xmm3 - jmp L(loop_ashr_15) - - .p2align 4 -L(nibble_ashr_15): - pcmpeqb %xmm3, %xmm0 - pmovmskb %xmm0, %esi - test $0x8000, %esi - jnz L(ashr_15_exittail) - -#if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L - cmp $1, REM - jbe L(ashr_15_exittail) -#endif - pxor %xmm0, %xmm0 - sub $0x1000, %edi - jmp L(gobble_ashr_15) - - .p2align 4 -L(ashr_15_exittail): - movdqa (%eax, %ecx), %xmm1 - psrldq $15, %xmm0 - psrldq $15, %xmm3 - jmp L(aftertail) - - .p2align 4 -L(aftertail): - TOLOWER (%xmm1, %xmm3) - pcmpeqb %xmm3, %xmm1 - psubb %xmm0, %xmm1 - pmovmskb %xmm1, %esi - not %esi -L(exit): - mov FLAGS, %edi - and $0x1f, %edi - lea -16(%edi, %ecx), %edi -L(less32bytes): - add %edi, %edx - add %ecx, %eax - testl $0x20, FLAGS - jz L(ret2) - xchg %eax, %edx - - .p2align 4 -L(ret2): - mov %esi, %ecx -#if defined USE_AS_STRCASECMP_L || defined USE_AS_STRNCASECMP_L - addl $4, %esp - cfi_adjust_cfa_offset (-4) -#endif - POP (%esi) - POP (%edi) -#if !defined USE_AS_STRCASECMP_L && !defined USE_AS_STRNCASECMP_L - POP (FLAGS) -#endif -L(less16bytes): - test %cl, %cl - jz L(2next_8_bytes) - - test $0x01, %cl - jnz L(Byte0) - - test $0x02, %cl - jnz L(Byte1) - - test $0x04, %cl - jnz L(Byte2) - - test $0x08, %cl - jnz L(Byte3) - - test $0x10, %cl - jnz L(Byte4) - - test $0x20, %cl - jnz L(Byte5) - - test $0x40, %cl - jnz L(Byte6) -#if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L - cmp $7, REM - jbe L(eq) -#endif - - movzx 7(%eax), %ecx - movzx 7(%edx), %eax -#if defined USE_AS_STRCASECMP_L || defined USE_AS_STRNCASECMP_L -# ifdef PIC - movl _nl_C_LC_CTYPE_tolower@GOTOFF+128*4(%ebx,%ecx,4), %ecx - movl _nl_C_LC_CTYPE_tolower@GOTOFF+128*4(%ebx,%eax,4), %eax -# else - movl _nl_C_LC_CTYPE_tolower+128*4(,%ecx,4), %ecx - movl _nl_C_LC_CTYPE_tolower+128*4(,%eax,4), %eax -# endif -#endif - - sub %ecx, %eax - RETURN - -L(Byte0): -#if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L - cmp $0, REM - jbe L(eq) -#endif - movzx (%eax), %ecx - movzx (%edx), %eax - -#if defined USE_AS_STRCASECMP_L || defined USE_AS_STRNCASECMP_L -# ifdef PIC - movl _nl_C_LC_CTYPE_tolower@GOTOFF+128*4(%ebx,%ecx,4), %ecx - movl _nl_C_LC_CTYPE_tolower@GOTOFF+128*4(%ebx,%eax,4), %eax -# else - movl _nl_C_LC_CTYPE_tolower+128*4(,%ecx,4), %ecx - movl _nl_C_LC_CTYPE_tolower+128*4(,%eax,4), %eax -# endif -#endif - - sub %ecx, %eax - RETURN - -L(Byte1): -#if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L - cmp $1, REM - jbe L(eq) -#endif - movzx 1(%eax), %ecx - movzx 1(%edx), %eax - -#if defined USE_AS_STRCASECMP_L || defined USE_AS_STRNCASECMP_L -# ifdef PIC - movl _nl_C_LC_CTYPE_tolower@GOTOFF+128*4(%ebx,%ecx,4), %ecx - movl _nl_C_LC_CTYPE_tolower@GOTOFF+128*4(%ebx,%eax,4), %eax -# else - movl _nl_C_LC_CTYPE_tolower+128*4(,%ecx,4), %ecx - movl _nl_C_LC_CTYPE_tolower+128*4(,%eax,4), %eax -# endif -#endif - - sub %ecx, %eax - RETURN - -L(Byte2): -#if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L - cmp $2, REM - jbe L(eq) -#endif - movzx 2(%eax), %ecx - movzx 2(%edx), %eax - -#if defined USE_AS_STRCASECMP_L || defined USE_AS_STRNCASECMP_L -# ifdef PIC - movl _nl_C_LC_CTYPE_tolower@GOTOFF+128*4(%ebx,%ecx,4), %ecx - movl _nl_C_LC_CTYPE_tolower@GOTOFF+128*4(%ebx,%eax,4), %eax -# else - movl _nl_C_LC_CTYPE_tolower+128*4(,%ecx,4), %ecx - movl _nl_C_LC_CTYPE_tolower+128*4(,%eax,4), %eax -# endif -#endif - - sub %ecx, %eax - RETURN - -L(Byte3): -#if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L - cmp $3, REM - jbe L(eq) -#endif - movzx 3(%eax), %ecx - movzx 3(%edx), %eax - -#if defined USE_AS_STRCASECMP_L || defined USE_AS_STRNCASECMP_L -# ifdef PIC - movl _nl_C_LC_CTYPE_tolower@GOTOFF+128*4(%ebx,%ecx,4), %ecx - movl _nl_C_LC_CTYPE_tolower@GOTOFF+128*4(%ebx,%eax,4), %eax -# else - movl _nl_C_LC_CTYPE_tolower+128*4(,%ecx,4), %ecx - movl _nl_C_LC_CTYPE_tolower+128*4(,%eax,4), %eax -# endif -#endif - - sub %ecx, %eax - RETURN - -L(Byte4): -#if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L - cmp $4, REM - jbe L(eq) -#endif - movzx 4(%eax), %ecx - movzx 4(%edx), %eax - -#if defined USE_AS_STRCASECMP_L || defined USE_AS_STRNCASECMP_L -# ifdef PIC - movl _nl_C_LC_CTYPE_tolower@GOTOFF+128*4(%ebx,%ecx,4), %ecx - movl _nl_C_LC_CTYPE_tolower@GOTOFF+128*4(%ebx,%eax,4), %eax -# else - movl _nl_C_LC_CTYPE_tolower+128*4(,%ecx,4), %ecx - movl _nl_C_LC_CTYPE_tolower+128*4(,%eax,4), %eax -# endif -#endif - - sub %ecx, %eax - RETURN - -L(Byte5): -#if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L - cmp $5, REM - jbe L(eq) -#endif - movzx 5(%eax), %ecx - movzx 5(%edx), %eax - -#if defined USE_AS_STRCASECMP_L || defined USE_AS_STRNCASECMP_L -# ifdef PIC - movl _nl_C_LC_CTYPE_tolower@GOTOFF+128*4(%ebx,%ecx,4), %ecx - movl _nl_C_LC_CTYPE_tolower@GOTOFF+128*4(%ebx,%eax,4), %eax -# else - movl _nl_C_LC_CTYPE_tolower+128*4(,%ecx,4), %ecx - movl _nl_C_LC_CTYPE_tolower+128*4(,%eax,4), %eax -# endif -#endif - - sub %ecx, %eax - RETURN - -L(Byte6): -#if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L - cmp $6, REM - jbe L(eq) -#endif - movzx 6(%eax), %ecx - movzx 6(%edx), %eax - -#if defined USE_AS_STRCASECMP_L || defined USE_AS_STRNCASECMP_L -# ifdef PIC - movl _nl_C_LC_CTYPE_tolower@GOTOFF+128*4(%ebx,%ecx,4), %ecx - movl _nl_C_LC_CTYPE_tolower@GOTOFF+128*4(%ebx,%eax,4), %eax -# else - movl _nl_C_LC_CTYPE_tolower+128*4(,%ecx,4), %ecx - movl _nl_C_LC_CTYPE_tolower+128*4(,%eax,4), %eax -# endif -#endif - - sub %ecx, %eax - RETURN - -L(2next_8_bytes): - add $8, %eax - add $8, %edx -#if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L - cmp $8, REM - lea -8(REM), REM - jbe L(eq) -#endif - - test $0x01, %ch - jnz L(Byte0) - - test $0x02, %ch - jnz L(Byte1) - - test $0x04, %ch - jnz L(Byte2) - - test $0x08, %ch - jnz L(Byte3) - - test $0x10, %ch - jnz L(Byte4) - - test $0x20, %ch - jnz L(Byte5) - - test $0x40, %ch - jnz L(Byte6) - -#if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L - cmp $7, REM - jbe L(eq) -#endif - movzx 7(%eax), %ecx - movzx 7(%edx), %eax - -#if defined USE_AS_STRCASECMP_L || defined USE_AS_STRNCASECMP_L -# ifdef PIC - movl _nl_C_LC_CTYPE_tolower@GOTOFF+128*4(%ebx,%ecx,4), %ecx - movl _nl_C_LC_CTYPE_tolower@GOTOFF+128*4(%ebx,%eax,4), %eax -# else - movl _nl_C_LC_CTYPE_tolower+128*4(,%ecx,4), %ecx - movl _nl_C_LC_CTYPE_tolower+128*4(,%eax,4), %eax -# endif -#endif - - sub %ecx, %eax - RETURN - -#ifdef USE_AS_STRNCMP -L(neq_sncmp): -#endif -L(neq): - mov $1, %eax - ja L(neq_bigger) - neg %eax -L(neq_bigger): -#if defined USE_AS_STRCASECMP_L || defined USE_AS_STRNCASECMP_L - addl $4, %esp - cfi_adjust_cfa_offset (-4) -#endif -#if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L - POP (REM) -#endif -#if defined USE_AS_STRCASECMP_L || defined USE_AS_STRNCASECMP_L -# ifdef PIC - POP (%ebx) -# endif -#endif - ret - -#if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L - .p2align 4 - cfi_restore_state -L(more8byteseq): - -# ifdef USE_AS_STRNCASECMP_L - addl $4, %esp - cfi_adjust_cfa_offset (-4) -# endif - POP (%esi) - POP (%edi) -# ifdef USE_AS_STRNCMP - POP (FLAGS) -# endif -#endif - -#ifdef USE_AS_STRNCMP -L(eq_sncmp): -#endif -L(eq): - -#if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L - POP (REM) -#endif -#if defined USE_AS_STRCASECMP_L || defined USE_AS_STRNCASECMP_L -# ifdef PIC - POP (%ebx) -# endif -#endif - xorl %eax, %eax - ret - -#if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L - .p2align 4 -# if defined USE_AS_STRNCASECMP_L && defined PIC - CFI_PUSH (%ebx) -# endif - CFI_PUSH (REM) -L(less16bytes_sncmp): -# ifdef USE_AS_STRNCASECMP_L - PUSH (%esi) -# endif - test REM, REM - jz L(eq_sncmp) - - movzbl (%eax), %ecx -# ifdef USE_AS_STRNCASECMP_L - movzbl (%edx), %esi -# ifdef PIC - movl _nl_C_LC_CTYPE_tolower@GOTOFF+128*4(%ebx,%ecx,4), %ecx - movl _nl_C_LC_CTYPE_tolower@GOTOFF+128*4(%ebx,%esi,4), %esi -# else - movl _nl_C_LC_CTYPE_tolower+128*4(,%ecx,4), %ecx - movl _nl_C_LC_CTYPE_tolower+128*4(,%esi,4), %esi -# endif - cmpl %ecx, %esi -# else - cmpb %cl, (%edx) -# endif - jne L(neq_sncmp) - test %cl, %cl - je L(eq_sncmp) - - cmp $1, REM - je L(eq_sncmp) - - movzbl 1(%eax), %ecx -# ifdef USE_AS_STRNCASECMP_L - movzbl 1(%edx), %esi -# ifdef PIC - movl _nl_C_LC_CTYPE_tolower@GOTOFF+128*4(%ebx,%ecx,4), %ecx - movl _nl_C_LC_CTYPE_tolower@GOTOFF+128*4(%ebx,%esi,4), %esi -# else - movl _nl_C_LC_CTYPE_tolower+128*4(,%ecx,4), %ecx - movl _nl_C_LC_CTYPE_tolower+128*4(,%esi,4), %esi -# endif - cmpl %ecx, %esi -# else - cmpb %cl, 1(%edx) -# endif - jne L(neq_sncmp) - test %cl, %cl - je L(eq_sncmp) - - cmp $2, REM - je L(eq_sncmp) - - movzbl 2(%eax), %ecx -# ifdef USE_AS_STRNCASECMP_L - movzbl 2(%edx), %esi -# ifdef PIC - movl _nl_C_LC_CTYPE_tolower@GOTOFF+128*4(%ebx,%ecx,4), %ecx - movl _nl_C_LC_CTYPE_tolower@GOTOFF+128*4(%ebx,%esi,4), %esi -# else - movl _nl_C_LC_CTYPE_tolower+128*4(,%ecx,4), %ecx - movl _nl_C_LC_CTYPE_tolower+128*4(,%esi,4), %esi -# endif - cmpl %ecx, %esi -# else - cmpb %cl, 2(%edx) -# endif - jne L(neq_sncmp) - test %cl, %cl - je L(eq_sncmp) - - cmp $3, REM - je L(eq_sncmp) - - movzbl 3(%eax), %ecx -# ifdef USE_AS_STRNCASECMP_L - movzbl 3(%edx), %esi -# ifdef PIC - movl _nl_C_LC_CTYPE_tolower@GOTOFF+128*4(%ebx,%ecx,4), %ecx - movl _nl_C_LC_CTYPE_tolower@GOTOFF+128*4(%ebx,%esi,4), %esi -# else - movl _nl_C_LC_CTYPE_tolower+128*4(,%ecx,4), %ecx - movl _nl_C_LC_CTYPE_tolower+128*4(,%esi,4), %esi -# endif - cmpl %ecx, %esi -# else - cmpb %cl, 3(%edx) -# endif - jne L(neq_sncmp) - test %cl, %cl - je L(eq_sncmp) - - cmp $4, REM - je L(eq_sncmp) - - movzbl 4(%eax), %ecx -# ifdef USE_AS_STRNCASECMP_L - movzbl 4(%edx), %esi -# ifdef PIC - movl _nl_C_LC_CTYPE_tolower@GOTOFF+128*4(%ebx,%ecx,4), %ecx - movl _nl_C_LC_CTYPE_tolower@GOTOFF+128*4(%ebx,%esi,4), %esi -# else - movl _nl_C_LC_CTYPE_tolower+128*4(,%ecx,4), %ecx - movl _nl_C_LC_CTYPE_tolower+128*4(,%esi,4), %esi -# endif - cmpl %ecx, %esi -# else - cmpb %cl, 4(%edx) -# endif - jne L(neq_sncmp) - test %cl, %cl - je L(eq_sncmp) - - cmp $5, REM - je L(eq_sncmp) - - movzbl 5(%eax), %ecx -# ifdef USE_AS_STRNCASECMP_L - movzbl 5(%edx), %esi -# ifdef PIC - movl _nl_C_LC_CTYPE_tolower@GOTOFF+128*4(%ebx,%ecx,4), %ecx - movl _nl_C_LC_CTYPE_tolower@GOTOFF+128*4(%ebx,%esi,4), %esi -# else - movl _nl_C_LC_CTYPE_tolower+128*4(,%ecx,4), %ecx - movl _nl_C_LC_CTYPE_tolower+128*4(,%esi,4), %esi -# endif - cmpl %ecx, %esi -# else - cmpb %cl, 5(%edx) -# endif - jne L(neq_sncmp) - test %cl, %cl - je L(eq_sncmp) - - cmp $6, REM - je L(eq_sncmp) - - movzbl 6(%eax), %ecx -# ifdef USE_AS_STRNCASECMP_L - movzbl 6(%edx), %esi -# ifdef PIC - movl _nl_C_LC_CTYPE_tolower@GOTOFF+128*4(%ebx,%ecx,4), %ecx - movl _nl_C_LC_CTYPE_tolower@GOTOFF+128*4(%ebx,%esi,4), %esi -# else - movl _nl_C_LC_CTYPE_tolower+128*4(,%ecx,4), %ecx - movl _nl_C_LC_CTYPE_tolower+128*4(,%esi,4), %esi -# endif - cmpl %ecx, %esi -# else - cmpb %cl, 6(%edx) -# endif - jne L(neq_sncmp) - test %cl, %cl - je L(eq_sncmp) - - cmp $7, REM - je L(eq_sncmp) - - movzbl 7(%eax), %ecx -# ifdef USE_AS_STRNCASECMP_L - movzbl 7(%edx), %esi -# ifdef PIC - movl _nl_C_LC_CTYPE_tolower@GOTOFF+128*4(%ebx,%ecx,4), %ecx - movl _nl_C_LC_CTYPE_tolower@GOTOFF+128*4(%ebx,%esi,4), %esi -# else - movl _nl_C_LC_CTYPE_tolower+128*4(,%ecx,4), %ecx - movl _nl_C_LC_CTYPE_tolower+128*4(,%esi,4), %esi -# endif - cmpl %ecx, %esi -# else - cmpb %cl, 7(%edx) -# endif - jne L(neq_sncmp) - test %cl, %cl - je L(eq_sncmp) - - - cmp $8, REM - je L(eq_sncmp) - - movzbl 8(%eax), %ecx -# ifdef USE_AS_STRNCASECMP_L - movzbl 8(%edx), %esi -# ifdef PIC - movl _nl_C_LC_CTYPE_tolower@GOTOFF+128*4(%ebx,%ecx,4), %ecx - movl _nl_C_LC_CTYPE_tolower@GOTOFF+128*4(%ebx,%esi,4), %esi -# else - movl _nl_C_LC_CTYPE_tolower+128*4(,%ecx,4), %ecx - movl _nl_C_LC_CTYPE_tolower+128*4(,%esi,4), %esi -# endif - cmpl %ecx, %esi -# else - cmpb %cl, 8(%edx) -# endif - jne L(neq_sncmp) - test %cl, %cl - je L(eq_sncmp) - - cmp $9, REM - je L(eq_sncmp) - - movzbl 9(%eax), %ecx -# ifdef USE_AS_STRNCASECMP_L - movzbl 9(%edx), %esi -# ifdef PIC - movl _nl_C_LC_CTYPE_tolower@GOTOFF+128*4(%ebx,%ecx,4), %ecx - movl _nl_C_LC_CTYPE_tolower@GOTOFF+128*4(%ebx,%esi,4), %esi -# else - movl _nl_C_LC_CTYPE_tolower+128*4(,%ecx,4), %ecx - movl _nl_C_LC_CTYPE_tolower+128*4(,%esi,4), %esi -# endif - cmpl %ecx, %esi -# else - cmpb %cl, 9(%edx) -# endif - jne L(neq_sncmp) - test %cl, %cl - je L(eq_sncmp) - - cmp $10, REM - je L(eq_sncmp) - - movzbl 10(%eax), %ecx -# ifdef USE_AS_STRNCASECMP_L - movzbl 10(%edx), %esi -# ifdef PIC - movl _nl_C_LC_CTYPE_tolower@GOTOFF+128*4(%ebx,%ecx,4), %ecx - movl _nl_C_LC_CTYPE_tolower@GOTOFF+128*4(%ebx,%esi,4), %esi -# else - movl _nl_C_LC_CTYPE_tolower+128*4(,%ecx,4), %ecx - movl _nl_C_LC_CTYPE_tolower+128*4(,%esi,4), %esi -# endif - cmpl %ecx, %esi -# else - cmpb %cl, 10(%edx) -# endif - jne L(neq_sncmp) - test %cl, %cl - je L(eq_sncmp) - - cmp $11, REM - je L(eq_sncmp) - - movzbl 11(%eax), %ecx -# ifdef USE_AS_STRNCASECMP_L - movzbl 11(%edx), %esi -# ifdef PIC - movl _nl_C_LC_CTYPE_tolower@GOTOFF+128*4(%ebx,%ecx,4), %ecx - movl _nl_C_LC_CTYPE_tolower@GOTOFF+128*4(%ebx,%esi,4), %esi -# else - movl _nl_C_LC_CTYPE_tolower+128*4(,%ecx,4), %ecx - movl _nl_C_LC_CTYPE_tolower+128*4(,%esi,4), %esi -# endif - cmpl %ecx, %esi -# else - cmpb %cl, 11(%edx) -# endif - jne L(neq_sncmp) - test %cl, %cl - je L(eq_sncmp) - - - cmp $12, REM - je L(eq_sncmp) - - movzbl 12(%eax), %ecx -# ifdef USE_AS_STRNCASECMP_L - movzbl 12(%edx), %esi -# ifdef PIC - movl _nl_C_LC_CTYPE_tolower@GOTOFF+128*4(%ebx,%ecx,4), %ecx - movl _nl_C_LC_CTYPE_tolower@GOTOFF+128*4(%ebx,%esi,4), %esi -# else - movl _nl_C_LC_CTYPE_tolower+128*4(,%ecx,4), %ecx - movl _nl_C_LC_CTYPE_tolower+128*4(,%esi,4), %esi -# endif - cmpl %ecx, %esi -# else - cmpb %cl, 12(%edx) -# endif - jne L(neq_sncmp) - test %cl, %cl - je L(eq_sncmp) - - cmp $13, REM - je L(eq_sncmp) - - movzbl 13(%eax), %ecx -# ifdef USE_AS_STRNCASECMP_L - movzbl 13(%edx), %esi -# ifdef PIC - movl _nl_C_LC_CTYPE_tolower@GOTOFF+128*4(%ebx,%ecx,4), %ecx - movl _nl_C_LC_CTYPE_tolower@GOTOFF+128*4(%ebx,%esi,4), %esi -# else - movl _nl_C_LC_CTYPE_tolower+128*4(,%ecx,4), %ecx - movl _nl_C_LC_CTYPE_tolower+128*4(,%esi,4), %esi -# endif - cmpl %ecx, %esi -# else - cmpb %cl, 13(%edx) -# endif - jne L(neq_sncmp) - test %cl, %cl - je L(eq_sncmp) - - cmp $14, REM - je L(eq_sncmp) - - movzbl 14(%eax), %ecx -# ifdef USE_AS_STRNCASECMP_L - movzbl 14(%edx), %esi -# ifdef PIC - movl _nl_C_LC_CTYPE_tolower@GOTOFF+128*4(%ebx,%ecx,4), %ecx - movl _nl_C_LC_CTYPE_tolower@GOTOFF+128*4(%ebx,%esi,4), %esi -# else - movl _nl_C_LC_CTYPE_tolower+128*4(,%ecx,4), %ecx - movl _nl_C_LC_CTYPE_tolower+128*4(,%esi,4), %esi -# endif - cmpl %ecx, %esi -# else - cmpb %cl, 14(%edx) -# endif - jne L(neq_sncmp) - test %cl, %cl - je L(eq_sncmp) - - cmp $15, REM - je L(eq_sncmp) - - movzbl 15(%eax), %ecx -# ifdef USE_AS_STRNCASECMP_L - movzbl 15(%edx), %esi -# ifdef PIC - movl _nl_C_LC_CTYPE_tolower@GOTOFF+128*4(%ebx,%ecx,4), %ecx - movl _nl_C_LC_CTYPE_tolower@GOTOFF+128*4(%ebx,%esi,4), %esi -# else - movl _nl_C_LC_CTYPE_tolower+128*4(,%ecx,4), %ecx - movl _nl_C_LC_CTYPE_tolower+128*4(,%esi,4), %esi -# endif - cmpl %ecx, %esi -# else - cmpb %cl, 15(%edx) -# endif - jne L(neq_sncmp) - -# ifdef USE_AS_STRNCASECMP_L -L(eq_sncmp): - POP (%esi) -# endif - POP (REM) -# if defined USE_AS_STRNCASECMP_L && defined PIC - POP (%ebx) -# endif - xor %eax, %eax - ret - -# ifdef USE_AS_STRNCASECMP_L - .p2align 4 -# ifdef PIC - CFI_PUSH (%ebx) -# endif - CFI_PUSH (REM) - CFI_PUSH (%esi) -L(neq_sncmp): - mov $1, %eax - mov $-1, %edx - cmovna %edx, %eax - POP (%esi) - POP (REM) -# ifdef PIC - POP (%ebx) -# endif - ret -# endif -#endif - -END (STRCMP) - -#endif diff --git a/sysdeps/i386/i686/multiarch/strcmp.S b/sysdeps/i386/i686/multiarch/strcmp.S deleted file mode 100644 index 56de25a4b7..0000000000 --- a/sysdeps/i386/i686/multiarch/strcmp.S +++ /dev/null @@ -1,95 +0,0 @@ -/* Multiple versions of strcmp - All versions must be listed in ifunc-impl-list.c. - Copyright (C) 2010-2017 Free Software Foundation, Inc. - Contributed by Intel Corporation. - This file is part of the GNU C Library. - - The GNU C Library is free software; you can redistribute it and/or - modify it under the terms of the GNU Lesser General Public - License as published by the Free Software Foundation; either - version 2.1 of the License, or (at your option) any later version. - - The GNU C Library is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - Lesser General Public License for more details. - - You should have received a copy of the GNU Lesser General Public - License along with the GNU C Library; if not, see - <http://www.gnu.org/licenses/>. */ - -#include <sysdep.h> -#include <init-arch.h> - -#ifdef USE_AS_STRNCMP -# define STRCMP strncmp -# define __GI_STRCMP __GI_strncmp -# define __STRCMP_IA32 __strncmp_ia32 -# define __STRCMP_SSSE3 __strncmp_ssse3 -# define __STRCMP_SSE4_2 __strncmp_sse4_2 -#elif defined USE_AS_STRCASECMP_L -# define STRCMP __strcasecmp_l -# define __GI_STRCMP __GI_strcasecmp_l -# define __STRCMP_IA32 __strcasecmp_l_ia32 -# define __STRCMP_SSSE3 __strcasecmp_l_ssse3 -# define __STRCMP_SSE4_2 __strcasecmp_l_sse4_2 -#elif defined USE_AS_STRNCASECMP_L -# define STRCMP __strncasecmp_l -# define __GI_STRCMP __GI_strncasecmp_l -# define __STRCMP_IA32 __strncasecmp_l_ia32 -# define __STRCMP_SSSE3 __strncasecmp_l_ssse3 -# define __STRCMP_SSE4_2 __strncasecmp_l_sse4_2 -#else -# define STRCMP strcmp -# define __GI_STRCMP __GI_strcmp -# define __STRCMP_IA32 __strcmp_ia32 -# define __STRCMP_SSSE3 __strcmp_ssse3 -# define __STRCMP_SSE4_2 __strcmp_sse4_2 -#endif - -/* Define multiple versions only for the definition in libc. Don't - define multiple versions for strncmp in static library since we - need strncmp before the initialization happened. */ -#if (defined SHARED || !defined USE_AS_STRNCMP) && IS_IN (libc) - .text -ENTRY(STRCMP) - .type STRCMP, @gnu_indirect_function - LOAD_GOT_AND_RTLD_GLOBAL_RO - LOAD_FUNC_GOT_EAX (__STRCMP_IA32) - HAS_CPU_FEATURE (SSSE3) - jz 2f - LOAD_FUNC_GOT_EAX (__STRCMP_SSSE3) - HAS_CPU_FEATURE (SSE4_2) - jz 2f - HAS_ARCH_FEATURE (Slow_SSE4_2) - jnz 2f - LOAD_FUNC_GOT_EAX (__STRCMP_SSE4_2) -2: ret -END(STRCMP) - -# undef ENTRY -# define ENTRY(name) \ - .type __STRCMP_IA32, @function; \ - .p2align 4; \ - .globl __STRCMP_IA32; \ - .hidden __STRCMP_IA32; \ - __STRCMP_IA32: cfi_startproc; \ - CALL_MCOUNT -# undef END -# define END(name) \ - cfi_endproc; .size __STRCMP_IA32, .-__STRCMP_IA32 - -# ifdef SHARED -# undef libc_hidden_builtin_def -/* IFUNC doesn't work with the hidden functions in shared library since - they will be called without setting up EBX needed for PLT which is - used by IFUNC. */ -# define libc_hidden_builtin_def(name) \ - .globl __GI_STRCMP; __GI_STRCMP = __STRCMP_IA32 -# endif -#endif - -#if !defined USE_AS_STRNCMP && !defined USE_AS_STRCASECMP_L \ - && !defined USE_AS_STRNCASECMP_L -# include "../strcmp.S" -#endif diff --git a/sysdeps/i386/i686/multiarch/strcpy-sse2.S b/sysdeps/i386/i686/multiarch/strcpy-sse2.S deleted file mode 100644 index ed627a5f62..0000000000 --- a/sysdeps/i386/i686/multiarch/strcpy-sse2.S +++ /dev/null @@ -1,2250 +0,0 @@ -/* strcpy with SSE2 and unaligned load - Copyright (C) 2011-2017 Free Software Foundation, Inc. - Contributed by Intel Corporation. - This file is part of the GNU C Library. - - The GNU C Library is free software; you can redistribute it and/or - modify it under the terms of the GNU Lesser General Public - License as published by the Free Software Foundation; either - version 2.1 of the License, or (at your option) any later version. - - The GNU C Library is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - Lesser General Public License for more details. - - You should have received a copy of the GNU Lesser General Public - License along with the GNU C Library; if not, see - <http://www.gnu.org/licenses/>. */ - - -#if IS_IN (libc) - -# include <sysdep.h> - - -# define CFI_PUSH(REG) \ - cfi_adjust_cfa_offset (4); \ - cfi_rel_offset (REG, 0) - -# define CFI_POP(REG) \ - cfi_adjust_cfa_offset (-4); \ - cfi_restore (REG) - -# define PUSH(REG) pushl REG; CFI_PUSH (REG) -# define POP(REG) popl REG; CFI_POP (REG) - -# ifndef STRCPY -# define STRCPY __strcpy_sse2 -# endif - -# define STR1 PARMS -# define STR2 STR1+4 -# define LEN STR2+4 - -# ifdef USE_AS_STRNCPY -# define PARMS 16 -# define ENTRANCE PUSH(%ebx); PUSH(%esi); PUSH(%edi) -# define RETURN POP(%edi); POP(%esi); POP(%ebx); ret; \ - CFI_PUSH(%ebx); CFI_PUSH(%esi); CFI_PUSH(%edi); - -# ifdef SHARED -# define JMPTBL(I, B) I - B - -/* Load an entry in a jump table into ECX and branch to it. TABLE is a - jump table with relative offsets. - INDEX is a register contains the index into the jump table. - SCALE is the scale of INDEX. */ - -# define BRANCH_TO_JMPTBL_ENTRY(TABLE, INDEX, SCALE) \ - /* We first load PC into ECX. */ \ - SETUP_PIC_REG(cx); \ - /* Get the address of the jump table. */ \ - addl $(TABLE - .), %ecx; \ - /* Get the entry and convert the relative offset to the \ - absolute address. */ \ - addl (%ecx,INDEX,SCALE), %ecx; \ - /* We loaded the jump table and adjusted ECX. Go. */ \ - jmp *%ecx -# else -# define JMPTBL(I, B) I - -/* Branch to an entry in a jump table. TABLE is a jump table with - absolute offsets. INDEX is a register contains the index into the - jump table. SCALE is the scale of INDEX. */ - -# define BRANCH_TO_JMPTBL_ENTRY(TABLE, INDEX, SCALE) \ - jmp *TABLE(,INDEX,SCALE) -# endif - -.text -ENTRY (STRCPY) - ENTRANCE - mov STR1(%esp), %edi - mov STR2(%esp), %esi - movl LEN(%esp), %ebx - test %ebx, %ebx - jz L(ExitZero) - - mov %esi, %ecx -# ifndef USE_AS_STPCPY - mov %edi, %eax /* save result */ -# endif - and $15, %ecx - jz L(SourceStringAlignmentZero) - - and $-16, %esi - pxor %xmm0, %xmm0 - pxor %xmm1, %xmm1 - - pcmpeqb (%esi), %xmm1 - add %ecx, %ebx - pmovmskb %xmm1, %edx - shr %cl, %edx -# ifdef USE_AS_STPCPY - cmp $16, %ebx - jbe L(CopyFrom1To16BytesTailCase2OrCase3) -# else - cmp $17, %ebx - jbe L(CopyFrom1To16BytesTailCase2OrCase3) -# endif - test %edx, %edx - jnz L(CopyFrom1To16BytesTail) - - pcmpeqb 16(%esi), %xmm0 - pmovmskb %xmm0, %edx -# ifdef USE_AS_STPCPY - cmp $32, %ebx - jbe L(CopyFrom1To32BytesCase2OrCase3) -# else - cmp $33, %ebx - jbe L(CopyFrom1To32BytesCase2OrCase3) -# endif - test %edx, %edx - jnz L(CopyFrom1To32Bytes) - - movdqu (%esi, %ecx), %xmm1 /* copy 16 bytes */ - movdqu %xmm1, (%edi) - - sub %ecx, %edi - -/* If source address alignment != destination address alignment */ - .p2align 4 -L(Unalign16Both): - mov $16, %ecx - movdqa (%esi, %ecx), %xmm1 - movaps 16(%esi, %ecx), %xmm2 - movdqu %xmm1, (%edi, %ecx) - pcmpeqb %xmm2, %xmm0 - pmovmskb %xmm0, %edx - add $16, %ecx - sub $48, %ebx - jbe L(CopyFrom1To16BytesCase2OrCase3) - test %edx, %edx - jnz L(CopyFrom1To16BytesUnalignedXmm2) - - movaps 16(%esi, %ecx), %xmm3 - movdqu %xmm2, (%edi, %ecx) - pcmpeqb %xmm3, %xmm0 - pmovmskb %xmm0, %edx - add $16, %ecx - sub $16, %ebx - jbe L(CopyFrom1To16BytesCase2OrCase3) - test %edx, %edx - jnz L(CopyFrom1To16BytesUnalignedXmm3) - - movaps 16(%esi, %ecx), %xmm4 - movdqu %xmm3, (%edi, %ecx) - pcmpeqb %xmm4, %xmm0 - pmovmskb %xmm0, %edx - add $16, %ecx - sub $16, %ebx - jbe L(CopyFrom1To16BytesCase2OrCase3) - test %edx, %edx - jnz L(CopyFrom1To16BytesUnalignedXmm4) - - movaps 16(%esi, %ecx), %xmm1 - movdqu %xmm4, (%edi, %ecx) - pcmpeqb %xmm1, %xmm0 - pmovmskb %xmm0, %edx - add $16, %ecx - sub $16, %ebx - jbe L(CopyFrom1To16BytesCase2OrCase3) - test %edx, %edx - jnz L(CopyFrom1To16BytesUnalignedXmm1) - - movaps 16(%esi, %ecx), %xmm2 - movdqu %xmm1, (%edi, %ecx) - pcmpeqb %xmm2, %xmm0 - pmovmskb %xmm0, %edx - add $16, %ecx - sub $16, %ebx - jbe L(CopyFrom1To16BytesCase2OrCase3) - test %edx, %edx - jnz L(CopyFrom1To16BytesUnalignedXmm2) - - movaps 16(%esi, %ecx), %xmm3 - movdqu %xmm2, (%edi, %ecx) - pcmpeqb %xmm3, %xmm0 - pmovmskb %xmm0, %edx - add $16, %ecx - sub $16, %ebx - jbe L(CopyFrom1To16BytesCase2OrCase3) - test %edx, %edx - jnz L(CopyFrom1To16BytesUnalignedXmm3) - - movdqu %xmm3, (%edi, %ecx) - mov %esi, %edx - lea 16(%esi, %ecx), %esi - and $-0x40, %esi - sub %esi, %edx - sub %edx, %edi - lea 128(%ebx, %edx), %ebx - -L(Unaligned64Loop): - movaps (%esi), %xmm2 - movaps %xmm2, %xmm4 - movaps 16(%esi), %xmm5 - movaps 32(%esi), %xmm3 - movaps %xmm3, %xmm6 - movaps 48(%esi), %xmm7 - pminub %xmm5, %xmm2 - pminub %xmm7, %xmm3 - pminub %xmm2, %xmm3 - pcmpeqb %xmm0, %xmm3 - pmovmskb %xmm3, %edx - sub $64, %ebx - jbe L(UnalignedLeaveCase2OrCase3) - test %edx, %edx - jnz L(Unaligned64Leave) -L(Unaligned64Loop_start): - add $64, %edi - add $64, %esi - movdqu %xmm4, -64(%edi) - movaps (%esi), %xmm2 - movdqa %xmm2, %xmm4 - movdqu %xmm5, -48(%edi) - movaps 16(%esi), %xmm5 - pminub %xmm5, %xmm2 - movaps 32(%esi), %xmm3 - movdqu %xmm6, -32(%edi) - movaps %xmm3, %xmm6 - movdqu %xmm7, -16(%edi) - movaps 48(%esi), %xmm7 - pminub %xmm7, %xmm3 - pminub %xmm2, %xmm3 - pcmpeqb %xmm0, %xmm3 - pmovmskb %xmm3, %edx - sub $64, %ebx - jbe L(UnalignedLeaveCase2OrCase3) - test %edx, %edx - jz L(Unaligned64Loop_start) -L(Unaligned64Leave): - pxor %xmm1, %xmm1 - - pcmpeqb %xmm4, %xmm0 - pcmpeqb %xmm5, %xmm1 - pmovmskb %xmm0, %edx - pmovmskb %xmm1, %ecx - test %edx, %edx - jnz L(CopyFrom1To16BytesUnaligned_0) - test %ecx, %ecx - jnz L(CopyFrom1To16BytesUnaligned_16) - - pcmpeqb %xmm6, %xmm0 - pcmpeqb %xmm7, %xmm1 - pmovmskb %xmm0, %edx - pmovmskb %xmm1, %ecx - test %edx, %edx - jnz L(CopyFrom1To16BytesUnaligned_32) - - bsf %ecx, %edx - movdqu %xmm4, (%edi) - movdqu %xmm5, 16(%edi) - movdqu %xmm6, 32(%edi) -# ifdef USE_AS_STPCPY - lea 48(%edi, %edx), %eax -# endif - movdqu %xmm7, 48(%edi) - add $15, %ebx - sub %edx, %ebx - lea 49(%edi, %edx), %edi - jmp L(StrncpyFillTailWithZero) - -/* If source address alignment == destination address alignment */ - -L(SourceStringAlignmentZero): - pxor %xmm0, %xmm0 - movdqa (%esi), %xmm1 - pcmpeqb %xmm1, %xmm0 - pmovmskb %xmm0, %edx -# ifdef USE_AS_STPCPY - cmp $16, %ebx - jbe L(CopyFrom1To16BytesTail1Case2OrCase3) -# else - cmp $17, %ebx - jbe L(CopyFrom1To16BytesTail1Case2OrCase3) -# endif - test %edx, %edx - jnz L(CopyFrom1To16BytesTail1) - - pcmpeqb 16(%esi), %xmm0 - movdqu %xmm1, (%edi) - pmovmskb %xmm0, %edx -# ifdef USE_AS_STPCPY - cmp $32, %ebx - jbe L(CopyFrom1To32Bytes1Case2OrCase3) -# else - cmp $33, %ebx - jbe L(CopyFrom1To32Bytes1Case2OrCase3) -# endif - test %edx, %edx - jnz L(CopyFrom1To32Bytes1) - - jmp L(Unalign16Both) - -/*-----------------End of main part---------------------------*/ - -/* Case1 */ - .p2align 4 -L(CopyFrom1To16BytesTail): - sub %ecx, %ebx - add %ecx, %esi - bsf %edx, %edx - BRANCH_TO_JMPTBL_ENTRY (L(ExitTable), %edx, 4) - - .p2align 4 -L(CopyFrom1To32Bytes1): - add $16, %esi - add $16, %edi - sub $16, %ebx -L(CopyFrom1To16BytesTail1): - bsf %edx, %edx - BRANCH_TO_JMPTBL_ENTRY (L(ExitTable), %edx, 4) - - .p2align 4 -L(CopyFrom1To32Bytes): - sub %ecx, %ebx - bsf %edx, %edx - add %ecx, %esi - add $16, %edx - sub %ecx, %edx - BRANCH_TO_JMPTBL_ENTRY (L(ExitTable), %edx, 4) - - .p2align 4 -L(CopyFrom1To16BytesUnaligned_0): - bsf %edx, %edx -# ifdef USE_AS_STPCPY - lea (%edi, %edx), %eax -# endif - movdqu %xmm4, (%edi) - add $63, %ebx - sub %edx, %ebx - lea 1(%edi, %edx), %edi - jmp L(StrncpyFillTailWithZero) - - .p2align 4 -L(CopyFrom1To16BytesUnaligned_16): - bsf %ecx, %edx - movdqu %xmm4, (%edi) -# ifdef USE_AS_STPCPY - lea 16(%edi, %edx), %eax -# endif - movdqu %xmm5, 16(%edi) - add $47, %ebx - sub %edx, %ebx - lea 17(%edi, %edx), %edi - jmp L(StrncpyFillTailWithZero) - - .p2align 4 -L(CopyFrom1To16BytesUnaligned_32): - bsf %edx, %edx - movdqu %xmm4, (%edi) - movdqu %xmm5, 16(%edi) -# ifdef USE_AS_STPCPY - lea 32(%edi, %edx), %eax -# endif - movdqu %xmm6, 32(%edi) - add $31, %ebx - sub %edx, %ebx - lea 33(%edi, %edx), %edi - jmp L(StrncpyFillTailWithZero) - - .p2align 4 -L(CopyFrom1To16BytesUnalignedXmm6): - movdqu %xmm6, (%edi, %ecx) - jmp L(CopyFrom1To16BytesXmmExit) - - .p2align 4 -L(CopyFrom1To16BytesUnalignedXmm5): - movdqu %xmm5, (%edi, %ecx) - jmp L(CopyFrom1To16BytesXmmExit) - - .p2align 4 -L(CopyFrom1To16BytesUnalignedXmm4): - movdqu %xmm4, (%edi, %ecx) - jmp L(CopyFrom1To16BytesXmmExit) - - .p2align 4 -L(CopyFrom1To16BytesUnalignedXmm3): - movdqu %xmm3, (%edi, %ecx) - jmp L(CopyFrom1To16BytesXmmExit) - - .p2align 4 -L(CopyFrom1To16BytesUnalignedXmm1): - movdqu %xmm1, (%edi, %ecx) - jmp L(CopyFrom1To16BytesXmmExit) - - .p2align 4 -L(CopyFrom1To16BytesExit): - BRANCH_TO_JMPTBL_ENTRY (L(ExitTable), %edx, 4) - -/* Case2 */ - - .p2align 4 -L(CopyFrom1To16BytesCase2): - add $16, %ebx - add %ecx, %edi - add %ecx, %esi - bsf %edx, %edx - cmp %ebx, %edx - jb L(CopyFrom1To16BytesExit) - BRANCH_TO_JMPTBL_ENTRY (L(ExitStrncpyTable), %ebx, 4) - - .p2align 4 -L(CopyFrom1To32BytesCase2): - sub %ecx, %ebx - add %ecx, %esi - bsf %edx, %edx - add $16, %edx - sub %ecx, %edx - cmp %ebx, %edx - jb L(CopyFrom1To16BytesExit) - BRANCH_TO_JMPTBL_ENTRY (L(ExitStrncpyTable), %ebx, 4) - -L(CopyFrom1To16BytesTailCase2): - sub %ecx, %ebx - add %ecx, %esi - bsf %edx, %edx - cmp %ebx, %edx - jb L(CopyFrom1To16BytesExit) - BRANCH_TO_JMPTBL_ENTRY (L(ExitStrncpyTable), %ebx, 4) - -L(CopyFrom1To16BytesTail1Case2): - bsf %edx, %edx - cmp %ebx, %edx - jb L(CopyFrom1To16BytesExit) - BRANCH_TO_JMPTBL_ENTRY (L(ExitStrncpyTable), %ebx, 4) - -/* Case2 or Case3, Case3 */ - - .p2align 4 -L(CopyFrom1To16BytesCase2OrCase3): - test %edx, %edx - jnz L(CopyFrom1To16BytesCase2) -L(CopyFrom1To16BytesCase3): - add $16, %ebx - add %ecx, %edi - add %ecx, %esi - BRANCH_TO_JMPTBL_ENTRY (L(ExitStrncpyTable), %ebx, 4) - - .p2align 4 -L(CopyFrom1To32BytesCase2OrCase3): - test %edx, %edx - jnz L(CopyFrom1To32BytesCase2) - sub %ecx, %ebx - add %ecx, %esi - BRANCH_TO_JMPTBL_ENTRY (L(ExitStrncpyTable), %ebx, 4) - - .p2align 4 -L(CopyFrom1To16BytesTailCase2OrCase3): - test %edx, %edx - jnz L(CopyFrom1To16BytesTailCase2) - sub %ecx, %ebx - add %ecx, %esi - BRANCH_TO_JMPTBL_ENTRY (L(ExitStrncpyTable), %ebx, 4) - - .p2align 4 -L(CopyFrom1To32Bytes1Case2OrCase3): - add $16, %edi - add $16, %esi - sub $16, %ebx -L(CopyFrom1To16BytesTail1Case2OrCase3): - test %edx, %edx - jnz L(CopyFrom1To16BytesTail1Case2) - BRANCH_TO_JMPTBL_ENTRY (L(ExitStrncpyTable), %ebx, 4) - - .p2align 4 -L(Exit0): -# ifdef USE_AS_STPCPY - mov %edi, %eax -# endif - RETURN - - .p2align 4 -L(Exit1): - movb %dh, (%edi) -# ifdef USE_AS_STPCPY - lea (%edi), %eax -# endif - sub $1, %ebx - lea 1(%edi), %edi - jnz L(StrncpyFillTailWithZero) - RETURN - - .p2align 4 -L(Exit2): - movw (%esi), %dx - movw %dx, (%edi) -# ifdef USE_AS_STPCPY - lea 1(%edi), %eax -# endif - sub $2, %ebx - lea 2(%edi), %edi - jnz L(StrncpyFillTailWithZero) - RETURN - - .p2align 4 -L(Exit3): - movw (%esi), %cx - movw %cx, (%edi) - movb %dh, 2(%edi) -# ifdef USE_AS_STPCPY - lea 2(%edi), %eax -# endif - sub $3, %ebx - lea 3(%edi), %edi - jnz L(StrncpyFillTailWithZero) - RETURN - - .p2align 4 -L(Exit4): - movl (%esi), %edx - movl %edx, (%edi) -# ifdef USE_AS_STPCPY - lea 3(%edi), %eax -# endif - sub $4, %ebx - lea 4(%edi), %edi - jnz L(StrncpyFillTailWithZero) - RETURN - - .p2align 4 -L(Exit5): - movl (%esi), %ecx - movb %dh, 4(%edi) - movl %ecx, (%edi) -# ifdef USE_AS_STPCPY - lea 4(%edi), %eax -# endif - sub $5, %ebx - lea 5(%edi), %edi - jnz L(StrncpyFillTailWithZero) - RETURN - - .p2align 4 -L(Exit6): - movl (%esi), %ecx - movw 4(%esi), %dx - movl %ecx, (%edi) - movw %dx, 4(%edi) -# ifdef USE_AS_STPCPY - lea 5(%edi), %eax -# endif - sub $6, %ebx - lea 6(%edi), %edi - jnz L(StrncpyFillTailWithZero) - RETURN - - .p2align 4 -L(Exit7): - movl (%esi), %ecx - movl 3(%esi), %edx - movl %ecx, (%edi) - movl %edx, 3(%edi) -# ifdef USE_AS_STPCPY - lea 6(%edi), %eax -# endif - sub $7, %ebx - lea 7(%edi), %edi - jnz L(StrncpyFillTailWithZero) - RETURN - - .p2align 4 -L(Exit8): - movlpd (%esi), %xmm0 - movlpd %xmm0, (%edi) -# ifdef USE_AS_STPCPY - lea 7(%edi), %eax -# endif - sub $8, %ebx - lea 8(%edi), %edi - jnz L(StrncpyFillTailWithZero) - RETURN - - .p2align 4 -L(Exit9): - movlpd (%esi), %xmm0 - movb %dh, 8(%edi) - movlpd %xmm0, (%edi) -# ifdef USE_AS_STPCPY - lea 8(%edi), %eax -# endif - sub $9, %ebx - lea 9(%edi), %edi - jnz L(StrncpyFillTailWithZero) - RETURN - - .p2align 4 -L(Exit10): - movlpd (%esi), %xmm0 - movw 8(%esi), %dx - movlpd %xmm0, (%edi) - movw %dx, 8(%edi) -# ifdef USE_AS_STPCPY - lea 9(%edi), %eax -# endif - sub $10, %ebx - lea 10(%edi), %edi - jnz L(StrncpyFillTailWithZero) - RETURN - - .p2align 4 -L(Exit11): - movlpd (%esi), %xmm0 - movl 7(%esi), %edx - movlpd %xmm0, (%edi) - movl %edx, 7(%edi) -# ifdef USE_AS_STPCPY - lea 10(%edi), %eax -# endif - sub $11, %ebx - lea 11(%edi), %edi - jnz L(StrncpyFillTailWithZero) - RETURN - - .p2align 4 -L(Exit12): - movlpd (%esi), %xmm0 - movl 8(%esi), %edx - movlpd %xmm0, (%edi) - movl %edx, 8(%edi) -# ifdef USE_AS_STPCPY - lea 11(%edi), %eax -# endif - sub $12, %ebx - lea 12(%edi), %edi - jnz L(StrncpyFillTailWithZero) - RETURN - - .p2align 4 -L(Exit13): - movlpd (%esi), %xmm0 - movlpd 5(%esi), %xmm1 - movlpd %xmm0, (%edi) - movlpd %xmm1, 5(%edi) -# ifdef USE_AS_STPCPY - lea 12(%edi), %eax -# endif - sub $13, %ebx - lea 13(%edi), %edi - jnz L(StrncpyFillTailWithZero) - RETURN - - .p2align 4 -L(Exit14): - movlpd (%esi), %xmm0 - movlpd 6(%esi), %xmm1 - movlpd %xmm0, (%edi) - movlpd %xmm1, 6(%edi) -# ifdef USE_AS_STPCPY - lea 13(%edi), %eax -# endif - sub $14, %ebx - lea 14(%edi), %edi - jnz L(StrncpyFillTailWithZero) - RETURN - - .p2align 4 -L(Exit15): - movlpd (%esi), %xmm0 - movlpd 7(%esi), %xmm1 - movlpd %xmm0, (%edi) - movlpd %xmm1, 7(%edi) -# ifdef USE_AS_STPCPY - lea 14(%edi), %eax -# endif - sub $15, %ebx - lea 15(%edi), %edi - jnz L(StrncpyFillTailWithZero) - RETURN - - .p2align 4 -L(Exit16): - movdqu (%esi), %xmm0 - movdqu %xmm0, (%edi) -# ifdef USE_AS_STPCPY - lea 15(%edi), %eax -# endif - sub $16, %ebx - lea 16(%edi), %edi - jnz L(StrncpyFillTailWithZero) - RETURN - - .p2align 4 -L(Exit17): - movdqu (%esi), %xmm0 - movdqu %xmm0, (%edi) - movb %dh, 16(%edi) -# ifdef USE_AS_STPCPY - lea 16(%edi), %eax -# endif - sub $17, %ebx - lea 17(%edi), %edi - jnz L(StrncpyFillTailWithZero) - RETURN - - .p2align 4 -L(Exit18): - movdqu (%esi), %xmm0 - movw 16(%esi), %cx - movdqu %xmm0, (%edi) - movw %cx, 16(%edi) -# ifdef USE_AS_STPCPY - lea 17(%edi), %eax -# endif - sub $18, %ebx - lea 18(%edi), %edi - jnz L(StrncpyFillTailWithZero) - RETURN - - .p2align 4 -L(Exit19): - movdqu (%esi), %xmm0 - movl 15(%esi), %ecx - movdqu %xmm0, (%edi) - movl %ecx, 15(%edi) -# ifdef USE_AS_STPCPY - lea 18(%edi), %eax -# endif - sub $19, %ebx - lea 19(%edi), %edi - jnz L(StrncpyFillTailWithZero) - RETURN - - .p2align 4 -L(Exit20): - movdqu (%esi), %xmm0 - movl 16(%esi), %ecx - movdqu %xmm0, (%edi) - movl %ecx, 16(%edi) -# ifdef USE_AS_STPCPY - lea 19(%edi), %eax -# endif - sub $20, %ebx - lea 20(%edi), %edi - jnz L(StrncpyFillTailWithZero) - RETURN - - .p2align 4 -L(Exit21): - movdqu (%esi), %xmm0 - movl 16(%esi), %ecx - movdqu %xmm0, (%edi) - movl %ecx, 16(%edi) - movb %dh, 20(%edi) -# ifdef USE_AS_STPCPY - lea 20(%edi), %eax -# endif - sub $21, %ebx - lea 21(%edi), %edi - jnz L(StrncpyFillTailWithZero) - RETURN - - .p2align 4 -L(Exit22): - movdqu (%esi), %xmm0 - movlpd 14(%esi), %xmm3 - movdqu %xmm0, (%edi) - movlpd %xmm3, 14(%edi) -# ifdef USE_AS_STPCPY - lea 21(%edi), %eax -# endif - sub $22, %ebx - lea 22(%edi), %edi - jnz L(StrncpyFillTailWithZero) - RETURN - - .p2align 4 -L(Exit23): - movdqu (%esi), %xmm0 - movlpd 15(%esi), %xmm3 - movdqu %xmm0, (%edi) - movlpd %xmm3, 15(%edi) -# ifdef USE_AS_STPCPY - lea 22(%edi), %eax -# endif - sub $23, %ebx - lea 23(%edi), %edi - jnz L(StrncpyFillTailWithZero) - RETURN - - .p2align 4 -L(Exit24): - movdqu (%esi), %xmm0 - movlpd 16(%esi), %xmm2 - movdqu %xmm0, (%edi) - movlpd %xmm2, 16(%edi) -# ifdef USE_AS_STPCPY - lea 23(%edi), %eax -# endif - sub $24, %ebx - lea 24(%edi), %edi - jnz L(StrncpyFillTailWithZero) - RETURN - - .p2align 4 -L(Exit25): - movdqu (%esi), %xmm0 - movlpd 16(%esi), %xmm2 - movdqu %xmm0, (%edi) - movlpd %xmm2, 16(%edi) - movb %dh, 24(%edi) -# ifdef USE_AS_STPCPY - lea 24(%edi), %eax -# endif - sub $25, %ebx - lea 25(%edi), %edi - jnz L(StrncpyFillTailWithZero) - RETURN - - .p2align 4 -L(Exit26): - movdqu (%esi), %xmm0 - movlpd 16(%esi), %xmm2 - movw 24(%esi), %cx - movdqu %xmm0, (%edi) - movlpd %xmm2, 16(%edi) - movw %cx, 24(%edi) -# ifdef USE_AS_STPCPY - lea 25(%edi), %eax -# endif - sub $26, %ebx - lea 26(%edi), %edi - jnz L(StrncpyFillTailWithZero) - RETURN - - .p2align 4 -L(Exit27): - movdqu (%esi), %xmm0 - movlpd 16(%esi), %xmm2 - movl 23(%esi), %ecx - movdqu %xmm0, (%edi) - movlpd %xmm2, 16(%edi) - movl %ecx, 23(%edi) -# ifdef USE_AS_STPCPY - lea 26(%edi), %eax -# endif - sub $27, %ebx - lea 27(%edi), %edi - jnz L(StrncpyFillTailWithZero) - RETURN - - .p2align 4 -L(Exit28): - movdqu (%esi), %xmm0 - movlpd 16(%esi), %xmm2 - movl 24(%esi), %ecx - movdqu %xmm0, (%edi) - movlpd %xmm2, 16(%edi) - movl %ecx, 24(%edi) -# ifdef USE_AS_STPCPY - lea 27(%edi), %eax -# endif - sub $28, %ebx - lea 28(%edi), %edi - jnz L(StrncpyFillTailWithZero) - RETURN - - .p2align 4 -L(Exit29): - movdqu (%esi), %xmm0 - movdqu 13(%esi), %xmm2 - movdqu %xmm0, (%edi) - movdqu %xmm2, 13(%edi) -# ifdef USE_AS_STPCPY - lea 28(%edi), %eax -# endif - sub $29, %ebx - lea 29(%edi), %edi - jnz L(StrncpyFillTailWithZero) - RETURN - - .p2align 4 -L(Exit30): - movdqu (%esi), %xmm0 - movdqu 14(%esi), %xmm2 - movdqu %xmm0, (%edi) - movdqu %xmm2, 14(%edi) -# ifdef USE_AS_STPCPY - lea 29(%edi), %eax -# endif - sub $30, %ebx - lea 30(%edi), %edi - jnz L(StrncpyFillTailWithZero) - RETURN - - - .p2align 4 -L(Exit31): - movdqu (%esi), %xmm0 - movdqu 15(%esi), %xmm2 - movdqu %xmm0, (%edi) - movdqu %xmm2, 15(%edi) -# ifdef USE_AS_STPCPY - lea 30(%edi), %eax -# endif - sub $31, %ebx - lea 31(%edi), %edi - jnz L(StrncpyFillTailWithZero) - RETURN - - .p2align 4 -L(Exit32): - movdqu (%esi), %xmm0 - movdqu 16(%esi), %xmm2 - movdqu %xmm0, (%edi) - movdqu %xmm2, 16(%edi) -# ifdef USE_AS_STPCPY - lea 31(%edi), %eax -# endif - sub $32, %ebx - lea 32(%edi), %edi - jnz L(StrncpyFillTailWithZero) - RETURN - - .p2align 4 -L(StrncpyExit1): - movb (%esi), %dl - movb %dl, (%edi) -# ifdef USE_AS_STPCPY - lea 1(%edi), %eax -# endif - RETURN - - .p2align 4 -L(StrncpyExit2): - movw (%esi), %dx - movw %dx, (%edi) -# ifdef USE_AS_STPCPY - lea 2(%edi), %eax -# endif - RETURN - - .p2align 4 -L(StrncpyExit3): - movw (%esi), %cx - movb 2(%esi), %dl - movw %cx, (%edi) - movb %dl, 2(%edi) -# ifdef USE_AS_STPCPY - lea 3(%edi), %eax -# endif - RETURN - - .p2align 4 -L(StrncpyExit4): - movl (%esi), %edx - movl %edx, (%edi) -# ifdef USE_AS_STPCPY - lea 4(%edi), %eax -# endif - RETURN - - .p2align 4 -L(StrncpyExit5): - movl (%esi), %ecx - movb 4(%esi), %dl - movl %ecx, (%edi) - movb %dl, 4(%edi) -# ifdef USE_AS_STPCPY - lea 5(%edi), %eax -# endif - RETURN - - .p2align 4 -L(StrncpyExit6): - movl (%esi), %ecx - movw 4(%esi), %dx - movl %ecx, (%edi) - movw %dx, 4(%edi) -# ifdef USE_AS_STPCPY - lea 6(%edi), %eax -# endif - RETURN - - .p2align 4 -L(StrncpyExit7): - movl (%esi), %ecx - movl 3(%esi), %edx - movl %ecx, (%edi) - movl %edx, 3(%edi) -# ifdef USE_AS_STPCPY - lea 7(%edi), %eax -# endif - RETURN - - .p2align 4 -L(StrncpyExit8): - movlpd (%esi), %xmm0 - movlpd %xmm0, (%edi) -# ifdef USE_AS_STPCPY - lea 8(%edi), %eax -# endif - RETURN - - .p2align 4 -L(StrncpyExit9): - movlpd (%esi), %xmm0 - movb 8(%esi), %dl - movlpd %xmm0, (%edi) - movb %dl, 8(%edi) -# ifdef USE_AS_STPCPY - lea 9(%edi), %eax -# endif - RETURN - - .p2align 4 -L(StrncpyExit10): - movlpd (%esi), %xmm0 - movw 8(%esi), %dx - movlpd %xmm0, (%edi) - movw %dx, 8(%edi) -# ifdef USE_AS_STPCPY - lea 10(%edi), %eax -# endif - RETURN - - .p2align 4 -L(StrncpyExit11): - movlpd (%esi), %xmm0 - movl 7(%esi), %edx - movlpd %xmm0, (%edi) - movl %edx, 7(%edi) -# ifdef USE_AS_STPCPY - lea 11(%edi), %eax -# endif - RETURN - - .p2align 4 -L(StrncpyExit12): - movlpd (%esi), %xmm0 - movl 8(%esi), %edx - movlpd %xmm0, (%edi) - movl %edx, 8(%edi) -# ifdef USE_AS_STPCPY - lea 12(%edi), %eax -# endif - RETURN - - .p2align 4 -L(StrncpyExit13): - movlpd (%esi), %xmm0 - movlpd 5(%esi), %xmm1 - movlpd %xmm0, (%edi) - movlpd %xmm1, 5(%edi) -# ifdef USE_AS_STPCPY - lea 13(%edi), %eax -# endif - RETURN - - .p2align 4 -L(StrncpyExit14): - movlpd (%esi), %xmm0 - movlpd 6(%esi), %xmm1 - movlpd %xmm0, (%edi) - movlpd %xmm1, 6(%edi) -# ifdef USE_AS_STPCPY - lea 14(%edi), %eax -# endif - RETURN - - .p2align 4 -L(StrncpyExit15): - movlpd (%esi), %xmm0 - movlpd 7(%esi), %xmm1 - movlpd %xmm0, (%edi) - movlpd %xmm1, 7(%edi) -# ifdef USE_AS_STPCPY - lea 15(%edi), %eax -# endif - RETURN - - .p2align 4 -L(StrncpyExit16): - movdqu (%esi), %xmm0 - movdqu %xmm0, (%edi) -# ifdef USE_AS_STPCPY - lea 16(%edi), %eax -# endif - RETURN - - .p2align 4 -L(StrncpyExit17): - movdqu (%esi), %xmm0 - movb 16(%esi), %cl - movdqu %xmm0, (%edi) - movb %cl, 16(%edi) -# ifdef USE_AS_STPCPY - lea 17(%edi), %eax -# endif - RETURN - - .p2align 4 -L(StrncpyExit18): - movdqu (%esi), %xmm0 - movw 16(%esi), %cx - movdqu %xmm0, (%edi) - movw %cx, 16(%edi) -# ifdef USE_AS_STPCPY - lea 18(%edi), %eax -# endif - RETURN - - .p2align 4 -L(StrncpyExit19): - movdqu (%esi), %xmm0 - movl 15(%esi), %ecx - movdqu %xmm0, (%edi) - movl %ecx, 15(%edi) -# ifdef USE_AS_STPCPY - lea 19(%edi), %eax -# endif - RETURN - - .p2align 4 -L(StrncpyExit20): - movdqu (%esi), %xmm0 - movl 16(%esi), %ecx - movdqu %xmm0, (%edi) - movl %ecx, 16(%edi) -# ifdef USE_AS_STPCPY - lea 20(%edi), %eax -# endif - RETURN - - .p2align 4 -L(StrncpyExit21): - movdqu (%esi), %xmm0 - movl 16(%esi), %ecx - movb 20(%esi), %dl - movdqu %xmm0, (%edi) - movl %ecx, 16(%edi) - movb %dl, 20(%edi) -# ifdef USE_AS_STPCPY - lea 21(%edi), %eax -# endif - RETURN - - .p2align 4 -L(StrncpyExit22): - movdqu (%esi), %xmm0 - movlpd 14(%esi), %xmm3 - movdqu %xmm0, (%edi) - movlpd %xmm3, 14(%edi) -# ifdef USE_AS_STPCPY - lea 22(%edi), %eax -# endif - RETURN - - .p2align 4 -L(StrncpyExit23): - movdqu (%esi), %xmm0 - movlpd 15(%esi), %xmm3 - movdqu %xmm0, (%edi) - movlpd %xmm3, 15(%edi) -# ifdef USE_AS_STPCPY - lea 23(%edi), %eax -# endif - RETURN - - .p2align 4 -L(StrncpyExit24): - movdqu (%esi), %xmm0 - movlpd 16(%esi), %xmm2 - movdqu %xmm0, (%edi) - movlpd %xmm2, 16(%edi) -# ifdef USE_AS_STPCPY - lea 24(%edi), %eax -# endif - RETURN - - .p2align 4 -L(StrncpyExit25): - movdqu (%esi), %xmm0 - movlpd 16(%esi), %xmm2 - movb 24(%esi), %cl - movdqu %xmm0, (%edi) - movlpd %xmm2, 16(%edi) - movb %cl, 24(%edi) -# ifdef USE_AS_STPCPY - lea 25(%edi), %eax -# endif - RETURN - - .p2align 4 -L(StrncpyExit26): - movdqu (%esi), %xmm0 - movlpd 16(%esi), %xmm2 - movw 24(%esi), %cx - movdqu %xmm0, (%edi) - movlpd %xmm2, 16(%edi) - movw %cx, 24(%edi) -# ifdef USE_AS_STPCPY - lea 26(%edi), %eax -# endif - RETURN - - .p2align 4 -L(StrncpyExit27): - movdqu (%esi), %xmm0 - movlpd 16(%esi), %xmm2 - movl 23(%esi), %ecx - movdqu %xmm0, (%edi) - movlpd %xmm2, 16(%edi) - movl %ecx, 23(%edi) -# ifdef USE_AS_STPCPY - lea 27(%edi), %eax -# endif - RETURN - - .p2align 4 -L(StrncpyExit28): - movdqu (%esi), %xmm0 - movlpd 16(%esi), %xmm2 - movl 24(%esi), %ecx - movdqu %xmm0, (%edi) - movlpd %xmm2, 16(%edi) - movl %ecx, 24(%edi) -# ifdef USE_AS_STPCPY - lea 28(%edi), %eax -# endif - RETURN - - .p2align 4 -L(StrncpyExit29): - movdqu (%esi), %xmm0 - movdqu 13(%esi), %xmm2 - movdqu %xmm0, (%edi) - movdqu %xmm2, 13(%edi) -# ifdef USE_AS_STPCPY - lea 29(%edi), %eax -# endif - RETURN - - .p2align 4 -L(StrncpyExit30): - movdqu (%esi), %xmm0 - movdqu 14(%esi), %xmm2 - movdqu %xmm0, (%edi) - movdqu %xmm2, 14(%edi) -# ifdef USE_AS_STPCPY - lea 30(%edi), %eax -# endif - RETURN - - .p2align 4 -L(StrncpyExit31): - movdqu (%esi), %xmm0 - movdqu 15(%esi), %xmm2 - movdqu %xmm0, (%edi) - movdqu %xmm2, 15(%edi) -# ifdef USE_AS_STPCPY - lea 31(%edi), %eax -# endif - RETURN - - .p2align 4 -L(StrncpyExit32): - movdqu (%esi), %xmm0 - movdqu 16(%esi), %xmm2 - movdqu %xmm0, (%edi) - movdqu %xmm2, 16(%edi) -# ifdef USE_AS_STPCPY - lea 32(%edi), %eax -# endif - RETURN - - .p2align 4 -L(StrncpyExit33): - movdqu (%esi), %xmm0 - movdqu 16(%esi), %xmm2 - movb 32(%esi), %cl - movdqu %xmm0, (%edi) - movdqu %xmm2, 16(%edi) - movb %cl, 32(%edi) - RETURN - - .p2align 4 -L(Fill0): - RETURN - - .p2align 4 -L(Fill1): - movb %dl, (%edi) - RETURN - - .p2align 4 -L(Fill2): - movw %dx, (%edi) - RETURN - - .p2align 4 -L(Fill3): - movl %edx, -1(%edi) - RETURN - - .p2align 4 -L(Fill4): - movl %edx, (%edi) - RETURN - - .p2align 4 -L(Fill5): - movl %edx, (%edi) - movb %dl, 4(%edi) - RETURN - - .p2align 4 -L(Fill6): - movl %edx, (%edi) - movw %dx, 4(%edi) - RETURN - - .p2align 4 -L(Fill7): - movlpd %xmm0, -1(%edi) - RETURN - - .p2align 4 -L(Fill8): - movlpd %xmm0, (%edi) - RETURN - - .p2align 4 -L(Fill9): - movlpd %xmm0, (%edi) - movb %dl, 8(%edi) - RETURN - - .p2align 4 -L(Fill10): - movlpd %xmm0, (%edi) - movw %dx, 8(%edi) - RETURN - - .p2align 4 -L(Fill11): - movlpd %xmm0, (%edi) - movl %edx, 7(%edi) - RETURN - - .p2align 4 -L(Fill12): - movlpd %xmm0, (%edi) - movl %edx, 8(%edi) - RETURN - - .p2align 4 -L(Fill13): - movlpd %xmm0, (%edi) - movlpd %xmm0, 5(%edi) - RETURN - - .p2align 4 -L(Fill14): - movlpd %xmm0, (%edi) - movlpd %xmm0, 6(%edi) - RETURN - - .p2align 4 -L(Fill15): - movdqu %xmm0, -1(%edi) - RETURN - - .p2align 4 -L(Fill16): - movdqu %xmm0, (%edi) - RETURN - - .p2align 4 -L(CopyFrom1To16BytesUnalignedXmm2): - movdqu %xmm2, (%edi, %ecx) - - .p2align 4 -L(CopyFrom1To16BytesXmmExit): - bsf %edx, %edx - add $15, %ebx - add %ecx, %edi -# ifdef USE_AS_STPCPY - lea (%edi, %edx), %eax -# endif - sub %edx, %ebx - lea 1(%edi, %edx), %edi - - .p2align 4 -L(StrncpyFillTailWithZero): - pxor %xmm0, %xmm0 - xor %edx, %edx - sub $16, %ebx - jbe L(StrncpyFillExit) - - movdqu %xmm0, (%edi) - add $16, %edi - - mov %edi, %esi - and $0xf, %esi - sub %esi, %edi - add %esi, %ebx - sub $64, %ebx - jb L(StrncpyFillLess64) - -L(StrncpyFillLoopMovdqa): - movdqa %xmm0, (%edi) - movdqa %xmm0, 16(%edi) - movdqa %xmm0, 32(%edi) - movdqa %xmm0, 48(%edi) - add $64, %edi - sub $64, %ebx - jae L(StrncpyFillLoopMovdqa) - -L(StrncpyFillLess64): - add $32, %ebx - jl L(StrncpyFillLess32) - movdqa %xmm0, (%edi) - movdqa %xmm0, 16(%edi) - add $32, %edi - sub $16, %ebx - jl L(StrncpyFillExit) - movdqa %xmm0, (%edi) - add $16, %edi - BRANCH_TO_JMPTBL_ENTRY (L(FillTable), %ebx, 4) - -L(StrncpyFillLess32): - add $16, %ebx - jl L(StrncpyFillExit) - movdqa %xmm0, (%edi) - add $16, %edi - BRANCH_TO_JMPTBL_ENTRY (L(FillTable), %ebx, 4) - -L(StrncpyFillExit): - add $16, %ebx - BRANCH_TO_JMPTBL_ENTRY (L(FillTable), %ebx, 4) - - .p2align 4 -L(UnalignedLeaveCase2OrCase3): - test %edx, %edx - jnz L(Unaligned64LeaveCase2) -L(Unaligned64LeaveCase3): - lea 64(%ebx), %ecx - and $-16, %ecx - add $48, %ebx - jl L(CopyFrom1To16BytesCase3) - movdqu %xmm4, (%edi) - sub $16, %ebx - jb L(CopyFrom1To16BytesCase3) - movdqu %xmm5, 16(%edi) - sub $16, %ebx - jb L(CopyFrom1To16BytesCase3) - movdqu %xmm6, 32(%edi) - sub $16, %ebx - jb L(CopyFrom1To16BytesCase3) - movdqu %xmm7, 48(%edi) -# ifdef USE_AS_STPCPY - lea 64(%edi), %eax -# endif - RETURN - - .p2align 4 -L(Unaligned64LeaveCase2): - xor %ecx, %ecx - pcmpeqb %xmm4, %xmm0 - pmovmskb %xmm0, %edx - add $48, %ebx - jle L(CopyFrom1To16BytesCase2OrCase3) - test %edx, %edx - jnz L(CopyFrom1To16BytesUnalignedXmm4) - - pcmpeqb %xmm5, %xmm0 - pmovmskb %xmm0, %edx - movdqu %xmm4, (%edi) - add $16, %ecx - sub $16, %ebx - jbe L(CopyFrom1To16BytesCase2OrCase3) - test %edx, %edx - jnz L(CopyFrom1To16BytesUnalignedXmm5) - - pcmpeqb %xmm6, %xmm0 - pmovmskb %xmm0, %edx - movdqu %xmm5, 16(%edi) - add $16, %ecx - sub $16, %ebx - jbe L(CopyFrom1To16BytesCase2OrCase3) - test %edx, %edx - jnz L(CopyFrom1To16BytesUnalignedXmm6) - - pcmpeqb %xmm7, %xmm0 - pmovmskb %xmm0, %edx - movdqu %xmm6, 32(%edi) - lea 16(%edi, %ecx), %edi - lea 16(%esi, %ecx), %esi - bsf %edx, %edx - cmp %ebx, %edx - jb L(CopyFrom1To16BytesExit) - BRANCH_TO_JMPTBL_ENTRY (L(ExitStrncpyTable), %ebx, 4) - - .p2align 4 -L(ExitZero): - movl %edi, %eax - RETURN - -END (STRCPY) - - .p2align 4 - .section .rodata -L(ExitTable): - .int JMPTBL(L(Exit1), L(ExitTable)) - .int JMPTBL(L(Exit2), L(ExitTable)) - .int JMPTBL(L(Exit3), L(ExitTable)) - .int JMPTBL(L(Exit4), L(ExitTable)) - .int JMPTBL(L(Exit5), L(ExitTable)) - .int JMPTBL(L(Exit6), L(ExitTable)) - .int JMPTBL(L(Exit7), L(ExitTable)) - .int JMPTBL(L(Exit8), L(ExitTable)) - .int JMPTBL(L(Exit9), L(ExitTable)) - .int JMPTBL(L(Exit10), L(ExitTable)) - .int JMPTBL(L(Exit11), L(ExitTable)) - .int JMPTBL(L(Exit12), L(ExitTable)) - .int JMPTBL(L(Exit13), L(ExitTable)) - .int JMPTBL(L(Exit14), L(ExitTable)) - .int JMPTBL(L(Exit15), L(ExitTable)) - .int JMPTBL(L(Exit16), L(ExitTable)) - .int JMPTBL(L(Exit17), L(ExitTable)) - .int JMPTBL(L(Exit18), L(ExitTable)) - .int JMPTBL(L(Exit19), L(ExitTable)) - .int JMPTBL(L(Exit20), L(ExitTable)) - .int JMPTBL(L(Exit21), L(ExitTable)) - .int JMPTBL(L(Exit22), L(ExitTable)) - .int JMPTBL(L(Exit23), L(ExitTable)) - .int JMPTBL(L(Exit24), L(ExitTable)) - .int JMPTBL(L(Exit25), L(ExitTable)) - .int JMPTBL(L(Exit26), L(ExitTable)) - .int JMPTBL(L(Exit27), L(ExitTable)) - .int JMPTBL(L(Exit28), L(ExitTable)) - .int JMPTBL(L(Exit29), L(ExitTable)) - .int JMPTBL(L(Exit30), L(ExitTable)) - .int JMPTBL(L(Exit31), L(ExitTable)) - .int JMPTBL(L(Exit32), L(ExitTable)) - -L(ExitStrncpyTable): - .int JMPTBL(L(Exit0), L(ExitStrncpyTable)) - .int JMPTBL(L(StrncpyExit1), L(ExitStrncpyTable)) - .int JMPTBL(L(StrncpyExit2), L(ExitStrncpyTable)) - .int JMPTBL(L(StrncpyExit3), L(ExitStrncpyTable)) - .int JMPTBL(L(StrncpyExit4), L(ExitStrncpyTable)) - .int JMPTBL(L(StrncpyExit5), L(ExitStrncpyTable)) - .int JMPTBL(L(StrncpyExit6), L(ExitStrncpyTable)) - .int JMPTBL(L(StrncpyExit7), L(ExitStrncpyTable)) - .int JMPTBL(L(StrncpyExit8), L(ExitStrncpyTable)) - .int JMPTBL(L(StrncpyExit9), L(ExitStrncpyTable)) - .int JMPTBL(L(StrncpyExit10), L(ExitStrncpyTable)) - .int JMPTBL(L(StrncpyExit11), L(ExitStrncpyTable)) - .int JMPTBL(L(StrncpyExit12), L(ExitStrncpyTable)) - .int JMPTBL(L(StrncpyExit13), L(ExitStrncpyTable)) - .int JMPTBL(L(StrncpyExit14), L(ExitStrncpyTable)) - .int JMPTBL(L(StrncpyExit15), L(ExitStrncpyTable)) - .int JMPTBL(L(StrncpyExit16), L(ExitStrncpyTable)) - .int JMPTBL(L(StrncpyExit17), L(ExitStrncpyTable)) - .int JMPTBL(L(StrncpyExit18), L(ExitStrncpyTable)) - .int JMPTBL(L(StrncpyExit19), L(ExitStrncpyTable)) - .int JMPTBL(L(StrncpyExit20), L(ExitStrncpyTable)) - .int JMPTBL(L(StrncpyExit21), L(ExitStrncpyTable)) - .int JMPTBL(L(StrncpyExit22), L(ExitStrncpyTable)) - .int JMPTBL(L(StrncpyExit23), L(ExitStrncpyTable)) - .int JMPTBL(L(StrncpyExit24), L(ExitStrncpyTable)) - .int JMPTBL(L(StrncpyExit25), L(ExitStrncpyTable)) - .int JMPTBL(L(StrncpyExit26), L(ExitStrncpyTable)) - .int JMPTBL(L(StrncpyExit27), L(ExitStrncpyTable)) - .int JMPTBL(L(StrncpyExit28), L(ExitStrncpyTable)) - .int JMPTBL(L(StrncpyExit29), L(ExitStrncpyTable)) - .int JMPTBL(L(StrncpyExit30), L(ExitStrncpyTable)) - .int JMPTBL(L(StrncpyExit31), L(ExitStrncpyTable)) - .int JMPTBL(L(StrncpyExit32), L(ExitStrncpyTable)) - .int JMPTBL(L(StrncpyExit33), L(ExitStrncpyTable)) - - .p2align 4 -L(FillTable): - .int JMPTBL(L(Fill0), L(FillTable)) - .int JMPTBL(L(Fill1), L(FillTable)) - .int JMPTBL(L(Fill2), L(FillTable)) - .int JMPTBL(L(Fill3), L(FillTable)) - .int JMPTBL(L(Fill4), L(FillTable)) - .int JMPTBL(L(Fill5), L(FillTable)) - .int JMPTBL(L(Fill6), L(FillTable)) - .int JMPTBL(L(Fill7), L(FillTable)) - .int JMPTBL(L(Fill8), L(FillTable)) - .int JMPTBL(L(Fill9), L(FillTable)) - .int JMPTBL(L(Fill10), L(FillTable)) - .int JMPTBL(L(Fill11), L(FillTable)) - .int JMPTBL(L(Fill12), L(FillTable)) - .int JMPTBL(L(Fill13), L(FillTable)) - .int JMPTBL(L(Fill14), L(FillTable)) - .int JMPTBL(L(Fill15), L(FillTable)) - .int JMPTBL(L(Fill16), L(FillTable)) -# else -# define PARMS 4 -# define ENTRANCE -# define RETURN POP (%edi); ret; CFI_PUSH (%edi) -# define RETURN1 ret - - .text -ENTRY (STRCPY) - ENTRANCE - mov STR1(%esp), %edx - mov STR2(%esp), %ecx - - cmpb $0, (%ecx) - jz L(ExitTail1) - cmpb $0, 1(%ecx) - jz L(ExitTail2) - cmpb $0, 2(%ecx) - jz L(ExitTail3) - cmpb $0, 3(%ecx) - jz L(ExitTail4) - cmpb $0, 4(%ecx) - jz L(ExitTail5) - cmpb $0, 5(%ecx) - jz L(ExitTail6) - cmpb $0, 6(%ecx) - jz L(ExitTail7) - cmpb $0, 7(%ecx) - jz L(ExitTail8) - cmpb $0, 8(%ecx) - jz L(ExitTail9) - cmpb $0, 9(%ecx) - jz L(ExitTail10) - cmpb $0, 10(%ecx) - jz L(ExitTail11) - cmpb $0, 11(%ecx) - jz L(ExitTail12) - cmpb $0, 12(%ecx) - jz L(ExitTail13) - cmpb $0, 13(%ecx) - jz L(ExitTail14) - cmpb $0, 14(%ecx) - jz L(ExitTail15) - cmpb $0, 15(%ecx) - jz L(ExitTail16) - - PUSH (%edi) - PUSH (%ebx) - - mov %edx, %edi - lea 16(%ecx), %ebx - and $-16, %ebx - pxor %xmm0, %xmm0 - movdqu (%ecx), %xmm1 - movdqu %xmm1, (%edx) - pcmpeqb (%ebx), %xmm0 - pmovmskb %xmm0, %eax - sub %ecx, %ebx - test %eax, %eax - jnz L(CopyFrom1To16Bytes) - - mov %ecx, %eax - lea 16(%ecx), %ecx - and $-16, %ecx - sub %ecx, %eax - sub %eax, %edx - xor %ebx, %ebx - - .p2align 4 - movdqa (%ecx), %xmm1 - movaps 16(%ecx), %xmm2 - movdqu %xmm1, (%edx) - pcmpeqb %xmm2, %xmm0 - pmovmskb %xmm0, %eax - add $16, %ebx - test %eax, %eax - jnz L(CopyFrom1To16Bytes) - - movaps 16(%ecx, %ebx), %xmm3 - movdqu %xmm2, (%edx, %ebx) - pcmpeqb %xmm3, %xmm0 - pmovmskb %xmm0, %eax - add $16, %ebx - test %eax, %eax - jnz L(CopyFrom1To16Bytes) - - movaps 16(%ecx, %ebx), %xmm4 - movdqu %xmm3, (%edx, %ebx) - pcmpeqb %xmm4, %xmm0 - pmovmskb %xmm0, %eax - add $16, %ebx - test %eax, %eax - jnz L(CopyFrom1To16Bytes) - - movaps 16(%ecx, %ebx), %xmm1 - movdqu %xmm4, (%edx, %ebx) - pcmpeqb %xmm1, %xmm0 - pmovmskb %xmm0, %eax - add $16, %ebx - test %eax, %eax - jnz L(CopyFrom1To16Bytes) - - movaps 16(%ecx, %ebx), %xmm2 - movdqu %xmm1, (%edx, %ebx) - pcmpeqb %xmm2, %xmm0 - pmovmskb %xmm0, %eax - add $16, %ebx - test %eax, %eax - jnz L(CopyFrom1To16Bytes) - - movaps 16(%ecx, %ebx), %xmm3 - movdqu %xmm2, (%edx, %ebx) - pcmpeqb %xmm3, %xmm0 - pmovmskb %xmm0, %eax - add $16, %ebx - test %eax, %eax - jnz L(CopyFrom1To16Bytes) - - movdqu %xmm3, (%edx, %ebx) - mov %ecx, %eax - lea 16(%ecx, %ebx), %ecx - and $-0x40, %ecx - sub %ecx, %eax - sub %eax, %edx - -L(Aligned64Loop): - movaps (%ecx), %xmm2 - movaps %xmm2, %xmm4 - movaps 16(%ecx), %xmm5 - movaps 32(%ecx), %xmm3 - movaps %xmm3, %xmm6 - movaps 48(%ecx), %xmm7 - pminub %xmm5, %xmm2 - add $64, %ecx - pminub %xmm7, %xmm3 - add $64, %edx - pminub %xmm2, %xmm3 - pcmpeqb %xmm0, %xmm3 - pmovmskb %xmm3, %eax - test %eax, %eax - jnz L(Aligned64Leave) -L(Aligned64Loop_start): - movdqu %xmm4, -64(%edx) - movaps (%ecx), %xmm2 - movdqa %xmm2, %xmm4 - movdqu %xmm5, -48(%edx) - movaps 16(%ecx), %xmm5 - pminub %xmm5, %xmm2 - movaps 32(%ecx), %xmm3 - movdqu %xmm6, -32(%edx) - movaps %xmm3, %xmm6 - movdqu %xmm7, -16(%edx) - movaps 48(%ecx), %xmm7 - pminub %xmm7, %xmm3 - pminub %xmm2, %xmm3 - pcmpeqb %xmm3, %xmm0 - pmovmskb %xmm0, %eax - add $64, %edx - add $64, %ecx - test %eax, %eax - jz L(Aligned64Loop_start) -L(Aligned64Leave): - sub $0xa0, %ebx - pxor %xmm0, %xmm0 - pcmpeqb %xmm4, %xmm0 - pmovmskb %xmm0, %eax - test %eax, %eax - jnz L(CopyFrom1To16Bytes) - - pcmpeqb %xmm5, %xmm0 - pmovmskb %xmm0, %eax - movdqu %xmm4, -64(%edx) - test %eax, %eax - lea 16(%ebx), %ebx - jnz L(CopyFrom1To16Bytes) - - pcmpeqb %xmm6, %xmm0 - pmovmskb %xmm0, %eax - movdqu %xmm5, -48(%edx) - test %eax, %eax - lea 16(%ebx), %ebx - jnz L(CopyFrom1To16Bytes) - - movdqu %xmm6, -32(%edx) - pcmpeqb %xmm7, %xmm0 - pmovmskb %xmm0, %eax - lea 16(%ebx), %ebx - -/*-----------------End of main part---------------------------*/ - - .p2align 4 -L(CopyFrom1To16Bytes): - add %ebx, %edx - add %ebx, %ecx - - POP (%ebx) - test %al, %al - jz L(ExitHigh) - test $0x01, %al - jnz L(Exit1) - test $0x02, %al - jnz L(Exit2) - test $0x04, %al - jnz L(Exit3) - test $0x08, %al - jnz L(Exit4) - test $0x10, %al - jnz L(Exit5) - test $0x20, %al - jnz L(Exit6) - test $0x40, %al - jnz L(Exit7) - /* Exit 8 */ - movl (%ecx), %eax - movl %eax, (%edx) - movl 4(%ecx), %eax - movl %eax, 4(%edx) -# ifdef USE_AS_STPCPY - lea 7(%edx), %eax -# else - movl %edi, %eax -# endif - RETURN - - .p2align 4 -L(ExitHigh): - test $0x01, %ah - jnz L(Exit9) - test $0x02, %ah - jnz L(Exit10) - test $0x04, %ah - jnz L(Exit11) - test $0x08, %ah - jnz L(Exit12) - test $0x10, %ah - jnz L(Exit13) - test $0x20, %ah - jnz L(Exit14) - test $0x40, %ah - jnz L(Exit15) - /* Exit 16 */ - movlpd (%ecx), %xmm0 - movlpd %xmm0, (%edx) - movlpd 8(%ecx), %xmm0 - movlpd %xmm0, 8(%edx) -# ifdef USE_AS_STPCPY - lea 15(%edx), %eax -# else - movl %edi, %eax -# endif - RETURN - - .p2align 4 -L(Exit1): - movb (%ecx), %al - movb %al, (%edx) -# ifdef USE_AS_STPCPY - lea (%edx), %eax -# else - movl %edi, %eax -# endif - RETURN - - .p2align 4 -L(Exit2): - movw (%ecx), %ax - movw %ax, (%edx) -# ifdef USE_AS_STPCPY - lea 1(%edx), %eax -# else - movl %edi, %eax -# endif - RETURN - - .p2align 4 -L(Exit3): - movw (%ecx), %ax - movw %ax, (%edx) - movb 2(%ecx), %al - movb %al, 2(%edx) -# ifdef USE_AS_STPCPY - lea 2(%edx), %eax -# else - movl %edi, %eax -# endif - RETURN - - .p2align 4 -L(Exit4): - movl (%ecx), %eax - movl %eax, (%edx) -# ifdef USE_AS_STPCPY - lea 3(%edx), %eax -# else - movl %edi, %eax -# endif - RETURN - - .p2align 4 -L(Exit5): - movl (%ecx), %eax - movl %eax, (%edx) - movb 4(%ecx), %al - movb %al, 4(%edx) -# ifdef USE_AS_STPCPY - lea 4(%edx), %eax -# else - movl %edi, %eax -# endif - RETURN - - .p2align 4 -L(Exit6): - movl (%ecx), %eax - movl %eax, (%edx) - movw 4(%ecx), %ax - movw %ax, 4(%edx) -# ifdef USE_AS_STPCPY - lea 5(%edx), %eax -# else - movl %edi, %eax -# endif - RETURN - - .p2align 4 -L(Exit7): - movl (%ecx), %eax - movl %eax, (%edx) - movl 3(%ecx), %eax - movl %eax, 3(%edx) -# ifdef USE_AS_STPCPY - lea 6(%edx), %eax -# else - movl %edi, %eax -# endif - RETURN - - .p2align 4 -L(Exit9): - movl (%ecx), %eax - movl %eax, (%edx) - movl 4(%ecx), %eax - movl %eax, 4(%edx) - movb 8(%ecx), %al - movb %al, 8(%edx) -# ifdef USE_AS_STPCPY - lea 8(%edx), %eax -# else - movl %edi, %eax -# endif - RETURN - - .p2align 4 -L(Exit10): - movl (%ecx), %eax - movl %eax, (%edx) - movl 4(%ecx), %eax - movl %eax, 4(%edx) - movw 8(%ecx), %ax - movw %ax, 8(%edx) -# ifdef USE_AS_STPCPY - lea 9(%edx), %eax -# else - movl %edi, %eax -# endif - RETURN - - .p2align 4 -L(Exit11): - movl (%ecx), %eax - movl %eax, (%edx) - movl 4(%ecx), %eax - movl %eax, 4(%edx) - movl 7(%ecx), %eax - movl %eax, 7(%edx) -# ifdef USE_AS_STPCPY - lea 10(%edx), %eax -# else - movl %edi, %eax -# endif - RETURN - - .p2align 4 -L(Exit12): - movl (%ecx), %eax - movl %eax, (%edx) - movl 4(%ecx), %eax - movl %eax, 4(%edx) - movl 8(%ecx), %eax - movl %eax, 8(%edx) -# ifdef USE_AS_STPCPY - lea 11(%edx), %eax -# else - movl %edi, %eax -# endif - RETURN - - .p2align 4 -L(Exit13): - movlpd (%ecx), %xmm0 - movlpd %xmm0, (%edx) - movlpd 5(%ecx), %xmm0 - movlpd %xmm0, 5(%edx) -# ifdef USE_AS_STPCPY - lea 12(%edx), %eax -# else - movl %edi, %eax -# endif - RETURN - - .p2align 4 -L(Exit14): - movlpd (%ecx), %xmm0 - movlpd %xmm0, (%edx) - movlpd 6(%ecx), %xmm0 - movlpd %xmm0, 6(%edx) -# ifdef USE_AS_STPCPY - lea 13(%edx), %eax -# else - movl %edi, %eax -# endif - RETURN - - .p2align 4 -L(Exit15): - movlpd (%ecx), %xmm0 - movlpd %xmm0, (%edx) - movlpd 7(%ecx), %xmm0 - movlpd %xmm0, 7(%edx) -# ifdef USE_AS_STPCPY - lea 14(%edx), %eax -# else - movl %edi, %eax -# endif - RETURN - -CFI_POP (%edi) - - .p2align 4 -L(ExitTail1): - movb (%ecx), %al - movb %al, (%edx) - movl %edx, %eax - RETURN1 - - .p2align 4 -L(ExitTail2): - movw (%ecx), %ax - movw %ax, (%edx) -# ifdef USE_AS_STPCPY - lea 1(%edx), %eax -# else - movl %edx, %eax -# endif - RETURN1 - - .p2align 4 -L(ExitTail3): - movw (%ecx), %ax - movw %ax, (%edx) - movb 2(%ecx), %al - movb %al, 2(%edx) -# ifdef USE_AS_STPCPY - lea 2(%edx), %eax -# else - movl %edx, %eax -# endif - RETURN1 - - .p2align 4 -L(ExitTail4): - movl (%ecx), %eax - movl %eax, (%edx) -# ifdef USE_AS_STPCPY - lea 3(%edx), %eax -# else - movl %edx, %eax -# endif - RETURN1 - - .p2align 4 -L(ExitTail5): - movl (%ecx), %eax - movl %eax, (%edx) - movb 4(%ecx), %al - movb %al, 4(%edx) -# ifdef USE_AS_STPCPY - lea 4(%edx), %eax -# else - movl %edx, %eax -# endif - RETURN1 - - .p2align 4 -L(ExitTail6): - movl (%ecx), %eax - movl %eax, (%edx) - movw 4(%ecx), %ax - movw %ax, 4(%edx) -# ifdef USE_AS_STPCPY - lea 5(%edx), %eax -# else - movl %edx, %eax -# endif - RETURN1 - - .p2align 4 -L(ExitTail7): - movl (%ecx), %eax - movl %eax, (%edx) - movl 3(%ecx), %eax - movl %eax, 3(%edx) -# ifdef USE_AS_STPCPY - lea 6(%edx), %eax -# else - movl %edx, %eax -# endif - RETURN1 - - .p2align 4 -L(ExitTail8): - movl (%ecx), %eax - movl %eax, (%edx) - movl 4(%ecx), %eax - movl %eax, 4(%edx) -# ifdef USE_AS_STPCPY - lea 7(%edx), %eax -# else - movl %edx, %eax -# endif - RETURN1 - - .p2align 4 -L(ExitTail9): - movl (%ecx), %eax - movl %eax, (%edx) - movl 4(%ecx), %eax - movl %eax, 4(%edx) - movb 8(%ecx), %al - movb %al, 8(%edx) -# ifdef USE_AS_STPCPY - lea 8(%edx), %eax -# else - movl %edx, %eax -# endif - RETURN1 - - .p2align 4 -L(ExitTail10): - movl (%ecx), %eax - movl %eax, (%edx) - movl 4(%ecx), %eax - movl %eax, 4(%edx) - movw 8(%ecx), %ax - movw %ax, 8(%edx) -# ifdef USE_AS_STPCPY - lea 9(%edx), %eax -# else - movl %edx, %eax -# endif - RETURN1 - - .p2align 4 -L(ExitTail11): - movl (%ecx), %eax - movl %eax, (%edx) - movl 4(%ecx), %eax - movl %eax, 4(%edx) - movl 7(%ecx), %eax - movl %eax, 7(%edx) -# ifdef USE_AS_STPCPY - lea 10(%edx), %eax -# else - movl %edx, %eax -# endif - RETURN1 - - .p2align 4 -L(ExitTail12): - movl (%ecx), %eax - movl %eax, (%edx) - movl 4(%ecx), %eax - movl %eax, 4(%edx) - movl 8(%ecx), %eax - movl %eax, 8(%edx) -# ifdef USE_AS_STPCPY - lea 11(%edx), %eax -# else - movl %edx, %eax -# endif - RETURN1 - - .p2align 4 -L(ExitTail13): - movlpd (%ecx), %xmm0 - movlpd %xmm0, (%edx) - movlpd 5(%ecx), %xmm0 - movlpd %xmm0, 5(%edx) -# ifdef USE_AS_STPCPY - lea 12(%edx), %eax -# else - movl %edx, %eax -# endif - RETURN1 - - .p2align 4 -L(ExitTail14): - movlpd (%ecx), %xmm0 - movlpd %xmm0, (%edx) - movlpd 6(%ecx), %xmm0 - movlpd %xmm0, 6(%edx) -# ifdef USE_AS_STPCPY - lea 13(%edx), %eax -# else - movl %edx, %eax -# endif - RETURN1 - - .p2align 4 -L(ExitTail15): - movlpd (%ecx), %xmm0 - movlpd %xmm0, (%edx) - movlpd 7(%ecx), %xmm0 - movlpd %xmm0, 7(%edx) -# ifdef USE_AS_STPCPY - lea 14(%edx), %eax -# else - movl %edx, %eax -# endif - RETURN1 - - .p2align 4 -L(ExitTail16): - movlpd (%ecx), %xmm0 - movlpd %xmm0, (%edx) - movlpd 8(%ecx), %xmm0 - movlpd %xmm0, 8(%edx) -# ifdef USE_AS_STPCPY - lea 15(%edx), %eax -# else - movl %edx, %eax -# endif - RETURN1 - -END (STRCPY) -# endif - -#endif diff --git a/sysdeps/i386/i686/multiarch/strcpy-ssse3.S b/sysdeps/i386/i686/multiarch/strcpy-ssse3.S deleted file mode 100644 index effd85da94..0000000000 --- a/sysdeps/i386/i686/multiarch/strcpy-ssse3.S +++ /dev/null @@ -1,3901 +0,0 @@ -/* strcpy with SSSE3 - Copyright (C) 2011-2017 Free Software Foundation, Inc. - Contributed by Intel Corporation. - This file is part of the GNU C Library. - - The GNU C Library is free software; you can redistribute it and/or - modify it under the terms of the GNU Lesser General Public - License as published by the Free Software Foundation; either - version 2.1 of the License, or (at your option) any later version. - - The GNU C Library is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - Lesser General Public License for more details. - - You should have received a copy of the GNU Lesser General Public - License along with the GNU C Library; if not, see - <http://www.gnu.org/licenses/>. */ - - -#if IS_IN (libc) - -# ifndef USE_AS_STRCAT -# include <sysdep.h> - -# define CFI_PUSH(REG) \ - cfi_adjust_cfa_offset (4); \ - cfi_rel_offset (REG, 0) - -# define CFI_POP(REG) \ - cfi_adjust_cfa_offset (-4); \ - cfi_restore (REG) - -# define PUSH(REG) pushl REG; CFI_PUSH (REG) -# define POP(REG) popl REG; CFI_POP (REG) - -# ifndef STRCPY -# define STRCPY __strcpy_ssse3 -# endif - -# ifdef USE_AS_STRNCPY -# define PARMS 8 -# define ENTRANCE PUSH (%ebx) -# define RETURN POP (%ebx); ret; CFI_PUSH (%ebx); -# define RETURN1 POP (%edi); POP (%ebx); ret; CFI_PUSH (%ebx); CFI_PUSH (%edi) -# else -# define PARMS 4 -# define ENTRANCE -# define RETURN ret -# define RETURN1 POP (%edi); ret; CFI_PUSH (%edi) -# endif - -# ifdef USE_AS_STPCPY -# define SAVE_RESULT(n) lea n(%edx), %eax -# define SAVE_RESULT_TAIL(n) lea n(%edx), %eax -# else -# define SAVE_RESULT(n) movl %edi, %eax -# define SAVE_RESULT_TAIL(n) movl %edx, %eax -# endif - -# define STR1 PARMS -# define STR2 STR1+4 -# define LEN STR2+4 - -/* In this code following instructions are used for copying: - movb - 1 byte - movw - 2 byte - movl - 4 byte - movlpd - 8 byte - movaps - 16 byte - requires 16 byte alignment - of sourse and destination adresses. -*/ - -.text -ENTRY (STRCPY) - ENTRANCE - mov STR1(%esp), %edx - mov STR2(%esp), %ecx -# ifdef USE_AS_STRNCPY - movl LEN(%esp), %ebx - cmp $8, %ebx - jbe L(StrncpyExit8Bytes) -# endif - cmpb $0, (%ecx) - jz L(ExitTail1) - cmpb $0, 1(%ecx) - jz L(ExitTail2) - cmpb $0, 2(%ecx) - jz L(ExitTail3) - cmpb $0, 3(%ecx) - jz L(ExitTail4) - cmpb $0, 4(%ecx) - jz L(ExitTail5) - cmpb $0, 5(%ecx) - jz L(ExitTail6) - cmpb $0, 6(%ecx) - jz L(ExitTail7) - cmpb $0, 7(%ecx) - jz L(ExitTail8) -# ifdef USE_AS_STRNCPY - cmp $16, %ebx - jb L(StrncpyExit15Bytes) -# endif - cmpb $0, 8(%ecx) - jz L(ExitTail9) - cmpb $0, 9(%ecx) - jz L(ExitTail10) - cmpb $0, 10(%ecx) - jz L(ExitTail11) - cmpb $0, 11(%ecx) - jz L(ExitTail12) - cmpb $0, 12(%ecx) - jz L(ExitTail13) - cmpb $0, 13(%ecx) - jz L(ExitTail14) - cmpb $0, 14(%ecx) - jz L(ExitTail15) -# ifdef USE_AS_STRNCPY - cmp $16, %ebx - je L(ExitTail16) -# endif - cmpb $0, 15(%ecx) - jz L(ExitTail16) - - PUSH (%edi) - mov %edx, %edi -# endif - PUSH (%esi) -# ifdef USE_AS_STRNCPY - mov %ecx, %esi - sub $16, %ebx - and $0xf, %esi - -/* add 16 bytes ecx_offset to ebx */ - - add %esi, %ebx -# endif - lea 16(%ecx), %esi - and $-16, %esi - pxor %xmm0, %xmm0 - movlpd (%ecx), %xmm1 - movlpd %xmm1, (%edx) - - pcmpeqb (%esi), %xmm0 - movlpd 8(%ecx), %xmm1 - movlpd %xmm1, 8(%edx) - - pmovmskb %xmm0, %eax - sub %ecx, %esi - -# ifdef USE_AS_STRNCPY - sub $16, %ebx - jbe L(CopyFrom1To16BytesCase2OrCase3) -# endif - test %eax, %eax - jnz L(CopyFrom1To16Bytes) - - mov %edx, %eax - lea 16(%edx), %edx - and $-16, %edx - sub %edx, %eax - -# ifdef USE_AS_STRNCPY - add %eax, %esi - lea -1(%esi), %esi - and $1<<31, %esi - test %esi, %esi - jnz L(ContinueCopy) - lea 16(%ebx), %ebx - -L(ContinueCopy): -# endif - sub %eax, %ecx - mov %ecx, %eax - and $0xf, %eax - mov $0, %esi - -/* case: ecx_offset == edx_offset */ - - jz L(Align16Both) - - cmp $8, %eax - jae L(ShlHigh8) - cmp $1, %eax - je L(Shl1) - cmp $2, %eax - je L(Shl2) - cmp $3, %eax - je L(Shl3) - cmp $4, %eax - je L(Shl4) - cmp $5, %eax - je L(Shl5) - cmp $6, %eax - je L(Shl6) - jmp L(Shl7) - -L(ShlHigh8): - je L(Shl8) - cmp $9, %eax - je L(Shl9) - cmp $10, %eax - je L(Shl10) - cmp $11, %eax - je L(Shl11) - cmp $12, %eax - je L(Shl12) - cmp $13, %eax - je L(Shl13) - cmp $14, %eax - je L(Shl14) - jmp L(Shl15) - -L(Align16Both): - movaps (%ecx), %xmm1 - movaps 16(%ecx), %xmm2 - movaps %xmm1, (%edx) - pcmpeqb %xmm2, %xmm0 - pmovmskb %xmm0, %eax - lea 16(%esi), %esi -# ifdef USE_AS_STRNCPY - sub $16, %ebx - jbe L(CopyFrom1To16BytesCase2OrCase3) -# endif - test %eax, %eax - jnz L(CopyFrom1To16Bytes) - - movaps 16(%ecx, %esi), %xmm3 - movaps %xmm2, (%edx, %esi) - pcmpeqb %xmm3, %xmm0 - pmovmskb %xmm0, %eax - lea 16(%esi), %esi -# ifdef USE_AS_STRNCPY - sub $16, %ebx - jbe L(CopyFrom1To16BytesCase2OrCase3) -# endif - test %eax, %eax - jnz L(CopyFrom1To16Bytes) - - movaps 16(%ecx, %esi), %xmm4 - movaps %xmm3, (%edx, %esi) - pcmpeqb %xmm4, %xmm0 - pmovmskb %xmm0, %eax - lea 16(%esi), %esi -# ifdef USE_AS_STRNCPY - sub $16, %ebx - jbe L(CopyFrom1To16BytesCase2OrCase3) -# endif - test %eax, %eax - jnz L(CopyFrom1To16Bytes) - - movaps 16(%ecx, %esi), %xmm1 - movaps %xmm4, (%edx, %esi) - pcmpeqb %xmm1, %xmm0 - pmovmskb %xmm0, %eax - lea 16(%esi), %esi -# ifdef USE_AS_STRNCPY - sub $16, %ebx - jbe L(CopyFrom1To16BytesCase2OrCase3) -# endif - test %eax, %eax - jnz L(CopyFrom1To16Bytes) - - movaps 16(%ecx, %esi), %xmm2 - movaps %xmm1, (%edx, %esi) - pcmpeqb %xmm2, %xmm0 - pmovmskb %xmm0, %eax - lea 16(%esi), %esi -# ifdef USE_AS_STRNCPY - sub $16, %ebx - jbe L(CopyFrom1To16BytesCase2OrCase3) -# endif - test %eax, %eax - jnz L(CopyFrom1To16Bytes) - - movaps 16(%ecx, %esi), %xmm3 - movaps %xmm2, (%edx, %esi) - pcmpeqb %xmm3, %xmm0 - pmovmskb %xmm0, %eax - lea 16(%esi), %esi -# ifdef USE_AS_STRNCPY - sub $16, %ebx - jbe L(CopyFrom1To16BytesCase2OrCase3) -# endif - test %eax, %eax - jnz L(CopyFrom1To16Bytes) - - movaps %xmm3, (%edx, %esi) - mov %ecx, %eax - lea 16(%ecx, %esi), %ecx - and $-0x40, %ecx - sub %ecx, %eax - sub %eax, %edx -# ifdef USE_AS_STRNCPY - lea 112(%ebx, %eax), %ebx -# endif - mov $-0x40, %esi - -L(Aligned64Loop): - movaps (%ecx), %xmm2 - movaps 32(%ecx), %xmm3 - movaps %xmm2, %xmm4 - movaps 16(%ecx), %xmm5 - movaps %xmm3, %xmm6 - movaps 48(%ecx), %xmm7 - pminub %xmm5, %xmm2 - pminub %xmm7, %xmm3 - pminub %xmm2, %xmm3 - lea 64(%edx), %edx - pcmpeqb %xmm0, %xmm3 - lea 64(%ecx), %ecx - pmovmskb %xmm3, %eax -# ifdef USE_AS_STRNCPY - sub $64, %ebx - jbe L(StrncpyLeaveCase2OrCase3) -# endif - test %eax, %eax - jnz L(Aligned64Leave) - movaps %xmm4, -64(%edx) - movaps %xmm5, -48(%edx) - movaps %xmm6, -32(%edx) - movaps %xmm7, -16(%edx) - jmp L(Aligned64Loop) - -L(Aligned64Leave): -# ifdef USE_AS_STRNCPY - lea 48(%ebx), %ebx -# endif - pcmpeqb %xmm4, %xmm0 - pmovmskb %xmm0, %eax - test %eax, %eax - jnz L(CopyFrom1To16Bytes) - - pcmpeqb %xmm5, %xmm0 -# ifdef USE_AS_STRNCPY - lea -16(%ebx), %ebx -# endif - pmovmskb %xmm0, %eax - movaps %xmm4, -64(%edx) - test %eax, %eax - lea 16(%esi), %esi - jnz L(CopyFrom1To16Bytes) - - pcmpeqb %xmm6, %xmm0 -# ifdef USE_AS_STRNCPY - lea -16(%ebx), %ebx -# endif - pmovmskb %xmm0, %eax - movaps %xmm5, -48(%edx) - test %eax, %eax - lea 16(%esi), %esi - jnz L(CopyFrom1To16Bytes) - - movaps %xmm6, -32(%edx) - pcmpeqb %xmm7, %xmm0 -# ifdef USE_AS_STRNCPY - lea -16(%ebx), %ebx -# endif - pmovmskb %xmm0, %eax - lea 16(%esi), %esi - jmp L(CopyFrom1To16Bytes) - - .p2align 4 -L(Shl1): - movaps -1(%ecx), %xmm1 - movaps 15(%ecx), %xmm2 -L(Shl1Start): - pcmpeqb %xmm2, %xmm0 - pmovmskb %xmm0, %eax - movaps %xmm2, %xmm3 -# ifdef USE_AS_STRNCPY - sub $16, %ebx - jbe L(StrncpyExit1Case2OrCase3) -# endif - test %eax, %eax - jnz L(Shl1LoopExit) - - palignr $1, %xmm1, %xmm2 - movaps %xmm3, %xmm1 - movaps %xmm2, (%edx) - movaps 31(%ecx), %xmm2 - - pcmpeqb %xmm2, %xmm0 - lea 16(%edx), %edx - pmovmskb %xmm0, %eax - lea 16(%ecx), %ecx - movaps %xmm2, %xmm3 -# ifdef USE_AS_STRNCPY - sub $16, %ebx - jbe L(StrncpyExit1Case2OrCase3) -# endif - test %eax, %eax - jnz L(Shl1LoopExit) - - palignr $1, %xmm1, %xmm2 - movaps %xmm2, (%edx) - movaps 31(%ecx), %xmm2 - movaps %xmm3, %xmm1 - - pcmpeqb %xmm2, %xmm0 - lea 16(%edx), %edx - pmovmskb %xmm0, %eax - lea 16(%ecx), %ecx - movaps %xmm2, %xmm3 -# ifdef USE_AS_STRNCPY - sub $16, %ebx - jbe L(StrncpyExit1Case2OrCase3) -# endif - test %eax, %eax - jnz L(Shl1LoopExit) - - palignr $1, %xmm1, %xmm2 - movaps %xmm2, (%edx) - movaps 31(%ecx), %xmm2 - - pcmpeqb %xmm2, %xmm0 - lea 16(%edx), %edx - pmovmskb %xmm0, %eax - lea 16(%ecx), %ecx -# ifdef USE_AS_STRNCPY - sub $16, %ebx - jbe L(StrncpyExit1Case2OrCase3) -# endif - test %eax, %eax - jnz L(Shl1LoopExit) - - palignr $1, %xmm3, %xmm2 - movaps %xmm2, (%edx) - lea 31(%ecx), %ecx - lea 16(%edx), %edx - - mov %ecx, %eax - and $-0x40, %ecx - sub %ecx, %eax - lea -15(%ecx), %ecx - sub %eax, %edx -# ifdef USE_AS_STRNCPY - add %eax, %ebx -# endif - movaps -1(%ecx), %xmm1 - -L(Shl1LoopStart): - movaps 15(%ecx), %xmm2 - movaps 31(%ecx), %xmm3 - movaps %xmm3, %xmm6 - movaps 47(%ecx), %xmm4 - movaps %xmm4, %xmm7 - movaps 63(%ecx), %xmm5 - pminub %xmm2, %xmm6 - pminub %xmm5, %xmm7 - pminub %xmm6, %xmm7 - pcmpeqb %xmm0, %xmm7 - pmovmskb %xmm7, %eax - movaps %xmm5, %xmm7 - palignr $1, %xmm4, %xmm5 - test %eax, %eax - palignr $1, %xmm3, %xmm4 - jnz L(Shl1Start) -# ifdef USE_AS_STRNCPY - sub $64, %ebx - jbe L(StrncpyLeave1) -# endif - palignr $1, %xmm2, %xmm3 - lea 64(%ecx), %ecx - palignr $1, %xmm1, %xmm2 - movaps %xmm7, %xmm1 - movaps %xmm5, 48(%edx) - movaps %xmm4, 32(%edx) - movaps %xmm3, 16(%edx) - movaps %xmm2, (%edx) - lea 64(%edx), %edx - jmp L(Shl1LoopStart) - -L(Shl1LoopExit): - movlpd (%ecx), %xmm0 - movlpd %xmm0, (%edx) - movlpd 7(%ecx), %xmm0 - movlpd %xmm0, 7(%edx) - mov $15, %esi - jmp L(CopyFrom1To16Bytes) - - .p2align 4 -L(Shl2): - movaps -2(%ecx), %xmm1 - movaps 14(%ecx), %xmm2 -L(Shl2Start): - pcmpeqb %xmm2, %xmm0 - pmovmskb %xmm0, %eax - movaps %xmm2, %xmm3 -# ifdef USE_AS_STRNCPY - sub $16, %ebx - jbe L(StrncpyExit2Case2OrCase3) -# endif - test %eax, %eax - jnz L(Shl2LoopExit) - - palignr $2, %xmm1, %xmm2 - movaps %xmm3, %xmm1 - movaps %xmm2, (%edx) - movaps 30(%ecx), %xmm2 - - pcmpeqb %xmm2, %xmm0 - lea 16(%edx), %edx - pmovmskb %xmm0, %eax - lea 16(%ecx), %ecx - movaps %xmm2, %xmm3 -# ifdef USE_AS_STRNCPY - sub $16, %ebx - jbe L(StrncpyExit2Case2OrCase3) -# endif - test %eax, %eax - jnz L(Shl2LoopExit) - - palignr $2, %xmm1, %xmm2 - movaps %xmm2, (%edx) - movaps 30(%ecx), %xmm2 - movaps %xmm3, %xmm1 - - pcmpeqb %xmm2, %xmm0 - lea 16(%edx), %edx - pmovmskb %xmm0, %eax - lea 16(%ecx), %ecx - movaps %xmm2, %xmm3 -# ifdef USE_AS_STRNCPY - sub $16, %ebx - jbe L(StrncpyExit2Case2OrCase3) -# endif - test %eax, %eax - jnz L(Shl2LoopExit) - - palignr $2, %xmm1, %xmm2 - movaps %xmm2, (%edx) - movaps 30(%ecx), %xmm2 - - pcmpeqb %xmm2, %xmm0 - lea 16(%edx), %edx - pmovmskb %xmm0, %eax - lea 16(%ecx), %ecx -# ifdef USE_AS_STRNCPY - sub $16, %ebx - jbe L(StrncpyExit2Case2OrCase3) -# endif - test %eax, %eax - jnz L(Shl2LoopExit) - - palignr $2, %xmm3, %xmm2 - movaps %xmm2, (%edx) - lea 30(%ecx), %ecx - lea 16(%edx), %edx - - mov %ecx, %eax - and $-0x40, %ecx - sub %ecx, %eax - lea -14(%ecx), %ecx - sub %eax, %edx -# ifdef USE_AS_STRNCPY - add %eax, %ebx -# endif - movaps -2(%ecx), %xmm1 - -L(Shl2LoopStart): - movaps 14(%ecx), %xmm2 - movaps 30(%ecx), %xmm3 - movaps %xmm3, %xmm6 - movaps 46(%ecx), %xmm4 - movaps %xmm4, %xmm7 - movaps 62(%ecx), %xmm5 - pminub %xmm2, %xmm6 - pminub %xmm5, %xmm7 - pminub %xmm6, %xmm7 - pcmpeqb %xmm0, %xmm7 - pmovmskb %xmm7, %eax - movaps %xmm5, %xmm7 - palignr $2, %xmm4, %xmm5 - test %eax, %eax - palignr $2, %xmm3, %xmm4 - jnz L(Shl2Start) -# ifdef USE_AS_STRNCPY - sub $64, %ebx - jbe L(StrncpyLeave2) -# endif - palignr $2, %xmm2, %xmm3 - lea 64(%ecx), %ecx - palignr $2, %xmm1, %xmm2 - movaps %xmm7, %xmm1 - movaps %xmm5, 48(%edx) - movaps %xmm4, 32(%edx) - movaps %xmm3, 16(%edx) - movaps %xmm2, (%edx) - lea 64(%edx), %edx - jmp L(Shl2LoopStart) - -L(Shl2LoopExit): - movlpd (%ecx), %xmm0 - movlpd 6(%ecx), %xmm1 - movlpd %xmm0, (%edx) - movlpd %xmm1, 6(%edx) - mov $14, %esi - jmp L(CopyFrom1To16Bytes) - - .p2align 4 -L(Shl3): - movaps -3(%ecx), %xmm1 - movaps 13(%ecx), %xmm2 -L(Shl3Start): - pcmpeqb %xmm2, %xmm0 - pmovmskb %xmm0, %eax - movaps %xmm2, %xmm3 -# ifdef USE_AS_STRNCPY - sub $16, %ebx - jbe L(StrncpyExit3Case2OrCase3) -# endif - test %eax, %eax - jnz L(Shl3LoopExit) - - palignr $3, %xmm1, %xmm2 - movaps %xmm3, %xmm1 - movaps %xmm2, (%edx) - movaps 29(%ecx), %xmm2 - - pcmpeqb %xmm2, %xmm0 - lea 16(%edx), %edx - pmovmskb %xmm0, %eax - lea 16(%ecx), %ecx - movaps %xmm2, %xmm3 -# ifdef USE_AS_STRNCPY - sub $16, %ebx - jbe L(StrncpyExit3Case2OrCase3) -# endif - test %eax, %eax - jnz L(Shl3LoopExit) - - palignr $3, %xmm1, %xmm2 - movaps %xmm2, (%edx) - movaps 29(%ecx), %xmm2 - movaps %xmm3, %xmm1 - - pcmpeqb %xmm2, %xmm0 - lea 16(%edx), %edx - pmovmskb %xmm0, %eax - lea 16(%ecx), %ecx - movaps %xmm2, %xmm3 -# ifdef USE_AS_STRNCPY - sub $16, %ebx - jbe L(StrncpyExit3Case2OrCase3) -# endif - test %eax, %eax - jnz L(Shl3LoopExit) - - palignr $3, %xmm1, %xmm2 - movaps %xmm2, (%edx) - movaps 29(%ecx), %xmm2 - - pcmpeqb %xmm2, %xmm0 - lea 16(%edx), %edx - pmovmskb %xmm0, %eax - lea 16(%ecx), %ecx -# ifdef USE_AS_STRNCPY - sub $16, %ebx - jbe L(StrncpyExit3Case2OrCase3) -# endif - test %eax, %eax - jnz L(Shl3LoopExit) - - palignr $3, %xmm3, %xmm2 - movaps %xmm2, (%edx) - lea 29(%ecx), %ecx - lea 16(%edx), %edx - - mov %ecx, %eax - and $-0x40, %ecx - sub %ecx, %eax - lea -13(%ecx), %ecx - sub %eax, %edx -# ifdef USE_AS_STRNCPY - add %eax, %ebx -# endif - movaps -3(%ecx), %xmm1 - -L(Shl3LoopStart): - movaps 13(%ecx), %xmm2 - movaps 29(%ecx), %xmm3 - movaps %xmm3, %xmm6 - movaps 45(%ecx), %xmm4 - movaps %xmm4, %xmm7 - movaps 61(%ecx), %xmm5 - pminub %xmm2, %xmm6 - pminub %xmm5, %xmm7 - pminub %xmm6, %xmm7 - pcmpeqb %xmm0, %xmm7 - pmovmskb %xmm7, %eax - movaps %xmm5, %xmm7 - palignr $3, %xmm4, %xmm5 - test %eax, %eax - palignr $3, %xmm3, %xmm4 - jnz L(Shl3Start) -# ifdef USE_AS_STRNCPY - sub $64, %ebx - jbe L(StrncpyLeave3) -# endif - palignr $3, %xmm2, %xmm3 - lea 64(%ecx), %ecx - palignr $3, %xmm1, %xmm2 - movaps %xmm7, %xmm1 - movaps %xmm5, 48(%edx) - movaps %xmm4, 32(%edx) - movaps %xmm3, 16(%edx) - movaps %xmm2, (%edx) - lea 64(%edx), %edx - jmp L(Shl3LoopStart) - -L(Shl3LoopExit): - movlpd (%ecx), %xmm0 - movlpd 5(%ecx), %xmm1 - movlpd %xmm0, (%edx) - movlpd %xmm1, 5(%edx) - mov $13, %esi - jmp L(CopyFrom1To16Bytes) - - .p2align 4 -L(Shl4): - movaps -4(%ecx), %xmm1 - movaps 12(%ecx), %xmm2 -L(Shl4Start): - pcmpeqb %xmm2, %xmm0 - pmovmskb %xmm0, %eax - movaps %xmm2, %xmm3 -# ifdef USE_AS_STRNCPY - sub $16, %ebx - jbe L(StrncpyExit4Case2OrCase3) -# endif - test %eax, %eax - jnz L(Shl4LoopExit) - - palignr $4, %xmm1, %xmm2 - movaps %xmm3, %xmm1 - movaps %xmm2, (%edx) - movaps 28(%ecx), %xmm2 - - pcmpeqb %xmm2, %xmm0 - lea 16(%edx), %edx - pmovmskb %xmm0, %eax - lea 16(%ecx), %ecx - movaps %xmm2, %xmm3 -# ifdef USE_AS_STRNCPY - sub $16, %ebx - jbe L(StrncpyExit4Case2OrCase3) -# endif - test %eax, %eax - jnz L(Shl4LoopExit) - - palignr $4, %xmm1, %xmm2 - movaps %xmm2, (%edx) - movaps 28(%ecx), %xmm2 - movaps %xmm3, %xmm1 - - pcmpeqb %xmm2, %xmm0 - lea 16(%edx), %edx - pmovmskb %xmm0, %eax - lea 16(%ecx), %ecx - movaps %xmm2, %xmm3 -# ifdef USE_AS_STRNCPY - sub $16, %ebx - jbe L(StrncpyExit4Case2OrCase3) -# endif - test %eax, %eax - jnz L(Shl4LoopExit) - - palignr $4, %xmm1, %xmm2 - movaps %xmm2, (%edx) - movaps 28(%ecx), %xmm2 - - pcmpeqb %xmm2, %xmm0 - lea 16(%edx), %edx - pmovmskb %xmm0, %eax - lea 16(%ecx), %ecx -# ifdef USE_AS_STRNCPY - sub $16, %ebx - jbe L(StrncpyExit4Case2OrCase3) -# endif - test %eax, %eax - jnz L(Shl4LoopExit) - - palignr $4, %xmm3, %xmm2 - movaps %xmm2, (%edx) - lea 28(%ecx), %ecx - lea 16(%edx), %edx - - mov %ecx, %eax - and $-0x40, %ecx - sub %ecx, %eax - lea -12(%ecx), %ecx - sub %eax, %edx -# ifdef USE_AS_STRNCPY - add %eax, %ebx -# endif - movaps -4(%ecx), %xmm1 - -L(Shl4LoopStart): - movaps 12(%ecx), %xmm2 - movaps 28(%ecx), %xmm3 - movaps %xmm3, %xmm6 - movaps 44(%ecx), %xmm4 - movaps %xmm4, %xmm7 - movaps 60(%ecx), %xmm5 - pminub %xmm2, %xmm6 - pminub %xmm5, %xmm7 - pminub %xmm6, %xmm7 - pcmpeqb %xmm0, %xmm7 - pmovmskb %xmm7, %eax - movaps %xmm5, %xmm7 - palignr $4, %xmm4, %xmm5 - test %eax, %eax - palignr $4, %xmm3, %xmm4 - jnz L(Shl4Start) -# ifdef USE_AS_STRNCPY - sub $64, %ebx - jbe L(StrncpyLeave4) -# endif - palignr $4, %xmm2, %xmm3 - lea 64(%ecx), %ecx - palignr $4, %xmm1, %xmm2 - movaps %xmm7, %xmm1 - movaps %xmm5, 48(%edx) - movaps %xmm4, 32(%edx) - movaps %xmm3, 16(%edx) - movaps %xmm2, (%edx) - lea 64(%edx), %edx - jmp L(Shl4LoopStart) - -L(Shl4LoopExit): - movlpd (%ecx), %xmm0 - movl 8(%ecx), %esi - movlpd %xmm0, (%edx) - movl %esi, 8(%edx) - mov $12, %esi - jmp L(CopyFrom1To16Bytes) - - .p2align 4 -L(Shl5): - movaps -5(%ecx), %xmm1 - movaps 11(%ecx), %xmm2 -L(Shl5Start): - pcmpeqb %xmm2, %xmm0 - pmovmskb %xmm0, %eax - movaps %xmm2, %xmm3 -# ifdef USE_AS_STRNCPY - sub $16, %ebx - jbe L(StrncpyExit5Case2OrCase3) -# endif - test %eax, %eax - jnz L(Shl5LoopExit) - - palignr $5, %xmm1, %xmm2 - movaps %xmm3, %xmm1 - movaps %xmm2, (%edx) - movaps 27(%ecx), %xmm2 - - pcmpeqb %xmm2, %xmm0 - lea 16(%edx), %edx - pmovmskb %xmm0, %eax - lea 16(%ecx), %ecx - movaps %xmm2, %xmm3 -# ifdef USE_AS_STRNCPY - sub $16, %ebx - jbe L(StrncpyExit5Case2OrCase3) -# endif - test %eax, %eax - jnz L(Shl5LoopExit) - - palignr $5, %xmm1, %xmm2 - movaps %xmm2, (%edx) - movaps 27(%ecx), %xmm2 - movaps %xmm3, %xmm1 - - pcmpeqb %xmm2, %xmm0 - lea 16(%edx), %edx - pmovmskb %xmm0, %eax - lea 16(%ecx), %ecx - movaps %xmm2, %xmm3 -# ifdef USE_AS_STRNCPY - sub $16, %ebx - jbe L(StrncpyExit5Case2OrCase3) -# endif - test %eax, %eax - jnz L(Shl5LoopExit) - - palignr $5, %xmm1, %xmm2 - movaps %xmm2, (%edx) - movaps 27(%ecx), %xmm2 - - pcmpeqb %xmm2, %xmm0 - lea 16(%edx), %edx - pmovmskb %xmm0, %eax - lea 16(%ecx), %ecx -# ifdef USE_AS_STRNCPY - sub $16, %ebx - jbe L(StrncpyExit5Case2OrCase3) -# endif - test %eax, %eax - jnz L(Shl5LoopExit) - - palignr $5, %xmm3, %xmm2 - movaps %xmm2, (%edx) - lea 27(%ecx), %ecx - lea 16(%edx), %edx - - mov %ecx, %eax - and $-0x40, %ecx - sub %ecx, %eax - lea -11(%ecx), %ecx - sub %eax, %edx -# ifdef USE_AS_STRNCPY - add %eax, %ebx -# endif - movaps -5(%ecx), %xmm1 - -L(Shl5LoopStart): - movaps 11(%ecx), %xmm2 - movaps 27(%ecx), %xmm3 - movaps %xmm3, %xmm6 - movaps 43(%ecx), %xmm4 - movaps %xmm4, %xmm7 - movaps 59(%ecx), %xmm5 - pminub %xmm2, %xmm6 - pminub %xmm5, %xmm7 - pminub %xmm6, %xmm7 - pcmpeqb %xmm0, %xmm7 - pmovmskb %xmm7, %eax - movaps %xmm5, %xmm7 - palignr $5, %xmm4, %xmm5 - test %eax, %eax - palignr $5, %xmm3, %xmm4 - jnz L(Shl5Start) -# ifdef USE_AS_STRNCPY - sub $64, %ebx - jbe L(StrncpyLeave5) -# endif - palignr $5, %xmm2, %xmm3 - lea 64(%ecx), %ecx - palignr $5, %xmm1, %xmm2 - movaps %xmm7, %xmm1 - movaps %xmm5, 48(%edx) - movaps %xmm4, 32(%edx) - movaps %xmm3, 16(%edx) - movaps %xmm2, (%edx) - lea 64(%edx), %edx - jmp L(Shl5LoopStart) - -L(Shl5LoopExit): - movlpd (%ecx), %xmm0 - movl 7(%ecx), %esi - movlpd %xmm0, (%edx) - movl %esi, 7(%edx) - mov $11, %esi - jmp L(CopyFrom1To16Bytes) - - .p2align 4 -L(Shl6): - movaps -6(%ecx), %xmm1 - movaps 10(%ecx), %xmm2 -L(Shl6Start): - pcmpeqb %xmm2, %xmm0 - pmovmskb %xmm0, %eax - movaps %xmm2, %xmm3 -# ifdef USE_AS_STRNCPY - sub $16, %ebx - jbe L(StrncpyExit6Case2OrCase3) -# endif - test %eax, %eax - jnz L(Shl6LoopExit) - - palignr $6, %xmm1, %xmm2 - movaps %xmm3, %xmm1 - movaps %xmm2, (%edx) - movaps 26(%ecx), %xmm2 - - pcmpeqb %xmm2, %xmm0 - lea 16(%edx), %edx - pmovmskb %xmm0, %eax - lea 16(%ecx), %ecx - movaps %xmm2, %xmm3 -# ifdef USE_AS_STRNCPY - sub $16, %ebx - jbe L(StrncpyExit6Case2OrCase3) -# endif - test %eax, %eax - jnz L(Shl6LoopExit) - - palignr $6, %xmm1, %xmm2 - movaps %xmm2, (%edx) - movaps 26(%ecx), %xmm2 - movaps %xmm3, %xmm1 - - pcmpeqb %xmm2, %xmm0 - lea 16(%edx), %edx - pmovmskb %xmm0, %eax - lea 16(%ecx), %ecx - movaps %xmm2, %xmm3 -# ifdef USE_AS_STRNCPY - sub $16, %ebx - jbe L(StrncpyExit6Case2OrCase3) -# endif - test %eax, %eax - jnz L(Shl6LoopExit) - - palignr $6, %xmm1, %xmm2 - movaps %xmm2, (%edx) - movaps 26(%ecx), %xmm2 - - pcmpeqb %xmm2, %xmm0 - lea 16(%edx), %edx - pmovmskb %xmm0, %eax - lea 16(%ecx), %ecx -# ifdef USE_AS_STRNCPY - sub $16, %ebx - jbe L(StrncpyExit6Case2OrCase3) -# endif - test %eax, %eax - jnz L(Shl6LoopExit) - - palignr $6, %xmm3, %xmm2 - movaps %xmm2, (%edx) - lea 26(%ecx), %ecx - lea 16(%edx), %edx - - mov %ecx, %eax - and $-0x40, %ecx - sub %ecx, %eax - lea -10(%ecx), %ecx - sub %eax, %edx -# ifdef USE_AS_STRNCPY - add %eax, %ebx -# endif - movaps -6(%ecx), %xmm1 - -L(Shl6LoopStart): - movaps 10(%ecx), %xmm2 - movaps 26(%ecx), %xmm3 - movaps %xmm3, %xmm6 - movaps 42(%ecx), %xmm4 - movaps %xmm4, %xmm7 - movaps 58(%ecx), %xmm5 - pminub %xmm2, %xmm6 - pminub %xmm5, %xmm7 - pminub %xmm6, %xmm7 - pcmpeqb %xmm0, %xmm7 - pmovmskb %xmm7, %eax - movaps %xmm5, %xmm7 - palignr $6, %xmm4, %xmm5 - test %eax, %eax - palignr $6, %xmm3, %xmm4 - jnz L(Shl6Start) -# ifdef USE_AS_STRNCPY - sub $64, %ebx - jbe L(StrncpyLeave6) -# endif - palignr $6, %xmm2, %xmm3 - lea 64(%ecx), %ecx - palignr $6, %xmm1, %xmm2 - movaps %xmm7, %xmm1 - movaps %xmm5, 48(%edx) - movaps %xmm4, 32(%edx) - movaps %xmm3, 16(%edx) - movaps %xmm2, (%edx) - lea 64(%edx), %edx - jmp L(Shl6LoopStart) - -L(Shl6LoopExit): - movlpd (%ecx), %xmm0 - movl 6(%ecx), %esi - movlpd %xmm0, (%edx) - movl %esi, 6(%edx) - mov $10, %esi - jmp L(CopyFrom1To16Bytes) - - .p2align 4 -L(Shl7): - movaps -7(%ecx), %xmm1 - movaps 9(%ecx), %xmm2 -L(Shl7Start): - pcmpeqb %xmm2, %xmm0 - pmovmskb %xmm0, %eax - movaps %xmm2, %xmm3 -# ifdef USE_AS_STRNCPY - sub $16, %ebx - jbe L(StrncpyExit7Case2OrCase3) -# endif - test %eax, %eax - jnz L(Shl7LoopExit) - - palignr $7, %xmm1, %xmm2 - movaps %xmm3, %xmm1 - movaps %xmm2, (%edx) - movaps 25(%ecx), %xmm2 - - pcmpeqb %xmm2, %xmm0 - lea 16(%edx), %edx - pmovmskb %xmm0, %eax - lea 16(%ecx), %ecx - movaps %xmm2, %xmm3 -# ifdef USE_AS_STRNCPY - sub $16, %ebx - jbe L(StrncpyExit7Case2OrCase3) -# endif - test %eax, %eax - jnz L(Shl7LoopExit) - - palignr $7, %xmm1, %xmm2 - movaps %xmm2, (%edx) - movaps 25(%ecx), %xmm2 - movaps %xmm3, %xmm1 - - pcmpeqb %xmm2, %xmm0 - lea 16(%edx), %edx - pmovmskb %xmm0, %eax - lea 16(%ecx), %ecx - movaps %xmm2, %xmm3 -# ifdef USE_AS_STRNCPY - sub $16, %ebx - jbe L(StrncpyExit7Case2OrCase3) -# endif - test %eax, %eax - jnz L(Shl7LoopExit) - - palignr $7, %xmm1, %xmm2 - movaps %xmm2, (%edx) - movaps 25(%ecx), %xmm2 - - pcmpeqb %xmm2, %xmm0 - lea 16(%edx), %edx - pmovmskb %xmm0, %eax - lea 16(%ecx), %ecx -# ifdef USE_AS_STRNCPY - sub $16, %ebx - jbe L(StrncpyExit7Case2OrCase3) -# endif - test %eax, %eax - jnz L(Shl7LoopExit) - - palignr $7, %xmm3, %xmm2 - movaps %xmm2, (%edx) - lea 25(%ecx), %ecx - lea 16(%edx), %edx - - mov %ecx, %eax - and $-0x40, %ecx - sub %ecx, %eax - lea -9(%ecx), %ecx - sub %eax, %edx -# ifdef USE_AS_STRNCPY - add %eax, %ebx -# endif - movaps -7(%ecx), %xmm1 - -L(Shl7LoopStart): - movaps 9(%ecx), %xmm2 - movaps 25(%ecx), %xmm3 - movaps %xmm3, %xmm6 - movaps 41(%ecx), %xmm4 - movaps %xmm4, %xmm7 - movaps 57(%ecx), %xmm5 - pminub %xmm2, %xmm6 - pminub %xmm5, %xmm7 - pminub %xmm6, %xmm7 - pcmpeqb %xmm0, %xmm7 - pmovmskb %xmm7, %eax - movaps %xmm5, %xmm7 - palignr $7, %xmm4, %xmm5 - test %eax, %eax - palignr $7, %xmm3, %xmm4 - jnz L(Shl7Start) -# ifdef USE_AS_STRNCPY - sub $64, %ebx - jbe L(StrncpyLeave7) -# endif - palignr $7, %xmm2, %xmm3 - lea 64(%ecx), %ecx - palignr $7, %xmm1, %xmm2 - movaps %xmm7, %xmm1 - movaps %xmm5, 48(%edx) - movaps %xmm4, 32(%edx) - movaps %xmm3, 16(%edx) - movaps %xmm2, (%edx) - lea 64(%edx), %edx - jmp L(Shl7LoopStart) - -L(Shl7LoopExit): - movlpd (%ecx), %xmm0 - movl 5(%ecx), %esi - movlpd %xmm0, (%edx) - movl %esi, 5(%edx) - mov $9, %esi - jmp L(CopyFrom1To16Bytes) - - .p2align 4 -L(Shl8): - movaps -8(%ecx), %xmm1 - movaps 8(%ecx), %xmm2 -L(Shl8Start): - pcmpeqb %xmm2, %xmm0 - pmovmskb %xmm0, %eax - movaps %xmm2, %xmm3 -# ifdef USE_AS_STRNCPY - sub $16, %ebx - jbe L(StrncpyExit8Case2OrCase3) -# endif - test %eax, %eax - jnz L(Shl8LoopExit) - - palignr $8, %xmm1, %xmm2 - movaps %xmm3, %xmm1 - movaps %xmm2, (%edx) - movaps 24(%ecx), %xmm2 - - pcmpeqb %xmm2, %xmm0 - lea 16(%edx), %edx - pmovmskb %xmm0, %eax - lea 16(%ecx), %ecx - movaps %xmm2, %xmm3 -# ifdef USE_AS_STRNCPY - sub $16, %ebx - jbe L(StrncpyExit8Case2OrCase3) -# endif - test %eax, %eax - jnz L(Shl8LoopExit) - - palignr $8, %xmm1, %xmm2 - movaps %xmm2, (%edx) - movaps 24(%ecx), %xmm2 - movaps %xmm3, %xmm1 - - pcmpeqb %xmm2, %xmm0 - lea 16(%edx), %edx - pmovmskb %xmm0, %eax - lea 16(%ecx), %ecx - movaps %xmm2, %xmm3 -# ifdef USE_AS_STRNCPY - sub $16, %ebx - jbe L(StrncpyExit8Case2OrCase3) -# endif - test %eax, %eax - jnz L(Shl8LoopExit) - - palignr $8, %xmm1, %xmm2 - movaps %xmm2, (%edx) - movaps 24(%ecx), %xmm2 - - pcmpeqb %xmm2, %xmm0 - lea 16(%edx), %edx - pmovmskb %xmm0, %eax - lea 16(%ecx), %ecx -# ifdef USE_AS_STRNCPY - sub $16, %ebx - jbe L(StrncpyExit8Case2OrCase3) -# endif - test %eax, %eax - jnz L(Shl8LoopExit) - - palignr $8, %xmm3, %xmm2 - movaps %xmm2, (%edx) - lea 24(%ecx), %ecx - lea 16(%edx), %edx - - mov %ecx, %eax - and $-0x40, %ecx - sub %ecx, %eax - lea -8(%ecx), %ecx - sub %eax, %edx -# ifdef USE_AS_STRNCPY - add %eax, %ebx -# endif - movaps -8(%ecx), %xmm1 - -L(Shl8LoopStart): - movaps 8(%ecx), %xmm2 - movaps 24(%ecx), %xmm3 - movaps %xmm3, %xmm6 - movaps 40(%ecx), %xmm4 - movaps %xmm4, %xmm7 - movaps 56(%ecx), %xmm5 - pminub %xmm2, %xmm6 - pminub %xmm5, %xmm7 - pminub %xmm6, %xmm7 - pcmpeqb %xmm0, %xmm7 - pmovmskb %xmm7, %eax - movaps %xmm5, %xmm7 - palignr $8, %xmm4, %xmm5 - test %eax, %eax - palignr $8, %xmm3, %xmm4 - jnz L(Shl8Start) -# ifdef USE_AS_STRNCPY - sub $64, %ebx - jbe L(StrncpyLeave8) -# endif - palignr $8, %xmm2, %xmm3 - lea 64(%ecx), %ecx - palignr $8, %xmm1, %xmm2 - movaps %xmm7, %xmm1 - movaps %xmm5, 48(%edx) - movaps %xmm4, 32(%edx) - movaps %xmm3, 16(%edx) - movaps %xmm2, (%edx) - lea 64(%edx), %edx - jmp L(Shl8LoopStart) - -L(Shl8LoopExit): - movlpd (%ecx), %xmm0 - movlpd %xmm0, (%edx) - mov $8, %esi - jmp L(CopyFrom1To16Bytes) - - .p2align 4 -L(Shl9): - movaps -9(%ecx), %xmm1 - movaps 7(%ecx), %xmm2 -L(Shl9Start): - pcmpeqb %xmm2, %xmm0 - pmovmskb %xmm0, %eax - movaps %xmm2, %xmm3 -# ifdef USE_AS_STRNCPY - sub $16, %ebx - jbe L(StrncpyExit9Case2OrCase3) -# endif - test %eax, %eax - jnz L(Shl9LoopExit) - - palignr $9, %xmm1, %xmm2 - movaps %xmm3, %xmm1 - movaps %xmm2, (%edx) - movaps 23(%ecx), %xmm2 - - pcmpeqb %xmm2, %xmm0 - lea 16(%edx), %edx - pmovmskb %xmm0, %eax - lea 16(%ecx), %ecx - movaps %xmm2, %xmm3 -# ifdef USE_AS_STRNCPY - sub $16, %ebx - jbe L(StrncpyExit9Case2OrCase3) -# endif - test %eax, %eax - jnz L(Shl9LoopExit) - - palignr $9, %xmm1, %xmm2 - movaps %xmm2, (%edx) - movaps 23(%ecx), %xmm2 - movaps %xmm3, %xmm1 - - pcmpeqb %xmm2, %xmm0 - lea 16(%edx), %edx - pmovmskb %xmm0, %eax - lea 16(%ecx), %ecx - movaps %xmm2, %xmm3 -# ifdef USE_AS_STRNCPY - sub $16, %ebx - jbe L(StrncpyExit9Case2OrCase3) -# endif - test %eax, %eax - jnz L(Shl9LoopExit) - - palignr $9, %xmm1, %xmm2 - movaps %xmm2, (%edx) - movaps 23(%ecx), %xmm2 - - pcmpeqb %xmm2, %xmm0 - lea 16(%edx), %edx - pmovmskb %xmm0, %eax - lea 16(%ecx), %ecx -# ifdef USE_AS_STRNCPY - sub $16, %ebx - jbe L(StrncpyExit9Case2OrCase3) -# endif - test %eax, %eax - jnz L(Shl9LoopExit) - - palignr $9, %xmm3, %xmm2 - movaps %xmm2, (%edx) - lea 23(%ecx), %ecx - lea 16(%edx), %edx - - mov %ecx, %eax - and $-0x40, %ecx - sub %ecx, %eax - lea -7(%ecx), %ecx - sub %eax, %edx -# ifdef USE_AS_STRNCPY - add %eax, %ebx -# endif - movaps -9(%ecx), %xmm1 - -L(Shl9LoopStart): - movaps 7(%ecx), %xmm2 - movaps 23(%ecx), %xmm3 - movaps %xmm3, %xmm6 - movaps 39(%ecx), %xmm4 - movaps %xmm4, %xmm7 - movaps 55(%ecx), %xmm5 - pminub %xmm2, %xmm6 - pminub %xmm5, %xmm7 - pminub %xmm6, %xmm7 - pcmpeqb %xmm0, %xmm7 - pmovmskb %xmm7, %eax - movaps %xmm5, %xmm7 - palignr $9, %xmm4, %xmm5 - test %eax, %eax - palignr $9, %xmm3, %xmm4 - jnz L(Shl9Start) -# ifdef USE_AS_STRNCPY - sub $64, %ebx - jbe L(StrncpyLeave9) -# endif - palignr $9, %xmm2, %xmm3 - lea 64(%ecx), %ecx - palignr $9, %xmm1, %xmm2 - movaps %xmm7, %xmm1 - movaps %xmm5, 48(%edx) - movaps %xmm4, 32(%edx) - movaps %xmm3, 16(%edx) - movaps %xmm2, (%edx) - lea 64(%edx), %edx - jmp L(Shl9LoopStart) - -L(Shl9LoopExit): - movlpd -1(%ecx), %xmm0 - movlpd %xmm0, -1(%edx) - mov $7, %esi - jmp L(CopyFrom1To16Bytes) - - .p2align 4 -L(Shl10): - movaps -10(%ecx), %xmm1 - movaps 6(%ecx), %xmm2 -L(Shl10Start): - pcmpeqb %xmm2, %xmm0 - pmovmskb %xmm0, %eax - movaps %xmm2, %xmm3 -# ifdef USE_AS_STRNCPY - sub $16, %ebx - jbe L(StrncpyExit10Case2OrCase3) -# endif - test %eax, %eax - jnz L(Shl10LoopExit) - - palignr $10, %xmm1, %xmm2 - movaps %xmm3, %xmm1 - movaps %xmm2, (%edx) - movaps 22(%ecx), %xmm2 - - pcmpeqb %xmm2, %xmm0 - lea 16(%edx), %edx - pmovmskb %xmm0, %eax - lea 16(%ecx), %ecx - movaps %xmm2, %xmm3 -# ifdef USE_AS_STRNCPY - sub $16, %ebx - jbe L(StrncpyExit10Case2OrCase3) -# endif - test %eax, %eax - jnz L(Shl10LoopExit) - - palignr $10, %xmm1, %xmm2 - movaps %xmm2, (%edx) - movaps 22(%ecx), %xmm2 - movaps %xmm3, %xmm1 - - pcmpeqb %xmm2, %xmm0 - lea 16(%edx), %edx - pmovmskb %xmm0, %eax - lea 16(%ecx), %ecx - movaps %xmm2, %xmm3 -# ifdef USE_AS_STRNCPY - sub $16, %ebx - jbe L(StrncpyExit10Case2OrCase3) -# endif - test %eax, %eax - jnz L(Shl10LoopExit) - - palignr $10, %xmm1, %xmm2 - movaps %xmm2, (%edx) - movaps 22(%ecx), %xmm2 - - pcmpeqb %xmm2, %xmm0 - lea 16(%edx), %edx - pmovmskb %xmm0, %eax - lea 16(%ecx), %ecx -# ifdef USE_AS_STRNCPY - sub $16, %ebx - jbe L(StrncpyExit10Case2OrCase3) -# endif - test %eax, %eax - jnz L(Shl10LoopExit) - - palignr $10, %xmm3, %xmm2 - movaps %xmm2, (%edx) - lea 22(%ecx), %ecx - lea 16(%edx), %edx - - mov %ecx, %eax - and $-0x40, %ecx - sub %ecx, %eax - lea -6(%ecx), %ecx - sub %eax, %edx -# ifdef USE_AS_STRNCPY - add %eax, %ebx -# endif - movaps -10(%ecx), %xmm1 - -L(Shl10LoopStart): - movaps 6(%ecx), %xmm2 - movaps 22(%ecx), %xmm3 - movaps %xmm3, %xmm6 - movaps 38(%ecx), %xmm4 - movaps %xmm4, %xmm7 - movaps 54(%ecx), %xmm5 - pminub %xmm2, %xmm6 - pminub %xmm5, %xmm7 - pminub %xmm6, %xmm7 - pcmpeqb %xmm0, %xmm7 - pmovmskb %xmm7, %eax - movaps %xmm5, %xmm7 - palignr $10, %xmm4, %xmm5 - test %eax, %eax - palignr $10, %xmm3, %xmm4 - jnz L(Shl10Start) -# ifdef USE_AS_STRNCPY - sub $64, %ebx - jbe L(StrncpyLeave10) -# endif - palignr $10, %xmm2, %xmm3 - lea 64(%ecx), %ecx - palignr $10, %xmm1, %xmm2 - movaps %xmm7, %xmm1 - movaps %xmm5, 48(%edx) - movaps %xmm4, 32(%edx) - movaps %xmm3, 16(%edx) - movaps %xmm2, (%edx) - lea 64(%edx), %edx - jmp L(Shl10LoopStart) - -L(Shl10LoopExit): - movlpd -2(%ecx), %xmm0 - movlpd %xmm0, -2(%edx) - mov $6, %esi - jmp L(CopyFrom1To16Bytes) - - .p2align 4 -L(Shl11): - movaps -11(%ecx), %xmm1 - movaps 5(%ecx), %xmm2 -L(Shl11Start): - pcmpeqb %xmm2, %xmm0 - pmovmskb %xmm0, %eax - movaps %xmm2, %xmm3 -# ifdef USE_AS_STRNCPY - sub $16, %ebx - jbe L(StrncpyExit11Case2OrCase3) -# endif - test %eax, %eax - jnz L(Shl11LoopExit) - - palignr $11, %xmm1, %xmm2 - movaps %xmm3, %xmm1 - movaps %xmm2, (%edx) - movaps 21(%ecx), %xmm2 - - pcmpeqb %xmm2, %xmm0 - lea 16(%edx), %edx - pmovmskb %xmm0, %eax - lea 16(%ecx), %ecx - movaps %xmm2, %xmm3 -# ifdef USE_AS_STRNCPY - sub $16, %ebx - jbe L(StrncpyExit11Case2OrCase3) -# endif - test %eax, %eax - jnz L(Shl11LoopExit) - - palignr $11, %xmm1, %xmm2 - movaps %xmm2, (%edx) - movaps 21(%ecx), %xmm2 - movaps %xmm3, %xmm1 - - pcmpeqb %xmm2, %xmm0 - lea 16(%edx), %edx - pmovmskb %xmm0, %eax - lea 16(%ecx), %ecx - movaps %xmm2, %xmm3 -# ifdef USE_AS_STRNCPY - sub $16, %ebx - jbe L(StrncpyExit11Case2OrCase3) -# endif - test %eax, %eax - jnz L(Shl11LoopExit) - - palignr $11, %xmm1, %xmm2 - movaps %xmm2, (%edx) - movaps 21(%ecx), %xmm2 - - pcmpeqb %xmm2, %xmm0 - lea 16(%edx), %edx - pmovmskb %xmm0, %eax - lea 16(%ecx), %ecx -# ifdef USE_AS_STRNCPY - sub $16, %ebx - jbe L(StrncpyExit11Case2OrCase3) -# endif - test %eax, %eax - jnz L(Shl11LoopExit) - - palignr $11, %xmm3, %xmm2 - movaps %xmm2, (%edx) - lea 21(%ecx), %ecx - lea 16(%edx), %edx - - mov %ecx, %eax - and $-0x40, %ecx - sub %ecx, %eax - lea -5(%ecx), %ecx - sub %eax, %edx -# ifdef USE_AS_STRNCPY - add %eax, %ebx -# endif - movaps -11(%ecx), %xmm1 - -L(Shl11LoopStart): - movaps 5(%ecx), %xmm2 - movaps 21(%ecx), %xmm3 - movaps %xmm3, %xmm6 - movaps 37(%ecx), %xmm4 - movaps %xmm4, %xmm7 - movaps 53(%ecx), %xmm5 - pminub %xmm2, %xmm6 - pminub %xmm5, %xmm7 - pminub %xmm6, %xmm7 - pcmpeqb %xmm0, %xmm7 - pmovmskb %xmm7, %eax - movaps %xmm5, %xmm7 - palignr $11, %xmm4, %xmm5 - test %eax, %eax - palignr $11, %xmm3, %xmm4 - jnz L(Shl11Start) -# ifdef USE_AS_STRNCPY - sub $64, %ebx - jbe L(StrncpyLeave11) -# endif - palignr $11, %xmm2, %xmm3 - lea 64(%ecx), %ecx - palignr $11, %xmm1, %xmm2 - movaps %xmm7, %xmm1 - movaps %xmm5, 48(%edx) - movaps %xmm4, 32(%edx) - movaps %xmm3, 16(%edx) - movaps %xmm2, (%edx) - lea 64(%edx), %edx - jmp L(Shl11LoopStart) - -L(Shl11LoopExit): - movlpd -3(%ecx), %xmm0 - movlpd %xmm0, -3(%edx) - mov $5, %esi - jmp L(CopyFrom1To16Bytes) - - .p2align 4 -L(Shl12): - movaps -12(%ecx), %xmm1 - movaps 4(%ecx), %xmm2 -L(Shl12Start): - pcmpeqb %xmm2, %xmm0 - pmovmskb %xmm0, %eax - movaps %xmm2, %xmm3 -# ifdef USE_AS_STRNCPY - sub $16, %ebx - jbe L(StrncpyExit12Case2OrCase3) -# endif - test %eax, %eax - jnz L(Shl12LoopExit) - - palignr $12, %xmm1, %xmm2 - movaps %xmm3, %xmm1 - movaps %xmm2, (%edx) - movaps 20(%ecx), %xmm2 - - pcmpeqb %xmm2, %xmm0 - lea 16(%edx), %edx - pmovmskb %xmm0, %eax - lea 16(%ecx), %ecx - movaps %xmm2, %xmm3 -# ifdef USE_AS_STRNCPY - sub $16, %ebx - jbe L(StrncpyExit12Case2OrCase3) -# endif - test %eax, %eax - jnz L(Shl12LoopExit) - - palignr $12, %xmm1, %xmm2 - movaps %xmm2, (%edx) - movaps 20(%ecx), %xmm2 - movaps %xmm3, %xmm1 - - pcmpeqb %xmm2, %xmm0 - lea 16(%edx), %edx - pmovmskb %xmm0, %eax - lea 16(%ecx), %ecx - movaps %xmm2, %xmm3 -# ifdef USE_AS_STRNCPY - sub $16, %ebx - jbe L(StrncpyExit12Case2OrCase3) -# endif - test %eax, %eax - jnz L(Shl12LoopExit) - - palignr $12, %xmm1, %xmm2 - movaps %xmm2, (%edx) - movaps 20(%ecx), %xmm2 - - pcmpeqb %xmm2, %xmm0 - lea 16(%edx), %edx - pmovmskb %xmm0, %eax - lea 16(%ecx), %ecx -# ifdef USE_AS_STRNCPY - sub $16, %ebx - jbe L(StrncpyExit12Case2OrCase3) -# endif - test %eax, %eax - jnz L(Shl12LoopExit) - - palignr $12, %xmm3, %xmm2 - movaps %xmm2, (%edx) - lea 20(%ecx), %ecx - lea 16(%edx), %edx - - mov %ecx, %eax - and $-0x40, %ecx - sub %ecx, %eax - lea -4(%ecx), %ecx - sub %eax, %edx -# ifdef USE_AS_STRNCPY - add %eax, %ebx -# endif - movaps -12(%ecx), %xmm1 - -L(Shl12LoopStart): - movaps 4(%ecx), %xmm2 - movaps 20(%ecx), %xmm3 - movaps %xmm3, %xmm6 - movaps 36(%ecx), %xmm4 - movaps %xmm4, %xmm7 - movaps 52(%ecx), %xmm5 - pminub %xmm2, %xmm6 - pminub %xmm5, %xmm7 - pminub %xmm6, %xmm7 - pcmpeqb %xmm0, %xmm7 - pmovmskb %xmm7, %eax - movaps %xmm5, %xmm7 - palignr $12, %xmm4, %xmm5 - test %eax, %eax - palignr $12, %xmm3, %xmm4 - jnz L(Shl12Start) -# ifdef USE_AS_STRNCPY - sub $64, %ebx - jbe L(StrncpyLeave12) -# endif - palignr $12, %xmm2, %xmm3 - lea 64(%ecx), %ecx - palignr $12, %xmm1, %xmm2 - movaps %xmm7, %xmm1 - movaps %xmm5, 48(%edx) - movaps %xmm4, 32(%edx) - movaps %xmm3, 16(%edx) - movaps %xmm2, (%edx) - lea 64(%edx), %edx - jmp L(Shl12LoopStart) - -L(Shl12LoopExit): - movl (%ecx), %esi - movl %esi, (%edx) - mov $4, %esi - jmp L(CopyFrom1To16Bytes) - - .p2align 4 -L(Shl13): - movaps -13(%ecx), %xmm1 - movaps 3(%ecx), %xmm2 -L(Shl13Start): - pcmpeqb %xmm2, %xmm0 - pmovmskb %xmm0, %eax - movaps %xmm2, %xmm3 -# ifdef USE_AS_STRNCPY - sub $16, %ebx - jbe L(StrncpyExit13Case2OrCase3) -# endif - test %eax, %eax - jnz L(Shl13LoopExit) - - palignr $13, %xmm1, %xmm2 - movaps %xmm3, %xmm1 - movaps %xmm2, (%edx) - movaps 19(%ecx), %xmm2 - - pcmpeqb %xmm2, %xmm0 - lea 16(%edx), %edx - pmovmskb %xmm0, %eax - lea 16(%ecx), %ecx - movaps %xmm2, %xmm3 -# ifdef USE_AS_STRNCPY - sub $16, %ebx - jbe L(StrncpyExit13Case2OrCase3) -# endif - test %eax, %eax - jnz L(Shl13LoopExit) - - palignr $13, %xmm1, %xmm2 - movaps %xmm2, (%edx) - movaps 19(%ecx), %xmm2 - movaps %xmm3, %xmm1 - - pcmpeqb %xmm2, %xmm0 - lea 16(%edx), %edx - pmovmskb %xmm0, %eax - lea 16(%ecx), %ecx - movaps %xmm2, %xmm3 -# ifdef USE_AS_STRNCPY - sub $16, %ebx - jbe L(StrncpyExit13Case2OrCase3) -# endif - test %eax, %eax - jnz L(Shl13LoopExit) - - palignr $13, %xmm1, %xmm2 - movaps %xmm2, (%edx) - movaps 19(%ecx), %xmm2 - - pcmpeqb %xmm2, %xmm0 - lea 16(%edx), %edx - pmovmskb %xmm0, %eax - lea 16(%ecx), %ecx -# ifdef USE_AS_STRNCPY - sub $16, %ebx - jbe L(StrncpyExit13Case2OrCase3) -# endif - test %eax, %eax - jnz L(Shl13LoopExit) - - palignr $13, %xmm3, %xmm2 - movaps %xmm2, (%edx) - lea 19(%ecx), %ecx - lea 16(%edx), %edx - - mov %ecx, %eax - and $-0x40, %ecx - sub %ecx, %eax - lea -3(%ecx), %ecx - sub %eax, %edx -# ifdef USE_AS_STRNCPY - add %eax, %ebx -# endif - movaps -13(%ecx), %xmm1 - -L(Shl13LoopStart): - movaps 3(%ecx), %xmm2 - movaps 19(%ecx), %xmm3 - movaps %xmm3, %xmm6 - movaps 35(%ecx), %xmm4 - movaps %xmm4, %xmm7 - movaps 51(%ecx), %xmm5 - pminub %xmm2, %xmm6 - pminub %xmm5, %xmm7 - pminub %xmm6, %xmm7 - pcmpeqb %xmm0, %xmm7 - pmovmskb %xmm7, %eax - movaps %xmm5, %xmm7 - palignr $13, %xmm4, %xmm5 - test %eax, %eax - palignr $13, %xmm3, %xmm4 - jnz L(Shl13Start) -# ifdef USE_AS_STRNCPY - sub $64, %ebx - jbe L(StrncpyLeave13) -# endif - palignr $13, %xmm2, %xmm3 - lea 64(%ecx), %ecx - palignr $13, %xmm1, %xmm2 - movaps %xmm7, %xmm1 - movaps %xmm5, 48(%edx) - movaps %xmm4, 32(%edx) - movaps %xmm3, 16(%edx) - movaps %xmm2, (%edx) - lea 64(%edx), %edx - jmp L(Shl13LoopStart) - -L(Shl13LoopExit): - movl -1(%ecx), %esi - movl %esi, -1(%edx) - mov $3, %esi - jmp L(CopyFrom1To16Bytes) - - .p2align 4 -L(Shl14): - movaps -14(%ecx), %xmm1 - movaps 2(%ecx), %xmm2 -L(Shl14Start): - pcmpeqb %xmm2, %xmm0 - pmovmskb %xmm0, %eax - movaps %xmm2, %xmm3 -# ifdef USE_AS_STRNCPY - sub $16, %ebx - jbe L(StrncpyExit14Case2OrCase3) -# endif - test %eax, %eax - jnz L(Shl14LoopExit) - - palignr $14, %xmm1, %xmm2 - movaps %xmm3, %xmm1 - movaps %xmm2, (%edx) - movaps 18(%ecx), %xmm2 - - pcmpeqb %xmm2, %xmm0 - lea 16(%edx), %edx - pmovmskb %xmm0, %eax - lea 16(%ecx), %ecx - movaps %xmm2, %xmm3 -# ifdef USE_AS_STRNCPY - sub $16, %ebx - jbe L(StrncpyExit14Case2OrCase3) -# endif - test %eax, %eax - jnz L(Shl14LoopExit) - - palignr $14, %xmm1, %xmm2 - movaps %xmm2, (%edx) - movaps 18(%ecx), %xmm2 - movaps %xmm3, %xmm1 - - pcmpeqb %xmm2, %xmm0 - lea 16(%edx), %edx - pmovmskb %xmm0, %eax - lea 16(%ecx), %ecx - movaps %xmm2, %xmm3 -# ifdef USE_AS_STRNCPY - sub $16, %ebx - jbe L(StrncpyExit14Case2OrCase3) -# endif - test %eax, %eax - jnz L(Shl14LoopExit) - - palignr $14, %xmm1, %xmm2 - movaps %xmm2, (%edx) - movaps 18(%ecx), %xmm2 - - pcmpeqb %xmm2, %xmm0 - lea 16(%edx), %edx - pmovmskb %xmm0, %eax - lea 16(%ecx), %ecx -# ifdef USE_AS_STRNCPY - sub $16, %ebx - jbe L(StrncpyExit14Case2OrCase3) -# endif - test %eax, %eax - jnz L(Shl14LoopExit) - - palignr $14, %xmm3, %xmm2 - movaps %xmm2, (%edx) - lea 18(%ecx), %ecx - lea 16(%edx), %edx - - mov %ecx, %eax - and $-0x40, %ecx - sub %ecx, %eax - lea -2(%ecx), %ecx - sub %eax, %edx -# ifdef USE_AS_STRNCPY - add %eax, %ebx -# endif - movaps -14(%ecx), %xmm1 - -L(Shl14LoopStart): - movaps 2(%ecx), %xmm2 - movaps 18(%ecx), %xmm3 - movaps %xmm3, %xmm6 - movaps 34(%ecx), %xmm4 - movaps %xmm4, %xmm7 - movaps 50(%ecx), %xmm5 - pminub %xmm2, %xmm6 - pminub %xmm5, %xmm7 - pminub %xmm6, %xmm7 - pcmpeqb %xmm0, %xmm7 - pmovmskb %xmm7, %eax - movaps %xmm5, %xmm7 - palignr $14, %xmm4, %xmm5 - test %eax, %eax - palignr $14, %xmm3, %xmm4 - jnz L(Shl14Start) -# ifdef USE_AS_STRNCPY - sub $64, %ebx - jbe L(StrncpyLeave14) -# endif - palignr $14, %xmm2, %xmm3 - lea 64(%ecx), %ecx - palignr $14, %xmm1, %xmm2 - movaps %xmm7, %xmm1 - movaps %xmm5, 48(%edx) - movaps %xmm4, 32(%edx) - movaps %xmm3, 16(%edx) - movaps %xmm2, (%edx) - lea 64(%edx), %edx - jmp L(Shl14LoopStart) - -L(Shl14LoopExit): - movl -2(%ecx), %esi - movl %esi, -2(%edx) - mov $2, %esi - jmp L(CopyFrom1To16Bytes) - - .p2align 4 -L(Shl15): - movaps -15(%ecx), %xmm1 - movaps 1(%ecx), %xmm2 -L(Shl15Start): - pcmpeqb %xmm2, %xmm0 - pmovmskb %xmm0, %eax - movaps %xmm2, %xmm3 -# ifdef USE_AS_STRNCPY - sub $16, %ebx - jbe L(StrncpyExit15Case2OrCase3) -# endif - test %eax, %eax - jnz L(Shl15LoopExit) - - palignr $15, %xmm1, %xmm2 - movaps %xmm3, %xmm1 - movaps %xmm2, (%edx) - movaps 17(%ecx), %xmm2 - - pcmpeqb %xmm2, %xmm0 - lea 16(%edx), %edx - pmovmskb %xmm0, %eax - lea 16(%ecx), %ecx - movaps %xmm2, %xmm3 -# ifdef USE_AS_STRNCPY - sub $16, %ebx - jbe L(StrncpyExit15Case2OrCase3) -# endif - test %eax, %eax - jnz L(Shl15LoopExit) - - palignr $15, %xmm1, %xmm2 - movaps %xmm2, (%edx) - movaps 17(%ecx), %xmm2 - movaps %xmm3, %xmm1 - - pcmpeqb %xmm2, %xmm0 - lea 16(%edx), %edx - pmovmskb %xmm0, %eax - lea 16(%ecx), %ecx - movaps %xmm2, %xmm3 -# ifdef USE_AS_STRNCPY - sub $16, %ebx - jbe L(StrncpyExit15Case2OrCase3) -# endif - test %eax, %eax - jnz L(Shl15LoopExit) - - palignr $15, %xmm1, %xmm2 - movaps %xmm2, (%edx) - movaps 17(%ecx), %xmm2 - - pcmpeqb %xmm2, %xmm0 - lea 16(%edx), %edx - pmovmskb %xmm0, %eax - lea 16(%ecx), %ecx -# ifdef USE_AS_STRNCPY - sub $16, %ebx - jbe L(StrncpyExit15Case2OrCase3) -# endif - test %eax, %eax - jnz L(Shl15LoopExit) - - palignr $15, %xmm3, %xmm2 - movaps %xmm2, (%edx) - lea 17(%ecx), %ecx - lea 16(%edx), %edx - - mov %ecx, %eax - and $-0x40, %ecx - sub %ecx, %eax - lea -1(%ecx), %ecx - sub %eax, %edx -# ifdef USE_AS_STRNCPY - add %eax, %ebx -# endif - movaps -15(%ecx), %xmm1 - -L(Shl15LoopStart): - movaps 1(%ecx), %xmm2 - movaps 17(%ecx), %xmm3 - movaps %xmm3, %xmm6 - movaps 33(%ecx), %xmm4 - movaps %xmm4, %xmm7 - movaps 49(%ecx), %xmm5 - pminub %xmm2, %xmm6 - pminub %xmm5, %xmm7 - pminub %xmm6, %xmm7 - pcmpeqb %xmm0, %xmm7 - pmovmskb %xmm7, %eax - movaps %xmm5, %xmm7 - palignr $15, %xmm4, %xmm5 - test %eax, %eax - palignr $15, %xmm3, %xmm4 - jnz L(Shl15Start) -# ifdef USE_AS_STRNCPY - sub $64, %ebx - jbe L(StrncpyLeave15) -# endif - palignr $15, %xmm2, %xmm3 - lea 64(%ecx), %ecx - palignr $15, %xmm1, %xmm2 - movaps %xmm7, %xmm1 - movaps %xmm5, 48(%edx) - movaps %xmm4, 32(%edx) - movaps %xmm3, 16(%edx) - movaps %xmm2, (%edx) - lea 64(%edx), %edx - jmp L(Shl15LoopStart) - -L(Shl15LoopExit): - movl -3(%ecx), %esi - movl %esi, -3(%edx) - mov $1, %esi -# ifdef USE_AS_STRCAT - jmp L(CopyFrom1To16Bytes) -# endif - - -# ifndef USE_AS_STRCAT - - .p2align 4 -L(CopyFrom1To16Bytes): -# ifdef USE_AS_STRNCPY - add $16, %ebx -# endif - add %esi, %edx - add %esi, %ecx - - POP (%esi) - test %al, %al - jz L(ExitHigh8) - -L(CopyFrom1To16BytesLess8): - mov %al, %ah - and $15, %ah - jz L(ExitHigh4) - - test $0x01, %al - jnz L(Exit1) - test $0x02, %al - jnz L(Exit2) - test $0x04, %al - jnz L(Exit3) - - .p2align 4 -L(Exit4): - movl (%ecx), %eax - movl %eax, (%edx) - SAVE_RESULT (3) -# ifdef USE_AS_STRNCPY - sub $4, %ebx - lea 4(%edx), %ecx - jnz L(StrncpyFillTailWithZero1) -# ifdef USE_AS_STPCPY - cmpb $1, (%eax) - sbb $-1, %eax -# endif -# endif - RETURN1 - - .p2align 4 -L(ExitHigh4): - test $0x10, %al - jnz L(Exit5) - test $0x20, %al - jnz L(Exit6) - test $0x40, %al - jnz L(Exit7) - - .p2align 4 -L(Exit8): - movlpd (%ecx), %xmm0 - movlpd %xmm0, (%edx) - SAVE_RESULT (7) -# ifdef USE_AS_STRNCPY - sub $8, %ebx - lea 8(%edx), %ecx - jnz L(StrncpyFillTailWithZero1) -# ifdef USE_AS_STPCPY - cmpb $1, (%eax) - sbb $-1, %eax -# endif -# endif - RETURN1 - - .p2align 4 -L(ExitHigh8): - mov %ah, %al - and $15, %al - jz L(ExitHigh12) - - test $0x01, %ah - jnz L(Exit9) - test $0x02, %ah - jnz L(Exit10) - test $0x04, %ah - jnz L(Exit11) - - .p2align 4 -L(Exit12): - movlpd (%ecx), %xmm0 - movl 8(%ecx), %eax - movlpd %xmm0, (%edx) - movl %eax, 8(%edx) - SAVE_RESULT (11) -# ifdef USE_AS_STRNCPY - sub $12, %ebx - lea 12(%edx), %ecx - jnz L(StrncpyFillTailWithZero1) -# ifdef USE_AS_STPCPY - cmpb $1, (%eax) - sbb $-1, %eax -# endif -# endif - RETURN1 - - .p2align 4 -L(ExitHigh12): - test $0x10, %ah - jnz L(Exit13) - test $0x20, %ah - jnz L(Exit14) - test $0x40, %ah - jnz L(Exit15) - - .p2align 4 -L(Exit16): - movdqu (%ecx), %xmm0 - movdqu %xmm0, (%edx) - SAVE_RESULT (15) -# ifdef USE_AS_STRNCPY - sub $16, %ebx - lea 16(%edx), %ecx - jnz L(StrncpyFillTailWithZero1) -# ifdef USE_AS_STPCPY - cmpb $1, (%eax) - sbb $-1, %eax -# endif -# endif - RETURN1 - -# ifdef USE_AS_STRNCPY - - CFI_PUSH(%esi) - - .p2align 4 -L(CopyFrom1To16BytesCase2): - add $16, %ebx - add %esi, %ecx - add %esi, %edx - - POP (%esi) - - test %al, %al - jz L(ExitHighCase2) - - cmp $8, %ebx - ja L(CopyFrom1To16BytesLess8) - - test $0x01, %al - jnz L(Exit1) - cmp $1, %ebx - je L(Exit1) - test $0x02, %al - jnz L(Exit2) - cmp $2, %ebx - je L(Exit2) - test $0x04, %al - jnz L(Exit3) - cmp $3, %ebx - je L(Exit3) - test $0x08, %al - jnz L(Exit4) - cmp $4, %ebx - je L(Exit4) - test $0x10, %al - jnz L(Exit5) - cmp $5, %ebx - je L(Exit5) - test $0x20, %al - jnz L(Exit6) - cmp $6, %ebx - je L(Exit6) - test $0x40, %al - jnz L(Exit7) - cmp $7, %ebx - je L(Exit7) - jmp L(Exit8) - - .p2align 4 -L(ExitHighCase2): - cmp $8, %ebx - jbe L(CopyFrom1To16BytesLess8Case3) - - test $0x01, %ah - jnz L(Exit9) - cmp $9, %ebx - je L(Exit9) - test $0x02, %ah - jnz L(Exit10) - cmp $10, %ebx - je L(Exit10) - test $0x04, %ah - jnz L(Exit11) - cmp $11, %ebx - je L(Exit11) - test $0x8, %ah - jnz L(Exit12) - cmp $12, %ebx - je L(Exit12) - test $0x10, %ah - jnz L(Exit13) - cmp $13, %ebx - je L(Exit13) - test $0x20, %ah - jnz L(Exit14) - cmp $14, %ebx - je L(Exit14) - test $0x40, %ah - jnz L(Exit15) - cmp $15, %ebx - je L(Exit15) - jmp L(Exit16) - - CFI_PUSH(%esi) - - .p2align 4 -L(CopyFrom1To16BytesCase2OrCase3): - test %eax, %eax - jnz L(CopyFrom1To16BytesCase2) - - .p2align 4 -L(CopyFrom1To16BytesCase3): - add $16, %ebx - add %esi, %edx - add %esi, %ecx - - POP (%esi) - - cmp $8, %ebx - ja L(ExitHigh8Case3) - -L(CopyFrom1To16BytesLess8Case3): - cmp $4, %ebx - ja L(ExitHigh4Case3) - - cmp $1, %ebx - je L(Exit1) - cmp $2, %ebx - je L(Exit2) - cmp $3, %ebx - je L(Exit3) - movl (%ecx), %eax - movl %eax, (%edx) - SAVE_RESULT (4) - RETURN1 - - .p2align 4 -L(ExitHigh4Case3): - cmp $5, %ebx - je L(Exit5) - cmp $6, %ebx - je L(Exit6) - cmp $7, %ebx - je L(Exit7) - movlpd (%ecx), %xmm0 - movlpd %xmm0, (%edx) - SAVE_RESULT (8) - RETURN1 - - .p2align 4 -L(ExitHigh8Case3): - cmp $12, %ebx - ja L(ExitHigh12Case3) - - cmp $9, %ebx - je L(Exit9) - cmp $10, %ebx - je L(Exit10) - cmp $11, %ebx - je L(Exit11) - movlpd (%ecx), %xmm0 - movl 8(%ecx), %eax - movlpd %xmm0, (%edx) - movl %eax, 8(%edx) - SAVE_RESULT (12) - RETURN1 - - .p2align 4 -L(ExitHigh12Case3): - cmp $13, %ebx - je L(Exit13) - cmp $14, %ebx - je L(Exit14) - cmp $15, %ebx - je L(Exit15) - movlpd (%ecx), %xmm0 - movlpd 8(%ecx), %xmm1 - movlpd %xmm0, (%edx) - movlpd %xmm1, 8(%edx) - SAVE_RESULT (16) - RETURN1 - -# endif - - .p2align 4 -L(Exit1): - movb (%ecx), %al - movb %al, (%edx) - SAVE_RESULT (0) -# ifdef USE_AS_STRNCPY - sub $1, %ebx - lea 1(%edx), %ecx - jnz L(StrncpyFillTailWithZero1) -# ifdef USE_AS_STPCPY - cmpb $1, (%eax) - sbb $-1, %eax -# endif -# endif - RETURN1 - - .p2align 4 -L(Exit2): - movw (%ecx), %ax - movw %ax, (%edx) - SAVE_RESULT (1) -# ifdef USE_AS_STRNCPY - sub $2, %ebx - lea 2(%edx), %ecx - jnz L(StrncpyFillTailWithZero1) -# ifdef USE_AS_STPCPY - cmpb $1, (%eax) - sbb $-1, %eax -# endif -# endif - RETURN1 - - .p2align 4 -L(Exit3): - movw (%ecx), %ax - movw %ax, (%edx) - movb 2(%ecx), %al - movb %al, 2(%edx) - SAVE_RESULT (2) -# ifdef USE_AS_STRNCPY - sub $3, %ebx - lea 3(%edx), %ecx - jnz L(StrncpyFillTailWithZero1) -# ifdef USE_AS_STPCPY - cmpb $1, (%eax) - sbb $-1, %eax -# endif -# endif - RETURN1 - - .p2align 4 -L(Exit5): - movl (%ecx), %eax - movl %eax, (%edx) - movb 4(%ecx), %al - movb %al, 4(%edx) - SAVE_RESULT (4) -# ifdef USE_AS_STRNCPY - sub $5, %ebx - lea 5(%edx), %ecx - jnz L(StrncpyFillTailWithZero1) -# ifdef USE_AS_STPCPY - cmpb $1, (%eax) - sbb $-1, %eax -# endif -# endif - RETURN1 - - .p2align 4 -L(Exit6): - movl (%ecx), %eax - movl %eax, (%edx) - movw 4(%ecx), %ax - movw %ax, 4(%edx) - SAVE_RESULT (5) -# ifdef USE_AS_STRNCPY - sub $6, %ebx - lea 6(%edx), %ecx - jnz L(StrncpyFillTailWithZero1) -# ifdef USE_AS_STPCPY - cmpb $1, (%eax) - sbb $-1, %eax -# endif -# endif - RETURN1 - - .p2align 4 -L(Exit7): - movl (%ecx), %eax - movl %eax, (%edx) - movl 3(%ecx), %eax - movl %eax, 3(%edx) - SAVE_RESULT (6) -# ifdef USE_AS_STRNCPY - sub $7, %ebx - lea 7(%edx), %ecx - jnz L(StrncpyFillTailWithZero1) -# ifdef USE_AS_STPCPY - cmpb $1, (%eax) - sbb $-1, %eax -# endif -# endif - RETURN1 - - .p2align 4 -L(Exit9): - movlpd (%ecx), %xmm0 - movb 8(%ecx), %al - movlpd %xmm0, (%edx) - movb %al, 8(%edx) - SAVE_RESULT (8) -# ifdef USE_AS_STRNCPY - sub $9, %ebx - lea 9(%edx), %ecx - jnz L(StrncpyFillTailWithZero1) -# ifdef USE_AS_STPCPY - cmpb $1, (%eax) - sbb $-1, %eax -# endif -# endif - RETURN1 - - .p2align 4 -L(Exit10): - movlpd (%ecx), %xmm0 - movw 8(%ecx), %ax - movlpd %xmm0, (%edx) - movw %ax, 8(%edx) - SAVE_RESULT (9) -# ifdef USE_AS_STRNCPY - sub $10, %ebx - lea 10(%edx), %ecx - jnz L(StrncpyFillTailWithZero1) -# ifdef USE_AS_STPCPY - cmpb $1, (%eax) - sbb $-1, %eax -# endif -# endif - RETURN1 - - .p2align 4 -L(Exit11): - movlpd (%ecx), %xmm0 - movl 7(%ecx), %eax - movlpd %xmm0, (%edx) - movl %eax, 7(%edx) - SAVE_RESULT (10) -# ifdef USE_AS_STRNCPY - sub $11, %ebx - lea 11(%edx), %ecx - jnz L(StrncpyFillTailWithZero1) -# ifdef USE_AS_STPCPY - cmpb $1, (%eax) - sbb $-1, %eax -# endif -# endif - RETURN1 - - .p2align 4 -L(Exit13): - movlpd (%ecx), %xmm0 - movlpd 5(%ecx), %xmm1 - movlpd %xmm0, (%edx) - movlpd %xmm1, 5(%edx) - SAVE_RESULT (12) -# ifdef USE_AS_STRNCPY - sub $13, %ebx - lea 13(%edx), %ecx - jnz L(StrncpyFillTailWithZero1) -# ifdef USE_AS_STPCPY - cmpb $1, (%eax) - sbb $-1, %eax -# endif -# endif - RETURN1 - - .p2align 4 -L(Exit14): - movlpd (%ecx), %xmm0 - movlpd 6(%ecx), %xmm1 - movlpd %xmm0, (%edx) - movlpd %xmm1, 6(%edx) - SAVE_RESULT (13) -# ifdef USE_AS_STRNCPY - sub $14, %ebx - lea 14(%edx), %ecx - jnz L(StrncpyFillTailWithZero1) -# ifdef USE_AS_STPCPY - cmpb $1, (%eax) - sbb $-1, %eax -# endif -# endif - RETURN1 - - .p2align 4 -L(Exit15): - movlpd (%ecx), %xmm0 - movlpd 7(%ecx), %xmm1 - movlpd %xmm0, (%edx) - movlpd %xmm1, 7(%edx) - SAVE_RESULT (14) -# ifdef USE_AS_STRNCPY - sub $15, %ebx - lea 15(%edx), %ecx - jnz L(StrncpyFillTailWithZero1) -# ifdef USE_AS_STPCPY - cmpb $1, (%eax) - sbb $-1, %eax -# endif -# endif - RETURN1 - -CFI_POP (%edi) - -# ifdef USE_AS_STRNCPY - .p2align 4 -L(Fill0): - RETURN - - .p2align 4 -L(Fill1): - movb %dl, (%ecx) - RETURN - - .p2align 4 -L(Fill2): - movw %dx, (%ecx) - RETURN - - .p2align 4 -L(Fill3): - movw %dx, (%ecx) - movb %dl, 2(%ecx) - RETURN - - .p2align 4 -L(Fill4): - movl %edx, (%ecx) - RETURN - - .p2align 4 -L(Fill5): - movl %edx, (%ecx) - movb %dl, 4(%ecx) - RETURN - - .p2align 4 -L(Fill6): - movl %edx, (%ecx) - movw %dx, 4(%ecx) - RETURN - - .p2align 4 -L(Fill7): - movl %edx, (%ecx) - movl %edx, 3(%ecx) - RETURN - - .p2align 4 -L(Fill8): - movlpd %xmm0, (%ecx) - RETURN - - .p2align 4 -L(Fill9): - movlpd %xmm0, (%ecx) - movb %dl, 8(%ecx) - RETURN - - .p2align 4 -L(Fill10): - movlpd %xmm0, (%ecx) - movw %dx, 8(%ecx) - RETURN - - .p2align 4 -L(Fill11): - movlpd %xmm0, (%ecx) - movl %edx, 7(%ecx) - RETURN - - .p2align 4 -L(Fill12): - movlpd %xmm0, (%ecx) - movl %edx, 8(%ecx) - RETURN - - .p2align 4 -L(Fill13): - movlpd %xmm0, (%ecx) - movlpd %xmm0, 5(%ecx) - RETURN - - .p2align 4 -L(Fill14): - movlpd %xmm0, (%ecx) - movlpd %xmm0, 6(%ecx) - RETURN - - .p2align 4 -L(Fill15): - movlpd %xmm0, (%ecx) - movlpd %xmm0, 7(%ecx) - RETURN - - .p2align 4 -L(Fill16): - movlpd %xmm0, (%ecx) - movlpd %xmm0, 8(%ecx) - RETURN - - .p2align 4 -L(StrncpyFillExit1): - lea 16(%ebx), %ebx -L(FillFrom1To16Bytes): - test %ebx, %ebx - jz L(Fill0) - cmp $16, %ebx - je L(Fill16) - cmp $8, %ebx - je L(Fill8) - jg L(FillMore8) - cmp $4, %ebx - je L(Fill4) - jg L(FillMore4) - cmp $2, %ebx - jl L(Fill1) - je L(Fill2) - jg L(Fill3) -L(FillMore8): /* but less than 16 */ - cmp $12, %ebx - je L(Fill12) - jl L(FillLess12) - cmp $14, %ebx - jl L(Fill13) - je L(Fill14) - jg L(Fill15) -L(FillMore4): /* but less than 8 */ - cmp $6, %ebx - jl L(Fill5) - je L(Fill6) - jg L(Fill7) -L(FillLess12): /* but more than 8 */ - cmp $10, %ebx - jl L(Fill9) - je L(Fill10) - jmp L(Fill11) - - CFI_PUSH(%edi) - - .p2align 4 -L(StrncpyFillTailWithZero1): - POP (%edi) -L(StrncpyFillTailWithZero): - pxor %xmm0, %xmm0 - xor %edx, %edx - sub $16, %ebx - jbe L(StrncpyFillExit1) - - movlpd %xmm0, (%ecx) - movlpd %xmm0, 8(%ecx) - - lea 16(%ecx), %ecx - - mov %ecx, %edx - and $0xf, %edx - sub %edx, %ecx - add %edx, %ebx - xor %edx, %edx - sub $64, %ebx - jb L(StrncpyFillLess64) - -L(StrncpyFillLoopMovdqa): - movdqa %xmm0, (%ecx) - movdqa %xmm0, 16(%ecx) - movdqa %xmm0, 32(%ecx) - movdqa %xmm0, 48(%ecx) - lea 64(%ecx), %ecx - sub $64, %ebx - jae L(StrncpyFillLoopMovdqa) - -L(StrncpyFillLess64): - add $32, %ebx - jl L(StrncpyFillLess32) - movdqa %xmm0, (%ecx) - movdqa %xmm0, 16(%ecx) - lea 32(%ecx), %ecx - sub $16, %ebx - jl L(StrncpyFillExit1) - movdqa %xmm0, (%ecx) - lea 16(%ecx), %ecx - jmp L(FillFrom1To16Bytes) - -L(StrncpyFillLess32): - add $16, %ebx - jl L(StrncpyFillExit1) - movdqa %xmm0, (%ecx) - lea 16(%ecx), %ecx - jmp L(FillFrom1To16Bytes) -# endif - - .p2align 4 -L(ExitTail1): - movb (%ecx), %al - movb %al, (%edx) - SAVE_RESULT_TAIL (0) -# ifdef USE_AS_STRNCPY - sub $1, %ebx - lea 1(%edx), %ecx - jnz L(StrncpyFillTailWithZero) -# ifdef USE_AS_STPCPY - cmpb $1, (%eax) - sbb $-1, %eax -# endif -# endif - RETURN - - .p2align 4 -L(ExitTail2): - movw (%ecx), %ax - movw %ax, (%edx) - SAVE_RESULT_TAIL (1) -# ifdef USE_AS_STRNCPY - sub $2, %ebx - lea 2(%edx), %ecx - jnz L(StrncpyFillTailWithZero) -# ifdef USE_AS_STPCPY - cmpb $1, (%eax) - sbb $-1, %eax -# endif -# endif - RETURN - - .p2align 4 -L(ExitTail3): - movw (%ecx), %ax - movw %ax, (%edx) - movb 2(%ecx), %al - movb %al, 2(%edx) - SAVE_RESULT_TAIL (2) -# ifdef USE_AS_STRNCPY - sub $3, %ebx - lea 3(%edx), %ecx - jnz L(StrncpyFillTailWithZero) -# ifdef USE_AS_STPCPY - cmpb $1, (%eax) - sbb $-1, %eax -# endif -# endif - RETURN - - .p2align 4 -L(ExitTail4): - movl (%ecx), %eax - movl %eax, (%edx) - SAVE_RESULT_TAIL (3) -# ifdef USE_AS_STRNCPY - sub $4, %ebx - lea 4(%edx), %ecx - jnz L(StrncpyFillTailWithZero) -# ifdef USE_AS_STPCPY - cmpb $1, (%eax) - sbb $-1, %eax -# endif -# endif - RETURN - - .p2align 4 -L(ExitTail5): - movl (%ecx), %eax - movl %eax, (%edx) - movb 4(%ecx), %al - movb %al, 4(%edx) - SAVE_RESULT_TAIL (4) -# ifdef USE_AS_STRNCPY - sub $5, %ebx - lea 5(%edx), %ecx - jnz L(StrncpyFillTailWithZero) -# ifdef USE_AS_STPCPY - cmpb $1, (%eax) - sbb $-1, %eax -# endif -# endif - RETURN - - .p2align 4 -L(ExitTail6): - movl (%ecx), %eax - movl %eax, (%edx) - movw 4(%ecx), %ax - movw %ax, 4(%edx) - SAVE_RESULT_TAIL (5) -# ifdef USE_AS_STRNCPY - sub $6, %ebx - lea 6(%edx), %ecx - jnz L(StrncpyFillTailWithZero) -# ifdef USE_AS_STPCPY - cmpb $1, (%eax) - sbb $-1, %eax -# endif -# endif - RETURN - - .p2align 4 -L(ExitTail7): - movl (%ecx), %eax - movl %eax, (%edx) - movl 3(%ecx), %eax - movl %eax, 3(%edx) - SAVE_RESULT_TAIL (6) -# ifdef USE_AS_STRNCPY - sub $7, %ebx - lea 7(%edx), %ecx - jnz L(StrncpyFillTailWithZero) -# ifdef USE_AS_STPCPY - cmpb $1, (%eax) - sbb $-1, %eax -# endif -# endif - RETURN - - .p2align 4 -L(ExitTail8): - movlpd (%ecx), %xmm0 - movlpd %xmm0, (%edx) - SAVE_RESULT_TAIL (7) -# ifdef USE_AS_STRNCPY - sub $8, %ebx - lea 8(%edx), %ecx - jnz L(StrncpyFillTailWithZero) -# endif - RETURN - - .p2align 4 -L(ExitTail9): - movlpd (%ecx), %xmm0 - movb 8(%ecx), %al - movlpd %xmm0, (%edx) - movb %al, 8(%edx) - SAVE_RESULT_TAIL (8) -# ifdef USE_AS_STRNCPY - sub $9, %ebx - lea 9(%edx), %ecx - jnz L(StrncpyFillTailWithZero) -# ifdef USE_AS_STPCPY - cmpb $1, (%eax) - sbb $-1, %eax -# endif -# endif - RETURN - - .p2align 4 -L(ExitTail10): - movlpd (%ecx), %xmm0 - movw 8(%ecx), %ax - movlpd %xmm0, (%edx) - movw %ax, 8(%edx) - SAVE_RESULT_TAIL (9) -# ifdef USE_AS_STRNCPY - sub $10, %ebx - lea 10(%edx), %ecx - jnz L(StrncpyFillTailWithZero) -# ifdef USE_AS_STPCPY - cmpb $1, (%eax) - sbb $-1, %eax -# endif -# endif - RETURN - - .p2align 4 -L(ExitTail11): - movlpd (%ecx), %xmm0 - movl 7(%ecx), %eax - movlpd %xmm0, (%edx) - movl %eax, 7(%edx) - SAVE_RESULT_TAIL (10) -# ifdef USE_AS_STRNCPY - sub $11, %ebx - lea 11(%edx), %ecx - jnz L(StrncpyFillTailWithZero) -# ifdef USE_AS_STPCPY - cmpb $1, (%eax) - sbb $-1, %eax -# endif -# endif - RETURN - - .p2align 4 -L(ExitTail12): - movlpd (%ecx), %xmm0 - movl 8(%ecx), %eax - movlpd %xmm0, (%edx) - movl %eax, 8(%edx) - SAVE_RESULT_TAIL (11) -# ifdef USE_AS_STRNCPY - sub $12, %ebx - lea 12(%edx), %ecx - jnz L(StrncpyFillTailWithZero) -# ifdef USE_AS_STPCPY - cmpb $1, (%eax) - sbb $-1, %eax -# endif -# endif - RETURN - - .p2align 4 -L(ExitTail13): - movlpd (%ecx), %xmm0 - movlpd 5(%ecx), %xmm1 - movlpd %xmm0, (%edx) - movlpd %xmm1, 5(%edx) - SAVE_RESULT_TAIL (12) -# ifdef USE_AS_STRNCPY - sub $13, %ebx - lea 13(%edx), %ecx - jnz L(StrncpyFillTailWithZero) -# ifdef USE_AS_STPCPY - cmpb $1, (%eax) - sbb $-1, %eax -# endif -# endif - RETURN - - .p2align 4 -L(ExitTail14): - movlpd (%ecx), %xmm0 - movlpd 6(%ecx), %xmm1 - movlpd %xmm0, (%edx) - movlpd %xmm1, 6(%edx) - SAVE_RESULT_TAIL (13) -# ifdef USE_AS_STRNCPY - sub $14, %ebx - lea 14(%edx), %ecx - jnz L(StrncpyFillTailWithZero) -# ifdef USE_AS_STPCPY - cmpb $1, (%eax) - sbb $-1, %eax -# endif -# endif - RETURN - - .p2align 4 -L(ExitTail15): - movlpd (%ecx), %xmm0 - movlpd 7(%ecx), %xmm1 - movlpd %xmm0, (%edx) - movlpd %xmm1, 7(%edx) - SAVE_RESULT_TAIL (14) -# ifdef USE_AS_STRNCPY - sub $15, %ebx - lea 15(%edx), %ecx - jnz L(StrncpyFillTailWithZero) -# endif - RETURN - - .p2align 4 -L(ExitTail16): - movdqu (%ecx), %xmm0 - movdqu %xmm0, (%edx) - SAVE_RESULT_TAIL (15) -# ifdef USE_AS_STRNCPY - sub $16, %ebx - lea 16(%edx), %ecx - jnz L(StrncpyFillTailWithZero) -# ifdef USE_AS_STPCPY - cmpb $1, (%eax) - sbb $-1, %eax -# endif -# endif - RETURN -# endif - -# ifdef USE_AS_STRNCPY -# ifndef USE_AS_STRCAT - CFI_PUSH (%esi) - CFI_PUSH (%edi) -# endif - .p2align 4 -L(StrncpyLeaveCase2OrCase3): - test %eax, %eax - jnz L(Aligned64LeaveCase2) - -L(Aligned64LeaveCase3): - add $48, %ebx - jle L(CopyFrom1To16BytesCase3) - movaps %xmm4, -64(%edx) - lea 16(%esi), %esi - sub $16, %ebx - jbe L(CopyFrom1To16BytesCase3) - movaps %xmm5, -48(%edx) - lea 16(%esi), %esi - sub $16, %ebx - jbe L(CopyFrom1To16BytesCase3) - movaps %xmm6, -32(%edx) - lea 16(%esi), %esi - lea -16(%ebx), %ebx - jmp L(CopyFrom1To16BytesCase3) - -L(Aligned64LeaveCase2): - pcmpeqb %xmm4, %xmm0 - pmovmskb %xmm0, %eax - add $48, %ebx - jle L(CopyFrom1To16BytesCase2OrCase3) - test %eax, %eax - jnz L(CopyFrom1To16Bytes) - - pcmpeqb %xmm5, %xmm0 - pmovmskb %xmm0, %eax - movaps %xmm4, -64(%edx) - lea 16(%esi), %esi - sub $16, %ebx - jbe L(CopyFrom1To16BytesCase2OrCase3) - test %eax, %eax - jnz L(CopyFrom1To16Bytes) - - pcmpeqb %xmm6, %xmm0 - pmovmskb %xmm0, %eax - movaps %xmm5, -48(%edx) - lea 16(%esi), %esi - sub $16, %ebx - jbe L(CopyFrom1To16BytesCase2OrCase3) - test %eax, %eax - jnz L(CopyFrom1To16Bytes) - - pcmpeqb %xmm7, %xmm0 - pmovmskb %xmm0, %eax - movaps %xmm6, -32(%edx) - lea 16(%esi), %esi - lea -16(%ebx), %ebx - jmp L(CopyFrom1To16BytesCase2) - -/*--------------------------------------------------*/ - .p2align 4 -L(StrncpyExit1Case2OrCase3): - movlpd (%ecx), %xmm0 - movlpd 7(%ecx), %xmm1 - movlpd %xmm0, (%edx) - movlpd %xmm1, 7(%edx) - mov $15, %esi - test %eax, %eax - jnz L(CopyFrom1To16BytesCase2) - jmp L(CopyFrom1To16BytesCase3) - - .p2align 4 -L(StrncpyExit2Case2OrCase3): - movlpd (%ecx), %xmm0 - movlpd 6(%ecx), %xmm1 - movlpd %xmm0, (%edx) - movlpd %xmm1, 6(%edx) - mov $14, %esi - test %eax, %eax - jnz L(CopyFrom1To16BytesCase2) - jmp L(CopyFrom1To16BytesCase3) - - .p2align 4 -L(StrncpyExit3Case2OrCase3): - movlpd (%ecx), %xmm0 - movlpd 5(%ecx), %xmm1 - movlpd %xmm0, (%edx) - movlpd %xmm1, 5(%edx) - mov $13, %esi - test %eax, %eax - jnz L(CopyFrom1To16BytesCase2) - jmp L(CopyFrom1To16BytesCase3) - - .p2align 4 -L(StrncpyExit4Case2OrCase3): - movlpd (%ecx), %xmm0 - movl 8(%ecx), %esi - movlpd %xmm0, (%edx) - movl %esi, 8(%edx) - mov $12, %esi - test %eax, %eax - jnz L(CopyFrom1To16BytesCase2) - jmp L(CopyFrom1To16BytesCase3) - - .p2align 4 -L(StrncpyExit5Case2OrCase3): - movlpd (%ecx), %xmm0 - movl 7(%ecx), %esi - movlpd %xmm0, (%edx) - movl %esi, 7(%edx) - mov $11, %esi - test %eax, %eax - jnz L(CopyFrom1To16BytesCase2) - jmp L(CopyFrom1To16BytesCase3) - - .p2align 4 -L(StrncpyExit6Case2OrCase3): - movlpd (%ecx), %xmm0 - movl 6(%ecx), %esi - movlpd %xmm0, (%edx) - movl %esi, 6(%edx) - mov $10, %esi - test %eax, %eax - jnz L(CopyFrom1To16BytesCase2) - jmp L(CopyFrom1To16BytesCase3) - - .p2align 4 -L(StrncpyExit7Case2OrCase3): - movlpd (%ecx), %xmm0 - movl 5(%ecx), %esi - movlpd %xmm0, (%edx) - movl %esi, 5(%edx) - mov $9, %esi - test %eax, %eax - jnz L(CopyFrom1To16BytesCase2) - jmp L(CopyFrom1To16BytesCase3) - - .p2align 4 -L(StrncpyExit8Case2OrCase3): - movlpd (%ecx), %xmm0 - movlpd %xmm0, (%edx) - mov $8, %esi - test %eax, %eax - jnz L(CopyFrom1To16BytesCase2) - jmp L(CopyFrom1To16BytesCase3) - - .p2align 4 -L(StrncpyExit9Case2OrCase3): - movlpd (%ecx), %xmm0 - movlpd %xmm0, (%edx) - mov $7, %esi - test %eax, %eax - jnz L(CopyFrom1To16BytesCase2) - jmp L(CopyFrom1To16BytesCase3) - - .p2align 4 -L(StrncpyExit10Case2OrCase3): - movlpd -1(%ecx), %xmm0 - movlpd %xmm0, -1(%edx) - mov $6, %esi - test %eax, %eax - jnz L(CopyFrom1To16BytesCase2) - jmp L(CopyFrom1To16BytesCase3) - - .p2align 4 -L(StrncpyExit11Case2OrCase3): - movlpd -2(%ecx), %xmm0 - movlpd %xmm0, -2(%edx) - mov $5, %esi - test %eax, %eax - jnz L(CopyFrom1To16BytesCase2) - jmp L(CopyFrom1To16BytesCase3) - - .p2align 4 -L(StrncpyExit12Case2OrCase3): - movl (%ecx), %esi - movl %esi, (%edx) - mov $4, %esi - test %eax, %eax - jnz L(CopyFrom1To16BytesCase2) - jmp L(CopyFrom1To16BytesCase3) - - .p2align 4 -L(StrncpyExit13Case2OrCase3): - movl -1(%ecx), %esi - movl %esi, -1(%edx) - mov $3, %esi - test %eax, %eax - jnz L(CopyFrom1To16BytesCase2) - jmp L(CopyFrom1To16BytesCase3) - - .p2align 4 -L(StrncpyExit14Case2OrCase3): - movl -2(%ecx), %esi - movl %esi, -2(%edx) - mov $2, %esi - test %eax, %eax - jnz L(CopyFrom1To16BytesCase2) - jmp L(CopyFrom1To16BytesCase3) - - .p2align 4 -L(StrncpyExit15Case2OrCase3): - movl -3(%ecx), %esi - movl %esi, -3(%edx) - mov $1, %esi - test %eax, %eax - jnz L(CopyFrom1To16BytesCase2) - jmp L(CopyFrom1To16BytesCase3) - -L(StrncpyLeave1): - movaps %xmm2, %xmm3 - add $48, %ebx - jle L(StrncpyExit1) - palignr $1, %xmm1, %xmm2 - movaps %xmm2, (%edx) - movaps 31(%ecx), %xmm2 - lea 16(%esi), %esi - sub $16, %ebx - jbe L(StrncpyExit1) - palignr $1, %xmm3, %xmm2 - movaps %xmm2, 16(%edx) - lea 16(%esi), %esi - sub $16, %ebx - jbe L(StrncpyExit1) - movaps %xmm4, 32(%edx) - lea 16(%esi), %esi - sub $16, %ebx - jbe L(StrncpyExit1) - movaps %xmm5, 48(%edx) - lea 16(%esi), %esi - lea -16(%ebx), %ebx -L(StrncpyExit1): - lea 15(%edx, %esi), %edx - lea 15(%ecx, %esi), %ecx - movdqu -16(%ecx), %xmm0 - xor %esi, %esi - movdqu %xmm0, -16(%edx) - jmp L(CopyFrom1To16BytesCase3) - -L(StrncpyLeave2): - movaps %xmm2, %xmm3 - add $48, %ebx - jle L(StrncpyExit2) - palignr $2, %xmm1, %xmm2 - movaps %xmm2, (%edx) - movaps 30(%ecx), %xmm2 - lea 16(%esi), %esi - sub $16, %ebx - jbe L(StrncpyExit2) - palignr $2, %xmm3, %xmm2 - movaps %xmm2, 16(%edx) - lea 16(%esi), %esi - sub $16, %ebx - jbe L(StrncpyExit2) - movaps %xmm4, 32(%edx) - lea 16(%esi), %esi - sub $16, %ebx - jbe L(StrncpyExit2) - movaps %xmm5, 48(%edx) - lea 16(%esi), %esi - lea -16(%ebx), %ebx -L(StrncpyExit2): - lea 14(%edx, %esi), %edx - lea 14(%ecx, %esi), %ecx - movdqu -16(%ecx), %xmm0 - xor %esi, %esi - movdqu %xmm0, -16(%edx) - jmp L(CopyFrom1To16BytesCase3) - -L(StrncpyLeave3): - movaps %xmm2, %xmm3 - add $48, %ebx - jle L(StrncpyExit3) - palignr $3, %xmm1, %xmm2 - movaps %xmm2, (%edx) - movaps 29(%ecx), %xmm2 - lea 16(%esi), %esi - sub $16, %ebx - jbe L(StrncpyExit3) - palignr $3, %xmm3, %xmm2 - movaps %xmm2, 16(%edx) - lea 16(%esi), %esi - sub $16, %ebx - jbe L(StrncpyExit3) - movaps %xmm4, 32(%edx) - lea 16(%esi), %esi - sub $16, %ebx - jbe L(StrncpyExit3) - movaps %xmm5, 48(%edx) - lea 16(%esi), %esi - lea -16(%ebx), %ebx -L(StrncpyExit3): - lea 13(%edx, %esi), %edx - lea 13(%ecx, %esi), %ecx - movdqu -16(%ecx), %xmm0 - xor %esi, %esi - movdqu %xmm0, -16(%edx) - jmp L(CopyFrom1To16BytesCase3) - -L(StrncpyLeave4): - movaps %xmm2, %xmm3 - add $48, %ebx - jle L(StrncpyExit4) - palignr $4, %xmm1, %xmm2 - movaps %xmm2, (%edx) - movaps 28(%ecx), %xmm2 - lea 16(%esi), %esi - sub $16, %ebx - jbe L(StrncpyExit4) - palignr $4, %xmm3, %xmm2 - movaps %xmm2, 16(%edx) - lea 16(%esi), %esi - sub $16, %ebx - jbe L(StrncpyExit4) - movaps %xmm4, 32(%edx) - lea 16(%esi), %esi - sub $16, %ebx - jbe L(StrncpyExit4) - movaps %xmm5, 48(%edx) - lea 16(%esi), %esi - lea -16(%ebx), %ebx -L(StrncpyExit4): - lea 12(%edx, %esi), %edx - lea 12(%ecx, %esi), %ecx - movlpd -12(%ecx), %xmm0 - movl -4(%ecx), %eax - movlpd %xmm0, -12(%edx) - movl %eax, -4(%edx) - xor %esi, %esi - jmp L(CopyFrom1To16BytesCase3) - -L(StrncpyLeave5): - movaps %xmm2, %xmm3 - add $48, %ebx - jle L(StrncpyExit5) - palignr $5, %xmm1, %xmm2 - movaps %xmm2, (%edx) - movaps 27(%ecx), %xmm2 - lea 16(%esi), %esi - sub $16, %ebx - jbe L(StrncpyExit5) - palignr $5, %xmm3, %xmm2 - movaps %xmm2, 16(%edx) - lea 16(%esi), %esi - sub $16, %ebx - jbe L(StrncpyExit5) - movaps %xmm4, 32(%edx) - lea 16(%esi), %esi - sub $16, %ebx - jbe L(StrncpyExit5) - movaps %xmm5, 48(%edx) - lea 16(%esi), %esi - lea -16(%ebx), %ebx -L(StrncpyExit5): - lea 11(%edx, %esi), %edx - lea 11(%ecx, %esi), %ecx - movlpd -11(%ecx), %xmm0 - movl -4(%ecx), %eax - movlpd %xmm0, -11(%edx) - movl %eax, -4(%edx) - xor %esi, %esi - jmp L(CopyFrom1To16BytesCase3) - -L(StrncpyLeave6): - movaps %xmm2, %xmm3 - add $48, %ebx - jle L(StrncpyExit6) - palignr $6, %xmm1, %xmm2 - movaps %xmm2, (%edx) - movaps 26(%ecx), %xmm2 - lea 16(%esi), %esi - sub $16, %ebx - jbe L(StrncpyExit6) - palignr $6, %xmm3, %xmm2 - movaps %xmm2, 16(%edx) - lea 16(%esi), %esi - sub $16, %ebx - jbe L(StrncpyExit6) - movaps %xmm4, 32(%edx) - lea 16(%esi), %esi - sub $16, %ebx - jbe L(StrncpyExit6) - movaps %xmm5, 48(%edx) - lea 16(%esi), %esi - lea -16(%ebx), %ebx -L(StrncpyExit6): - lea 10(%edx, %esi), %edx - lea 10(%ecx, %esi), %ecx - - movlpd -10(%ecx), %xmm0 - movw -2(%ecx), %ax - movlpd %xmm0, -10(%edx) - movw %ax, -2(%edx) - xor %esi, %esi - jmp L(CopyFrom1To16BytesCase3) - -L(StrncpyLeave7): - movaps %xmm2, %xmm3 - add $48, %ebx - jle L(StrncpyExit7) - palignr $7, %xmm1, %xmm2 - movaps %xmm2, (%edx) - movaps 25(%ecx), %xmm2 - lea 16(%esi), %esi - sub $16, %ebx - jbe L(StrncpyExit7) - palignr $7, %xmm3, %xmm2 - movaps %xmm2, 16(%edx) - lea 16(%esi), %esi - sub $16, %ebx - jbe L(StrncpyExit7) - movaps %xmm4, 32(%edx) - lea 16(%esi), %esi - sub $16, %ebx - jbe L(StrncpyExit7) - movaps %xmm5, 48(%edx) - lea 16(%esi), %esi - lea -16(%ebx), %ebx -L(StrncpyExit7): - lea 9(%edx, %esi), %edx - lea 9(%ecx, %esi), %ecx - - movlpd -9(%ecx), %xmm0 - movb -1(%ecx), %ah - movlpd %xmm0, -9(%edx) - movb %ah, -1(%edx) - xor %esi, %esi - jmp L(CopyFrom1To16BytesCase3) - -L(StrncpyLeave8): - movaps %xmm2, %xmm3 - add $48, %ebx - jle L(StrncpyExit8) - palignr $8, %xmm1, %xmm2 - movaps %xmm2, (%edx) - movaps 24(%ecx), %xmm2 - lea 16(%esi), %esi - sub $16, %ebx - jbe L(StrncpyExit8) - palignr $8, %xmm3, %xmm2 - movaps %xmm2, 16(%edx) - lea 16(%esi), %esi - sub $16, %ebx - jbe L(StrncpyExit8) - movaps %xmm4, 32(%edx) - lea 16(%esi), %esi - sub $16, %ebx - jbe L(StrncpyExit8) - movaps %xmm5, 48(%edx) - lea 16(%esi), %esi - lea -16(%ebx), %ebx -L(StrncpyExit8): - lea 8(%edx, %esi), %edx - lea 8(%ecx, %esi), %ecx - movlpd -8(%ecx), %xmm0 - movlpd %xmm0, -8(%edx) - xor %esi, %esi - jmp L(CopyFrom1To16BytesCase3) - -L(StrncpyLeave9): - movaps %xmm2, %xmm3 - add $48, %ebx - jle L(StrncpyExit9) - palignr $9, %xmm1, %xmm2 - movaps %xmm2, (%edx) - movaps 23(%ecx), %xmm2 - lea 16(%esi), %esi - sub $16, %ebx - jbe L(StrncpyExit9) - palignr $9, %xmm3, %xmm2 - movaps %xmm2, 16(%edx) - lea 16(%esi), %esi - sub $16, %ebx - jbe L(StrncpyExit9) - movaps %xmm4, 32(%edx) - lea 16(%esi), %esi - sub $16, %ebx - jbe L(StrncpyExit9) - movaps %xmm5, 48(%edx) - lea 16(%esi), %esi - lea -16(%ebx), %ebx -L(StrncpyExit9): - lea 7(%edx, %esi), %edx - lea 7(%ecx, %esi), %ecx - - movlpd -8(%ecx), %xmm0 - movlpd %xmm0, -8(%edx) - xor %esi, %esi - jmp L(CopyFrom1To16BytesCase3) - -L(StrncpyLeave10): - movaps %xmm2, %xmm3 - add $48, %ebx - jle L(StrncpyExit10) - palignr $10, %xmm1, %xmm2 - movaps %xmm2, (%edx) - movaps 22(%ecx), %xmm2 - lea 16(%esi), %esi - sub $16, %ebx - jbe L(StrncpyExit10) - palignr $10, %xmm3, %xmm2 - movaps %xmm2, 16(%edx) - lea 16(%esi), %esi - sub $16, %ebx - jbe L(StrncpyExit10) - movaps %xmm4, 32(%edx) - lea 16(%esi), %esi - sub $16, %ebx - jbe L(StrncpyExit10) - movaps %xmm5, 48(%edx) - lea 16(%esi), %esi - lea -16(%ebx), %ebx -L(StrncpyExit10): - lea 6(%edx, %esi), %edx - lea 6(%ecx, %esi), %ecx - - movlpd -8(%ecx), %xmm0 - movlpd %xmm0, -8(%edx) - xor %esi, %esi - jmp L(CopyFrom1To16BytesCase3) - -L(StrncpyLeave11): - movaps %xmm2, %xmm3 - add $48, %ebx - jle L(StrncpyExit11) - palignr $11, %xmm1, %xmm2 - movaps %xmm2, (%edx) - movaps 21(%ecx), %xmm2 - lea 16(%esi), %esi - sub $16, %ebx - jbe L(StrncpyExit11) - palignr $11, %xmm3, %xmm2 - movaps %xmm2, 16(%edx) - lea 16(%esi), %esi - sub $16, %ebx - jbe L(StrncpyExit11) - movaps %xmm4, 32(%edx) - lea 16(%esi), %esi - sub $16, %ebx - jbe L(StrncpyExit11) - movaps %xmm5, 48(%edx) - lea 16(%esi), %esi - lea -16(%ebx), %ebx -L(StrncpyExit11): - lea 5(%edx, %esi), %edx - lea 5(%ecx, %esi), %ecx - movl -5(%ecx), %esi - movb -1(%ecx), %ah - movl %esi, -5(%edx) - movb %ah, -1(%edx) - xor %esi, %esi - jmp L(CopyFrom1To16BytesCase3) - -L(StrncpyLeave12): - movaps %xmm2, %xmm3 - add $48, %ebx - jle L(StrncpyExit12) - palignr $12, %xmm1, %xmm2 - movaps %xmm2, (%edx) - movaps 20(%ecx), %xmm2 - lea 16(%esi), %esi - sub $16, %ebx - jbe L(StrncpyExit12) - palignr $12, %xmm3, %xmm2 - movaps %xmm2, 16(%edx) - lea 16(%esi), %esi - sub $16, %ebx - jbe L(StrncpyExit12) - movaps %xmm4, 32(%edx) - lea 16(%esi), %esi - sub $16, %ebx - jbe L(StrncpyExit12) - movaps %xmm5, 48(%edx) - lea 16(%esi), %esi - lea -16(%ebx), %ebx -L(StrncpyExit12): - lea 4(%edx, %esi), %edx - lea 4(%ecx, %esi), %ecx - movl -4(%ecx), %eax - movl %eax, -4(%edx) - xor %esi, %esi - jmp L(CopyFrom1To16BytesCase3) - -L(StrncpyLeave13): - movaps %xmm2, %xmm3 - add $48, %ebx - jle L(StrncpyExit13) - palignr $13, %xmm1, %xmm2 - movaps %xmm2, (%edx) - movaps 19(%ecx), %xmm2 - lea 16(%esi), %esi - sub $16, %ebx - jbe L(StrncpyExit13) - palignr $13, %xmm3, %xmm2 - movaps %xmm2, 16(%edx) - lea 16(%esi), %esi - sub $16, %ebx - jbe L(StrncpyExit13) - movaps %xmm4, 32(%edx) - lea 16(%esi), %esi - sub $16, %ebx - jbe L(StrncpyExit13) - movaps %xmm5, 48(%edx) - lea 16(%esi), %esi - lea -16(%ebx), %ebx -L(StrncpyExit13): - lea 3(%edx, %esi), %edx - lea 3(%ecx, %esi), %ecx - - movl -4(%ecx), %eax - movl %eax, -4(%edx) - xor %esi, %esi - jmp L(CopyFrom1To16BytesCase3) - -L(StrncpyLeave14): - movaps %xmm2, %xmm3 - add $48, %ebx - jle L(StrncpyExit14) - palignr $14, %xmm1, %xmm2 - movaps %xmm2, (%edx) - movaps 18(%ecx), %xmm2 - lea 16(%esi), %esi - sub $16, %ebx - jbe L(StrncpyExit14) - palignr $14, %xmm3, %xmm2 - movaps %xmm2, 16(%edx) - lea 16(%esi), %esi - sub $16, %ebx - jbe L(StrncpyExit14) - movaps %xmm4, 32(%edx) - lea 16(%esi), %esi - sub $16, %ebx - jbe L(StrncpyExit14) - movaps %xmm5, 48(%edx) - lea 16(%esi), %esi - lea -16(%ebx), %ebx -L(StrncpyExit14): - lea 2(%edx, %esi), %edx - lea 2(%ecx, %esi), %ecx - movw -2(%ecx), %ax - movw %ax, -2(%edx) - xor %esi, %esi - jmp L(CopyFrom1To16BytesCase3) - -L(StrncpyLeave15): - movaps %xmm2, %xmm3 - add $48, %ebx - jle L(StrncpyExit15) - palignr $15, %xmm1, %xmm2 - movaps %xmm2, (%edx) - movaps 17(%ecx), %xmm2 - lea 16(%esi), %esi - sub $16, %ebx - jbe L(StrncpyExit15) - palignr $15, %xmm3, %xmm2 - movaps %xmm2, 16(%edx) - lea 16(%esi), %esi - sub $16, %ebx - jbe L(StrncpyExit15) - movaps %xmm4, 32(%edx) - lea 16(%esi), %esi - sub $16, %ebx - jbe L(StrncpyExit15) - movaps %xmm5, 48(%edx) - lea 16(%esi), %esi - lea -16(%ebx), %ebx -L(StrncpyExit15): - lea 1(%edx, %esi), %edx - lea 1(%ecx, %esi), %ecx - movb -1(%ecx), %ah - movb %ah, -1(%edx) - xor %esi, %esi - jmp L(CopyFrom1To16BytesCase3) -# endif - -# ifndef USE_AS_STRCAT -# ifdef USE_AS_STRNCPY - CFI_POP (%esi) - CFI_POP (%edi) - - .p2align 4 -L(ExitTail0): - movl %edx, %eax - RETURN - - .p2align 4 -L(StrncpyExit15Bytes): - cmp $12, %ebx - jbe L(StrncpyExit12Bytes) - cmpb $0, 8(%ecx) - jz L(ExitTail9) - cmpb $0, 9(%ecx) - jz L(ExitTail10) - cmpb $0, 10(%ecx) - jz L(ExitTail11) - cmpb $0, 11(%ecx) - jz L(ExitTail12) - cmp $13, %ebx - je L(ExitTail13) - cmpb $0, 12(%ecx) - jz L(ExitTail13) - cmp $14, %ebx - je L(ExitTail14) - cmpb $0, 13(%ecx) - jz L(ExitTail14) - movlpd (%ecx), %xmm0 - movlpd 7(%ecx), %xmm1 - movlpd %xmm0, (%edx) - movlpd %xmm1, 7(%edx) -# ifdef USE_AS_STPCPY - lea 14(%edx), %eax - cmpb $1, (%eax) - sbb $-1, %eax -# else - movl %edx, %eax -# endif - RETURN - - .p2align 4 -L(StrncpyExit12Bytes): - cmp $9, %ebx - je L(ExitTail9) - cmpb $0, 8(%ecx) - jz L(ExitTail9) - cmp $10, %ebx - je L(ExitTail10) - cmpb $0, 9(%ecx) - jz L(ExitTail10) - cmp $11, %ebx - je L(ExitTail11) - cmpb $0, 10(%ecx) - jz L(ExitTail11) - movlpd (%ecx), %xmm0 - movl 8(%ecx), %eax - movlpd %xmm0, (%edx) - movl %eax, 8(%edx) - SAVE_RESULT_TAIL (11) -# ifdef USE_AS_STPCPY - cmpb $1, (%eax) - sbb $-1, %eax -# endif - RETURN - - .p2align 4 -L(StrncpyExit8Bytes): - cmp $4, %ebx - jbe L(StrncpyExit4Bytes) - cmpb $0, (%ecx) - jz L(ExitTail1) - cmpb $0, 1(%ecx) - jz L(ExitTail2) - cmpb $0, 2(%ecx) - jz L(ExitTail3) - cmpb $0, 3(%ecx) - jz L(ExitTail4) - - cmp $5, %ebx - je L(ExitTail5) - cmpb $0, 4(%ecx) - jz L(ExitTail5) - cmp $6, %ebx - je L(ExitTail6) - cmpb $0, 5(%ecx) - jz L(ExitTail6) - cmp $7, %ebx - je L(ExitTail7) - cmpb $0, 6(%ecx) - jz L(ExitTail7) - movlpd (%ecx), %xmm0 - movlpd %xmm0, (%edx) -# ifdef USE_AS_STPCPY - lea 7(%edx), %eax - cmpb $1, (%eax) - sbb $-1, %eax -# else - movl %edx, %eax -# endif - RETURN - - .p2align 4 -L(StrncpyExit4Bytes): - test %ebx, %ebx - jz L(ExitTail0) - cmp $1, %ebx - je L(ExitTail1) - cmpb $0, (%ecx) - jz L(ExitTail1) - cmp $2, %ebx - je L(ExitTail2) - cmpb $0, 1(%ecx) - jz L(ExitTail2) - cmp $3, %ebx - je L(ExitTail3) - cmpb $0, 2(%ecx) - jz L(ExitTail3) - movl (%ecx), %eax - movl %eax, (%edx) - SAVE_RESULT_TAIL (3) -# ifdef USE_AS_STPCPY - cmpb $1, (%eax) - sbb $-1, %eax -# endif - RETURN -# endif - -END (STRCPY) -# endif -#endif diff --git a/sysdeps/i386/i686/multiarch/strcpy.S b/sysdeps/i386/i686/multiarch/strcpy.S deleted file mode 100644 index ffbc03c6d5..0000000000 --- a/sysdeps/i386/i686/multiarch/strcpy.S +++ /dev/null @@ -1,116 +0,0 @@ -/* Multiple versions of strcpy - All versions must be listed in ifunc-impl-list.c. - Copyright (C) 2011-2017 Free Software Foundation, Inc. - Contributed by Intel Corporation. - This file is part of the GNU C Library. - - The GNU C Library is free software; you can redistribute it and/or - modify it under the terms of the GNU Lesser General Public - License as published by the Free Software Foundation; either - version 2.1 of the License, or (at your option) any later version. - - The GNU C Library is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - Lesser General Public License for more details. - - You should have received a copy of the GNU Lesser General Public - License along with the GNU C Library; if not, see - <http://www.gnu.org/licenses/>. */ - -#include <sysdep.h> -#include <init-arch.h> - -#if !defined (USE_AS_STPCPY) && !defined (USE_AS_STRNCPY) -# ifndef STRCPY -# define STRCPY strcpy -# endif -#endif - -#ifdef USE_AS_STPCPY -# ifdef USE_AS_STRNCPY -# define STRCPY_SSSE3 __stpncpy_ssse3 -# define STRCPY_SSE2 __stpncpy_sse2 -# define STRCPY_IA32 __stpncpy_ia32 -# define __GI_STRCPY __GI_stpncpy -# define __GI___STRCPY __GI___stpncpy -# else -# define STRCPY_SSSE3 __stpcpy_ssse3 -# define STRCPY_SSE2 __stpcpy_sse2 -# define STRCPY_IA32 __stpcpy_ia32 -# define __GI_STRCPY __GI_stpcpy -# define __GI___STRCPY __GI___stpcpy -# endif -#else -# ifdef USE_AS_STRNCPY -# define STRCPY_SSSE3 __strncpy_ssse3 -# define STRCPY_SSE2 __strncpy_sse2 -# define STRCPY_IA32 __strncpy_ia32 -# define __GI_STRCPY __GI_strncpy -# else -# define STRCPY_SSSE3 __strcpy_ssse3 -# define STRCPY_SSE2 __strcpy_sse2 -# define STRCPY_IA32 __strcpy_ia32 -# define __GI_STRCPY __GI_strcpy -# endif -#endif - - -/* Define multiple versions only for the definition in libc. Don't - define multiple versions for strncpy in static library since we - need strncpy before the initialization happened. */ -#if IS_IN (libc) - - .text -ENTRY(STRCPY) - .type STRCPY, @gnu_indirect_function - LOAD_GOT_AND_RTLD_GLOBAL_RO - LOAD_FUNC_GOT_EAX (STRCPY_IA32) - HAS_CPU_FEATURE (SSE2) - jz 2f - LOAD_FUNC_GOT_EAX (STRCPY_SSE2) - HAS_ARCH_FEATURE (Fast_Unaligned_Load) - jnz 2f - HAS_CPU_FEATURE (SSSE3) - jz 2f - LOAD_FUNC_GOT_EAX (STRCPY_SSSE3) -2: ret -END(STRCPY) - -# undef ENTRY -# define ENTRY(name) \ - .type STRCPY_IA32, @function; \ - .align 16; \ - .globl STRCPY_IA32; \ - .hidden STRCPY_IA32; \ - STRCPY_IA32: cfi_startproc; \ - CALL_MCOUNT -# undef END -# define END(name) \ - cfi_endproc; .size STRCPY_IA32, .-STRCPY_IA32 - -# ifdef SHARED -# undef libc_hidden_builtin_def -/* It doesn't make sense to send libc-internal strcpy calls through a PLT. - The speedup we get from using SSSE3 instruction is likely eaten away - by the indirect call in the PLT. */ -# define libc_hidden_builtin_def(name) \ - .globl __GI_STRCPY; __GI_STRCPY = STRCPY_IA32 -# undef libc_hidden_def -# define libc_hidden_def(name) \ - .globl __GI___STRCPY; __GI___STRCPY = STRCPY_IA32 - -# endif -#endif - -#ifdef USE_AS_STPCPY -# ifdef USE_AS_STRNCPY -# include "../../stpncpy.S" -# else -# include "../../i586/stpcpy.S" -# endif -#else -# ifndef USE_AS_STRNCPY -# include "../../i586/strcpy.S" -# endif -#endif diff --git a/sysdeps/i386/i686/multiarch/strcspn-c.c b/sysdeps/i386/i686/multiarch/strcspn-c.c deleted file mode 100644 index 6d61e190a8..0000000000 --- a/sysdeps/i386/i686/multiarch/strcspn-c.c +++ /dev/null @@ -1,2 +0,0 @@ -#define __strcspn_sse2 __strcspn_ia32 -#include <sysdeps/x86_64/multiarch/strcspn-c.c> diff --git a/sysdeps/i386/i686/multiarch/strcspn.S b/sysdeps/i386/i686/multiarch/strcspn.S deleted file mode 100644 index 21e5093924..0000000000 --- a/sysdeps/i386/i686/multiarch/strcspn.S +++ /dev/null @@ -1,75 +0,0 @@ -/* Multiple versions of strcspn - All versions must be listed in ifunc-impl-list.c. - Copyright (C) 2009-2017 Free Software Foundation, Inc. - Contributed by Intel Corporation. - This file is part of the GNU C Library. - - The GNU C Library is free software; you can redistribute it and/or - modify it under the terms of the GNU Lesser General Public - License as published by the Free Software Foundation; either - version 2.1 of the License, or (at your option) any later version. - - The GNU C Library is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - Lesser General Public License for more details. - - You should have received a copy of the GNU Lesser General Public - License along with the GNU C Library; if not, see - <http://www.gnu.org/licenses/>. */ - -#include <config.h> -#include <sysdep.h> -#include <init-arch.h> - -#ifdef USE_AS_STRPBRK -#define STRCSPN_SSE42 __strpbrk_sse42 -#define STRCSPN_IA32 __strpbrk_ia32 -#define __GI_STRCSPN __GI_strpbrk -#else -#ifndef STRCSPN -#define STRCSPN strcspn -#define STRCSPN_SSE42 __strcspn_sse42 -#define STRCSPN_IA32 __strcspn_ia32 -#define __GI_STRCSPN __GI_strcspn -#endif -#endif - -/* Define multiple versions only for the definition in libc. Don't - define multiple versions for strpbrk in static library since we - need strpbrk before the initialization happened. */ -#if (defined SHARED || !defined USE_AS_STRPBRK) && IS_IN (libc) - .text -ENTRY(STRCSPN) - .type STRCSPN, @gnu_indirect_function - LOAD_GOT_AND_RTLD_GLOBAL_RO - LOAD_FUNC_GOT_EAX (STRCSPN_IA32) - HAS_CPU_FEATURE (SSE4_2) - jz 2f - LOAD_FUNC_GOT_EAX (STRCSPN_SSE42) -2: ret -END(STRCSPN) - -# undef ENTRY -# define ENTRY(name) \ - .type STRCSPN_IA32, @function; \ - .globl STRCSPN_IA32; \ - .p2align 4; \ - STRCSPN_IA32: cfi_startproc; \ - CALL_MCOUNT -# undef END -# define END(name) \ - cfi_endproc; .size STRCSPN_IA32, .-STRCSPN_IA32 -# undef libc_hidden_builtin_def -/* IFUNC doesn't work with the hidden functions in shared library since - they will be called without setting up EBX needed for PLT which is - used by IFUNC. */ -# define libc_hidden_builtin_def(name) \ - .globl __GI_STRCSPN; __GI_STRCSPN = STRCSPN_IA32 -#endif - -#ifdef USE_AS_STRPBRK -#include "../../strpbrk.S" -#else -#include "../../strcspn.S" -#endif diff --git a/sysdeps/i386/i686/multiarch/strlen-sse2-bsf.S b/sysdeps/i386/i686/multiarch/strlen-sse2-bsf.S deleted file mode 100644 index d3ea864bab..0000000000 --- a/sysdeps/i386/i686/multiarch/strlen-sse2-bsf.S +++ /dev/null @@ -1,125 +0,0 @@ -/* strlen with SSE2 and BSF - Copyright (C) 2010-2017 Free Software Foundation, Inc. - Contributed by Intel Corporation. - This file is part of the GNU C Library. - - The GNU C Library is free software; you can redistribute it and/or - modify it under the terms of the GNU Lesser General Public - License as published by the Free Software Foundation; either - version 2.1 of the License, or (at your option) any later version. - - The GNU C Library is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - Lesser General Public License for more details. - - You should have received a copy of the GNU Lesser General Public - License along with the GNU C Library; if not, see - <http://www.gnu.org/licenses/>. */ - -#if defined SHARED && IS_IN (libc) - -#include <sysdep.h> - -#define CFI_PUSH(REG) \ - cfi_adjust_cfa_offset (4); \ - cfi_rel_offset (REG, 0) - -#define CFI_POP(REG) \ - cfi_adjust_cfa_offset (-4); \ - cfi_restore (REG) - -#define PUSH(REG) pushl REG; CFI_PUSH (REG) -#define POP(REG) popl REG; CFI_POP (REG) -#define PARMS 4 + 8 /* Preserve ESI and EDI. */ -#define STR PARMS -#define ENTRANCE PUSH (%esi); PUSH (%edi); cfi_remember_state -#define RETURN POP (%edi); POP (%esi); ret; \ - cfi_restore_state; cfi_remember_state - - .text -ENTRY ( __strlen_sse2_bsf) - ENTRANCE - mov STR(%esp), %edi - xor %eax, %eax - mov %edi, %ecx - and $0x3f, %ecx - pxor %xmm0, %xmm0 - cmp $0x30, %ecx - ja L(next) - movdqu (%edi), %xmm1 - pcmpeqb %xmm1, %xmm0 - pmovmskb %xmm0, %edx - test %edx, %edx - jnz L(exit_less16) - mov %edi, %eax - and $-16, %eax - jmp L(align16_start) -L(next): - - mov %edi, %eax - and $-16, %eax - pcmpeqb (%eax), %xmm0 - mov $-1, %esi - sub %eax, %ecx - shl %cl, %esi - pmovmskb %xmm0, %edx - and %esi, %edx - jnz L(exit) -L(align16_start): - pxor %xmm0, %xmm0 - pxor %xmm1, %xmm1 - pxor %xmm2, %xmm2 - pxor %xmm3, %xmm3 - .p2align 4 -L(align16_loop): - pcmpeqb 16(%eax), %xmm0 - pmovmskb %xmm0, %edx - test %edx, %edx - jnz L(exit16) - - pcmpeqb 32(%eax), %xmm1 - pmovmskb %xmm1, %edx - test %edx, %edx - jnz L(exit32) - - pcmpeqb 48(%eax), %xmm2 - pmovmskb %xmm2, %edx - test %edx, %edx - jnz L(exit48) - - pcmpeqb 64(%eax), %xmm3 - pmovmskb %xmm3, %edx - lea 64(%eax), %eax - test %edx, %edx - jz L(align16_loop) -L(exit): - sub %edi, %eax -L(exit_less16): - bsf %edx, %edx - add %edx, %eax - RETURN -L(exit16): - sub %edi, %eax - bsf %edx, %edx - add %edx, %eax - add $16, %eax - RETURN -L(exit32): - sub %edi, %eax - bsf %edx, %edx - add %edx, %eax - add $32, %eax - RETURN -L(exit48): - sub %edi, %eax - bsf %edx, %edx - add %edx, %eax - add $48, %eax - POP (%edi) - POP (%esi) - ret - -END ( __strlen_sse2_bsf) - -#endif diff --git a/sysdeps/i386/i686/multiarch/strlen-sse2.S b/sysdeps/i386/i686/multiarch/strlen-sse2.S deleted file mode 100644 index 36fc1469d0..0000000000 --- a/sysdeps/i386/i686/multiarch/strlen-sse2.S +++ /dev/null @@ -1,695 +0,0 @@ -/* strlen with SSE2 - Copyright (C) 2010-2017 Free Software Foundation, Inc. - Contributed by Intel Corporation. - This file is part of the GNU C Library. - - The GNU C Library is free software; you can redistribute it and/or - modify it under the terms of the GNU Lesser General Public - License as published by the Free Software Foundation; either - version 2.1 of the License, or (at your option) any later version. - - The GNU C Library is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - Lesser General Public License for more details. - - You should have received a copy of the GNU Lesser General Public - License along with the GNU C Library; if not, see - <http://www.gnu.org/licenses/>. */ - -/* for strlen only SHARED version is optimized, for strcat, strncat, strnlen both STATIC and SHARED are optimized */ - -#if (defined USE_AS_STRNLEN || defined USE_AS_STRCAT || defined SHARED) && IS_IN (libc) - -# ifndef USE_AS_STRCAT - -# include <sysdep.h> -# define PARMS 4 -# define STR PARMS -# define RETURN ret - -# ifdef USE_AS_STRNLEN -# define LEN PARMS + 8 -# define CFI_PUSH(REG) \ - cfi_adjust_cfa_offset (4); \ - cfi_rel_offset (REG, 0) - -# define CFI_POP(REG) \ - cfi_adjust_cfa_offset (-4); \ - cfi_restore (REG) - -# define PUSH(REG) pushl REG; CFI_PUSH (REG) -# define POP(REG) popl REG; CFI_POP (REG) -# undef RETURN -# define RETURN POP (%edi); CFI_PUSH(%edi); ret -# endif - -# ifndef STRLEN -# define STRLEN __strlen_sse2 -# endif - - atom_text_section -ENTRY (STRLEN) - mov STR(%esp), %edx -# ifdef USE_AS_STRNLEN - PUSH (%edi) - movl LEN(%esp), %edi - sub $4, %edi - jbe L(len_less4_prolog) -# endif -# endif - xor %eax, %eax - cmpb $0, (%edx) - jz L(exit_tail0) - cmpb $0, 1(%edx) - jz L(exit_tail1) - cmpb $0, 2(%edx) - jz L(exit_tail2) - cmpb $0, 3(%edx) - jz L(exit_tail3) - -# ifdef USE_AS_STRNLEN - sub $4, %edi - jbe L(len_less8_prolog) -# endif - - cmpb $0, 4(%edx) - jz L(exit_tail4) - cmpb $0, 5(%edx) - jz L(exit_tail5) - cmpb $0, 6(%edx) - jz L(exit_tail6) - cmpb $0, 7(%edx) - jz L(exit_tail7) - -# ifdef USE_AS_STRNLEN - sub $4, %edi - jbe L(len_less12_prolog) -# endif - - cmpb $0, 8(%edx) - jz L(exit_tail8) - cmpb $0, 9(%edx) - jz L(exit_tail9) - cmpb $0, 10(%edx) - jz L(exit_tail10) - cmpb $0, 11(%edx) - jz L(exit_tail11) - -# ifdef USE_AS_STRNLEN - sub $4, %edi - jbe L(len_less16_prolog) -# endif - - cmpb $0, 12(%edx) - jz L(exit_tail12) - cmpb $0, 13(%edx) - jz L(exit_tail13) - cmpb $0, 14(%edx) - jz L(exit_tail14) - cmpb $0, 15(%edx) - jz L(exit_tail15) - - pxor %xmm0, %xmm0 - lea 16(%edx), %eax - mov %eax, %ecx - and $-16, %eax - -# ifdef USE_AS_STRNLEN - and $15, %edx - add %edx, %edi - sub $64, %edi - jbe L(len_less64) -# endif - - pcmpeqb (%eax), %xmm0 - pmovmskb %xmm0, %edx - pxor %xmm1, %xmm1 - test %edx, %edx - lea 16(%eax), %eax - jnz L(exit) - - pcmpeqb (%eax), %xmm1 - pmovmskb %xmm1, %edx - pxor %xmm2, %xmm2 - test %edx, %edx - lea 16(%eax), %eax - jnz L(exit) - - pcmpeqb (%eax), %xmm2 - pmovmskb %xmm2, %edx - pxor %xmm3, %xmm3 - test %edx, %edx - lea 16(%eax), %eax - jnz L(exit) - - pcmpeqb (%eax), %xmm3 - pmovmskb %xmm3, %edx - test %edx, %edx - lea 16(%eax), %eax - jnz L(exit) - -# ifdef USE_AS_STRNLEN - sub $64, %edi - jbe L(len_less64) -# endif - - pcmpeqb (%eax), %xmm0 - pmovmskb %xmm0, %edx - test %edx, %edx - lea 16(%eax), %eax - jnz L(exit) - - pcmpeqb (%eax), %xmm1 - pmovmskb %xmm1, %edx - test %edx, %edx - lea 16(%eax), %eax - jnz L(exit) - - pcmpeqb (%eax), %xmm2 - pmovmskb %xmm2, %edx - test %edx, %edx - lea 16(%eax), %eax - jnz L(exit) - - pcmpeqb (%eax), %xmm3 - pmovmskb %xmm3, %edx - test %edx, %edx - lea 16(%eax), %eax - jnz L(exit) - -# ifdef USE_AS_STRNLEN - sub $64, %edi - jbe L(len_less64) -# endif - - pcmpeqb (%eax), %xmm0 - pmovmskb %xmm0, %edx - test %edx, %edx - lea 16(%eax), %eax - jnz L(exit) - - pcmpeqb (%eax), %xmm1 - pmovmskb %xmm1, %edx - test %edx, %edx - lea 16(%eax), %eax - jnz L(exit) - - pcmpeqb (%eax), %xmm2 - pmovmskb %xmm2, %edx - test %edx, %edx - lea 16(%eax), %eax - jnz L(exit) - - pcmpeqb (%eax), %xmm3 - pmovmskb %xmm3, %edx - test %edx, %edx - lea 16(%eax), %eax - jnz L(exit) - -# ifdef USE_AS_STRNLEN - sub $64, %edi - jbe L(len_less64) -# endif - - pcmpeqb (%eax), %xmm0 - pmovmskb %xmm0, %edx - test %edx, %edx - lea 16(%eax), %eax - jnz L(exit) - - pcmpeqb (%eax), %xmm1 - pmovmskb %xmm1, %edx - test %edx, %edx - lea 16(%eax), %eax - jnz L(exit) - - pcmpeqb (%eax), %xmm2 - pmovmskb %xmm2, %edx - test %edx, %edx - lea 16(%eax), %eax - jnz L(exit) - - pcmpeqb (%eax), %xmm3 - pmovmskb %xmm3, %edx - test %edx, %edx - lea 16(%eax), %eax - jnz L(exit) - -# ifdef USE_AS_STRNLEN - mov %eax, %edx - and $63, %edx - add %edx, %edi -# endif - - and $-0x40, %eax - - .p2align 4 -L(aligned_64_loop): -# ifdef USE_AS_STRNLEN - sub $64, %edi - jbe L(len_less64) -# endif - movaps (%eax), %xmm0 - movaps 16(%eax), %xmm1 - movaps 32(%eax), %xmm2 - movaps 48(%eax), %xmm6 - pminub %xmm1, %xmm0 - pminub %xmm6, %xmm2 - pminub %xmm0, %xmm2 - pcmpeqb %xmm3, %xmm2 - pmovmskb %xmm2, %edx - test %edx, %edx - lea 64(%eax), %eax - jz L(aligned_64_loop) - - pcmpeqb -64(%eax), %xmm3 - pmovmskb %xmm3, %edx - test %edx, %edx - lea 48(%ecx), %ecx - jnz L(exit) - - pcmpeqb %xmm1, %xmm3 - pmovmskb %xmm3, %edx - test %edx, %edx - lea -16(%ecx), %ecx - jnz L(exit) - - pcmpeqb -32(%eax), %xmm3 - pmovmskb %xmm3, %edx - test %edx, %edx - lea -16(%ecx), %ecx - jnz L(exit) - - pcmpeqb %xmm6, %xmm3 - pmovmskb %xmm3, %edx - lea -16(%ecx), %ecx -L(exit): - sub %ecx, %eax - test %dl, %dl - jz L(exit_high) - - mov %dl, %cl - and $15, %cl - jz L(exit_8) - test $0x01, %dl - jnz L(exit_tail0) - test $0x02, %dl - jnz L(exit_tail1) - test $0x04, %dl - jnz L(exit_tail2) - add $3, %eax - RETURN - - .p2align 4 -L(exit_8): - test $0x10, %dl - jnz L(exit_tail4) - test $0x20, %dl - jnz L(exit_tail5) - test $0x40, %dl - jnz L(exit_tail6) - add $7, %eax - RETURN - - .p2align 4 -L(exit_high): - mov %dh, %ch - and $15, %ch - jz L(exit_high_8) - test $0x01, %dh - jnz L(exit_tail8) - test $0x02, %dh - jnz L(exit_tail9) - test $0x04, %dh - jnz L(exit_tail10) - add $11, %eax - RETURN - - .p2align 4 -L(exit_high_8): - test $0x10, %dh - jnz L(exit_tail12) - test $0x20, %dh - jnz L(exit_tail13) - test $0x40, %dh - jnz L(exit_tail14) - add $15, %eax -L(exit_tail0): - RETURN - -# ifdef USE_AS_STRNLEN - - .p2align 4 -L(len_less64): - pxor %xmm0, %xmm0 - add $64, %edi - - pcmpeqb (%eax), %xmm0 - pmovmskb %xmm0, %edx - pxor %xmm1, %xmm1 - lea 16(%eax), %eax - test %edx, %edx - jnz L(strnlen_exit) - - sub $16, %edi - jbe L(return_start_len) - - pcmpeqb (%eax), %xmm1 - pmovmskb %xmm1, %edx - lea 16(%eax), %eax - test %edx, %edx - jnz L(strnlen_exit) - - sub $16, %edi - jbe L(return_start_len) - - pcmpeqb (%eax), %xmm0 - pmovmskb %xmm0, %edx - lea 16(%eax), %eax - test %edx, %edx - jnz L(strnlen_exit) - - sub $16, %edi - jbe L(return_start_len) - - pcmpeqb (%eax), %xmm1 - pmovmskb %xmm1, %edx - lea 16(%eax), %eax - test %edx, %edx - jnz L(strnlen_exit) - - movl LEN(%esp), %eax - RETURN - - .p2align 4 -L(strnlen_exit): - sub %ecx, %eax - - test %dl, %dl - jz L(strnlen_exit_high) - mov %dl, %cl - and $15, %cl - jz L(strnlen_exit_8) - test $0x01, %dl - jnz L(exit_tail0) - test $0x02, %dl - jnz L(strnlen_exit_tail1) - test $0x04, %dl - jnz L(strnlen_exit_tail2) - sub $4, %edi - jb L(return_start_len) - lea 3(%eax), %eax - RETURN - - .p2align 4 -L(strnlen_exit_8): - test $0x10, %dl - jnz L(strnlen_exit_tail4) - test $0x20, %dl - jnz L(strnlen_exit_tail5) - test $0x40, %dl - jnz L(strnlen_exit_tail6) - sub $8, %edi - jb L(return_start_len) - lea 7(%eax), %eax - RETURN - - .p2align 4 -L(strnlen_exit_high): - mov %dh, %ch - and $15, %ch - jz L(strnlen_exit_high_8) - test $0x01, %dh - jnz L(strnlen_exit_tail8) - test $0x02, %dh - jnz L(strnlen_exit_tail9) - test $0x04, %dh - jnz L(strnlen_exit_tail10) - sub $12, %edi - jb L(return_start_len) - lea 11(%eax), %eax - RETURN - - .p2align 4 -L(strnlen_exit_high_8): - test $0x10, %dh - jnz L(strnlen_exit_tail12) - test $0x20, %dh - jnz L(strnlen_exit_tail13) - test $0x40, %dh - jnz L(strnlen_exit_tail14) - sub $16, %edi - jb L(return_start_len) - lea 15(%eax), %eax - RETURN - - .p2align 4 -L(strnlen_exit_tail1): - sub $2, %edi - jb L(return_start_len) - lea 1(%eax), %eax - RETURN - - .p2align 4 -L(strnlen_exit_tail2): - sub $3, %edi - jb L(return_start_len) - lea 2(%eax), %eax - RETURN - - .p2align 4 -L(strnlen_exit_tail4): - sub $5, %edi - jb L(return_start_len) - lea 4(%eax), %eax - RETURN - - .p2align 4 -L(strnlen_exit_tail5): - sub $6, %edi - jb L(return_start_len) - lea 5(%eax), %eax - RETURN - - .p2align 4 -L(strnlen_exit_tail6): - sub $7, %edi - jb L(return_start_len) - lea 6(%eax), %eax - RETURN - - .p2align 4 -L(strnlen_exit_tail8): - sub $9, %edi - jb L(return_start_len) - lea 8(%eax), %eax - RETURN - - .p2align 4 -L(strnlen_exit_tail9): - sub $10, %edi - jb L(return_start_len) - lea 9(%eax), %eax - RETURN - - .p2align 4 -L(strnlen_exit_tail10): - sub $11, %edi - jb L(return_start_len) - lea 10(%eax), %eax - RETURN - - .p2align 4 -L(strnlen_exit_tail12): - sub $13, %edi - jb L(return_start_len) - lea 12(%eax), %eax - RETURN - - .p2align 4 -L(strnlen_exit_tail13): - sub $14, %edi - jb L(return_start_len) - lea 13(%eax), %eax - RETURN - - .p2align 4 -L(strnlen_exit_tail14): - sub $15, %edi - jb L(return_start_len) - lea 14(%eax), %eax - RETURN - - .p2align 4 -L(return_start_len): - movl LEN(%esp), %eax - RETURN - -/* for prolog only */ - - .p2align 4 -L(len_less4_prolog): - xor %eax, %eax - - add $4, %edi - jz L(exit_tail0) - - cmpb $0, (%edx) - jz L(exit_tail0) - cmp $1, %edi - je L(exit_tail1) - - cmpb $0, 1(%edx) - jz L(exit_tail1) - cmp $2, %edi - je L(exit_tail2) - - cmpb $0, 2(%edx) - jz L(exit_tail2) - cmp $3, %edi - je L(exit_tail3) - - cmpb $0, 3(%edx) - jz L(exit_tail3) - mov $4, %eax - RETURN - - .p2align 4 -L(len_less8_prolog): - add $4, %edi - - cmpb $0, 4(%edx) - jz L(exit_tail4) - cmp $1, %edi - je L(exit_tail5) - - cmpb $0, 5(%edx) - jz L(exit_tail5) - cmp $2, %edi - je L(exit_tail6) - - cmpb $0, 6(%edx) - jz L(exit_tail6) - cmp $3, %edi - je L(exit_tail7) - - cmpb $0, 7(%edx) - jz L(exit_tail7) - mov $8, %eax - RETURN - - - .p2align 4 -L(len_less12_prolog): - add $4, %edi - - cmpb $0, 8(%edx) - jz L(exit_tail8) - cmp $1, %edi - je L(exit_tail9) - - cmpb $0, 9(%edx) - jz L(exit_tail9) - cmp $2, %edi - je L(exit_tail10) - - cmpb $0, 10(%edx) - jz L(exit_tail10) - cmp $3, %edi - je L(exit_tail11) - - cmpb $0, 11(%edx) - jz L(exit_tail11) - mov $12, %eax - RETURN - - .p2align 4 -L(len_less16_prolog): - add $4, %edi - - cmpb $0, 12(%edx) - jz L(exit_tail12) - cmp $1, %edi - je L(exit_tail13) - - cmpb $0, 13(%edx) - jz L(exit_tail13) - cmp $2, %edi - je L(exit_tail14) - - cmpb $0, 14(%edx) - jz L(exit_tail14) - cmp $3, %edi - je L(exit_tail15) - - cmpb $0, 15(%edx) - jz L(exit_tail15) - mov $16, %eax - RETURN -# endif - - .p2align 4 -L(exit_tail1): - add $1, %eax - RETURN - -L(exit_tail2): - add $2, %eax - RETURN - -L(exit_tail3): - add $3, %eax - RETURN - -L(exit_tail4): - add $4, %eax - RETURN - -L(exit_tail5): - add $5, %eax - RETURN - -L(exit_tail6): - add $6, %eax - RETURN - -L(exit_tail7): - add $7, %eax - RETURN - -L(exit_tail8): - add $8, %eax - RETURN - -L(exit_tail9): - add $9, %eax - RETURN - -L(exit_tail10): - add $10, %eax - RETURN - -L(exit_tail11): - add $11, %eax - RETURN - -L(exit_tail12): - add $12, %eax - RETURN - -L(exit_tail13): - add $13, %eax - RETURN - -L(exit_tail14): - add $14, %eax - RETURN - -L(exit_tail15): - add $15, %eax -# ifndef USE_AS_STRCAT - RETURN -END (STRLEN) -# endif -#endif diff --git a/sysdeps/i386/i686/multiarch/strlen.S b/sysdeps/i386/i686/multiarch/strlen.S deleted file mode 100644 index 77cf6bcdb0..0000000000 --- a/sysdeps/i386/i686/multiarch/strlen.S +++ /dev/null @@ -1,60 +0,0 @@ -/* Multiple versions of strlen - All versions must be listed in ifunc-impl-list.c. - Copyright (C) 2009-2017 Free Software Foundation, Inc. - Contributed by Intel Corporation. - This file is part of the GNU C Library. - - The GNU C Library is free software; you can redistribute it and/or - modify it under the terms of the GNU Lesser General Public - License as published by the Free Software Foundation; either - version 2.1 of the License, or (at your option) any later version. - - The GNU C Library is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - Lesser General Public License for more details. - - You should have received a copy of the GNU Lesser General Public - License along with the GNU C Library; if not, see - <http://www.gnu.org/licenses/>. */ - -#include <sysdep.h> -#include <init-arch.h> - -/* Define multiple versions only for the definition in libc and for the - DSO. In static binaries, we need strlen before the initialization - happened. */ -#if defined SHARED && IS_IN (libc) - .text -ENTRY(strlen) - .type strlen, @gnu_indirect_function - LOAD_GOT_AND_RTLD_GLOBAL_RO - LOAD_FUNC_GOT_EAX (__strlen_ia32) - HAS_CPU_FEATURE (SSE2) - jz 2f - LOAD_FUNC_GOT_EAX (__strlen_sse2_bsf) - HAS_ARCH_FEATURE (Slow_BSF) - jz 2f - LOAD_FUNC_GOT_EAX (__strlen_sse2) -2: ret -END(strlen) - -# undef ENTRY -# define ENTRY(name) \ - .type __strlen_ia32, @function; \ - .globl __strlen_ia32; \ - .p2align 4; \ - __strlen_ia32: cfi_startproc; \ - CALL_MCOUNT -# undef END -# define END(name) \ - cfi_endproc; .size __strlen_ia32, .-__strlen_ia32 -# undef libc_hidden_builtin_def -/* IFUNC doesn't work with the hidden functions in shared library since - they will be called without setting up EBX needed for PLT which is - used by IFUNC. */ -# define libc_hidden_builtin_def(name) \ - .globl __GI_strlen; __GI_strlen = __strlen_ia32 -#endif - -#include "../../i586/strlen.S" diff --git a/sysdeps/i386/i686/multiarch/strncase-c.c b/sysdeps/i386/i686/multiarch/strncase-c.c deleted file mode 100644 index 76581eb62b..0000000000 --- a/sysdeps/i386/i686/multiarch/strncase-c.c +++ /dev/null @@ -1,8 +0,0 @@ -#include <string.h> - -extern __typeof (strncasecmp) __strncasecmp_nonascii; - -#define __strncasecmp __strncasecmp_nonascii -#include <string/strncase.c> - -strong_alias (__strncasecmp_nonascii, __strncasecmp_ia32) diff --git a/sysdeps/i386/i686/multiarch/strncase.S b/sysdeps/i386/i686/multiarch/strncase.S deleted file mode 100644 index a56e63a566..0000000000 --- a/sysdeps/i386/i686/multiarch/strncase.S +++ /dev/null @@ -1,39 +0,0 @@ -/* Entry point for multi-version x86 strncasecmp. - All versions must be listed in ifunc-impl-list.c. - Copyright (C) 2011-2017 Free Software Foundation, Inc. - This file is part of the GNU C Library. - - The GNU C Library is free software; you can redistribute it and/or - modify it under the terms of the GNU Lesser General Public - License as published by the Free Software Foundation; either - version 2.1 of the License, or (at your option) any later version. - - The GNU C Library is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - Lesser General Public License for more details. - - You should have received a copy of the GNU Lesser General Public - License along with the GNU C Library; if not, see - <http://www.gnu.org/licenses/>. */ - -#include <sysdep.h> -#include <init-arch.h> - - .text -ENTRY(__strncasecmp) - .type __strncasecmp, @gnu_indirect_function - LOAD_GOT_AND_RTLD_GLOBAL_RO - LOAD_FUNC_GOT_EAX (__strncasecmp_ia32) - HAS_CPU_FEATURE (SSSE3) - jz 2f - LOAD_FUNC_GOT_EAX (__strncasecmp_ssse3) - HAS_CPU_FEATURE (SSE4_2) - jz 2f - HAS_ARCH_FEATURE (Slow_SSE4_2) - jnz 2f - LOAD_FUNC_GOT_EAX (__strncasecmp_sse4_2) -2: ret -END(__strncasecmp) - -weak_alias (__strncasecmp, strncasecmp) diff --git a/sysdeps/i386/i686/multiarch/strncase_l-c.c b/sysdeps/i386/i686/multiarch/strncase_l-c.c deleted file mode 100644 index 7e601af271..0000000000 --- a/sysdeps/i386/i686/multiarch/strncase_l-c.c +++ /dev/null @@ -1,13 +0,0 @@ -#include <string.h> - -extern __typeof (strncasecmp_l) __strncasecmp_l_nonascii; - -#define __strncasecmp_l __strncasecmp_l_nonascii -#define USE_IN_EXTENDED_LOCALE_MODEL 1 -#include <string/strncase.c> - -strong_alias (__strncasecmp_l_nonascii, __strncasecmp_l_ia32) - -/* The needs of strcasecmp in libc are minimal, no need to go through - the IFUNC. */ -strong_alias (__strncasecmp_l_nonascii, __GI___strncasecmp_l) diff --git a/sysdeps/i386/i686/multiarch/strncase_l-sse4.S b/sysdeps/i386/i686/multiarch/strncase_l-sse4.S deleted file mode 100644 index 557210832e..0000000000 --- a/sysdeps/i386/i686/multiarch/strncase_l-sse4.S +++ /dev/null @@ -1,2 +0,0 @@ -#define USE_AS_STRNCASECMP_L 1 -#include "strcmp-sse4.S" diff --git a/sysdeps/i386/i686/multiarch/strncase_l-ssse3.S b/sysdeps/i386/i686/multiarch/strncase_l-ssse3.S deleted file mode 100644 index d438a1ae35..0000000000 --- a/sysdeps/i386/i686/multiarch/strncase_l-ssse3.S +++ /dev/null @@ -1,2 +0,0 @@ -#define USE_AS_STRNCASECMP_L 1 -#include "strcmp-ssse3.S" diff --git a/sysdeps/i386/i686/multiarch/strncase_l.S b/sysdeps/i386/i686/multiarch/strncase_l.S deleted file mode 100644 index 8a74ee8574..0000000000 --- a/sysdeps/i386/i686/multiarch/strncase_l.S +++ /dev/null @@ -1,7 +0,0 @@ -/* Multiple versions of strncasecmp_l - All versions must be listed in ifunc-impl-list.c. */ -#define STRCMP __strncasecmp_l -#define USE_AS_STRNCASECMP_L -#include "strcmp.S" - -weak_alias (__strncasecmp_l, strncasecmp_l) diff --git a/sysdeps/i386/i686/multiarch/strncat-c.c b/sysdeps/i386/i686/multiarch/strncat-c.c deleted file mode 100644 index 132a000545..0000000000 --- a/sysdeps/i386/i686/multiarch/strncat-c.c +++ /dev/null @@ -1,8 +0,0 @@ -#define STRNCAT __strncat_ia32 -#ifdef SHARED -#undef libc_hidden_def -#define libc_hidden_def(name) \ - __hidden_ver1 (__strncat_ia32, __GI___strncat, __strncat_ia32); -#endif - -#include "string/strncat.c" diff --git a/sysdeps/i386/i686/multiarch/strncat-sse2.S b/sysdeps/i386/i686/multiarch/strncat-sse2.S deleted file mode 100644 index f1045b72b8..0000000000 --- a/sysdeps/i386/i686/multiarch/strncat-sse2.S +++ /dev/null @@ -1,4 +0,0 @@ -#define STRCAT __strncat_sse2 -#define USE_AS_STRNCAT - -#include "strcat-sse2.S" diff --git a/sysdeps/i386/i686/multiarch/strncat-ssse3.S b/sysdeps/i386/i686/multiarch/strncat-ssse3.S deleted file mode 100644 index 625b90a978..0000000000 --- a/sysdeps/i386/i686/multiarch/strncat-ssse3.S +++ /dev/null @@ -1,4 +0,0 @@ -#define STRCAT __strncat_ssse3 -#define USE_AS_STRNCAT - -#include "strcat-ssse3.S" diff --git a/sysdeps/i386/i686/multiarch/strncat.S b/sysdeps/i386/i686/multiarch/strncat.S deleted file mode 100644 index 5c1bf41453..0000000000 --- a/sysdeps/i386/i686/multiarch/strncat.S +++ /dev/null @@ -1,5 +0,0 @@ -/* Multiple versions of strncat - All versions must be listed in ifunc-impl-list.c. */ -#define STRCAT strncat -#define USE_AS_STRNCAT -#include "strcat.S" diff --git a/sysdeps/i386/i686/multiarch/strncmp-c.c b/sysdeps/i386/i686/multiarch/strncmp-c.c deleted file mode 100644 index cc059da494..0000000000 --- a/sysdeps/i386/i686/multiarch/strncmp-c.c +++ /dev/null @@ -1,8 +0,0 @@ -#ifdef SHARED -# define STRNCMP __strncmp_ia32 -# undef libc_hidden_builtin_def -# define libc_hidden_builtin_def(name) \ - __hidden_ver1 (__strncmp_ia32, __GI_strncmp, __strncmp_ia32); -#endif - -#include "string/strncmp.c" diff --git a/sysdeps/i386/i686/multiarch/strncmp-sse4.S b/sysdeps/i386/i686/multiarch/strncmp-sse4.S deleted file mode 100644 index cf14dfaf6c..0000000000 --- a/sysdeps/i386/i686/multiarch/strncmp-sse4.S +++ /dev/null @@ -1,5 +0,0 @@ -#ifdef SHARED -# define USE_AS_STRNCMP -# define STRCMP __strncmp_sse4_2 -# include "strcmp-sse4.S" -#endif diff --git a/sysdeps/i386/i686/multiarch/strncmp-ssse3.S b/sysdeps/i386/i686/multiarch/strncmp-ssse3.S deleted file mode 100644 index 536c8685f2..0000000000 --- a/sysdeps/i386/i686/multiarch/strncmp-ssse3.S +++ /dev/null @@ -1,5 +0,0 @@ -#ifdef SHARED -# define USE_AS_STRNCMP -# define STRCMP __strncmp_ssse3 -# include "strcmp-ssse3.S" -#endif diff --git a/sysdeps/i386/i686/multiarch/strncmp.S b/sysdeps/i386/i686/multiarch/strncmp.S deleted file mode 100644 index 150d4786d2..0000000000 --- a/sysdeps/i386/i686/multiarch/strncmp.S +++ /dev/null @@ -1,5 +0,0 @@ -/* Multiple versions of strncmp - All versions must be listed in ifunc-impl-list.c. */ -#define USE_AS_STRNCMP -#define STRCMP strncmp -#include "strcmp.S" diff --git a/sysdeps/i386/i686/multiarch/strncpy-c.c b/sysdeps/i386/i686/multiarch/strncpy-c.c deleted file mode 100644 index 201e3f98b3..0000000000 --- a/sysdeps/i386/i686/multiarch/strncpy-c.c +++ /dev/null @@ -1,8 +0,0 @@ -#define STRNCPY __strncpy_ia32 -#ifdef SHARED -# undef libc_hidden_builtin_def -# define libc_hidden_builtin_def(name) \ - __hidden_ver1 (__strncpy_ia32, __GI_strncpy, __strncpy_ia32); -#endif - -#include "string/strncpy.c" diff --git a/sysdeps/i386/i686/multiarch/strncpy-sse2.S b/sysdeps/i386/i686/multiarch/strncpy-sse2.S deleted file mode 100644 index bdd99239a4..0000000000 --- a/sysdeps/i386/i686/multiarch/strncpy-sse2.S +++ /dev/null @@ -1,3 +0,0 @@ -#define USE_AS_STRNCPY -#define STRCPY __strncpy_sse2 -#include "strcpy-sse2.S" diff --git a/sysdeps/i386/i686/multiarch/strncpy-ssse3.S b/sysdeps/i386/i686/multiarch/strncpy-ssse3.S deleted file mode 100644 index bf82ee447d..0000000000 --- a/sysdeps/i386/i686/multiarch/strncpy-ssse3.S +++ /dev/null @@ -1,3 +0,0 @@ -#define USE_AS_STRNCPY -#define STRCPY __strncpy_ssse3 -#include "strcpy-ssse3.S" diff --git a/sysdeps/i386/i686/multiarch/strncpy.S b/sysdeps/i386/i686/multiarch/strncpy.S deleted file mode 100644 index 9c257efc6e..0000000000 --- a/sysdeps/i386/i686/multiarch/strncpy.S +++ /dev/null @@ -1,5 +0,0 @@ -/* Multiple versions of strncpy - All versions must be listed in ifunc-impl-list.c. */ -#define USE_AS_STRNCPY -#define STRCPY strncpy -#include "strcpy.S" diff --git a/sysdeps/i386/i686/multiarch/strnlen-c.c b/sysdeps/i386/i686/multiarch/strnlen-c.c deleted file mode 100644 index 351e939a93..0000000000 --- a/sysdeps/i386/i686/multiarch/strnlen-c.c +++ /dev/null @@ -1,10 +0,0 @@ -#define STRNLEN __strnlen_ia32 -#ifdef SHARED -# undef libc_hidden_def -# define libc_hidden_def(name) \ - __hidden_ver1 (__strnlen_ia32, __GI_strnlen, __strnlen_ia32); \ - strong_alias (__strnlen_ia32, __strnlen_ia32_1); \ - __hidden_ver1 (__strnlen_ia32_1, __GI___strnlen, __strnlen_ia32_1); -#endif - -#include "string/strnlen.c" diff --git a/sysdeps/i386/i686/multiarch/strnlen-sse2.S b/sysdeps/i386/i686/multiarch/strnlen-sse2.S deleted file mode 100644 index 56b6ae2a5c..0000000000 --- a/sysdeps/i386/i686/multiarch/strnlen-sse2.S +++ /dev/null @@ -1,3 +0,0 @@ -#define USE_AS_STRNLEN -#define STRLEN __strnlen_sse2 -#include "strlen-sse2.S" diff --git a/sysdeps/i386/i686/multiarch/strnlen.S b/sysdeps/i386/i686/multiarch/strnlen.S deleted file mode 100644 index d241522c70..0000000000 --- a/sysdeps/i386/i686/multiarch/strnlen.S +++ /dev/null @@ -1,37 +0,0 @@ -/* Multiple versions of strnlen - All versions must be listed in ifunc-impl-list.c. - Copyright (C) 2011-2017 Free Software Foundation, Inc. - Contributed by Intel Corporation. - This file is part of the GNU C Library. - - The GNU C Library is free software; you can redistribute it and/or - modify it under the terms of the GNU Lesser General Public - License as published by the Free Software Foundation; either - version 2.1 of the License, or (at your option) any later version. - - The GNU C Library is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - Lesser General Public License for more details. - - You should have received a copy of the GNU Lesser General Public - License along with the GNU C Library; if not, see - <http://www.gnu.org/licenses/>. */ - -#include <sysdep.h> -#include <init-arch.h> - -#if IS_IN (libc) - .text -ENTRY(__strnlen) - .type __strnlen, @gnu_indirect_function - LOAD_GOT_AND_RTLD_GLOBAL_RO - LOAD_FUNC_GOT_EAX (__strnlen_ia32) - HAS_CPU_FEATURE (SSE2) - jz 2f - LOAD_FUNC_GOT_EAX (__strnlen_sse2) -2: ret -END(__strnlen) - -weak_alias(__strnlen, strnlen) -#endif diff --git a/sysdeps/i386/i686/multiarch/strpbrk-c.c b/sysdeps/i386/i686/multiarch/strpbrk-c.c deleted file mode 100644 index 5db62053b3..0000000000 --- a/sysdeps/i386/i686/multiarch/strpbrk-c.c +++ /dev/null @@ -1,2 +0,0 @@ -#define __strpbrk_sse2 __strpbrk_ia32 -#include <sysdeps/x86_64/multiarch/strpbrk-c.c> diff --git a/sysdeps/i386/i686/multiarch/strpbrk.S b/sysdeps/i386/i686/multiarch/strpbrk.S deleted file mode 100644 index 7201d6376f..0000000000 --- a/sysdeps/i386/i686/multiarch/strpbrk.S +++ /dev/null @@ -1,5 +0,0 @@ -/* Multiple versions of strpbrk - All versions must be listed in ifunc-impl-list.c. */ -#define STRCSPN strpbrk -#define USE_AS_STRPBRK -#include "strcspn.S" diff --git a/sysdeps/i386/i686/multiarch/strrchr-sse2-bsf.S b/sysdeps/i386/i686/multiarch/strrchr-sse2-bsf.S deleted file mode 100644 index 39a7c8825b..0000000000 --- a/sysdeps/i386/i686/multiarch/strrchr-sse2-bsf.S +++ /dev/null @@ -1,282 +0,0 @@ -/* strrchr with SSE2 with bsf and bsr - Copyright (C) 2011-2017 Free Software Foundation, Inc. - Contributed by Intel Corporation. - This file is part of the GNU C Library. - - The GNU C Library is free software; you can redistribute it and/or - modify it under the terms of the GNU Lesser General Public - License as published by the Free Software Foundation; either - version 2.1 of the License, or (at your option) any later version. - - The GNU C Library is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - Lesser General Public License for more details. - - You should have received a copy of the GNU Lesser General Public - License along with the GNU C Library; if not, see - <http://www.gnu.org/licenses/>. */ - -#if IS_IN (libc) - -# include <sysdep.h> - -# define CFI_PUSH(REG) \ - cfi_adjust_cfa_offset (4); \ - cfi_rel_offset (REG, 0) - -# define CFI_POP(REG) \ - cfi_adjust_cfa_offset (-4); \ - cfi_restore (REG) - -# define PUSH(REG) pushl REG; CFI_PUSH (REG) -# define POP(REG) popl REG; CFI_POP (REG) - -# define PARMS 4 -# define STR1 PARMS -# define STR2 STR1+4 - - .text -ENTRY (__strrchr_sse2_bsf) - - mov STR1(%esp), %ecx - movd STR2(%esp), %xmm1 - - PUSH (%edi) - pxor %xmm2, %xmm2 - mov %ecx, %edi - punpcklbw %xmm1, %xmm1 - punpcklbw %xmm1, %xmm1 - /* ECX has OFFSET. */ - and $63, %ecx - cmp $48, %ecx - pshufd $0, %xmm1, %xmm1 - ja L(crosscashe) - -/* unaligned string. */ - movdqu (%edi), %xmm0 - pcmpeqb %xmm0, %xmm2 - pcmpeqb %xmm1, %xmm0 - /* Find where NULL is. */ - pmovmskb %xmm2, %edx - /* Check if there is a match. */ - pmovmskb %xmm0, %eax - - test %eax, %eax - jnz L(unaligned_match1) - - test %edx, %edx - jnz L(return_null) - - and $-16, %edi - add $16, %edi - - PUSH (%esi) - PUSH (%ebx) - - xor %ebx, %ebx - jmp L(loop) - - CFI_POP (%esi) - CFI_POP (%ebx) - - .p2align 4 -L(unaligned_return_value1): - bsf %edx, %ecx - mov $2, %edx - shl %cl, %edx - sub $1, %edx - and %edx, %eax - jz L(return_null) - bsr %eax, %eax - add %edi, %eax - POP (%edi) - ret - CFI_PUSH (%edi) - - .p2align 4 -L(unaligned_match1): - test %edx, %edx - jnz L(unaligned_return_value1) - - PUSH (%esi) - PUSH (%ebx) - - mov %eax, %ebx - lea 16(%edi), %esi - and $-16, %edi - add $16, %edi - jmp L(loop) - - CFI_POP (%esi) - CFI_POP (%ebx) - - .p2align 4 - L(crosscashe): -/* Hancle unaligned string. */ - and $15, %ecx - and $-16, %edi - pxor %xmm3, %xmm3 - movdqa (%edi), %xmm0 - pcmpeqb %xmm0, %xmm3 - pcmpeqb %xmm1, %xmm0 - /* Find where NULL is. */ - pmovmskb %xmm3, %edx - /* Check if there is a match. */ - pmovmskb %xmm0, %eax - /* Remove the leading bytes. */ - shr %cl, %edx - shr %cl, %eax - - test %eax, %eax - jnz L(unaligned_match) - - test %edx, %edx - jnz L(return_null) - - add $16, %edi - - PUSH (%esi) - PUSH (%ebx) - - xor %ebx, %ebx - jmp L(loop) - - CFI_POP (%esi) - CFI_POP (%ebx) - - .p2align 4 -L(unaligned_return_value): - add %ecx, %edi - bsf %edx, %ecx - mov $2, %edx - shl %cl, %edx - sub $1, %edx - and %edx, %eax - jz L(return_null) - bsr %eax, %eax - add %edi, %eax - POP (%edi) - ret - CFI_PUSH (%edi) - - .p2align 4 -L(unaligned_match): - test %edx, %edx - jnz L(unaligned_return_value) - - PUSH (%esi) - PUSH (%ebx) - - mov %eax, %ebx - add $16, %edi - lea (%edi, %ecx), %esi - -/* Loop start on aligned string. */ - .p2align 4 -L(loop): - movdqa (%edi), %xmm0 - pcmpeqb %xmm0, %xmm2 - add $16, %edi - pcmpeqb %xmm1, %xmm0 - pmovmskb %xmm2, %ecx - pmovmskb %xmm0, %eax - or %eax, %ecx - jnz L(matches) - - movdqa (%edi), %xmm0 - pcmpeqb %xmm0, %xmm2 - add $16, %edi - pcmpeqb %xmm1, %xmm0 - pmovmskb %xmm2, %ecx - pmovmskb %xmm0, %eax - or %eax, %ecx - jnz L(matches) - - movdqa (%edi), %xmm0 - pcmpeqb %xmm0, %xmm2 - add $16, %edi - pcmpeqb %xmm1, %xmm0 - pmovmskb %xmm2, %ecx - pmovmskb %xmm0, %eax - or %eax, %ecx - jnz L(matches) - - movdqa (%edi), %xmm0 - pcmpeqb %xmm0, %xmm2 - add $16, %edi - pcmpeqb %xmm1, %xmm0 - pmovmskb %xmm2, %ecx - pmovmskb %xmm0, %eax - or %eax, %ecx - jz L(loop) - -L(matches): - test %eax, %eax - jnz L(match) -L(return_value): - test %ebx, %ebx - jz L(return_null_1) - bsr %ebx, %eax - add %esi, %eax - - POP (%ebx) - POP (%esi) - - sub $16, %eax - POP (%edi) - ret - - CFI_PUSH (%edi) - CFI_PUSH (%ebx) - CFI_PUSH (%esi) - - .p2align 4 -L(match): - pmovmskb %xmm2, %ecx - test %ecx, %ecx - jnz L(return_value_1) - mov %eax, %ebx - mov %edi, %esi - jmp L(loop) - - .p2align 4 -L(return_value_1): - bsf %ecx, %ecx - mov $2, %edx - shl %cl, %edx - sub $1, %edx - and %edx, %eax - jz L(return_value) - - POP (%ebx) - POP (%esi) - - bsr %eax, %eax - add %edi, %eax - sub $16, %eax - POP (%edi) - ret - - CFI_PUSH (%edi) -/* Return NULL. */ - .p2align 4 -L(return_null): - xor %eax, %eax - POP (%edi) - ret - - CFI_PUSH (%edi) - CFI_PUSH (%ebx) - CFI_PUSH (%esi) -/* Return NULL. */ - .p2align 4 -L(return_null_1): - POP (%ebx) - POP (%esi) - POP (%edi) - xor %eax, %eax - ret - -END (__strrchr_sse2_bsf) -#endif diff --git a/sysdeps/i386/i686/multiarch/strrchr-sse2.S b/sysdeps/i386/i686/multiarch/strrchr-sse2.S deleted file mode 100644 index 20934288be..0000000000 --- a/sysdeps/i386/i686/multiarch/strrchr-sse2.S +++ /dev/null @@ -1,708 +0,0 @@ -/* strrchr SSE2 without bsf and bsr - Copyright (C) 2011-2017 Free Software Foundation, Inc. - Contributed by Intel Corporation. - This file is part of the GNU C Library. - - The GNU C Library is free software; you can redistribute it and/or - modify it under the terms of the GNU Lesser General Public - License as published by the Free Software Foundation; either - version 2.1 of the License, or (at your option) any later version. - - The GNU C Library is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - Lesser General Public License for more details. - - You should have received a copy of the GNU Lesser General Public - License along with the GNU C Library; if not, see - <http://www.gnu.org/licenses/>. */ - -#if IS_IN (libc) - -# include <sysdep.h> - -# define CFI_PUSH(REG) \ - cfi_adjust_cfa_offset (4); \ - cfi_rel_offset (REG, 0) - -# define CFI_POP(REG) \ - cfi_adjust_cfa_offset (-4); \ - cfi_restore (REG) - -# define PUSH(REG) pushl REG; CFI_PUSH (REG) -# define POP(REG) popl REG; CFI_POP (REG) - -# define PARMS 8 -# define ENTRANCE PUSH(%edi); -# define RETURN POP(%edi); ret; CFI_PUSH(%edi); - -# define STR1 PARMS -# define STR2 STR1+4 - - atom_text_section -ENTRY (__strrchr_sse2) - - ENTRANCE - mov STR1(%esp), %ecx - movd STR2(%esp), %xmm1 - - pxor %xmm2, %xmm2 - mov %ecx, %edi - punpcklbw %xmm1, %xmm1 - punpcklbw %xmm1, %xmm1 - /* ECX has OFFSET. */ - and $63, %ecx - cmp $48, %ecx - pshufd $0, %xmm1, %xmm1 - ja L(crosscache) - -/* unaligned string. */ - movdqu (%edi), %xmm0 - pcmpeqb %xmm0, %xmm2 - pcmpeqb %xmm1, %xmm0 - /* Find where NULL is. */ - pmovmskb %xmm2, %ecx - /* Check if there is a match. */ - pmovmskb %xmm0, %eax - add $16, %edi - - test %eax, %eax - jnz L(unaligned_match1) - - test %ecx, %ecx - jnz L(return_null) - - and $-16, %edi - - PUSH (%esi) - PUSH (%ebx) - - xor %ebx, %ebx - jmp L(loop) - - CFI_POP (%esi) - CFI_POP (%ebx) - - .p2align 4 -L(unaligned_match1): - test %ecx, %ecx - jnz L(prolog_find_zero_1) - - PUSH (%esi) - PUSH (%ebx) - - mov %eax, %ebx - mov %edi, %esi - and $-16, %edi - jmp L(loop) - - CFI_POP (%esi) - CFI_POP (%ebx) - - .p2align 4 -L(crosscache): -/* Hancle unaligned string. */ - and $15, %ecx - and $-16, %edi - pxor %xmm3, %xmm3 - movdqa (%edi), %xmm0 - pcmpeqb %xmm0, %xmm3 - pcmpeqb %xmm1, %xmm0 - /* Find where NULL is. */ - pmovmskb %xmm3, %edx - /* Check if there is a match. */ - pmovmskb %xmm0, %eax - /* Remove the leading bytes. */ - shr %cl, %edx - shr %cl, %eax - add $16, %edi - - test %eax, %eax - jnz L(unaligned_match) - - test %edx, %edx - jnz L(return_null) - - PUSH (%esi) - PUSH (%ebx) - - xor %ebx, %ebx - jmp L(loop) - - CFI_POP (%esi) - CFI_POP (%ebx) - - .p2align 4 -L(unaligned_match): - test %edx, %edx - jnz L(prolog_find_zero) - - PUSH (%esi) - PUSH (%ebx) - - mov %eax, %ebx - lea (%edi, %ecx), %esi - -/* Loop start on aligned string. */ - .p2align 4 -L(loop): - movdqa (%edi), %xmm0 - pcmpeqb %xmm0, %xmm2 - add $16, %edi - pcmpeqb %xmm1, %xmm0 - pmovmskb %xmm2, %ecx - pmovmskb %xmm0, %eax - or %eax, %ecx - jnz L(matches) - - movdqa (%edi), %xmm0 - pcmpeqb %xmm0, %xmm2 - add $16, %edi - pcmpeqb %xmm1, %xmm0 - pmovmskb %xmm2, %ecx - pmovmskb %xmm0, %eax - or %eax, %ecx - jnz L(matches) - - movdqa (%edi), %xmm0 - pcmpeqb %xmm0, %xmm2 - add $16, %edi - pcmpeqb %xmm1, %xmm0 - pmovmskb %xmm2, %ecx - pmovmskb %xmm0, %eax - or %eax, %ecx - jnz L(matches) - - movdqa (%edi), %xmm0 - pcmpeqb %xmm0, %xmm2 - add $16, %edi - pcmpeqb %xmm1, %xmm0 - pmovmskb %xmm2, %ecx - pmovmskb %xmm0, %eax - or %eax, %ecx - jz L(loop) - -L(matches): - test %eax, %eax - jnz L(match) -L(return_value): - test %ebx, %ebx - jz L(return_null_1) - mov %ebx, %eax - mov %esi, %edi - - POP (%ebx) - POP (%esi) - - jmp L(match_exit) - - CFI_PUSH (%ebx) - CFI_PUSH (%esi) - - .p2align 4 -L(return_null_1): - POP (%ebx) - POP (%esi) - - xor %eax, %eax - RETURN - - CFI_PUSH (%ebx) - CFI_PUSH (%esi) - - .p2align 4 -L(match): - pmovmskb %xmm2, %ecx - test %ecx, %ecx - jnz L(find_zero) - mov %eax, %ebx - mov %edi, %esi - jmp L(loop) - - .p2align 4 -L(find_zero): - test %cl, %cl - jz L(find_zero_high) - mov %cl, %dl - and $15, %dl - jz L(find_zero_8) - test $0x01, %cl - jnz L(FindZeroExit1) - test $0x02, %cl - jnz L(FindZeroExit2) - test $0x04, %cl - jnz L(FindZeroExit3) - and $1 << 4 - 1, %eax - jz L(return_value) - - POP (%ebx) - POP (%esi) - jmp L(match_exit) - - CFI_PUSH (%ebx) - CFI_PUSH (%esi) - - .p2align 4 -L(find_zero_8): - test $0x10, %cl - jnz L(FindZeroExit5) - test $0x20, %cl - jnz L(FindZeroExit6) - test $0x40, %cl - jnz L(FindZeroExit7) - and $1 << 8 - 1, %eax - jz L(return_value) - - POP (%ebx) - POP (%esi) - jmp L(match_exit) - - CFI_PUSH (%ebx) - CFI_PUSH (%esi) - - .p2align 4 -L(find_zero_high): - mov %ch, %dh - and $15, %dh - jz L(find_zero_high_8) - test $0x01, %ch - jnz L(FindZeroExit9) - test $0x02, %ch - jnz L(FindZeroExit10) - test $0x04, %ch - jnz L(FindZeroExit11) - and $1 << 12 - 1, %eax - jz L(return_value) - - POP (%ebx) - POP (%esi) - jmp L(match_exit) - - CFI_PUSH (%ebx) - CFI_PUSH (%esi) - - .p2align 4 -L(find_zero_high_8): - test $0x10, %ch - jnz L(FindZeroExit13) - test $0x20, %ch - jnz L(FindZeroExit14) - test $0x40, %ch - jnz L(FindZeroExit15) - and $1 << 16 - 1, %eax - jz L(return_value) - - POP (%ebx) - POP (%esi) - jmp L(match_exit) - - CFI_PUSH (%ebx) - CFI_PUSH (%esi) - - .p2align 4 -L(FindZeroExit1): - and $1, %eax - jz L(return_value) - - POP (%ebx) - POP (%esi) - jmp L(match_exit) - - CFI_PUSH (%ebx) - CFI_PUSH (%esi) - - .p2align 4 -L(FindZeroExit2): - and $1 << 2 - 1, %eax - jz L(return_value) - - POP (%ebx) - POP (%esi) - jmp L(match_exit) - - CFI_PUSH (%ebx) - CFI_PUSH (%esi) - - .p2align 4 -L(FindZeroExit3): - and $1 << 3 - 1, %eax - jz L(return_value) - - POP (%ebx) - POP (%esi) - jmp L(match_exit) - - CFI_PUSH (%ebx) - CFI_PUSH (%esi) - - .p2align 4 -L(FindZeroExit5): - and $1 << 5 - 1, %eax - jz L(return_value) - - POP (%ebx) - POP (%esi) - jmp L(match_exit) - - CFI_PUSH (%ebx) - CFI_PUSH (%esi) - - .p2align 4 -L(FindZeroExit6): - and $1 << 6 - 1, %eax - jz L(return_value) - - POP (%ebx) - POP (%esi) - jmp L(match_exit) - - CFI_PUSH (%ebx) - CFI_PUSH (%esi) - - .p2align 4 -L(FindZeroExit7): - and $1 << 7 - 1, %eax - jz L(return_value) - - POP (%ebx) - POP (%esi) - jmp L(match_exit) - - CFI_PUSH (%ebx) - CFI_PUSH (%esi) - - .p2align 4 -L(FindZeroExit9): - and $1 << 9 - 1, %eax - jz L(return_value) - - POP (%ebx) - POP (%esi) - jmp L(match_exit) - - CFI_PUSH (%ebx) - CFI_PUSH (%esi) - - .p2align 4 -L(FindZeroExit10): - and $1 << 10 - 1, %eax - jz L(return_value) - - POP (%ebx) - POP (%esi) - jmp L(match_exit) - - CFI_PUSH (%ebx) - CFI_PUSH (%esi) - - .p2align 4 -L(FindZeroExit11): - and $1 << 11 - 1, %eax - jz L(return_value) - - POP (%ebx) - POP (%esi) - jmp L(match_exit) - - CFI_PUSH (%ebx) - CFI_PUSH (%esi) - - .p2align 4 -L(FindZeroExit13): - and $1 << 13 - 1, %eax - jz L(return_value) - - POP (%ebx) - POP (%esi) - jmp L(match_exit) - - CFI_PUSH (%ebx) - CFI_PUSH (%esi) - - .p2align 4 -L(FindZeroExit14): - and $1 << 14 - 1, %eax - jz L(return_value) - - POP (%ebx) - POP (%esi) - jmp L(match_exit) - - CFI_PUSH (%ebx) - CFI_PUSH (%esi) - - .p2align 4 -L(FindZeroExit15): - and $1 << 15 - 1, %eax - jz L(return_value) - - POP (%ebx) - POP (%esi) - - .p2align 4 -L(match_exit): - test %ah, %ah - jnz L(match_exit_high) - mov %al, %dl - and $15 << 4, %dl - jnz L(match_exit_8) - test $0x08, %al - jnz L(Exit4) - test $0x04, %al - jnz L(Exit3) - test $0x02, %al - jnz L(Exit2) - lea -16(%edi), %eax - RETURN - - .p2align 4 -L(match_exit_8): - test $0x80, %al - jnz L(Exit8) - test $0x40, %al - jnz L(Exit7) - test $0x20, %al - jnz L(Exit6) - lea -12(%edi), %eax - RETURN - - .p2align 4 -L(match_exit_high): - mov %ah, %dh - and $15 << 4, %dh - jnz L(match_exit_high_8) - test $0x08, %ah - jnz L(Exit12) - test $0x04, %ah - jnz L(Exit11) - test $0x02, %ah - jnz L(Exit10) - lea -8(%edi), %eax - RETURN - - .p2align 4 -L(match_exit_high_8): - test $0x80, %ah - jnz L(Exit16) - test $0x40, %ah - jnz L(Exit15) - test $0x20, %ah - jnz L(Exit14) - lea -4(%edi), %eax - RETURN - - .p2align 4 -L(Exit2): - lea -15(%edi), %eax - RETURN - - .p2align 4 -L(Exit3): - lea -14(%edi), %eax - RETURN - - .p2align 4 -L(Exit4): - lea -13(%edi), %eax - RETURN - - .p2align 4 -L(Exit6): - lea -11(%edi), %eax - RETURN - - .p2align 4 -L(Exit7): - lea -10(%edi), %eax - RETURN - - .p2align 4 -L(Exit8): - lea -9(%edi), %eax - RETURN - - .p2align 4 -L(Exit10): - lea -7(%edi), %eax - RETURN - - .p2align 4 -L(Exit11): - lea -6(%edi), %eax - RETURN - - .p2align 4 -L(Exit12): - lea -5(%edi), %eax - RETURN - - .p2align 4 -L(Exit14): - lea -3(%edi), %eax - RETURN - - .p2align 4 -L(Exit15): - lea -2(%edi), %eax - RETURN - - .p2align 4 -L(Exit16): - lea -1(%edi), %eax - RETURN - -/* Return NULL. */ - .p2align 4 -L(return_null): - xor %eax, %eax - RETURN - - .p2align 4 -L(prolog_find_zero): - add %ecx, %edi - mov %edx, %ecx -L(prolog_find_zero_1): - test %cl, %cl - jz L(prolog_find_zero_high) - mov %cl, %dl - and $15, %dl - jz L(prolog_find_zero_8) - test $0x01, %cl - jnz L(PrologFindZeroExit1) - test $0x02, %cl - jnz L(PrologFindZeroExit2) - test $0x04, %cl - jnz L(PrologFindZeroExit3) - and $1 << 4 - 1, %eax - jnz L(match_exit) - xor %eax, %eax - RETURN - - .p2align 4 -L(prolog_find_zero_8): - test $0x10, %cl - jnz L(PrologFindZeroExit5) - test $0x20, %cl - jnz L(PrologFindZeroExit6) - test $0x40, %cl - jnz L(PrologFindZeroExit7) - and $1 << 8 - 1, %eax - jnz L(match_exit) - xor %eax, %eax - RETURN - - .p2align 4 -L(prolog_find_zero_high): - mov %ch, %dh - and $15, %dh - jz L(prolog_find_zero_high_8) - test $0x01, %ch - jnz L(PrologFindZeroExit9) - test $0x02, %ch - jnz L(PrologFindZeroExit10) - test $0x04, %ch - jnz L(PrologFindZeroExit11) - and $1 << 12 - 1, %eax - jnz L(match_exit) - xor %eax, %eax - RETURN - - .p2align 4 -L(prolog_find_zero_high_8): - test $0x10, %ch - jnz L(PrologFindZeroExit13) - test $0x20, %ch - jnz L(PrologFindZeroExit14) - test $0x40, %ch - jnz L(PrologFindZeroExit15) - and $1 << 16 - 1, %eax - jnz L(match_exit) - xor %eax, %eax - RETURN - - .p2align 4 -L(PrologFindZeroExit1): - and $1, %eax - jnz L(match_exit) - xor %eax, %eax - RETURN - - .p2align 4 -L(PrologFindZeroExit2): - and $1 << 2 - 1, %eax - jnz L(match_exit) - xor %eax, %eax - RETURN - - .p2align 4 -L(PrologFindZeroExit3): - and $1 << 3 - 1, %eax - jnz L(match_exit) - xor %eax, %eax - RETURN - - .p2align 4 -L(PrologFindZeroExit5): - and $1 << 5 - 1, %eax - jnz L(match_exit) - xor %eax, %eax - RETURN - - .p2align 4 -L(PrologFindZeroExit6): - and $1 << 6 - 1, %eax - jnz L(match_exit) - xor %eax, %eax - RETURN - - .p2align 4 -L(PrologFindZeroExit7): - and $1 << 7 - 1, %eax - jnz L(match_exit) - xor %eax, %eax - RETURN - - .p2align 4 -L(PrologFindZeroExit9): - and $1 << 9 - 1, %eax - jnz L(match_exit) - xor %eax, %eax - RETURN - - .p2align 4 -L(PrologFindZeroExit10): - and $1 << 10 - 1, %eax - jnz L(match_exit) - xor %eax, %eax - RETURN - - .p2align 4 -L(PrologFindZeroExit11): - and $1 << 11 - 1, %eax - jnz L(match_exit) - xor %eax, %eax - RETURN - - .p2align 4 -L(PrologFindZeroExit13): - and $1 << 13 - 1, %eax - jnz L(match_exit) - xor %eax, %eax - RETURN - - .p2align 4 -L(PrologFindZeroExit14): - and $1 << 14 - 1, %eax - jnz L(match_exit) - xor %eax, %eax - RETURN - - .p2align 4 -L(PrologFindZeroExit15): - and $1 << 15 - 1, %eax - jnz L(match_exit) - xor %eax, %eax - RETURN - -END (__strrchr_sse2) -#endif diff --git a/sysdeps/i386/i686/multiarch/strrchr.S b/sysdeps/i386/i686/multiarch/strrchr.S deleted file mode 100644 index d9281eaeae..0000000000 --- a/sysdeps/i386/i686/multiarch/strrchr.S +++ /dev/null @@ -1,57 +0,0 @@ -/* Multiple versions of strrchr - All versions must be listed in ifunc-impl-list.c. - Copyright (C) 2011-2017 Free Software Foundation, Inc. - Contributed by Intel Corporation. - This file is part of the GNU C Library. - - The GNU C Library is free software; you can redistribute it and/or - modify it under the terms of the GNU Lesser General Public - License as published by the Free Software Foundation; either - version 2.1 of the License, or (at your option) any later version. - - The GNU C Library is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - Lesser General Public License for more details. - - You should have received a copy of the GNU Lesser General Public - License along with the GNU C Library; if not, see - <http://www.gnu.org/licenses/>. */ - -#include <sysdep.h> -#include <init-arch.h> - -#if IS_IN (libc) - .text -ENTRY(strrchr) - .type strrchr, @gnu_indirect_function - LOAD_GOT_AND_RTLD_GLOBAL_RO - LOAD_FUNC_GOT_EAX (__strrchr_ia32) - HAS_CPU_FEATURE (SSE2) - jz 2f - LOAD_FUNC_GOT_EAX (__strrchr_sse2_bsf) - HAS_ARCH_FEATURE (Slow_BSF) - jz 2f - LOAD_FUNC_GOT_EAX (__strrchr_sse2) -2: ret -END(strrchr) - -# undef ENTRY -# define ENTRY(name) \ - .type __strrchr_ia32, @function; \ - .globl __strrchr_ia32; \ - .p2align 4; \ - __strrchr_ia32: cfi_startproc; \ - CALL_MCOUNT -# undef END -# define END(name) \ - cfi_endproc; .size __strrchr_ia32, .-__strrchr_ia32 -# undef libc_hidden_builtin_def -/* IFUNC doesn't work with the hidden functions in shared library since - they will be called without setting up EBX needed for PLT which is - used by IFUNC. */ -# define libc_hidden_builtin_def(name) \ - .globl __GI_strrchr; __GI_strrchr = __strrchr_ia32 -#endif - -#include "../../strrchr.S" diff --git a/sysdeps/i386/i686/multiarch/strspn-c.c b/sysdeps/i386/i686/multiarch/strspn-c.c deleted file mode 100644 index bea09dea71..0000000000 --- a/sysdeps/i386/i686/multiarch/strspn-c.c +++ /dev/null @@ -1,2 +0,0 @@ -#define __strspn_sse2 __strspn_ia32 -#include <sysdeps/x86_64/multiarch/strspn-c.c> diff --git a/sysdeps/i386/i686/multiarch/strspn.S b/sysdeps/i386/i686/multiarch/strspn.S deleted file mode 100644 index 1269062381..0000000000 --- a/sysdeps/i386/i686/multiarch/strspn.S +++ /dev/null @@ -1,56 +0,0 @@ -/* Multiple versions of strspn - All versions must be listed in ifunc-impl-list.c. - Copyright (C) 2009-2017 Free Software Foundation, Inc. - Contributed by Intel Corporation. - This file is part of the GNU C Library. - - The GNU C Library is free software; you can redistribute it and/or - modify it under the terms of the GNU Lesser General Public - License as published by the Free Software Foundation; either - version 2.1 of the License, or (at your option) any later version. - - The GNU C Library is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - Lesser General Public License for more details. - - You should have received a copy of the GNU Lesser General Public - License along with the GNU C Library; if not, see - <http://www.gnu.org/licenses/>. */ - -#include <config.h> -#include <sysdep.h> -#include <init-arch.h> - -/* Define multiple versions only for the definition in libc. */ -#if IS_IN (libc) - .text -ENTRY(strspn) - .type strspn, @gnu_indirect_function - LOAD_GOT_AND_RTLD_GLOBAL_RO - LOAD_FUNC_GOT_EAX (__strspn_ia32) - HAS_CPU_FEATURE (SSE4_2) - jz 2f - LOAD_FUNC_GOT_EAX (__strspn_sse42) -2: ret -END(strspn) - -# undef ENTRY -# define ENTRY(name) \ - .type __strspn_ia32, @function; \ - .globl __strspn_ia32; \ - .p2align 4; \ -__strspn_ia32: cfi_startproc; \ - CALL_MCOUNT -# undef END -# define END(name) \ - cfi_endproc; .size __strspn_ia32, .-__strspn_ia32 -# undef libc_hidden_builtin_def -/* IFUNC doesn't work with the hidden functions in shared library since - they will be called without setting up EBX needed for PLT which is - used by IFUNC. */ -# define libc_hidden_builtin_def(name) \ - .globl __GI_strspn; __GI_strspn = __strspn_ia32 -#endif - -#include "../../strspn.S" diff --git a/sysdeps/i386/i686/multiarch/test-multiarch.c b/sysdeps/i386/i686/multiarch/test-multiarch.c deleted file mode 100644 index 593cfec273..0000000000 --- a/sysdeps/i386/i686/multiarch/test-multiarch.c +++ /dev/null @@ -1 +0,0 @@ -#include <sysdeps/x86_64/multiarch/test-multiarch.c> diff --git a/sysdeps/i386/i686/multiarch/varshift.c b/sysdeps/i386/i686/multiarch/varshift.c deleted file mode 100644 index 7760b966e2..0000000000 --- a/sysdeps/i386/i686/multiarch/varshift.c +++ /dev/null @@ -1 +0,0 @@ -#include <sysdeps/x86_64/multiarch/varshift.c> diff --git a/sysdeps/i386/i686/multiarch/varshift.h b/sysdeps/i386/i686/multiarch/varshift.h deleted file mode 100644 index 7c72c70d67..0000000000 --- a/sysdeps/i386/i686/multiarch/varshift.h +++ /dev/null @@ -1 +0,0 @@ -#include <sysdeps/x86_64/multiarch/varshift.h> diff --git a/sysdeps/i386/i686/multiarch/wcschr-c.c b/sysdeps/i386/i686/multiarch/wcschr-c.c deleted file mode 100644 index 38d41d04de..0000000000 --- a/sysdeps/i386/i686/multiarch/wcschr-c.c +++ /dev/null @@ -1,22 +0,0 @@ -#include <wchar.h> - -#if IS_IN (libc) -# undef libc_hidden_weak -# define libc_hidden_weak(name) - -# undef weak_alias -# define weak_alias(name,alias) - -# ifdef SHARED -# undef libc_hidden_def -# define libc_hidden_def(name) \ - __hidden_ver1 (__wcschr_ia32, __GI_wcschr, __wcschr_ia32); \ - strong_alias (__wcschr_ia32, __wcschr_ia32_1); \ - __hidden_ver1 (__wcschr_ia32_1, __GI___wcschr, __wcschr_ia32_1); -# endif -#endif - -extern __typeof (wcschr) __wcschr_ia32; - -#define WCSCHR __wcschr_ia32 -#include <wcsmbs/wcschr.c> diff --git a/sysdeps/i386/i686/multiarch/wcschr-sse2.S b/sysdeps/i386/i686/multiarch/wcschr-sse2.S deleted file mode 100644 index 9ff6c3b8d6..0000000000 --- a/sysdeps/i386/i686/multiarch/wcschr-sse2.S +++ /dev/null @@ -1,219 +0,0 @@ -/* wcschr with SSE2, without using bsf instructions - Copyright (C) 2011-2017 Free Software Foundation, Inc. - Contributed by Intel Corporation. - This file is part of the GNU C Library. - - The GNU C Library is free software; you can redistribute it and/or - modify it under the terms of the GNU Lesser General Public - License as published by the Free Software Foundation; either - version 2.1 of the License, or (at your option) any later version. - - The GNU C Library is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - Lesser General Public License for more details. - - You should have received a copy of the GNU Lesser General Public - License along with the GNU C Library; if not, see - <http://www.gnu.org/licenses/>. */ - -#if IS_IN (libc) -# include <sysdep.h> - -# define CFI_PUSH(REG) \ - cfi_adjust_cfa_offset (4); \ - cfi_rel_offset (REG, 0) - -# define CFI_POP(REG) \ - cfi_adjust_cfa_offset (-4); \ - cfi_restore (REG) - -# define PUSH(REG) pushl REG; CFI_PUSH (REG) -# define POP(REG) popl REG; CFI_POP (REG) - -# define PARMS 4 -# define STR1 PARMS -# define STR2 STR1+4 - - atom_text_section -ENTRY (__wcschr_sse2) - - mov STR1(%esp), %ecx - movd STR2(%esp), %xmm1 - - mov %ecx, %eax - punpckldq %xmm1, %xmm1 - pxor %xmm2, %xmm2 - punpckldq %xmm1, %xmm1 - - and $63, %eax - cmp $48, %eax - ja L(cross_cache) - - movdqu (%ecx), %xmm0 - pcmpeqd %xmm0, %xmm2 - pcmpeqd %xmm1, %xmm0 - pmovmskb %xmm2, %edx - pmovmskb %xmm0, %eax - or %eax, %edx - jnz L(matches) - and $-16, %ecx - jmp L(loop) - - .p2align 4 -L(cross_cache): - PUSH (%edi) - mov %ecx, %edi - mov %eax, %ecx - and $-16, %edi - and $15, %ecx - movdqa (%edi), %xmm0 - pcmpeqd %xmm0, %xmm2 - pcmpeqd %xmm1, %xmm0 - pmovmskb %xmm2, %edx - pmovmskb %xmm0, %eax - - sarl %cl, %edx - sarl %cl, %eax - test %eax, %eax - jz L(unaligned_no_match) - - add %edi, %ecx - POP (%edi) - - test %edx, %edx - jz L(match_case1) - test %al, %al - jz L(match_higth_case2) - test $15, %al - jnz L(match_case2_4) - test $15, %dl - jnz L(return_null) - lea 4(%ecx), %eax - ret - - CFI_PUSH (%edi) - - .p2align 4 -L(unaligned_no_match): - mov %edi, %ecx - POP (%edi) - - test %edx, %edx - jnz L(return_null) - - pxor %xmm2, %xmm2 - -/* Loop start on aligned string. */ - .p2align 4 -L(loop): - add $16, %ecx - movdqa (%ecx), %xmm0 - pcmpeqd %xmm0, %xmm2 - pcmpeqd %xmm1, %xmm0 - pmovmskb %xmm2, %edx - pmovmskb %xmm0, %eax - or %eax, %edx - jnz L(matches) - add $16, %ecx - - movdqa (%ecx), %xmm0 - pcmpeqd %xmm0, %xmm2 - pcmpeqd %xmm1, %xmm0 - pmovmskb %xmm2, %edx - pmovmskb %xmm0, %eax - or %eax, %edx - jnz L(matches) - add $16, %ecx - - movdqa (%ecx), %xmm0 - pcmpeqd %xmm0, %xmm2 - pcmpeqd %xmm1, %xmm0 - pmovmskb %xmm2, %edx - pmovmskb %xmm0, %eax - or %eax, %edx - jnz L(matches) - add $16, %ecx - - movdqa (%ecx), %xmm0 - pcmpeqd %xmm0, %xmm2 - pcmpeqd %xmm1, %xmm0 - pmovmskb %xmm2, %edx - pmovmskb %xmm0, %eax - or %eax, %edx - jz L(loop) - - .p2align 4 -L(matches): - pmovmskb %xmm2, %edx - test %eax, %eax - jz L(return_null) - test %edx, %edx - jz L(match_case1) - - .p2align 4 -L(match_case2): - test %al, %al - jz L(match_higth_case2) - test $15, %al - jnz L(match_case2_4) - test $15, %dl - jnz L(return_null) - lea 4(%ecx), %eax - ret - - .p2align 4 -L(match_case2_4): - mov %ecx, %eax - ret - - .p2align 4 -L(match_higth_case2): - test %dl, %dl - jnz L(return_null) - test $15, %ah - jnz L(match_case2_12) - test $15, %dh - jnz L(return_null) - lea 12(%ecx), %eax - ret - - .p2align 4 -L(match_case2_12): - lea 8(%ecx), %eax - ret - - .p2align 4 -L(match_case1): - test %al, %al - jz L(match_higth_case1) - - test $0x01, %al - jnz L(exit0) - lea 4(%ecx), %eax - ret - - .p2align 4 -L(match_higth_case1): - test $0x01, %ah - jnz L(exit3) - lea 12(%ecx), %eax - ret - - .p2align 4 -L(exit0): - mov %ecx, %eax - ret - - .p2align 4 -L(exit3): - lea 8(%ecx), %eax - ret - - .p2align 4 -L(return_null): - xor %eax, %eax - ret - -END (__wcschr_sse2) -#endif diff --git a/sysdeps/i386/i686/multiarch/wcschr.S b/sysdeps/i386/i686/multiarch/wcschr.S deleted file mode 100644 index d3c65a6436..0000000000 --- a/sysdeps/i386/i686/multiarch/wcschr.S +++ /dev/null @@ -1,36 +0,0 @@ -/* Multiple versions of wcschr - All versions must be listed in ifunc-impl-list.c. - Copyright (C) 2011-2017 Free Software Foundation, Inc. - Contributed by Intel Corporation. - This file is part of the GNU C Library. - - The GNU C Library is free software; you can redistribute it and/or - modify it under the terms of the GNU Lesser General Public - License as published by the Free Software Foundation; either - version 2.1 of the License, or (at your option) any later version. - - The GNU C Library is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - Lesser General Public License for more details. - - You should have received a copy of the GNU Lesser General Public - License along with the GNU C Library; if not, see - <http://www.gnu.org/licenses/>. */ - -#include <sysdep.h> -#include <init-arch.h> - -#if IS_IN (libc) - .text -ENTRY(__wcschr) - .type wcschr, @gnu_indirect_function - LOAD_GOT_AND_RTLD_GLOBAL_RO - LOAD_FUNC_GOT_EAX (__wcschr_ia32) - HAS_CPU_FEATURE (SSE2) - jz 2f - LOAD_FUNC_GOT_EAX (__wcschr_sse2) -2: ret -END(__wcschr) -weak_alias (__wcschr, wcschr) -#endif diff --git a/sysdeps/i386/i686/multiarch/wcscmp-c.c b/sysdeps/i386/i686/multiarch/wcscmp-c.c deleted file mode 100644 index e3337d77e2..0000000000 --- a/sysdeps/i386/i686/multiarch/wcscmp-c.c +++ /dev/null @@ -1,14 +0,0 @@ -#include <wchar.h> - -#define WCSCMP __wcscmp_ia32 -#ifdef SHARED -# undef libc_hidden_def -# define libc_hidden_def(name) \ - __hidden_ver1 (__wcscmp_ia32, __GI___wcscmp, __wcscmp_ia32); -#endif -#undef weak_alias -#define weak_alias(name, alias) - -extern __typeof (wcscmp) __wcscmp_ia32; - -#include "wcsmbs/wcscmp.c" diff --git a/sysdeps/i386/i686/multiarch/wcscmp-sse2.S b/sysdeps/i386/i686/multiarch/wcscmp-sse2.S deleted file mode 100644 index a464b58204..0000000000 --- a/sysdeps/i386/i686/multiarch/wcscmp-sse2.S +++ /dev/null @@ -1,1018 +0,0 @@ -/* wcscmp with SSE2 - Copyright (C) 2011-2017 Free Software Foundation, Inc. - Contributed by Intel Corporation. - This file is part of the GNU C Library. - - The GNU C Library is free software; you can redistribute it and/or - modify it under the terms of the GNU Lesser General Public - License as published by the Free Software Foundation; either - version 2.1 of the License, or (at your option) any later version. - - The GNU C Library is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - Lesser General Public License for more details. - - You should have received a copy of the GNU Lesser General Public - License along with the GNU C Library; if not, see - <http://www.gnu.org/licenses/>. */ - -#if IS_IN (libc) - -# include <sysdep.h> - -# define CFI_PUSH(REG) \ - cfi_adjust_cfa_offset (4); \ - cfi_rel_offset (REG, 0) - -# define CFI_POP(REG) \ - cfi_adjust_cfa_offset (-4); \ - cfi_restore (REG) - -# define PUSH(REG) pushl REG; CFI_PUSH (REG) -# define POP(REG) popl REG; CFI_POP (REG) - -# define ENTRANCE PUSH(%esi); PUSH(%edi) -# define RETURN POP(%edi); POP(%esi); ret; CFI_PUSH(%esi); CFI_PUSH(%edi); -# define PARMS 4 -# define STR1 PARMS -# define STR2 STR1+4 - -/* Note: wcscmp uses signed comparison, not unsugned as in strcmp function. */ - - .text -ENTRY (__wcscmp_sse2) -/* - * This implementation uses SSE to compare up to 16 bytes at a time. -*/ - mov STR1(%esp), %edx - mov STR2(%esp), %eax - - mov (%eax), %ecx - cmp %ecx, (%edx) - jne L(neq) - test %ecx, %ecx - jz L(eq) - - mov 4(%eax), %ecx - cmp %ecx, 4(%edx) - jne L(neq) - test %ecx, %ecx - jz L(eq) - - mov 8(%eax), %ecx - cmp %ecx, 8(%edx) - jne L(neq) - test %ecx, %ecx - jz L(eq) - - mov 12(%eax), %ecx - cmp %ecx, 12(%edx) - jne L(neq) - test %ecx, %ecx - jz L(eq) - - ENTRANCE - add $16, %eax - add $16, %edx - - mov %eax, %esi - mov %edx, %edi - pxor %xmm0, %xmm0 /* clear %xmm0 for null char checks */ - mov %al, %ch - mov %dl, %cl - and $63, %eax /* esi alignment in cache line */ - and $63, %edx /* edi alignment in cache line */ - and $15, %cl - jz L(continue_00) - cmp $16, %edx - jb L(continue_0) - cmp $32, %edx - jb L(continue_16) - cmp $48, %edx - jb L(continue_32) - -L(continue_48): - and $15, %ch - jz L(continue_48_00) - cmp $16, %eax - jb L(continue_0_48) - cmp $32, %eax - jb L(continue_16_48) - cmp $48, %eax - jb L(continue_32_48) - - .p2align 4 -L(continue_48_48): - mov (%esi), %ecx - cmp %ecx, (%edi) - jne L(nequal) - test %ecx, %ecx - jz L(equal) - - mov 4(%esi), %ecx - cmp %ecx, 4(%edi) - jne L(nequal) - test %ecx, %ecx - jz L(equal) - - mov 8(%esi), %ecx - cmp %ecx, 8(%edi) - jne L(nequal) - test %ecx, %ecx - jz L(equal) - - mov 12(%esi), %ecx - cmp %ecx, 12(%edi) - jne L(nequal) - test %ecx, %ecx - jz L(equal) - - movdqu 16(%edi), %xmm1 - movdqu 16(%esi), %xmm2 - pcmpeqd %xmm1, %xmm0 /* Any null double_word? */ - pcmpeqd %xmm2, %xmm1 /* compare first 4 double_words for equality */ - psubb %xmm0, %xmm1 /* packed sub of comparison results*/ - pmovmskb %xmm1, %edx - sub $0xffff, %edx /* if first 4 double_words are same, edx == 0xffff */ - jnz L(less4_double_words_16) - - movdqu 32(%edi), %xmm1 - movdqu 32(%esi), %xmm2 - pcmpeqd %xmm1, %xmm0 /* Any null double_word? */ - pcmpeqd %xmm2, %xmm1 /* compare first 4 double_words for equality */ - psubb %xmm0, %xmm1 /* packed sub of comparison results*/ - pmovmskb %xmm1, %edx - sub $0xffff, %edx /* if first 4 double_words are same, edx == 0xffff */ - jnz L(less4_double_words_32) - - movdqu 48(%edi), %xmm1 - movdqu 48(%esi), %xmm2 - pcmpeqd %xmm1, %xmm0 /* Any null double_word? */ - pcmpeqd %xmm2, %xmm1 /* compare first 4 double_words for equality */ - psubb %xmm0, %xmm1 /* packed sub of comparison results*/ - pmovmskb %xmm1, %edx - sub $0xffff, %edx /* if first 4 double_words are same, edx == 0xffff */ - jnz L(less4_double_words_48) - - add $64, %esi - add $64, %edi - jmp L(continue_48_48) - -L(continue_0): - and $15, %ch - jz L(continue_0_00) - cmp $16, %eax - jb L(continue_0_0) - cmp $32, %eax - jb L(continue_0_16) - cmp $48, %eax - jb L(continue_0_32) - - .p2align 4 -L(continue_0_48): - mov (%esi), %ecx - cmp %ecx, (%edi) - jne L(nequal) - test %ecx, %ecx - jz L(equal) - - mov 4(%esi), %ecx - cmp %ecx, 4(%edi) - jne L(nequal) - test %ecx, %ecx - jz L(equal) - - mov 8(%esi), %ecx - cmp %ecx, 8(%edi) - jne L(nequal) - test %ecx, %ecx - jz L(equal) - - mov 12(%esi), %ecx - cmp %ecx, 12(%edi) - jne L(nequal) - test %ecx, %ecx - jz L(equal) - - movdqu 16(%edi), %xmm1 - movdqu 16(%esi), %xmm2 - pcmpeqd %xmm1, %xmm0 /* Any null double_word? */ - pcmpeqd %xmm2, %xmm1 /* compare first 4 double_words for equality */ - psubb %xmm0, %xmm1 /* packed sub of comparison results*/ - pmovmskb %xmm1, %edx - sub $0xffff, %edx /* if first 4 double_words are same, edx == 0xffff */ - jnz L(less4_double_words_16) - - movdqu 32(%edi), %xmm1 - movdqu 32(%esi), %xmm2 - pcmpeqd %xmm1, %xmm0 /* Any null double_word? */ - pcmpeqd %xmm2, %xmm1 /* compare first 4 double_words for equality */ - psubb %xmm0, %xmm1 /* packed sub of comparison results*/ - pmovmskb %xmm1, %edx - sub $0xffff, %edx /* if first 4 double_words are same, edx == 0xffff */ - jnz L(less4_double_words_32) - - mov 48(%esi), %ecx - cmp %ecx, 48(%edi) - jne L(nequal) - test %ecx, %ecx - jz L(equal) - - mov 52(%esi), %ecx - cmp %ecx, 52(%edi) - jne L(nequal) - test %ecx, %ecx - jz L(equal) - - mov 56(%esi), %ecx - cmp %ecx, 56(%edi) - jne L(nequal) - test %ecx, %ecx - jz L(equal) - - mov 60(%esi), %ecx - cmp %ecx, 60(%edi) - jne L(nequal) - test %ecx, %ecx - jz L(equal) - - add $64, %esi - add $64, %edi - jmp L(continue_0_48) - - .p2align 4 -L(continue_00): - and $15, %ch - jz L(continue_00_00) - cmp $16, %eax - jb L(continue_00_0) - cmp $32, %eax - jb L(continue_00_16) - cmp $48, %eax - jb L(continue_00_32) - - .p2align 4 -L(continue_00_48): - pcmpeqd (%edi), %xmm0 - mov (%edi), %eax - pmovmskb %xmm0, %ecx - test %ecx, %ecx - jnz L(less4_double_words1) - - cmp (%esi), %eax - jne L(nequal) - - mov 4(%edi), %eax - cmp 4(%esi), %eax - jne L(nequal) - - mov 8(%edi), %eax - cmp 8(%esi), %eax - jne L(nequal) - - mov 12(%edi), %eax - cmp 12(%esi), %eax - jne L(nequal) - - movdqu 16(%esi), %xmm2 - pcmpeqd %xmm2, %xmm0 /* Any null double_word? */ - pcmpeqd 16(%edi), %xmm2 /* compare first 4 double_words for equality */ - psubb %xmm0, %xmm2 /* packed sub of comparison results*/ - pmovmskb %xmm2, %edx - sub $0xffff, %edx /* if first 4 double_words are same, edx == 0xffff */ - jnz L(less4_double_words_16) - - movdqu 32(%esi), %xmm2 - pcmpeqd %xmm2, %xmm0 /* Any null double_word? */ - pcmpeqd 32(%edi), %xmm2 /* compare first 4 double_words for equality */ - psubb %xmm0, %xmm2 /* packed sub of comparison results*/ - pmovmskb %xmm2, %edx - sub $0xffff, %edx /* if first 4 double_words are same, edx == 0xffff */ - jnz L(less4_double_words_32) - - movdqu 48(%esi), %xmm2 - pcmpeqd %xmm2, %xmm0 /* Any null double_word? */ - pcmpeqd 48(%edi), %xmm2 /* compare first 4 double_words for equality */ - psubb %xmm0, %xmm2 /* packed sub of comparison results*/ - pmovmskb %xmm2, %edx - sub $0xffff, %edx /* if first 4 double_words are same, edx == 0xffff */ - jnz L(less4_double_words_48) - - add $64, %esi - add $64, %edi - jmp L(continue_00_48) - - .p2align 4 -L(continue_32): - and $15, %ch - jz L(continue_32_00) - cmp $16, %eax - jb L(continue_0_32) - cmp $32, %eax - jb L(continue_16_32) - cmp $48, %eax - jb L(continue_32_32) - - .p2align 4 -L(continue_32_48): - mov (%esi), %ecx - cmp %ecx, (%edi) - jne L(nequal) - test %ecx, %ecx - jz L(equal) - - mov 4(%esi), %ecx - cmp %ecx, 4(%edi) - jne L(nequal) - test %ecx, %ecx - jz L(equal) - - mov 8(%esi), %ecx - cmp %ecx, 8(%edi) - jne L(nequal) - test %ecx, %ecx - jz L(equal) - - mov 12(%esi), %ecx - cmp %ecx, 12(%edi) - jne L(nequal) - test %ecx, %ecx - jz L(equal) - - mov 16(%esi), %ecx - cmp %ecx, 16(%edi) - jne L(nequal) - test %ecx, %ecx - jz L(equal) - - mov 20(%esi), %ecx - cmp %ecx, 20(%edi) - jne L(nequal) - test %ecx, %ecx - jz L(equal) - - mov 24(%esi), %ecx - cmp %ecx, 24(%edi) - jne L(nequal) - test %ecx, %ecx - jz L(equal) - - mov 28(%esi), %ecx - cmp %ecx, 28(%edi) - jne L(nequal) - test %ecx, %ecx - jz L(equal) - - movdqu 32(%edi), %xmm1 - movdqu 32(%esi), %xmm2 - pcmpeqd %xmm1, %xmm0 /* Any null double_word? */ - pcmpeqd %xmm2, %xmm1 /* compare first 4 double_words for equality */ - psubb %xmm0, %xmm1 /* packed sub of comparison results*/ - pmovmskb %xmm1, %edx - sub $0xffff, %edx /* if first 4 double_words are same, edx == 0xffff */ - jnz L(less4_double_words_32) - - movdqu 48(%edi), %xmm1 - movdqu 48(%esi), %xmm2 - pcmpeqd %xmm1, %xmm0 /* Any null double_word? */ - pcmpeqd %xmm2, %xmm1 /* compare first 4 double_words for equality */ - psubb %xmm0, %xmm1 /* packed sub of comparison results */ - pmovmskb %xmm1, %edx - sub $0xffff, %edx /* if first 4 double_words are same, edx == 0xffff */ - jnz L(less4_double_words_48) - - add $64, %esi - add $64, %edi - jmp L(continue_32_48) - - .p2align 4 -L(continue_16): - and $15, %ch - jz L(continue_16_00) - cmp $16, %eax - jb L(continue_0_16) - cmp $32, %eax - jb L(continue_16_16) - cmp $48, %eax - jb L(continue_16_32) - - .p2align 4 -L(continue_16_48): - mov (%esi), %ecx - cmp %ecx, (%edi) - jne L(nequal) - test %ecx, %ecx - jz L(equal) - - mov 4(%esi), %ecx - cmp %ecx, 4(%edi) - jne L(nequal) - test %ecx, %ecx - jz L(equal) - - mov 8(%esi), %ecx - cmp %ecx, 8(%edi) - jne L(nequal) - test %ecx, %ecx - jz L(equal) - - mov 12(%esi), %ecx - cmp %ecx, 12(%edi) - jne L(nequal) - test %ecx, %ecx - jz L(equal) - - movdqu 16(%edi), %xmm1 - movdqu 16(%esi), %xmm2 - pcmpeqd %xmm1, %xmm0 /* Any null double_word? */ - pcmpeqd %xmm2, %xmm1 /* compare first 4 double_words for equality */ - psubb %xmm0, %xmm1 /* packed sub of comparison results*/ - pmovmskb %xmm1, %edx - sub $0xffff, %edx /* if first 4 double_words are same, edx == 0xffff */ - jnz L(less4_double_words_16) - - mov 32(%esi), %ecx - cmp %ecx, 32(%edi) - jne L(nequal) - test %ecx, %ecx - jz L(equal) - - mov 36(%esi), %ecx - cmp %ecx, 36(%edi) - jne L(nequal) - test %ecx, %ecx - jz L(equal) - - mov 40(%esi), %ecx - cmp %ecx, 40(%edi) - jne L(nequal) - test %ecx, %ecx - jz L(equal) - - mov 44(%esi), %ecx - cmp %ecx, 44(%edi) - jne L(nequal) - test %ecx, %ecx - jz L(equal) - - movdqu 48(%edi), %xmm1 - movdqu 48(%esi), %xmm2 - pcmpeqd %xmm1, %xmm0 /* Any null double_word? */ - pcmpeqd %xmm2, %xmm1 /* compare first 4 double_words for equality */ - psubb %xmm0, %xmm1 /* packed sub of comparison results*/ - pmovmskb %xmm1, %edx - sub $0xffff, %edx /* if first 4 double_words are same, edx == 0xffff */ - jnz L(less4_double_words_48) - - add $64, %esi - add $64, %edi - jmp L(continue_16_48) - - .p2align 4 -L(continue_00_00): - movdqa (%edi), %xmm1 - pcmpeqd %xmm1, %xmm0 /* Any null double_word? */ - pcmpeqd (%esi), %xmm1 /* compare first 4 double_words for equality */ - psubb %xmm0, %xmm1 /* packed sub of comparison results*/ - pmovmskb %xmm1, %edx - sub $0xffff, %edx /* if first 4 double_words are same, edx == 0xffff */ - jnz L(less4_double_words) - - movdqa 16(%edi), %xmm3 - pcmpeqd %xmm3, %xmm0 /* Any null double_word? */ - pcmpeqd 16(%esi), %xmm3 /* compare first 4 double_words for equality */ - psubb %xmm0, %xmm3 /* packed sub of comparison results*/ - pmovmskb %xmm3, %edx - sub $0xffff, %edx /* if first 4 double_words are same, edx == 0xffff */ - jnz L(less4_double_words_16) - - movdqa 32(%edi), %xmm5 - pcmpeqd %xmm5, %xmm0 /* Any null double_word? */ - pcmpeqd 32(%esi), %xmm5 /* compare first 4 double_words for equality */ - psubb %xmm0, %xmm5 /* packed sub of comparison results*/ - pmovmskb %xmm5, %edx - sub $0xffff, %edx /* if first 4 double_words are same, edx == 0xffff */ - jnz L(less4_double_words_32) - - movdqa 48(%edi), %xmm1 - pcmpeqd %xmm1, %xmm0 /* Any null double_word? */ - pcmpeqd 48(%esi), %xmm1 /* compare first 4 double_words for equality */ - psubb %xmm0, %xmm1 /* packed sub of comparison results*/ - pmovmskb %xmm1, %edx - sub $0xffff, %edx /* if first 4 double_words are same, edx == 0xffff */ - jnz L(less4_double_words_48) - - add $64, %esi - add $64, %edi - jmp L(continue_00_00) - - .p2align 4 -L(continue_00_32): - movdqu (%esi), %xmm2 - pcmpeqd %xmm2, %xmm0 /* Any null double_word? */ - pcmpeqd (%edi), %xmm2 /* compare first 4 double_words for equality */ - psubb %xmm0, %xmm2 /* packed sub of comparison results*/ - pmovmskb %xmm2, %edx - sub $0xffff, %edx /* if first 4 double_words are same, edx == 0xffff */ - jnz L(less4_double_words) - - add $16, %esi - add $16, %edi - jmp L(continue_00_48) - - .p2align 4 -L(continue_00_16): - movdqu (%esi), %xmm2 - pcmpeqd %xmm2, %xmm0 /* Any null double_word? */ - pcmpeqd (%edi), %xmm2 /* compare first 4 double_words for equality */ - psubb %xmm0, %xmm2 /* packed sub of comparison results*/ - pmovmskb %xmm2, %edx - sub $0xffff, %edx /* if first 4 double_words are same, edx == 0xffff */ - jnz L(less4_double_words) - - movdqu 16(%esi), %xmm2 - pcmpeqd %xmm2, %xmm0 /* Any null double_word? */ - pcmpeqd 16(%edi), %xmm2 /* compare first 4 double_words for equality */ - psubb %xmm0, %xmm2 /* packed sub of comparison results*/ - pmovmskb %xmm2, %edx - sub $0xffff, %edx /* if first 4 double_words are same, edx == 0xffff */ - jnz L(less4_double_words_16) - - add $32, %esi - add $32, %edi - jmp L(continue_00_48) - - .p2align 4 -L(continue_00_0): - movdqu (%esi), %xmm2 - pcmpeqd %xmm2, %xmm0 /* Any null double_word? */ - pcmpeqd (%edi), %xmm2 /* compare first 4 double_words for equality */ - psubb %xmm0, %xmm2 /* packed sub of comparison results*/ - pmovmskb %xmm2, %edx - sub $0xffff, %edx /* if first 4 double_words are same, edx == 0xffff */ - jnz L(less4_double_words) - - movdqu 16(%esi), %xmm2 - pcmpeqd %xmm2, %xmm0 /* Any null double_word? */ - pcmpeqd 16(%edi), %xmm2 /* compare first 4 double_words for equality */ - psubb %xmm0, %xmm2 /* packed sub of comparison results*/ - pmovmskb %xmm2, %edx - sub $0xffff, %edx /* if first 4 double_words are same, edx == 0xffff */ - jnz L(less4_double_words_16) - - movdqu 32(%esi), %xmm2 - pcmpeqd %xmm2, %xmm0 /* Any null double_word? */ - pcmpeqd 32(%edi), %xmm2 /* compare first 4 double_words for equality */ - psubb %xmm0, %xmm2 /* packed sub of comparison results*/ - pmovmskb %xmm2, %edx - sub $0xffff, %edx /* if first 4 double_words are same, edx == 0xffff */ - jnz L(less4_double_words_32) - - add $48, %esi - add $48, %edi - jmp L(continue_00_48) - - .p2align 4 -L(continue_48_00): - pcmpeqd (%esi), %xmm0 - mov (%edi), %eax - pmovmskb %xmm0, %ecx - test %ecx, %ecx - jnz L(less4_double_words1) - - cmp (%esi), %eax - jne L(nequal) - - mov 4(%edi), %eax - cmp 4(%esi), %eax - jne L(nequal) - - mov 8(%edi), %eax - cmp 8(%esi), %eax - jne L(nequal) - - mov 12(%edi), %eax - cmp 12(%esi), %eax - jne L(nequal) - - movdqu 16(%edi), %xmm1 - pcmpeqd %xmm1, %xmm0 /* Any null double_word? */ - pcmpeqd 16(%esi), %xmm1 /* compare first 4 double_words for equality */ - psubb %xmm0, %xmm1 /* packed sub of comparison results*/ - pmovmskb %xmm1, %edx - sub $0xffff, %edx /* if first 4 double_words are same, edx == 0xffff */ - jnz L(less4_double_words_16) - - movdqu 32(%edi), %xmm1 - pcmpeqd %xmm1, %xmm0 /* Any null double_word? */ - pcmpeqd 32(%esi), %xmm1 /* compare first 4 double_words for equality */ - psubb %xmm0, %xmm1 /* packed sub of comparison results*/ - pmovmskb %xmm1, %edx - sub $0xffff, %edx /* if first 4 double_words are same, edx == 0xffff */ - jnz L(less4_double_words_32) - - movdqu 48(%edi), %xmm1 - pcmpeqd %xmm1, %xmm0 /* Any null double_word? */ - pcmpeqd 48(%esi), %xmm1 /* compare first 4 double_words for equality */ - psubb %xmm0, %xmm1 /* packed sub of comparison results*/ - pmovmskb %xmm1, %edx - sub $0xffff, %edx /* if first 4 double_words are same, edx == 0xffff */ - jnz L(less4_double_words_48) - - add $64, %esi - add $64, %edi - jmp L(continue_48_00) - - .p2align 4 -L(continue_32_00): - movdqu (%edi), %xmm1 - pcmpeqd %xmm1, %xmm0 /* Any null double_word? */ - pcmpeqd (%esi), %xmm1 /* compare first 4 double_words for equality */ - psubb %xmm0, %xmm1 /* packed sub of comparison results*/ - pmovmskb %xmm1, %edx - sub $0xffff, %edx /* if first 4 double_words are same, edx == 0xffff */ - jnz L(less4_double_words) - - add $16, %esi - add $16, %edi - jmp L(continue_48_00) - - .p2align 4 -L(continue_16_00): - movdqu (%edi), %xmm1 - pcmpeqd %xmm1, %xmm0 /* Any null double_word? */ - pcmpeqd (%esi), %xmm1 /* compare first 4 double_words for equality */ - psubb %xmm0, %xmm1 /* packed sub of comparison results*/ - pmovmskb %xmm1, %edx - sub $0xffff, %edx /* if first 4 double_words are same, edx == 0xffff */ - jnz L(less4_double_words) - - movdqu 16(%edi), %xmm1 - pcmpeqd %xmm1, %xmm0 /* Any null double_word? */ - pcmpeqd 16(%esi), %xmm1 /* compare first 4 double_words for equality */ - psubb %xmm0, %xmm1 /* packed sub of comparison results*/ - pmovmskb %xmm1, %edx - sub $0xffff, %edx /* if first 4 double_words are same, edx == 0xffff */ - jnz L(less4_double_words_16) - - add $32, %esi - add $32, %edi - jmp L(continue_48_00) - - .p2align 4 -L(continue_0_00): - movdqu (%edi), %xmm1 - pcmpeqd %xmm1, %xmm0 /* Any null double_word? */ - pcmpeqd (%esi), %xmm1 /* compare first 4 double_words for equality */ - psubb %xmm0, %xmm1 /* packed sub of comparison results*/ - pmovmskb %xmm1, %edx - sub $0xffff, %edx /* if first 4 double_words are same, edx == 0xffff */ - jnz L(less4_double_words) - - movdqu 16(%edi), %xmm1 - pcmpeqd %xmm1, %xmm0 /* Any null double_word? */ - pcmpeqd 16(%esi), %xmm1 /* compare first 4 double_words for equality */ - psubb %xmm0, %xmm1 /* packed sub of comparison results*/ - pmovmskb %xmm1, %edx - sub $0xffff, %edx /* if first 4 double_words are same, edx == 0xffff */ - jnz L(less4_double_words_16) - - movdqu 32(%edi), %xmm1 - pcmpeqd %xmm1, %xmm0 /* Any null double_word? */ - pcmpeqd 32(%esi), %xmm1 /* compare first 4 double_words for equality */ - psubb %xmm0, %xmm1 /* packed sub of comparison results*/ - pmovmskb %xmm1, %edx - sub $0xffff, %edx /* if first 4 double_words are same, edx == 0xffff */ - jnz L(less4_double_words_32) - - add $48, %esi - add $48, %edi - jmp L(continue_48_00) - - .p2align 4 -L(continue_32_32): - movdqu (%edi), %xmm1 - movdqu (%esi), %xmm2 - pcmpeqd %xmm1, %xmm0 /* Any null double_word? */ - pcmpeqd %xmm2, %xmm1 /* compare first 4 double_words for equality */ - psubb %xmm0, %xmm1 /* packed sub of comparison results*/ - pmovmskb %xmm1, %edx - sub $0xffff, %edx /* if first 4 double_words are same, edx == 0xffff */ - jnz L(less4_double_words) - - add $16, %esi - add $16, %edi - jmp L(continue_48_48) - - .p2align 4 -L(continue_16_16): - movdqu (%edi), %xmm1 - movdqu (%esi), %xmm2 - pcmpeqd %xmm1, %xmm0 /* Any null double_word? */ - pcmpeqd %xmm2, %xmm1 /* compare first 4 double_words for equality */ - psubb %xmm0, %xmm1 /* packed sub of comparison results*/ - pmovmskb %xmm1, %edx - sub $0xffff, %edx /* if first 4 double_words are same, edx == 0xffff */ - jnz L(less4_double_words) - - movdqu 16(%edi), %xmm3 - movdqu 16(%esi), %xmm4 - pcmpeqd %xmm3, %xmm0 /* Any null double_word? */ - pcmpeqd %xmm4, %xmm3 /* compare first 4 double_words for equality */ - psubb %xmm0, %xmm3 /* packed sub of comparison results*/ - pmovmskb %xmm3, %edx - sub $0xffff, %edx /* if first 4 double_words are same, edx == 0xffff */ - jnz L(less4_double_words_16) - - add $32, %esi - add $32, %edi - jmp L(continue_48_48) - - .p2align 4 -L(continue_0_0): - movdqu (%edi), %xmm1 - movdqu (%esi), %xmm2 - pcmpeqd %xmm1, %xmm0 /* Any null double_word? */ - pcmpeqd %xmm2, %xmm1 /* compare first 4 double_words for equality */ - psubb %xmm0, %xmm1 /* packed sub of comparison results*/ - pmovmskb %xmm1, %edx - sub $0xffff, %edx /* if first 4 double_words are same, edx == 0xffff */ - jnz L(less4_double_words) - - movdqu 16(%edi), %xmm3 - movdqu 16(%esi), %xmm4 - pcmpeqd %xmm3, %xmm0 /* Any null double_word? */ - pcmpeqd %xmm4, %xmm3 /* compare first 4 double_words for equality */ - psubb %xmm0, %xmm3 /* packed sub of comparison results*/ - pmovmskb %xmm3, %edx - sub $0xffff, %edx /* if first 4 double_words are same, edx == 0xffff */ - jnz L(less4_double_words_16) - - movdqu 32(%edi), %xmm1 - movdqu 32(%esi), %xmm2 - pcmpeqd %xmm1, %xmm0 /* Any null double_word? */ - pcmpeqd %xmm2, %xmm1 /* compare first 4 double_words for equality */ - psubb %xmm0, %xmm1 /* packed sub of comparison results*/ - pmovmskb %xmm1, %edx - sub $0xffff, %edx /* if first 4 double_words are same, edx == 0xffff */ - jnz L(less4_double_words_32) - - add $48, %esi - add $48, %edi - jmp L(continue_48_48) - - .p2align 4 -L(continue_0_16): - movdqu (%edi), %xmm1 - movdqu (%esi), %xmm2 - pcmpeqd %xmm1, %xmm0 /* Any null double_word? */ - pcmpeqd %xmm2, %xmm1 /* compare first 4 double_words for equality */ - psubb %xmm0, %xmm1 /* packed sub of comparison results*/ - pmovmskb %xmm1, %edx - sub $0xffff, %edx /* if first 4 double_words are same, edx == 0xffff */ - jnz L(less4_double_words) - - movdqu 16(%edi), %xmm1 - movdqu 16(%esi), %xmm2 - pcmpeqd %xmm1, %xmm0 /* Any null double_word? */ - pcmpeqd %xmm2, %xmm1 /* compare first 4 double_words for equality */ - psubb %xmm0, %xmm1 /* packed sub of comparison results*/ - pmovmskb %xmm1, %edx - sub $0xffff, %edx /* if first 4 double_words are same, edx == 0xffff */ - jnz L(less4_double_words_16) - - add $32, %esi - add $32, %edi - jmp L(continue_32_48) - - .p2align 4 -L(continue_0_32): - movdqu (%edi), %xmm1 - movdqu (%esi), %xmm2 - pcmpeqd %xmm1, %xmm0 /* Any null double_word? */ - pcmpeqd %xmm2, %xmm1 /* compare first 4 double_words for equality */ - psubb %xmm0, %xmm1 /* packed sub of comparison results*/ - pmovmskb %xmm1, %edx - sub $0xffff, %edx /* if first 4 double_words are same, edx == 0xffff */ - jnz L(less4_double_words) - - add $16, %esi - add $16, %edi - jmp L(continue_16_48) - - .p2align 4 -L(continue_16_32): - movdqu (%edi), %xmm1 - movdqu (%esi), %xmm2 - pcmpeqd %xmm1, %xmm0 /* Any null double_word? */ - pcmpeqd %xmm2, %xmm1 /* compare first 4 double_words for equality */ - psubb %xmm0, %xmm1 /* packed sub of comparison results*/ - pmovmskb %xmm1, %edx - sub $0xffff, %edx /* if first 4 double_words are same, edx == 0xffff */ - jnz L(less4_double_words) - - add $16, %esi - add $16, %edi - jmp L(continue_32_48) - - .p2align 4 -L(less4_double_words1): - cmp (%esi), %eax - jne L(nequal) - test %eax, %eax - jz L(equal) - - mov 4(%esi), %ecx - cmp %ecx, 4(%edi) - jne L(nequal) - test %ecx, %ecx - jz L(equal) - - mov 8(%esi), %ecx - cmp %ecx, 8(%edi) - jne L(nequal) - test %ecx, %ecx - jz L(equal) - - mov 12(%esi), %ecx - cmp %ecx, 12(%edi) - jne L(nequal) - xor %eax, %eax - RETURN - - .p2align 4 -L(less4_double_words): - xor %eax, %eax - test %dl, %dl - jz L(next_two_double_words) - and $15, %dl - jz L(second_double_word) - mov (%esi), %ecx - cmp %ecx, (%edi) - jne L(nequal) - RETURN - - .p2align 4 -L(second_double_word): - mov 4(%esi), %ecx - cmp %ecx, 4(%edi) - jne L(nequal) - RETURN - - .p2align 4 -L(next_two_double_words): - and $15, %dh - jz L(fourth_double_word) - mov 8(%esi), %ecx - cmp %ecx, 8(%edi) - jne L(nequal) - RETURN - - .p2align 4 -L(fourth_double_word): - mov 12(%esi), %ecx - cmp %ecx, 12(%edi) - jne L(nequal) - RETURN - - .p2align 4 -L(less4_double_words_16): - xor %eax, %eax - test %dl, %dl - jz L(next_two_double_words_16) - and $15, %dl - jz L(second_double_word_16) - mov 16(%esi), %ecx - cmp %ecx, 16(%edi) - jne L(nequal) - RETURN - - .p2align 4 -L(second_double_word_16): - mov 20(%esi), %ecx - cmp %ecx, 20(%edi) - jne L(nequal) - RETURN - - .p2align 4 -L(next_two_double_words_16): - and $15, %dh - jz L(fourth_double_word_16) - mov 24(%esi), %ecx - cmp %ecx, 24(%edi) - jne L(nequal) - RETURN - - .p2align 4 -L(fourth_double_word_16): - mov 28(%esi), %ecx - cmp %ecx, 28(%edi) - jne L(nequal) - RETURN - - .p2align 4 -L(less4_double_words_32): - xor %eax, %eax - test %dl, %dl - jz L(next_two_double_words_32) - and $15, %dl - jz L(second_double_word_32) - mov 32(%esi), %ecx - cmp %ecx, 32(%edi) - jne L(nequal) - RETURN - - .p2align 4 -L(second_double_word_32): - mov 36(%esi), %ecx - cmp %ecx, 36(%edi) - jne L(nequal) - RETURN - - .p2align 4 -L(next_two_double_words_32): - and $15, %dh - jz L(fourth_double_word_32) - mov 40(%esi), %ecx - cmp %ecx, 40(%edi) - jne L(nequal) - RETURN - - .p2align 4 -L(fourth_double_word_32): - mov 44(%esi), %ecx - cmp %ecx, 44(%edi) - jne L(nequal) - RETURN - - .p2align 4 -L(less4_double_words_48): - xor %eax, %eax - test %dl, %dl - jz L(next_two_double_words_48) - and $15, %dl - jz L(second_double_word_48) - mov 48(%esi), %ecx - cmp %ecx, 48(%edi) - jne L(nequal) - RETURN - - .p2align 4 -L(second_double_word_48): - mov 52(%esi), %ecx - cmp %ecx, 52(%edi) - jne L(nequal) - RETURN - - .p2align 4 -L(next_two_double_words_48): - and $15, %dh - jz L(fourth_double_word_48) - mov 56(%esi), %ecx - cmp %ecx, 56(%edi) - jne L(nequal) - RETURN - - .p2align 4 -L(fourth_double_word_48): - mov 60(%esi), %ecx - cmp %ecx, 60(%edi) - jne L(nequal) - RETURN - - .p2align 4 -L(nequal): - mov $1, %eax - jg L(return) - neg %eax - RETURN - - .p2align 4 -L(return): - RETURN - - .p2align 4 -L(equal): - xorl %eax, %eax - RETURN - - CFI_POP (%edi) - CFI_POP (%esi) - - .p2align 4 -L(neq): - mov $1, %eax - jg L(neq_bigger) - neg %eax - -L(neq_bigger): - ret - - .p2align 4 -L(eq): - xorl %eax, %eax - ret - -END (__wcscmp_sse2) -#endif diff --git a/sysdeps/i386/i686/multiarch/wcscmp.S b/sysdeps/i386/i686/multiarch/wcscmp.S deleted file mode 100644 index 7118bdd4db..0000000000 --- a/sysdeps/i386/i686/multiarch/wcscmp.S +++ /dev/null @@ -1,39 +0,0 @@ -/* Multiple versions of wcscmp - All versions must be listed in ifunc-impl-list.c. - Copyright (C) 2011-2017 Free Software Foundation, Inc. - Contributed by Intel Corporation. - This file is part of the GNU C Library. - - The GNU C Library is free software; you can redistribute it and/or - modify it under the terms of the GNU Lesser General Public - License as published by the Free Software Foundation; either - version 2.1 of the License, or (at your option) any later version. - - The GNU C Library is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - Lesser General Public License for more details. - - You should have received a copy of the GNU Lesser General Public - License along with the GNU C Library; if not, see - <http://www.gnu.org/licenses/>. */ - -#include <sysdep.h> -#include <init-arch.h> - -/* Define multiple versions only for the definition in libc and for the - DSO. In static binaries, we need wcscmp before the initialization - happened. */ -#if IS_IN (libc) - .text -ENTRY(__wcscmp) - .type __wcscmp, @gnu_indirect_function - LOAD_GOT_AND_RTLD_GLOBAL_RO - LOAD_FUNC_GOT_EAX (__wcscmp_ia32) - HAS_CPU_FEATURE (SSE2) - jz 2f - LOAD_FUNC_GOT_EAX (__wcscmp_sse2) -2: ret -END(__wcscmp) -weak_alias (__wcscmp, wcscmp) -#endif diff --git a/sysdeps/i386/i686/multiarch/wcscpy-c.c b/sysdeps/i386/i686/multiarch/wcscpy-c.c deleted file mode 100644 index fb3000392b..0000000000 --- a/sysdeps/i386/i686/multiarch/wcscpy-c.c +++ /dev/null @@ -1,5 +0,0 @@ -#if IS_IN (libc) -# define wcscpy __wcscpy_ia32 -#endif - -#include "wcsmbs/wcscpy.c" diff --git a/sysdeps/i386/i686/multiarch/wcscpy-ssse3.S b/sysdeps/i386/i686/multiarch/wcscpy-ssse3.S deleted file mode 100644 index 6280ba92ab..0000000000 --- a/sysdeps/i386/i686/multiarch/wcscpy-ssse3.S +++ /dev/null @@ -1,600 +0,0 @@ -/* wcscpy with SSSE3 - Copyright (C) 2011-2017 Free Software Foundation, Inc. - Contributed by Intel Corporation. - This file is part of the GNU C Library. - - The GNU C Library is free software; you can redistribute it and/or - modify it under the terms of the GNU Lesser General Public - License as published by the Free Software Foundation; either - version 2.1 of the License, or (at your option) any later version. - - The GNU C Library is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - Lesser General Public License for more details. - - You should have received a copy of the GNU Lesser General Public - License along with the GNU C Library; if not, see - <http://www.gnu.org/licenses/>. */ - -#if IS_IN (libc) -# include <sysdep.h> - -# define CFI_PUSH(REG) \ - cfi_adjust_cfa_offset (4); \ - cfi_rel_offset (REG, 0) - -# define CFI_POP(REG) \ - cfi_adjust_cfa_offset (-4); \ - cfi_restore (REG) - -# define PUSH(REG) pushl REG; CFI_PUSH (REG) -# define POP(REG) popl REG; CFI_POP (REG) - -# define PARMS 4 -# define RETURN POP (%edi); ret; CFI_PUSH (%edi) -# define STR1 PARMS -# define STR2 STR1+4 -# define LEN STR2+4 - - atom_text_section -ENTRY (__wcscpy_ssse3) - mov STR1(%esp), %edx - mov STR2(%esp), %ecx - - cmp $0, (%ecx) - jz L(ExitTail4) - cmp $0, 4(%ecx) - jz L(ExitTail8) - cmp $0, 8(%ecx) - jz L(ExitTail12) - cmp $0, 12(%ecx) - jz L(ExitTail16) - - PUSH (%edi) - mov %edx, %edi - PUSH (%esi) - lea 16(%ecx), %esi - - and $-16, %esi - - pxor %xmm0, %xmm0 - pcmpeqd (%esi), %xmm0 - movdqu (%ecx), %xmm1 - movdqu %xmm1, (%edx) - - pmovmskb %xmm0, %eax - sub %ecx, %esi - - test %eax, %eax - jnz L(CopyFrom1To16Bytes) - - mov %edx, %eax - lea 16(%edx), %edx - and $-16, %edx - sub %edx, %eax - - sub %eax, %ecx - mov %ecx, %eax - and $0xf, %eax - mov $0, %esi - - jz L(Align16Both) - cmp $4, %eax - je L(Shl4) - cmp $8, %eax - je L(Shl8) - jmp L(Shl12) - -L(Align16Both): - movaps (%ecx), %xmm1 - movaps 16(%ecx), %xmm2 - movaps %xmm1, (%edx) - pcmpeqd %xmm2, %xmm0 - pmovmskb %xmm0, %eax - lea 16(%esi), %esi - - test %eax, %eax - jnz L(CopyFrom1To16Bytes) - - movaps 16(%ecx, %esi), %xmm3 - movaps %xmm2, (%edx, %esi) - pcmpeqd %xmm3, %xmm0 - pmovmskb %xmm0, %eax - lea 16(%esi), %esi - - test %eax, %eax - jnz L(CopyFrom1To16Bytes) - - movaps 16(%ecx, %esi), %xmm4 - movaps %xmm3, (%edx, %esi) - pcmpeqd %xmm4, %xmm0 - pmovmskb %xmm0, %eax - lea 16(%esi), %esi - - test %eax, %eax - jnz L(CopyFrom1To16Bytes) - - movaps 16(%ecx, %esi), %xmm1 - movaps %xmm4, (%edx, %esi) - pcmpeqd %xmm1, %xmm0 - pmovmskb %xmm0, %eax - lea 16(%esi), %esi - - test %eax, %eax - jnz L(CopyFrom1To16Bytes) - - movaps 16(%ecx, %esi), %xmm2 - movaps %xmm1, (%edx, %esi) - pcmpeqd %xmm2, %xmm0 - pmovmskb %xmm0, %eax - lea 16(%esi), %esi - - test %eax, %eax - jnz L(CopyFrom1To16Bytes) - - movaps 16(%ecx, %esi), %xmm3 - movaps %xmm2, (%edx, %esi) - pcmpeqd %xmm3, %xmm0 - pmovmskb %xmm0, %eax - lea 16(%esi), %esi - - test %eax, %eax - jnz L(CopyFrom1To16Bytes) - - movaps %xmm3, (%edx, %esi) - mov %ecx, %eax - lea 16(%ecx, %esi), %ecx - and $-0x40, %ecx - sub %ecx, %eax - sub %eax, %edx - - mov $-0x40, %esi - -L(Aligned64Loop): - movaps (%ecx), %xmm2 - movaps 32(%ecx), %xmm3 - movaps %xmm2, %xmm4 - movaps 16(%ecx), %xmm5 - movaps %xmm3, %xmm6 - movaps 48(%ecx), %xmm7 - pminub %xmm5, %xmm2 - pminub %xmm7, %xmm3 - pminub %xmm2, %xmm3 - lea 64(%edx), %edx - pcmpeqd %xmm0, %xmm3 - lea 64(%ecx), %ecx - pmovmskb %xmm3, %eax - - test %eax, %eax - jnz L(Aligned64Leave) - movaps %xmm4, -64(%edx) - movaps %xmm5, -48(%edx) - movaps %xmm6, -32(%edx) - movaps %xmm7, -16(%edx) - jmp L(Aligned64Loop) - -L(Aligned64Leave): - pcmpeqd %xmm4, %xmm0 - pmovmskb %xmm0, %eax - test %eax, %eax - jnz L(CopyFrom1To16Bytes) - - pcmpeqd %xmm5, %xmm0 - pmovmskb %xmm0, %eax - movaps %xmm4, -64(%edx) - test %eax, %eax - lea 16(%esi), %esi - jnz L(CopyFrom1To16Bytes) - - pcmpeqd %xmm6, %xmm0 - pmovmskb %xmm0, %eax - movaps %xmm5, -48(%edx) - test %eax, %eax - lea 16(%esi), %esi - jnz L(CopyFrom1To16Bytes) - - movaps %xmm6, -32(%edx) - pcmpeqd %xmm7, %xmm0 - pmovmskb %xmm0, %eax - test %eax, %eax - lea 16(%esi), %esi - jnz L(CopyFrom1To16Bytes) - - mov $-0x40, %esi - movaps %xmm7, -16(%edx) - jmp L(Aligned64Loop) - - .p2align 4 -L(Shl4): - movaps -4(%ecx), %xmm1 - movaps 12(%ecx), %xmm2 -L(Shl4Start): - pcmpeqd %xmm2, %xmm0 - pmovmskb %xmm0, %eax - movaps %xmm2, %xmm3 - - test %eax, %eax - jnz L(Shl4LoopExit) - - palignr $4, %xmm1, %xmm2 - movaps %xmm2, (%edx) - movaps 28(%ecx), %xmm2 - - pcmpeqd %xmm2, %xmm0 - lea 16(%edx), %edx - pmovmskb %xmm0, %eax - lea 16(%ecx), %ecx - movaps %xmm2, %xmm1 - - test %eax, %eax - jnz L(Shl4LoopExit) - - palignr $4, %xmm3, %xmm2 - movaps %xmm2, (%edx) - movaps 28(%ecx), %xmm2 - - pcmpeqd %xmm2, %xmm0 - lea 16(%edx), %edx - pmovmskb %xmm0, %eax - lea 16(%ecx), %ecx - movaps %xmm2, %xmm3 - - test %eax, %eax - jnz L(Shl4LoopExit) - - palignr $4, %xmm1, %xmm2 - movaps %xmm2, (%edx) - movaps 28(%ecx), %xmm2 - - pcmpeqd %xmm2, %xmm0 - lea 16(%edx), %edx - pmovmskb %xmm0, %eax - lea 16(%ecx), %ecx - - test %eax, %eax - jnz L(Shl4LoopExit) - - palignr $4, %xmm3, %xmm2 - movaps %xmm2, (%edx) - lea 28(%ecx), %ecx - lea 16(%edx), %edx - - mov %ecx, %eax - and $-0x40, %ecx - sub %ecx, %eax - lea -12(%ecx), %ecx - sub %eax, %edx - - movaps -4(%ecx), %xmm1 - -L(Shl4LoopStart): - movaps 12(%ecx), %xmm2 - movaps 28(%ecx), %xmm3 - movaps %xmm3, %xmm6 - movaps 44(%ecx), %xmm4 - movaps %xmm4, %xmm7 - movaps 60(%ecx), %xmm5 - pminub %xmm2, %xmm6 - pminub %xmm5, %xmm7 - pminub %xmm6, %xmm7 - pcmpeqd %xmm0, %xmm7 - pmovmskb %xmm7, %eax - movaps %xmm5, %xmm7 - palignr $4, %xmm4, %xmm5 - test %eax, %eax - palignr $4, %xmm3, %xmm4 - jnz L(Shl4Start) - - palignr $4, %xmm2, %xmm3 - lea 64(%ecx), %ecx - palignr $4, %xmm1, %xmm2 - movaps %xmm7, %xmm1 - movaps %xmm5, 48(%edx) - movaps %xmm4, 32(%edx) - movaps %xmm3, 16(%edx) - movaps %xmm2, (%edx) - lea 64(%edx), %edx - jmp L(Shl4LoopStart) - -L(Shl4LoopExit): - movlpd (%ecx), %xmm0 - movl 8(%ecx), %esi - movlpd %xmm0, (%edx) - movl %esi, 8(%edx) - POP (%esi) - add $12, %edx - add $12, %ecx - test %al, %al - jz L(ExitHigh) - test $0x01, %al - jnz L(Exit4) - movlpd (%ecx), %xmm0 - movlpd %xmm0, (%edx) - movl %edi, %eax - RETURN - - CFI_PUSH (%esi) - - .p2align 4 -L(Shl8): - movaps -8(%ecx), %xmm1 - movaps 8(%ecx), %xmm2 -L(Shl8Start): - pcmpeqd %xmm2, %xmm0 - pmovmskb %xmm0, %eax - movaps %xmm2, %xmm3 - - test %eax, %eax - jnz L(Shl8LoopExit) - - palignr $8, %xmm1, %xmm2 - movaps %xmm2, (%edx) - movaps 24(%ecx), %xmm2 - - pcmpeqd %xmm2, %xmm0 - lea 16(%edx), %edx - pmovmskb %xmm0, %eax - lea 16(%ecx), %ecx - movaps %xmm2, %xmm1 - - test %eax, %eax - jnz L(Shl8LoopExit) - - palignr $8, %xmm3, %xmm2 - movaps %xmm2, (%edx) - movaps 24(%ecx), %xmm2 - - pcmpeqd %xmm2, %xmm0 - lea 16(%edx), %edx - pmovmskb %xmm0, %eax - lea 16(%ecx), %ecx - movaps %xmm2, %xmm3 - - test %eax, %eax - jnz L(Shl8LoopExit) - - palignr $8, %xmm1, %xmm2 - movaps %xmm2, (%edx) - movaps 24(%ecx), %xmm2 - - pcmpeqd %xmm2, %xmm0 - lea 16(%edx), %edx - pmovmskb %xmm0, %eax - lea 16(%ecx), %ecx - - test %eax, %eax - jnz L(Shl8LoopExit) - - palignr $8, %xmm3, %xmm2 - movaps %xmm2, (%edx) - lea 24(%ecx), %ecx - lea 16(%edx), %edx - - mov %ecx, %eax - and $-0x40, %ecx - sub %ecx, %eax - lea -8(%ecx), %ecx - sub %eax, %edx - - movaps -8(%ecx), %xmm1 - -L(Shl8LoopStart): - movaps 8(%ecx), %xmm2 - movaps 24(%ecx), %xmm3 - movaps %xmm3, %xmm6 - movaps 40(%ecx), %xmm4 - movaps %xmm4, %xmm7 - movaps 56(%ecx), %xmm5 - pminub %xmm2, %xmm6 - pminub %xmm5, %xmm7 - pminub %xmm6, %xmm7 - pcmpeqd %xmm0, %xmm7 - pmovmskb %xmm7, %eax - movaps %xmm5, %xmm7 - palignr $8, %xmm4, %xmm5 - test %eax, %eax - palignr $8, %xmm3, %xmm4 - jnz L(Shl8Start) - - palignr $8, %xmm2, %xmm3 - lea 64(%ecx), %ecx - palignr $8, %xmm1, %xmm2 - movaps %xmm7, %xmm1 - movaps %xmm5, 48(%edx) - movaps %xmm4, 32(%edx) - movaps %xmm3, 16(%edx) - movaps %xmm2, (%edx) - lea 64(%edx), %edx - jmp L(Shl8LoopStart) - -L(Shl8LoopExit): - movlpd (%ecx), %xmm0 - movlpd %xmm0, (%edx) - POP (%esi) - add $8, %edx - add $8, %ecx - test %al, %al - jz L(ExitHigh) - test $0x01, %al - jnz L(Exit4) - movlpd (%ecx), %xmm0 - movlpd %xmm0, (%edx) - movl %edi, %eax - RETURN - - CFI_PUSH (%esi) - - .p2align 4 -L(Shl12): - movaps -12(%ecx), %xmm1 - movaps 4(%ecx), %xmm2 -L(Shl12Start): - pcmpeqd %xmm2, %xmm0 - pmovmskb %xmm0, %eax - movaps %xmm2, %xmm3 - - test %eax, %eax - jnz L(Shl12LoopExit) - - palignr $12, %xmm1, %xmm2 - movaps %xmm2, (%edx) - movaps 20(%ecx), %xmm2 - - pcmpeqd %xmm2, %xmm0 - lea 16(%edx), %edx - pmovmskb %xmm0, %eax - lea 16(%ecx), %ecx - movaps %xmm2, %xmm1 - - test %eax, %eax - jnz L(Shl12LoopExit) - - palignr $12, %xmm3, %xmm2 - movaps %xmm2, (%edx) - movaps 20(%ecx), %xmm2 - - pcmpeqd %xmm2, %xmm0 - lea 16(%edx), %edx - pmovmskb %xmm0, %eax - lea 16(%ecx), %ecx - movaps %xmm2, %xmm3 - - test %eax, %eax - jnz L(Shl12LoopExit) - - palignr $12, %xmm1, %xmm2 - movaps %xmm2, (%edx) - movaps 20(%ecx), %xmm2 - - pcmpeqd %xmm2, %xmm0 - lea 16(%edx), %edx - pmovmskb %xmm0, %eax - lea 16(%ecx), %ecx - - test %eax, %eax - jnz L(Shl12LoopExit) - - palignr $12, %xmm3, %xmm2 - movaps %xmm2, (%edx) - lea 20(%ecx), %ecx - lea 16(%edx), %edx - - mov %ecx, %eax - and $-0x40, %ecx - sub %ecx, %eax - lea -4(%ecx), %ecx - sub %eax, %edx - - movaps -12(%ecx), %xmm1 - -L(Shl12LoopStart): - movaps 4(%ecx), %xmm2 - movaps 20(%ecx), %xmm3 - movaps %xmm3, %xmm6 - movaps 36(%ecx), %xmm4 - movaps %xmm4, %xmm7 - movaps 52(%ecx), %xmm5 - pminub %xmm2, %xmm6 - pminub %xmm5, %xmm7 - pminub %xmm6, %xmm7 - pcmpeqd %xmm0, %xmm7 - pmovmskb %xmm7, %eax - movaps %xmm5, %xmm7 - palignr $12, %xmm4, %xmm5 - test %eax, %eax - palignr $12, %xmm3, %xmm4 - jnz L(Shl12Start) - - palignr $12, %xmm2, %xmm3 - lea 64(%ecx), %ecx - palignr $12, %xmm1, %xmm2 - movaps %xmm7, %xmm1 - movaps %xmm5, 48(%edx) - movaps %xmm4, 32(%edx) - movaps %xmm3, 16(%edx) - movaps %xmm2, (%edx) - lea 64(%edx), %edx - jmp L(Shl12LoopStart) - -L(Shl12LoopExit): - movl (%ecx), %esi - movl %esi, (%edx) - mov $4, %esi - - .p2align 4 -L(CopyFrom1To16Bytes): - add %esi, %edx - add %esi, %ecx - - POP (%esi) - test %al, %al - jz L(ExitHigh) - test $0x01, %al - jnz L(Exit4) -L(Exit8): - movlpd (%ecx), %xmm0 - movlpd %xmm0, (%edx) - movl %edi, %eax - RETURN - - .p2align 4 -L(ExitHigh): - test $0x01, %ah - jnz L(Exit12) -L(Exit16): - movdqu (%ecx), %xmm0 - movdqu %xmm0, (%edx) - movl %edi, %eax - RETURN - - .p2align 4 -L(Exit4): - movl (%ecx), %eax - movl %eax, (%edx) - movl %edi, %eax - RETURN - - .p2align 4 -L(Exit12): - movlpd (%ecx), %xmm0 - movlpd %xmm0, (%edx) - movl 8(%ecx), %eax - movl %eax, 8(%edx) - movl %edi, %eax - RETURN - -CFI_POP (%edi) - - .p2align 4 -L(ExitTail4): - movl (%ecx), %eax - movl %eax, (%edx) - movl %edx, %eax - ret - - .p2align 4 -L(ExitTail8): - movlpd (%ecx), %xmm0 - movlpd %xmm0, (%edx) - movl %edx, %eax - ret - - .p2align 4 -L(ExitTail12): - movlpd (%ecx), %xmm0 - movlpd %xmm0, (%edx) - movl 8(%ecx), %eax - movl %eax, 8(%edx) - movl %edx, %eax - ret - - .p2align 4 -L(ExitTail16): - movdqu (%ecx), %xmm0 - movdqu %xmm0, (%edx) - movl %edx, %eax - ret - -END (__wcscpy_ssse3) -#endif diff --git a/sysdeps/i386/i686/multiarch/wcscpy.S b/sysdeps/i386/i686/multiarch/wcscpy.S deleted file mode 100644 index cfc97dd87c..0000000000 --- a/sysdeps/i386/i686/multiarch/wcscpy.S +++ /dev/null @@ -1,36 +0,0 @@ -/* Multiple versions of wcscpy - All versions must be listed in ifunc-impl-list.c. - Copyright (C) 2011-2017 Free Software Foundation, Inc. - Contributed by Intel Corporation. - This file is part of the GNU C Library. - - The GNU C Library is free software; you can redistribute it and/or - modify it under the terms of the GNU Lesser General Public - License as published by the Free Software Foundation; either - version 2.1 of the License, or (at your option) any later version. - - The GNU C Library is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - Lesser General Public License for more details. - - You should have received a copy of the GNU Lesser General Public - License along with the GNU C Library; if not, see - <http://www.gnu.org/licenses/>. */ - -#include <sysdep.h> -#include <init-arch.h> - -/* Define multiple versions only for the definition in libc. */ -#if IS_IN (libc) - .text -ENTRY(wcscpy) - .type wcscpy, @gnu_indirect_function - LOAD_GOT_AND_RTLD_GLOBAL_RO - LOAD_FUNC_GOT_EAX (__wcscpy_ia32) - HAS_CPU_FEATURE (SSSE3) - jz 2f - LOAD_FUNC_GOT_EAX (__wcscpy_ssse3) -2: ret -END(wcscpy) -#endif diff --git a/sysdeps/i386/i686/multiarch/wcslen-c.c b/sysdeps/i386/i686/multiarch/wcslen-c.c deleted file mode 100644 index a335dc0f7e..0000000000 --- a/sysdeps/i386/i686/multiarch/wcslen-c.c +++ /dev/null @@ -1,9 +0,0 @@ -#include <wchar.h> - -#if IS_IN (libc) -# define WCSLEN __wcslen_ia32 -#endif - -extern __typeof (wcslen) __wcslen_ia32; - -#include "wcsmbs/wcslen.c" diff --git a/sysdeps/i386/i686/multiarch/wcslen-sse2.S b/sysdeps/i386/i686/multiarch/wcslen-sse2.S deleted file mode 100644 index bd3fc4c79b..0000000000 --- a/sysdeps/i386/i686/multiarch/wcslen-sse2.S +++ /dev/null @@ -1,193 +0,0 @@ -/* wcslen with SSE2 - Copyright (C) 2011-2017 Free Software Foundation, Inc. - Contributed by Intel Corporation. - This file is part of the GNU C Library. - - The GNU C Library is free software; you can redistribute it and/or - modify it under the terms of the GNU Lesser General Public - License as published by the Free Software Foundation; either - version 2.1 of the License, or (at your option) any later version. - - The GNU C Library is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - Lesser General Public License for more details. - - You should have received a copy of the GNU Lesser General Public - License along with the GNU C Library; if not, see - <http://www.gnu.org/licenses/>. */ - -#if IS_IN (libc) -# include <sysdep.h> -# define STR 4 - - .text -ENTRY (__wcslen_sse2) - mov STR(%esp), %edx - - cmp $0, (%edx) - jz L(exit_tail0) - cmp $0, 4(%edx) - jz L(exit_tail1) - cmp $0, 8(%edx) - jz L(exit_tail2) - cmp $0, 12(%edx) - jz L(exit_tail3) - cmp $0, 16(%edx) - jz L(exit_tail4) - cmp $0, 20(%edx) - jz L(exit_tail5) - cmp $0, 24(%edx) - jz L(exit_tail6) - cmp $0, 28(%edx) - jz L(exit_tail7) - - pxor %xmm0, %xmm0 - - lea 32(%edx), %eax - lea 16(%edx), %ecx - and $-16, %eax - - pcmpeqd (%eax), %xmm0 - pmovmskb %xmm0, %edx - pxor %xmm1, %xmm1 - test %edx, %edx - lea 16(%eax), %eax - jnz L(exit) - - pcmpeqd (%eax), %xmm1 - pmovmskb %xmm1, %edx - pxor %xmm2, %xmm2 - test %edx, %edx - lea 16(%eax), %eax - jnz L(exit) - - pcmpeqd (%eax), %xmm2 - pmovmskb %xmm2, %edx - pxor %xmm3, %xmm3 - test %edx, %edx - lea 16(%eax), %eax - jnz L(exit) - - pcmpeqd (%eax), %xmm3 - pmovmskb %xmm3, %edx - test %edx, %edx - lea 16(%eax), %eax - jnz L(exit) - - and $-0x40, %eax - - .p2align 4 -L(aligned_64_loop): - movaps (%eax), %xmm0 - movaps 16(%eax), %xmm1 - movaps 32(%eax), %xmm2 - movaps 48(%eax), %xmm6 - - pminub %xmm1, %xmm0 - pminub %xmm6, %xmm2 - pminub %xmm0, %xmm2 - pcmpeqd %xmm3, %xmm2 - pmovmskb %xmm2, %edx - test %edx, %edx - lea 64(%eax), %eax - jz L(aligned_64_loop) - - pcmpeqd -64(%eax), %xmm3 - pmovmskb %xmm3, %edx - test %edx, %edx - lea 48(%ecx), %ecx - jnz L(exit) - - pcmpeqd %xmm1, %xmm3 - pmovmskb %xmm3, %edx - test %edx, %edx - lea -16(%ecx), %ecx - jnz L(exit) - - pcmpeqd -32(%eax), %xmm3 - pmovmskb %xmm3, %edx - test %edx, %edx - lea -16(%ecx), %ecx - jnz L(exit) - - pcmpeqd %xmm6, %xmm3 - pmovmskb %xmm3, %edx - test %edx, %edx - lea -16(%ecx), %ecx - jnz L(exit) - - jmp L(aligned_64_loop) - - .p2align 4 -L(exit): - sub %ecx, %eax - shr $2, %eax - test %dl, %dl - jz L(exit_high) - - mov %dl, %cl - and $15, %cl - jz L(exit_1) - ret - - .p2align 4 -L(exit_high): - mov %dh, %ch - and $15, %ch - jz L(exit_3) - add $2, %eax - ret - - .p2align 4 -L(exit_1): - add $1, %eax - ret - - .p2align 4 -L(exit_3): - add $3, %eax - ret - - .p2align 4 -L(exit_tail0): - xor %eax, %eax - ret - - .p2align 4 -L(exit_tail1): - mov $1, %eax - ret - - .p2align 4 -L(exit_tail2): - mov $2, %eax - ret - - .p2align 4 -L(exit_tail3): - mov $3, %eax - ret - - .p2align 4 -L(exit_tail4): - mov $4, %eax - ret - - .p2align 4 -L(exit_tail5): - mov $5, %eax - ret - - .p2align 4 -L(exit_tail6): - mov $6, %eax - ret - - .p2align 4 -L(exit_tail7): - mov $7, %eax - ret - -END (__wcslen_sse2) -#endif diff --git a/sysdeps/i386/i686/multiarch/wcslen.S b/sysdeps/i386/i686/multiarch/wcslen.S deleted file mode 100644 index 6ef9b6e7b5..0000000000 --- a/sysdeps/i386/i686/multiarch/wcslen.S +++ /dev/null @@ -1,37 +0,0 @@ -/* Multiple versions of wcslen - All versions must be listed in ifunc-impl-list.c. - Copyright (C) 2011-2017 Free Software Foundation, Inc. - Contributed by Intel Corporation. - This file is part of the GNU C Library. - - The GNU C Library is free software; you can redistribute it and/or - modify it under the terms of the GNU Lesser General Public - License as published by the Free Software Foundation; either - version 2.1 of the License, or (at your option) any later version. - - The GNU C Library is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - Lesser General Public License for more details. - - You should have received a copy of the GNU Lesser General Public - License along with the GNU C Library; if not, see - <http://www.gnu.org/licenses/>. */ - -#include <sysdep.h> -#include <init-arch.h> - -#if IS_IN (libc) - .text -ENTRY(__wcslen) - .type __wcslen, @gnu_indirect_function - LOAD_GOT_AND_RTLD_GLOBAL_RO - LOAD_FUNC_GOT_EAX (__wcslen_ia32) - HAS_CPU_FEATURE (SSE2) - jz 2f - LOAD_FUNC_GOT_EAX (__wcslen_sse2) -2: ret -END(__wcslen) - -weak_alias(__wcslen, wcslen) -#endif diff --git a/sysdeps/i386/i686/multiarch/wcsrchr-c.c b/sysdeps/i386/i686/multiarch/wcsrchr-c.c deleted file mode 100644 index 8d8a335b5b..0000000000 --- a/sysdeps/i386/i686/multiarch/wcsrchr-c.c +++ /dev/null @@ -1,5 +0,0 @@ -#if IS_IN (libc) -# define wcsrchr __wcsrchr_ia32 -#endif - -#include "wcsmbs/wcsrchr.c" diff --git a/sysdeps/i386/i686/multiarch/wcsrchr-sse2.S b/sysdeps/i386/i686/multiarch/wcsrchr-sse2.S deleted file mode 100644 index 1a9b60e55e..0000000000 --- a/sysdeps/i386/i686/multiarch/wcsrchr-sse2.S +++ /dev/null @@ -1,354 +0,0 @@ -/* wcsrchr with SSE2, without using bsf instructions. - Copyright (C) 2011-2017 Free Software Foundation, Inc. - Contributed by Intel Corporation. - This file is part of the GNU C Library. - - The GNU C Library is free software; you can redistribute it and/or - modify it under the terms of the GNU Lesser General Public - License as published by the Free Software Foundation; either - version 2.1 of the License, or (at your option) any later version. - - The GNU C Library is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - Lesser General Public License for more details. - - You should have received a copy of the GNU Lesser General Public - License along with the GNU C Library; if not, see - <http://www.gnu.org/licenses/>. */ - -#if IS_IN (libc) -# include <sysdep.h> -# define CFI_PUSH(REG) \ - cfi_adjust_cfa_offset (4); \ - cfi_rel_offset (REG, 0) - -# define CFI_POP(REG) \ - cfi_adjust_cfa_offset (-4); \ - cfi_restore (REG) - -# define PUSH(REG) pushl REG; CFI_PUSH (REG) -# define POP(REG) popl REG; CFI_POP (REG) - -# define PARMS 8 -# define ENTRANCE PUSH (%edi); -# define RETURN POP (%edi); ret; CFI_PUSH (%edi); -# define STR1 PARMS -# define STR2 STR1+4 - - atom_text_section -ENTRY (__wcsrchr_sse2) - - ENTRANCE - mov STR1(%esp), %ecx - movd STR2(%esp), %xmm1 - - mov %ecx, %edi - punpckldq %xmm1, %xmm1 - pxor %xmm2, %xmm2 - punpckldq %xmm1, %xmm1 - -/* ECX has OFFSET. */ - and $63, %ecx - cmp $48, %ecx - ja L(crosscache) - -/* unaligned string. */ - movdqu (%edi), %xmm0 - pcmpeqd %xmm0, %xmm2 - pcmpeqd %xmm1, %xmm0 -/* Find where NULL is. */ - pmovmskb %xmm2, %ecx -/* Check if there is a match. */ - pmovmskb %xmm0, %eax - add $16, %edi - - test %eax, %eax - jnz L(unaligned_match1) - - test %ecx, %ecx - jnz L(return_null) - - and $-16, %edi - - PUSH (%esi) - - xor %edx, %edx - jmp L(loop) - - CFI_POP (%esi) - - .p2align 4 -L(unaligned_match1): - test %ecx, %ecx - jnz L(prolog_find_zero_1) - - PUSH (%esi) - -/* Save current match */ - mov %eax, %edx - mov %edi, %esi - and $-16, %edi - jmp L(loop) - - CFI_POP (%esi) - - .p2align 4 -L(crosscache): -/* Hancle unaligned string. */ - and $15, %ecx - and $-16, %edi - pxor %xmm3, %xmm3 - movdqa (%edi), %xmm0 - pcmpeqd %xmm0, %xmm3 - pcmpeqd %xmm1, %xmm0 -/* Find where NULL is. */ - pmovmskb %xmm3, %edx -/* Check if there is a match. */ - pmovmskb %xmm0, %eax -/* Remove the leading bytes. */ - shr %cl, %edx - shr %cl, %eax - add $16, %edi - - test %eax, %eax - jnz L(unaligned_match) - - test %edx, %edx - jnz L(return_null) - - PUSH (%esi) - - xor %edx, %edx - jmp L(loop) - - CFI_POP (%esi) - - .p2align 4 -L(unaligned_match): - test %edx, %edx - jnz L(prolog_find_zero) - - PUSH (%esi) - - mov %eax, %edx - lea (%edi, %ecx), %esi - -/* Loop start on aligned string. */ - .p2align 4 -L(loop): - movdqa (%edi), %xmm0 - pcmpeqd %xmm0, %xmm2 - add $16, %edi - pcmpeqd %xmm1, %xmm0 - pmovmskb %xmm2, %ecx - pmovmskb %xmm0, %eax - or %eax, %ecx - jnz L(matches) - - movdqa (%edi), %xmm3 - pcmpeqd %xmm3, %xmm2 - add $16, %edi - pcmpeqd %xmm1, %xmm3 - pmovmskb %xmm2, %ecx - pmovmskb %xmm3, %eax - or %eax, %ecx - jnz L(matches) - - movdqa (%edi), %xmm4 - pcmpeqd %xmm4, %xmm2 - add $16, %edi - pcmpeqd %xmm1, %xmm4 - pmovmskb %xmm2, %ecx - pmovmskb %xmm4, %eax - or %eax, %ecx - jnz L(matches) - - movdqa (%edi), %xmm5 - pcmpeqd %xmm5, %xmm2 - add $16, %edi - pcmpeqd %xmm1, %xmm5 - pmovmskb %xmm2, %ecx - pmovmskb %xmm5, %eax - or %eax, %ecx - jz L(loop) - - .p2align 4 -L(matches): - test %eax, %eax - jnz L(match) -L(return_value): - test %edx, %edx - jz L(return_null_1) - mov %edx, %eax - mov %esi, %edi - - POP (%esi) - - test %ah, %ah - jnz L(match_third_or_fourth_wchar) - test $15 << 4, %al - jnz L(match_second_wchar) - lea -16(%edi), %eax - RETURN - - CFI_PUSH (%esi) - - .p2align 4 -L(return_null_1): - POP (%esi) - - xor %eax, %eax - RETURN - - CFI_PUSH (%esi) - - .p2align 4 -L(match): - pmovmskb %xmm2, %ecx - test %ecx, %ecx - jnz L(find_zero) -/* save match info */ - mov %eax, %edx - mov %edi, %esi - jmp L(loop) - - .p2align 4 -L(find_zero): - test %cl, %cl - jz L(find_zero_in_third_or_fourth_wchar) - test $15, %cl - jz L(find_zero_in_second_wchar) - and $1, %eax - jz L(return_value) - - POP (%esi) - - lea -16(%edi), %eax - RETURN - - CFI_PUSH (%esi) - - .p2align 4 -L(find_zero_in_second_wchar): - and $1 << 5 - 1, %eax - jz L(return_value) - - POP (%esi) - - test $15 << 4, %al - jnz L(match_second_wchar) - lea -16(%edi), %eax - RETURN - - CFI_PUSH (%esi) - - .p2align 4 -L(find_zero_in_third_or_fourth_wchar): - test $15, %ch - jz L(find_zero_in_fourth_wchar) - and $1 << 9 - 1, %eax - jz L(return_value) - - POP (%esi) - - test %ah, %ah - jnz L(match_third_wchar) - test $15 << 4, %al - jnz L(match_second_wchar) - lea -16(%edi), %eax - RETURN - - CFI_PUSH (%esi) - - .p2align 4 -L(find_zero_in_fourth_wchar): - - POP (%esi) - - test %ah, %ah - jnz L(match_third_or_fourth_wchar) - test $15 << 4, %al - jnz L(match_second_wchar) - lea -16(%edi), %eax - RETURN - - CFI_PUSH (%esi) - - .p2align 4 -L(match_second_wchar): - lea -12(%edi), %eax - RETURN - - .p2align 4 -L(match_third_or_fourth_wchar): - test $15 << 4, %ah - jnz L(match_fourth_wchar) - lea -8(%edi), %eax - RETURN - - .p2align 4 -L(match_third_wchar): - lea -8(%edi), %eax - RETURN - - .p2align 4 -L(match_fourth_wchar): - lea -4(%edi), %eax - RETURN - - .p2align 4 -L(return_null): - xor %eax, %eax - RETURN - - .p2align 4 -L(prolog_find_zero): - add %ecx, %edi - mov %edx, %ecx -L(prolog_find_zero_1): - test %cl, %cl - jz L(prolog_find_zero_in_third_or_fourth_wchar) - test $15, %cl - jz L(prolog_find_zero_in_second_wchar) - and $1, %eax - jz L(return_null) - - lea -16(%edi), %eax - RETURN - - .p2align 4 -L(prolog_find_zero_in_second_wchar): - and $1 << 5 - 1, %eax - jz L(return_null) - - test $15 << 4, %al - jnz L(match_second_wchar) - lea -16(%edi), %eax - RETURN - - .p2align 4 -L(prolog_find_zero_in_third_or_fourth_wchar): - test $15, %ch - jz L(prolog_find_zero_in_fourth_wchar) - and $1 << 9 - 1, %eax - jz L(return_null) - - test %ah, %ah - jnz L(match_third_wchar) - test $15 << 4, %al - jnz L(match_second_wchar) - lea -16(%edi), %eax - RETURN - - .p2align 4 -L(prolog_find_zero_in_fourth_wchar): - test %ah, %ah - jnz L(match_third_or_fourth_wchar) - test $15 << 4, %al - jnz L(match_second_wchar) - lea -16(%edi), %eax - RETURN - -END (__wcsrchr_sse2) -#endif diff --git a/sysdeps/i386/i686/multiarch/wcsrchr.S b/sysdeps/i386/i686/multiarch/wcsrchr.S deleted file mode 100644 index cf67333995..0000000000 --- a/sysdeps/i386/i686/multiarch/wcsrchr.S +++ /dev/null @@ -1,35 +0,0 @@ -/* Multiple versions of wcsrchr - All versions must be listed in ifunc-impl-list.c. - Copyright (C) 2011-2017 Free Software Foundation, Inc. - Contributed by Intel Corporation. - This file is part of the GNU C Library. - - The GNU C Library is free software; you can redistribute it and/or - modify it under the terms of the GNU Lesser General Public - License as published by the Free Software Foundation; either - version 2.1 of the License, or (at your option) any later version. - - The GNU C Library is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - Lesser General Public License for more details. - - You should have received a copy of the GNU Lesser General Public - License along with the GNU C Library; if not, see - <http://www.gnu.org/licenses/>. */ - -#include <sysdep.h> -#include <init-arch.h> - -#if IS_IN (libc) - .text -ENTRY(wcsrchr) - .type wcsrchr, @gnu_indirect_function - LOAD_GOT_AND_RTLD_GLOBAL_RO - LOAD_FUNC_GOT_EAX (__wcsrchr_ia32) - HAS_CPU_FEATURE (SSE2) - jz 2f - LOAD_FUNC_GOT_EAX (__wcsrchr_sse2) -2: ret -END(wcsrchr) -#endif diff --git a/sysdeps/i386/i686/multiarch/wmemcmp-c.c b/sysdeps/i386/i686/multiarch/wmemcmp-c.c deleted file mode 100644 index 75ab4b94c1..0000000000 --- a/sysdeps/i386/i686/multiarch/wmemcmp-c.c +++ /dev/null @@ -1,9 +0,0 @@ -#include <wchar.h> - -#if IS_IN (libc) -# define WMEMCMP __wmemcmp_ia32 -#endif - -extern __typeof (wmemcmp) __wmemcmp_ia32; - -#include "wcsmbs/wmemcmp.c" diff --git a/sysdeps/i386/i686/multiarch/wmemcmp-sse4.S b/sysdeps/i386/i686/multiarch/wmemcmp-sse4.S deleted file mode 100644 index 1a857c7e21..0000000000 --- a/sysdeps/i386/i686/multiarch/wmemcmp-sse4.S +++ /dev/null @@ -1,4 +0,0 @@ -#define USE_AS_WMEMCMP 1 -#define MEMCMP __wmemcmp_sse4_2 - -#include "memcmp-sse4.S" diff --git a/sysdeps/i386/i686/multiarch/wmemcmp-ssse3.S b/sysdeps/i386/i686/multiarch/wmemcmp-ssse3.S deleted file mode 100644 index a41ef95fc1..0000000000 --- a/sysdeps/i386/i686/multiarch/wmemcmp-ssse3.S +++ /dev/null @@ -1,4 +0,0 @@ -#define USE_AS_WMEMCMP 1 -#define MEMCMP __wmemcmp_ssse3 - -#include "memcmp-ssse3.S" diff --git a/sysdeps/i386/i686/multiarch/wmemcmp.S b/sysdeps/i386/i686/multiarch/wmemcmp.S deleted file mode 100644 index 1b9a54a413..0000000000 --- a/sysdeps/i386/i686/multiarch/wmemcmp.S +++ /dev/null @@ -1,40 +0,0 @@ -/* Multiple versions of wmemcmp - All versions must be listed in ifunc-impl-list.c. - Copyright (C) 2011-2017 Free Software Foundation, Inc. - Contributed by Intel Corporation. - This file is part of the GNU C Library. - - The GNU C Library is free software; you can redistribute it and/or - modify it under the terms of the GNU Lesser General Public - License as published by the Free Software Foundation; either - version 2.1 of the License, or (at your option) any later version. - - The GNU C Library is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - Lesser General Public License for more details. - - You should have received a copy of the GNU Lesser General Public - License along with the GNU C Library; if not, see - <http://www.gnu.org/licenses/>. */ - -#include <sysdep.h> -#include <init-arch.h> - -/* Define multiple versions only for the definition in libc. */ - -#if IS_IN (libc) - .text -ENTRY(wmemcmp) - .type wmemcmp, @gnu_indirect_function - LOAD_GOT_AND_RTLD_GLOBAL_RO - LOAD_FUNC_GOT_EAX (__wmemcmp_ia32) - HAS_CPU_FEATURE (SSSE3) - jz 2f - LOAD_FUNC_GOT_EAX (__wmemcmp_ssse3) - HAS_CPU_FEATURE (SSE4_2) - jz 2f - LOAD_FUNC_GOT_EAX (__wmemcmp_sse4_2) -2: ret -END(wmemcmp) -#endif diff --git a/sysdeps/i386/i686/nptl/tls.h b/sysdeps/i386/i686/nptl/tls.h deleted file mode 100644 index 5b527af9d3..0000000000 --- a/sysdeps/i386/i686/nptl/tls.h +++ /dev/null @@ -1,35 +0,0 @@ -/* Copyright (C) 2002-2017 Free Software Foundation, Inc. - This file is part of the GNU C Library. - Contributed by Ulrich Drepper <drepper@redhat.com>, 2002. - - The GNU C Library is free software; you can redistribute it and/or - modify it under the terms of the GNU Lesser General Public - License as published by the Free Software Foundation; either - version 2.1 of the License, or (at your option) any later version. - - The GNU C Library is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - Lesser General Public License for more details. - - You should have received a copy of the GNU Lesser General Public - License along with the GNU C Library; if not, see - <http://www.gnu.org/licenses/>. */ - -#ifndef _TLS_H - -/* Additional definitions for <tls.h> on i686 and up. */ - - -/* Macros to load from and store into segment registers. We can use - the 32-bit instructions. */ -#define TLS_GET_GS() \ - ({ int __seg; __asm ("movl %%gs, %0" : "=q" (__seg)); __seg; }) -#define TLS_SET_GS(val) \ - __asm ("movl %0, %%gs" :: "q" (val)) - - -/* Get the full set of definitions. */ -#include_next <tls.h> - -#endif /* tls.h */ diff --git a/sysdeps/i386/i686/pthread_spin_trylock.S b/sysdeps/i386/i686/pthread_spin_trylock.S deleted file mode 100644 index ce9c94d41a..0000000000 --- a/sysdeps/i386/i686/pthread_spin_trylock.S +++ /dev/null @@ -1,20 +0,0 @@ -/* Copyright (C) 2002-2017 Free Software Foundation, Inc. - This file is part of the GNU C Library. - Contributed by Ulrich Drepper <drepper@redhat.com>, 2002. - - The GNU C Library is free software; you can redistribute it and/or - modify it under the terms of the GNU Lesser General Public - License as published by the Free Software Foundation; either - version 2.1 of the License, or (at your option) any later version. - - The GNU C Library is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - Lesser General Public License for more details. - - You should have received a copy of the GNU Lesser General Public - License along with the GNU C Library; if not, see - <http://www.gnu.org/licenses/>. */ - -#define HAVE_CMOV 1 -#include <sysdeps/i386/pthread_spin_trylock.S> diff --git a/sysdeps/i386/i686/stack-aliasing.h b/sysdeps/i386/i686/stack-aliasing.h deleted file mode 100644 index 9b5a1b0d47..0000000000 --- a/sysdeps/i386/i686/stack-aliasing.h +++ /dev/null @@ -1,23 +0,0 @@ -/* Define macros for stack address aliasing issues for NPTL. i686 version. - Copyright (C) 2014-2017 Free Software Foundation, Inc. - This file is part of the GNU C Library. - - The GNU C Library is free software; you can redistribute it and/or - modify it under the terms of the GNU Lesser General Public - License as published by the Free Software Foundation; either - version 2.1 of the License, or (at your option) any later version. - - The GNU C Library is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - Lesser General Public License for more details. - - You should have received a copy of the GNU Lesser General Public - License along with the GNU C Library; if not, see - <http://www.gnu.org/licenses/>. */ - -/* What is useful is to avoid the 64k aliasing problem which reliably - happens if all stacks use sizes which are a multiple of 64k. Tell - the stack allocator to disturb this by allocation one more page if - necessary. */ -#define MULTI_PAGE_ALIASING 65536 diff --git a/sysdeps/i386/i686/strcmp.S b/sysdeps/i386/i686/strcmp.S deleted file mode 100644 index 1ae305912e..0000000000 --- a/sysdeps/i386/i686/strcmp.S +++ /dev/null @@ -1,52 +0,0 @@ -/* Highly optimized version for ix86, x>=6. - Copyright (C) 1999-2017 Free Software Foundation, Inc. - This file is part of the GNU C Library. - Contributed by Ulrich Drepper <drepper@cygnus.com>, 1999. - - The GNU C Library is free software; you can redistribute it and/or - modify it under the terms of the GNU Lesser General Public - License as published by the Free Software Foundation; either - version 2.1 of the License, or (at your option) any later version. - - The GNU C Library is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - Lesser General Public License for more details. - - You should have received a copy of the GNU Lesser General Public - License along with the GNU C Library; if not, see - <http://www.gnu.org/licenses/>. */ - -#include <sysdep.h> -#include "asm-syntax.h" - -#define PARMS 4 /* no space for saved regs */ -#define STR1 PARMS -#define STR2 STR1+4 - - .text -ENTRY (strcmp) - - movl STR1(%esp), %ecx - movl STR2(%esp), %edx - -L(oop): movb (%ecx), %al - cmpb (%edx), %al - jne L(neq) - incl %ecx - incl %edx - testb %al, %al - jnz L(oop) - - xorl %eax, %eax - /* when strings are equal, pointers rest one beyond - the end of the NUL terminators. */ - ret - -L(neq): movl $1, %eax - movl $-1, %ecx - cmovbl %ecx, %eax - - ret -END (strcmp) -libc_hidden_builtin_def (strcmp) diff --git a/sysdeps/i386/i686/tst-stack-align.h b/sysdeps/i386/i686/tst-stack-align.h deleted file mode 100644 index 51f03fe77b..0000000000 --- a/sysdeps/i386/i686/tst-stack-align.h +++ /dev/null @@ -1,44 +0,0 @@ -/* Copyright (C) 2003-2017 Free Software Foundation, Inc. - This file is part of the GNU C Library. - - The GNU C Library is free software; you can redistribute it and/or - modify it under the terms of the GNU Lesser General Public - License as published by the Free Software Foundation; either - version 2.1 of the License, or (at your option) any later version. - - The GNU C Library is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - Lesser General Public License for more details. - - You should have received a copy of the GNU Lesser General Public - License along with the GNU C Library; if not, see - <http://www.gnu.org/licenses/>. */ - -#include <stdio.h> -#include <stdint.h> -#ifndef __SSE__ -#include_next <tst-stack-align.h> -#else -#include <xmmintrin.h> - -#define TEST_STACK_ALIGN() \ - ({ \ - __m128 _m; \ - double _d = 12.0; \ - long double _ld = 15.0; \ - int _ret = 0; \ - printf ("__m128: %p %zu\n", &_m, __alignof (__m128)); \ - if ((((uintptr_t) &_m) & (__alignof (__m128) - 1)) != 0) \ - _ret = 1; \ - \ - printf ("double: %g %p %zu\n", _d, &_d, __alignof (double)); \ - if ((((uintptr_t) &_d) & (__alignof (double) - 1)) != 0) \ - _ret = 1; \ - \ - printf ("ldouble: %Lg %p %zu\n", _ld, &_ld, __alignof (long double)); \ - if ((((uintptr_t) &_ld) & (__alignof (long double) - 1)) != 0) \ - _ret = 1; \ - _ret; \ - }) -#endif diff --git a/sysdeps/i386/i786/Implies b/sysdeps/i386/i786/Implies deleted file mode 100644 index 1cd29f63cf..0000000000 --- a/sysdeps/i386/i786/Implies +++ /dev/null @@ -1,2 +0,0 @@ -# The PPro and PII cores are mostly the same. -i386/i686 diff --git a/sysdeps/i386/init-arch.h b/sysdeps/i386/init-arch.h deleted file mode 100644 index 72881c5679..0000000000 --- a/sysdeps/i386/init-arch.h +++ /dev/null @@ -1,19 +0,0 @@ -/* Copyright (C) 2015-2017 Free Software Foundation, Inc. - This file is part of the GNU C Library. - - The GNU C Library is free software; you can redistribute it and/or - modify it under the terms of the GNU Lesser General Public - License as published by the Free Software Foundation; either - version 2.1 of the License, or (at your option) any later version. - - The GNU C Library is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - Lesser General Public License for more details. - - You should have received a copy of the GNU Lesser General Public - License along with the GNU C Library; if not, see - <http://www.gnu.org/licenses/>. */ - -#define MINIMUM_ISA 486 -#include <sysdeps/x86/init-arch.h> diff --git a/sysdeps/i386/jmpbuf-offsets.h b/sysdeps/i386/jmpbuf-offsets.h deleted file mode 100644 index 1c95db7287..0000000000 --- a/sysdeps/i386/jmpbuf-offsets.h +++ /dev/null @@ -1,25 +0,0 @@ -/* Private macros for accessing __jmp_buf contents. i386 version. - Copyright (C) 2006-2017 Free Software Foundation, Inc. - This file is part of the GNU C Library. - - The GNU C Library is free software; you can redistribute it and/or - modify it under the terms of the GNU Lesser General Public - License as published by the Free Software Foundation; either - version 2.1 of the License, or (at your option) any later version. - - The GNU C Library is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - Lesser General Public License for more details. - - You should have received a copy of the GNU Lesser General Public - License along with the GNU C Library; if not, see - <http://www.gnu.org/licenses/>. */ - -#define JB_BX 0 -#define JB_SI 1 -#define JB_DI 2 -#define JB_BP 3 -#define JB_SP 4 -#define JB_PC 5 -#define JB_SIZE 24 diff --git a/sysdeps/i386/jmpbuf-unwind.h b/sysdeps/i386/jmpbuf-unwind.h deleted file mode 100644 index 0a63a832cc..0000000000 --- a/sysdeps/i386/jmpbuf-unwind.h +++ /dev/null @@ -1,47 +0,0 @@ -/* Copyright (C) 2003-2017 Free Software Foundation, Inc. - This file is part of the GNU C Library. - Contributed by Jakub Jelinek <jakub@redhat.com>, 2003. - - The GNU C Library is free software; you can redistribute it and/or - modify it under the terms of the GNU Lesser General Public - License as published by the Free Software Foundation; either - version 2.1 of the License, or (at your option) any later version. - - The GNU C Library is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - Lesser General Public License for more details. - - You should have received a copy of the GNU Lesser General Public - License along with the GNU C Library; if not, see - <http://www.gnu.org/licenses/>. */ - -#include <setjmp.h> -#include <jmpbuf-offsets.h> -#include <stdint.h> -#include <unwind.h> -#include <sysdep.h> - -/* Test if longjmp to JMPBUF would unwind the frame - containing a local variable at ADDRESS. */ -#define _JMPBUF_UNWINDS(jmpbuf, address, demangle) \ - ((void *) (address) < (void *) demangle ((jmpbuf)[JB_SP])) - -#define _JMPBUF_CFA_UNWINDS_ADJ(_jmpbuf, _context, _adj) \ - _JMPBUF_UNWINDS_ADJ (_jmpbuf, (void *) _Unwind_GetCFA (_context), _adj) - -static inline uintptr_t __attribute__ ((unused)) -_jmpbuf_sp (__jmp_buf regs) -{ - uintptr_t sp = regs[JB_SP]; -#ifdef PTR_DEMANGLE - PTR_DEMANGLE (sp); -#endif - return sp; -} - -#define _JMPBUF_UNWINDS_ADJ(_jmpbuf, _address, _adj) \ - ((uintptr_t) (_address) - (_adj) < _jmpbuf_sp (_jmpbuf) - (_adj)) - -/* We use the normal longjmp for unwinding. */ -#define __libc_unwind_longjmp(buf, val) __libc_longjmp (buf, val) diff --git a/sysdeps/i386/ldbl2mpn.c b/sysdeps/i386/ldbl2mpn.c deleted file mode 100644 index 076be0ae7e..0000000000 --- a/sysdeps/i386/ldbl2mpn.c +++ /dev/null @@ -1,120 +0,0 @@ -/* Copyright (C) 1995-2017 Free Software Foundation, Inc. - This file is part of the GNU C Library. - - The GNU C Library is free software; you can redistribute it and/or - modify it under the terms of the GNU Lesser General Public - License as published by the Free Software Foundation; either - version 2.1 of the License, or (at your option) any later version. - - The GNU C Library is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - Lesser General Public License for more details. - - You should have received a copy of the GNU Lesser General Public - License along with the GNU C Library; if not, see - <http://www.gnu.org/licenses/>. */ - -#include "gmp.h" -#include "gmp-impl.h" -#include "longlong.h" -#include <ieee754.h> -#include <float.h> -#include <stdlib.h> - -/* Convert a `long double' in IEEE854 standard double-precision format to a - multi-precision integer representing the significand scaled up by its - number of bits (64 for long double) and an integral power of two - (MPN frexpl). */ - -mp_size_t -__mpn_extract_long_double (mp_ptr res_ptr, mp_size_t size, - int *expt, int *is_neg, - long double value) -{ - union ieee854_long_double u; - u.d = value; - - *is_neg = u.ieee.negative; - *expt = (int) u.ieee.exponent - IEEE854_LONG_DOUBLE_BIAS; - -#if BITS_PER_MP_LIMB == 32 - res_ptr[0] = u.ieee.mantissa1; /* Low-order 32 bits of fraction. */ - res_ptr[1] = u.ieee.mantissa0; /* High-order 32 bits. */ - #define N 2 -#elif BITS_PER_MP_LIMB == 64 - /* Hopefully the compiler will combine the two bitfield extracts - and this composition into just the original quadword extract. */ - res_ptr[0] = ((mp_limb_t) u.ieee.mantissa0 << 32) | u.ieee.mantissa1; - #define N 1 -#else - #error "mp_limb size " BITS_PER_MP_LIMB "not accounted for" -#endif - - if (u.ieee.exponent == 0) - { - /* A biased exponent of zero is a special case. - Either it is a zero or it is a denormal number. */ - if (res_ptr[0] == 0 && res_ptr[N - 1] == 0) /* Assumes N<=2. */ - /* It's zero. */ - *expt = 0; - else - { - /* It is a denormal number, meaning it has no implicit leading - one bit, and its exponent is in fact the format minimum. */ - int cnt; - - /* One problem with Intel's 80-bit format is that the explicit - leading one in the normalized representation has to be zero - for denormalized number. If it is one, the number is according - to Intel's specification an invalid number. We make the - representation unique by explicitly clearing this bit. */ - res_ptr[N - 1] &= ~((mp_limb_t) 1 << ((LDBL_MANT_DIG - 1) % BITS_PER_MP_LIMB)); - - if (res_ptr[N - 1] != 0) - { - count_leading_zeros (cnt, res_ptr[N - 1]); - if (cnt != 0) - { -#if N == 2 - res_ptr[N - 1] = res_ptr[N - 1] << cnt - | (res_ptr[0] >> (BITS_PER_MP_LIMB - cnt)); - res_ptr[0] <<= cnt; -#else - res_ptr[N - 1] <<= cnt; -#endif - } - *expt = LDBL_MIN_EXP - 1 - cnt; - } - else if (res_ptr[0] != 0) - { - count_leading_zeros (cnt, res_ptr[0]); - res_ptr[N - 1] = res_ptr[0] << cnt; - res_ptr[0] = 0; - *expt = LDBL_MIN_EXP - 1 - BITS_PER_MP_LIMB - cnt; - } - else - { - /* This is the special case of the pseudo denormal number - with only the implicit leading bit set. The value is - in fact a normal number and so we have to treat this - case differently. */ -#if N == 2 - res_ptr[N - 1] = 0x80000000ul; -#else - res_ptr[0] = 0x8000000000000000ul; -#endif - *expt = LDBL_MIN_EXP - 1; - } - } - } - else if (u.ieee.exponent < 0x7fff -#if N == 2 - && res_ptr[0] == 0 -#endif - && res_ptr[N - 1] == 0) - /* Pseudo zero. */ - *expt = 0; - - return N; -} diff --git a/sysdeps/i386/ldsodefs.h b/sysdeps/i386/ldsodefs.h deleted file mode 100644 index a369f5fc68..0000000000 --- a/sysdeps/i386/ldsodefs.h +++ /dev/null @@ -1,41 +0,0 @@ -/* Run-time dynamic linker data structures for loaded ELF shared objects. - Copyright (C) 1995-2017 Free Software Foundation, Inc. - This file is part of the GNU C Library. - - The GNU C Library is free software; you can redistribute it and/or - modify it under the terms of the GNU Lesser General Public - License as published by the Free Software Foundation; either - version 2.1 of the License, or (at your option) any later version. - - The GNU C Library is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - Lesser General Public License for more details. - - You should have received a copy of the GNU Lesser General Public - License along with the GNU C Library; if not, see - <http://www.gnu.org/licenses/>. */ - -#ifndef _I386_LDSODEFS_H -#define _I386_LDSODEFS_H 1 - -#include <elf.h> -#include <cpu-features.h> - -struct La_i86_regs; -struct La_i86_retval; - -#define ARCH_PLTENTER_MEMBERS \ - Elf32_Addr (*i86_gnu_pltenter) (Elf32_Sym *, unsigned int, uintptr_t *, \ - uintptr_t *, struct La_i86_regs *, \ - unsigned int *, const char *name, \ - long int *framesizep) - -#define ARCH_PLTEXIT_MEMBERS \ - unsigned int (*i86_gnu_pltexit) (Elf32_Sym *, unsigned int, uintptr_t *, \ - uintptr_t *, const struct La_i86_regs *, \ - struct La_i86_retval *, const char *) - -#include_next <ldsodefs.h> - -#endif diff --git a/sysdeps/i386/link-defines.sym b/sysdeps/i386/link-defines.sym deleted file mode 100644 index 0995adb37f..0000000000 --- a/sysdeps/i386/link-defines.sym +++ /dev/null @@ -1,20 +0,0 @@ -#include "link.h" -#include <stddef.h> - --- -LONG_DOUBLE_SIZE sizeof (long double) - -LR_SIZE sizeof (struct La_i86_regs) -LR_EDX_OFFSET offsetof (struct La_i86_regs, lr_edx) -LR_ECX_OFFSET offsetof (struct La_i86_regs, lr_ecx) -LR_EAX_OFFSET offsetof (struct La_i86_regs, lr_eax) -LR_EBP_OFFSET offsetof (struct La_i86_regs, lr_ebp) -LR_ESP_OFFSET offsetof (struct La_i86_regs, lr_esp) - -LRV_SIZE sizeof (struct La_i86_retval) -LRV_EAX_OFFSET offsetof (struct La_i86_retval, lrv_eax) -LRV_EDX_OFFSET offsetof (struct La_i86_retval, lrv_edx) -LRV_ST0_OFFSET offsetof (struct La_i86_retval, lrv_st0) -LRV_ST1_OFFSET offsetof (struct La_i86_retval, lrv_st1) -LRV_BND0_OFFSET offsetof (struct La_i86_retval, lrv_bnd0) -LRV_BND1_OFFSET offsetof (struct La_i86_retval, lrv_bnd1) diff --git a/sysdeps/i386/lshift.S b/sysdeps/i386/lshift.S deleted file mode 100644 index fa4b07793f..0000000000 --- a/sysdeps/i386/lshift.S +++ /dev/null @@ -1,103 +0,0 @@ -/* i80386 __mpn_lshift -- - Copyright (C) 1992-2017 Free Software Foundation, Inc. - This file is part of the GNU MP Library. - - The GNU MP Library is free software; you can redistribute it and/or modify - it under the terms of the GNU Lesser General Public License as published by - the Free Software Foundation; either version 2.1 of the License, or (at your - option) any later version. - - The GNU MP Library is distributed in the hope that it will be useful, but - WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY - or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public - License for more details. - - You should have received a copy of the GNU Lesser General Public License - along with the GNU MP Library; see the file COPYING.LIB. If not, - see <http://www.gnu.org/licenses/>. */ - -#include "sysdep.h" -#include "asm-syntax.h" - -#define PARMS 4+12 /* space for 3 saved regs */ -#define RES PARMS -#define S RES+4 -#define SIZE S+4 -#define CNT SIZE+4 - - .text -ENTRY (__mpn_lshift) - - pushl %edi - cfi_adjust_cfa_offset (4) - pushl %esi - cfi_adjust_cfa_offset (4) - pushl %ebx - cfi_adjust_cfa_offset (4) - - movl RES(%esp),%edi - cfi_rel_offset (edi, 8) - movl S(%esp),%esi - cfi_rel_offset (esi, 4) - movl SIZE(%esp),%edx - movl CNT(%esp),%ecx - subl $4,%esi /* adjust s_ptr */ - - movl (%esi,%edx,4),%ebx /* read most significant limb */ - cfi_rel_offset (ebx, 0) - cfi_remember_state - xorl %eax,%eax - shldl %cl,%ebx,%eax /* compute carry limb */ - decl %edx - jz L(end) - pushl %eax /* push carry limb onto stack */ - cfi_adjust_cfa_offset (4) - testb $1,%dl - jnz L(1) /* enter loop in the middle */ - movl %ebx,%eax - - ALIGN (3) -L(oop): movl (%esi,%edx,4),%ebx /* load next lower limb */ - shldl %cl,%ebx,%eax /* compute result limb */ - movl %eax,(%edi,%edx,4) /* store it */ - decl %edx -L(1): movl (%esi,%edx,4),%eax - shldl %cl,%eax,%ebx - movl %ebx,(%edi,%edx,4) - decl %edx - jnz L(oop) - - shll %cl,%eax /* compute least significant limb */ - movl %eax,(%edi) /* store it */ - - popl %eax /* pop carry limb */ - cfi_adjust_cfa_offset (-4) - - popl %ebx - cfi_adjust_cfa_offset (-4) - cfi_restore (ebx) - popl %esi - cfi_adjust_cfa_offset (-4) - cfi_restore (esi) - popl %edi - cfi_adjust_cfa_offset (-4) - cfi_restore (edi) - - ret - - cfi_restore_state -L(end): shll %cl,%ebx /* compute least significant limb */ - movl %ebx,(%edi) /* store it */ - - popl %ebx - cfi_adjust_cfa_offset (-4) - cfi_restore (ebx) - popl %esi - cfi_adjust_cfa_offset (-4) - cfi_restore (esi) - popl %edi - cfi_adjust_cfa_offset (-4) - cfi_restore (edi) - - ret -END (__mpn_lshift) diff --git a/sysdeps/i386/machine-gmon.h b/sysdeps/i386/machine-gmon.h deleted file mode 100644 index d5d8cdf7c6..0000000000 --- a/sysdeps/i386/machine-gmon.h +++ /dev/null @@ -1,40 +0,0 @@ -/* i386-specific implementation of profiling support. - Copyright (C) 1997-2017 Free Software Foundation, Inc. - This file is part of the GNU C Library. - Contributed by Ulrich Drepper <drepper@cygnus.com>, 1997. - - The GNU C Library is free software; you can redistribute it and/or - modify it under the terms of the GNU Lesser General Public - License as published by the Free Software Foundation; either - version 2.1 of the License, or (at your option) any later version. - - The GNU C Library is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - Lesser General Public License for more details. - - You should have received a copy of the GNU Lesser General Public - License along with the GNU C Library; if not, see - <http://www.gnu.org/licenses/>. */ - -#include <sysdep.h> - -/* We need a special version of the `mcount' function since for ix86 it - must not clobber any register. This has several reasons: - - there is a bug in gcc as of version 2.7.2.2 which prohibits the - use of profiling together with nested functions - - the ELF `fixup' function uses GCC's regparm feature - - some (future) systems might want to pass parameters in registers. */ - -/* We must not pollute the global namespace. */ -#define mcount_internal __mcount_internal - -extern void mcount_internal (u_long frompc, u_long selfpc) internal_function; - -#define _MCOUNT_DECL(frompc, selfpc) \ -void internal_function mcount_internal (u_long frompc, u_long selfpc) - - -/* Define MCOUNT as empty since we have the implementation in another - file. */ -#define MCOUNT diff --git a/sysdeps/i386/memchr.S b/sysdeps/i386/memchr.S deleted file mode 100644 index db4a6418ff..0000000000 --- a/sysdeps/i386/memchr.S +++ /dev/null @@ -1,322 +0,0 @@ -/* memchr (str, chr, len) -- Return pointer to first occurrence of CHR in STR - less than LEN. For Intel 80x86, x>=3. - Copyright (C) 1994-2017 Free Software Foundation, Inc. - This file is part of the GNU C Library. - Contributed by Ulrich Drepper <drepper@gnu.ai.mit.edu> - Optimised a little by Alan Modra <Alan@SPRI.Levels.UniSA.Edu.Au> - This version is developed using the same algorithm as the fast C - version which carries the following introduction: - Based on strlen implementation by Torbjorn Granlund (tege@sics.se), - with help from Dan Sahlin (dan@sics.se) and - commentary by Jim Blandy (jimb@ai.mit.edu); - adaptation to memchr suggested by Dick Karpinski (dick@cca.ucsf.edu), - and implemented by Roland McGrath (roland@ai.mit.edu). - - The GNU C Library is free software; you can redistribute it and/or - modify it under the terms of the GNU Lesser General Public - License as published by the Free Software Foundation; either - version 2.1 of the License, or (at your option) any later version. - - The GNU C Library is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - Lesser General Public License for more details. - - You should have received a copy of the GNU Lesser General Public - License along with the GNU C Library; if not, see - <http://www.gnu.org/licenses/>. */ - -#include <sysdep.h> -#include "asm-syntax.h" - -#define PARMS 4+8 /* space for 2 saved regs */ -#define RTN PARMS -#define STR RTN -#define CHR STR+4 -#define LEN CHR+4 - - .text -ENTRY (__memchr) - - /* Save callee-safe registers used in this function. */ - pushl %esi - cfi_adjust_cfa_offset (4) - pushl %edi - cfi_adjust_cfa_offset (4) - cfi_rel_offset (edi, 0) - - /* Load parameters into registers. */ - movl STR(%esp), %eax /* str: pointer to memory block. */ - movl CHR(%esp), %edx /* c: byte we are looking for. */ - movl LEN(%esp), %esi /* len: length of memory block. */ - cfi_rel_offset (esi, 4) - - /* If my must not test more than three characters test - them one by one. This is especially true for 0. */ - cmpl $4, %esi - jb L(3) - - /* At the moment %edx contains CHR. What we need for the - algorithm is CHR in all bytes of the dword. Avoid - operations on 16 bit words because these require an - prefix byte (and one more cycle). */ - movb %dl, %dh /* Now it is 0|0|c|c */ - movl %edx, %ecx - shll $16, %edx /* Now c|c|0|0 */ - movw %cx, %dx /* And finally c|c|c|c */ - - /* Better performance can be achieved if the word (32 - bit) memory access is aligned on a four-byte-boundary. - So process first bytes one by one until boundary is - reached. Don't use a loop for better performance. */ - - testb $3, %al /* correctly aligned ? */ - je L(2) /* yes => begin loop */ - cmpb %dl, (%eax) /* compare byte */ - je L(9) /* target found => return */ - incl %eax /* increment source pointer */ - decl %esi /* decrement length counter */ - je L(4) /* len==0 => return NULL */ - - testb $3, %al /* correctly aligned ? */ - je L(2) /* yes => begin loop */ - cmpb %dl, (%eax) /* compare byte */ - je L(9) /* target found => return */ - incl %eax /* increment source pointer */ - decl %esi /* decrement length counter */ - je L(4) /* len==0 => return NULL */ - - testb $3, %al /* correctly aligned ? */ - je L(2) /* yes => begin loop */ - cmpb %dl, (%eax) /* compare byte */ - je L(9) /* target found => return */ - incl %eax /* increment source pointer */ - decl %esi /* decrement length counter */ - /* no test for len==0 here, because this is done in the - loop head */ - jmp L(2) - - /* We exit the loop if adding MAGIC_BITS to LONGWORD fails to - change any of the hole bits of LONGWORD. - - 1) Is this safe? Will it catch all the zero bytes? - Suppose there is a byte with all zeros. Any carry bits - propagating from its left will fall into the hole at its - least significant bit and stop. Since there will be no - carry from its most significant bit, the LSB of the - byte to the left will be unchanged, and the zero will be - detected. - - 2) Is this worthwhile? Will it ignore everything except - zero bytes? Suppose every byte of LONGWORD has a bit set - somewhere. There will be a carry into bit 8. If bit 8 - is set, this will carry into bit 16. If bit 8 is clear, - one of bits 9-15 must be set, so there will be a carry - into bit 16. Similarly, there will be a carry into bit - 24. If one of bits 24-31 is set, there will be a carry - into bit 32 (=carry flag), so all of the hole bits will - be changed. - - 3) But wait! Aren't we looking for CHR, not zero? - Good point. So what we do is XOR LONGWORD with a longword, - each of whose bytes is CHR. This turns each byte that is CHR - into a zero. */ - - - /* Each round the main loop processes 16 bytes. */ - - ALIGN (4) - -L(1): movl (%eax), %ecx /* get word (= 4 bytes) in question */ - movl $0xfefefeff, %edi /* magic value */ - xorl %edx, %ecx /* XOR with word c|c|c|c => bytes of str == c - are now 0 */ - addl %ecx, %edi /* add the magic value to the word. We get - carry bits reported for each byte which - is *not* 0 */ - - /* According to the algorithm we had to reverse the effect of the - XOR first and then test the overflow bits. But because the - following XOR would destroy the carry flag and it would (in a - representation with more than 32 bits) not alter then last - overflow, we can now test this condition. If no carry is signaled - no overflow must have occurred in the last byte => it was 0. */ - jnc L(8) - - /* We are only interested in carry bits that change due to the - previous add, so remove original bits */ - xorl %ecx, %edi /* ((word^charmask)+magic)^(word^charmask) */ - - /* Now test for the other three overflow bits. */ - orl $0xfefefeff, %edi /* set all non-carry bits */ - incl %edi /* add 1: if one carry bit was *not* set - the addition will not result in 0. */ - - /* If at least one byte of the word is CHR we don't get 0 in %edi. */ - jnz L(8) /* found it => return pointer */ - - /* This process is unfolded four times for better performance. - we don't increment the source pointer each time. Instead we - use offsets and increment by 16 in each run of the loop. But - before probing for the matching byte we need some extra code - (following LL(13) below). Even the len can be compared with - constants instead of decrementing each time. */ - - movl 4(%eax), %ecx /* get word (= 4 bytes) in question */ - movl $0xfefefeff, %edi /* magic value */ - xorl %edx, %ecx /* XOR with word c|c|c|c => bytes of str == c - are now 0 */ - addl %ecx, %edi /* add the magic value to the word. We get - carry bits reported for each byte which - is *not* 0 */ - jnc L(7) /* highest byte is CHR => return pointer */ - xorl %ecx, %edi /* ((word^charmask)+magic)^(word^charmask) */ - orl $0xfefefeff, %edi /* set all non-carry bits */ - incl %edi /* add 1: if one carry bit was *not* set - the addition will not result in 0. */ - jnz L(7) /* found it => return pointer */ - - movl 8(%eax), %ecx /* get word (= 4 bytes) in question */ - movl $0xfefefeff, %edi /* magic value */ - xorl %edx, %ecx /* XOR with word c|c|c|c => bytes of str == c - are now 0 */ - addl %ecx, %edi /* add the magic value to the word. We get - carry bits reported for each byte which - is *not* 0 */ - jnc L(6) /* highest byte is CHR => return pointer */ - xorl %ecx, %edi /* ((word^charmask)+magic)^(word^charmask) */ - orl $0xfefefeff, %edi /* set all non-carry bits */ - incl %edi /* add 1: if one carry bit was *not* set - the addition will not result in 0. */ - jnz L(6) /* found it => return pointer */ - - movl 12(%eax), %ecx /* get word (= 4 bytes) in question */ - movl $0xfefefeff, %edi /* magic value */ - xorl %edx, %ecx /* XOR with word c|c|c|c => bytes of str == c - are now 0 */ - addl %ecx, %edi /* add the magic value to the word. We get - carry bits reported for each byte which - is *not* 0 */ - jnc L(5) /* highest byte is CHR => return pointer */ - xorl %ecx, %edi /* ((word^charmask)+magic)^(word^charmask) */ - orl $0xfefefeff, %edi /* set all non-carry bits */ - incl %edi /* add 1: if one carry bit was *not* set - the addition will not result in 0. */ - jnz L(5) /* found it => return pointer */ - - /* Adjust both counters for a full round, i.e. 16 bytes. */ - addl $16, %eax -L(2): subl $16, %esi - jae L(1) /* Still more than 16 bytes remaining */ - - /* Process remaining bytes separately. */ - cmpl $4-16, %esi /* rest < 4 bytes? */ - jb L(3) /* yes, than test byte by byte */ - - movl (%eax), %ecx /* get word (= 4 bytes) in question */ - movl $0xfefefeff, %edi /* magic value */ - xorl %edx, %ecx /* XOR with word c|c|c|c => bytes of str == c - are now 0 */ - addl %ecx, %edi /* add the magic value to the word. We get - carry bits reported for each byte which - is *not* 0 */ - jnc L(8) /* highest byte is CHR => return pointer */ - xorl %ecx, %edi /* ((word^charmask)+magic)^(word^charmask) */ - orl $0xfefefeff, %edi /* set all non-carry bits */ - incl %edi /* add 1: if one carry bit was *not* set - the addition will not result in 0. */ - jne L(8) /* found it => return pointer */ - addl $4, %eax /* adjust source pointer */ - - cmpl $8-16, %esi /* rest < 8 bytes? */ - jb L(3) /* yes, than test byte by byte */ - - movl (%eax), %ecx /* get word (= 4 bytes) in question */ - movl $0xfefefeff, %edi /* magic value */ - xorl %edx, %ecx /* XOR with word c|c|c|c => bytes of str == c - are now 0 */ - addl %ecx, %edi /* add the magic value to the word. We get - carry bits reported for each byte which - is *not* 0 */ - jnc L(8) /* highest byte is CHR => return pointer */ - xorl %ecx, %edi /* ((word^charmask)+magic)^(word^charmask) */ - orl $0xfefefeff, %edi /* set all non-carry bits */ - incl %edi /* add 1: if one carry bit was *not* set - the addition will not result in 0. */ - jne L(8) /* found it => return pointer */ - addl $4, %eax /* adjust source pointer */ - - cmpl $12-16, %esi /* rest < 12 bytes? */ - jb L(3) /* yes, than test byte by byte */ - - movl (%eax), %ecx /* get word (= 4 bytes) in question */ - movl $0xfefefeff, %edi /* magic value */ - xorl %edx, %ecx /* XOR with word c|c|c|c => bytes of str == c - are now 0 */ - addl %ecx, %edi /* add the magic value to the word. We get - carry bits reported for each byte which - is *not* 0 */ - jnc L(8) /* highest byte is CHR => return pointer */ - xorl %ecx, %edi /* ((word^charmask)+magic)^(word^charmask) */ - orl $0xfefefeff, %edi /* set all non-carry bits */ - incl %edi /* add 1: if one carry bit was *not* set - the addition will not result in 0. */ - jne L(8) /* found it => return pointer */ - addl $4, %eax /* adjust source pointer */ - - /* Check the remaining bytes one by one. */ -L(3): andl $3, %esi /* mask out uninteresting bytes */ - jz L(4) /* no remaining bytes => return NULL */ - - cmpb %dl, (%eax) /* compare byte with CHR */ - je L(9) /* equal, than return pointer */ - incl %eax /* increment source pointer */ - decl %esi /* decrement length */ - jz L(4) /* no remaining bytes => return NULL */ - - cmpb %dl, (%eax) /* compare byte with CHR */ - je L(9) /* equal, than return pointer */ - incl %eax /* increment source pointer */ - decl %esi /* decrement length */ - jz L(4) /* no remaining bytes => return NULL */ - - cmpb %dl, (%eax) /* compare byte with CHR */ - je L(9) /* equal, than return pointer */ - -L(4): /* no byte found => return NULL */ - xorl %eax, %eax - jmp L(9) - - /* add missing source pointer increments */ -L(5): addl $4, %eax -L(6): addl $4, %eax -L(7): addl $4, %eax - - /* Test for the matching byte in the word. %ecx contains a NUL - char in the byte which originally was the byte we are looking - at. */ -L(8): testb %cl, %cl /* test first byte in dword */ - jz L(9) /* if zero => return pointer */ - incl %eax /* increment source pointer */ - - testb %ch, %ch /* test second byte in dword */ - jz L(9) /* if zero => return pointer */ - incl %eax /* increment source pointer */ - - testl $0xff0000, %ecx /* test third byte in dword */ - jz L(9) /* if zero => return pointer */ - incl %eax /* increment source pointer */ - - /* No further test needed we we know it is one of the four bytes. */ -L(9): popl %edi /* pop saved registers */ - cfi_adjust_cfa_offset (-4) - cfi_restore (edi) - popl %esi - cfi_adjust_cfa_offset (-4) - cfi_restore (esi) - - ret -END (__memchr) - -weak_alias (__memchr, memchr) -libc_hidden_builtin_def (memchr) diff --git a/sysdeps/i386/memcmp.S b/sysdeps/i386/memcmp.S deleted file mode 100644 index 01f8f8ef03..0000000000 --- a/sysdeps/i386/memcmp.S +++ /dev/null @@ -1,73 +0,0 @@ -/* Compare two memory blocks for differences in the first COUNT bytes. - Copyright (C) 1995-2017 Free Software Foundation, Inc. - This file is part of the GNU C Library. - - The GNU C Library is free software; you can redistribute it and/or - modify it under the terms of the GNU Lesser General Public - License as published by the Free Software Foundation; either - version 2.1 of the License, or (at your option) any later version. - - The GNU C Library is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - Lesser General Public License for more details. - - You should have received a copy of the GNU Lesser General Public - License along with the GNU C Library; if not, see - <http://www.gnu.org/licenses/>. */ - -#include <sysdep.h> -#include "asm-syntax.h" - -#define PARMS 4+4 /* space for 1 saved reg */ -#define BLK1 PARMS -#define BLK2 BLK1+4 -#define LEN BLK2+4 - - .text -ENTRY (memcmp) - - pushl %esi /* Save callee-safe registers. */ - cfi_adjust_cfa_offset (4) - movl %edi, %edx /* Note that %edx is not used and can - so be used to save %edi. It's faster. */ - cfi_register (edi, edx) - - movl BLK1(%esp), %esi - cfi_rel_offset (esi, 0) - movl BLK2(%esp), %edi - movl LEN(%esp), %ecx - - cld /* Set direction of comparison. */ - - xorl %eax, %eax /* Default result. */ - - repe /* Compare at most %ecx bytes. */ - cmpsb - jz L(1) /* If even last byte was equal we return 0. */ - - /* The memory blocks are not equal. So result of the last - subtraction is present in the carry flag. It is set when - the byte in block #2 is bigger. In this case we have to - return -1 (=0xffffffff), else 1. */ - sbbl %eax, %eax /* This is tricky. %eax == 0 and carry is set - or not depending on last subtraction. */ - - /* At this point %eax == 0, if the byte of block #1 was bigger, and - 0xffffffff if the last byte of block #2 was bigger. The latter - case is already correct but the former needs a little adjustment. - Note that the following operation does not change 0xffffffff. */ - orb $1, %al /* Change 0 to 1. */ - -L(1): popl %esi /* Restore registers. */ - cfi_adjust_cfa_offset (-4) - cfi_restore (esi) - movl %edx, %edi - cfi_restore (edi) - - ret -END (memcmp) - -#undef bcmp -weak_alias (memcmp, bcmp) -libc_hidden_builtin_def (memcmp) diff --git a/sysdeps/i386/memcopy.h b/sysdeps/i386/memcopy.h deleted file mode 100644 index dc6173ee29..0000000000 --- a/sysdeps/i386/memcopy.h +++ /dev/null @@ -1,92 +0,0 @@ -/* memcopy.h -- definitions for memory copy functions. i386 version. - Copyright (C) 1991-2017 Free Software Foundation, Inc. - This file is part of the GNU C Library. - Contributed by Torbjorn Granlund (tege@sics.se). - - The GNU C Library is free software; you can redistribute it and/or - modify it under the terms of the GNU Lesser General Public - License as published by the Free Software Foundation; either - version 2.1 of the License, or (at your option) any later version. - - The GNU C Library is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - Lesser General Public License for more details. - - You should have received a copy of the GNU Lesser General Public - License along with the GNU C Library; if not, see - <http://www.gnu.org/licenses/>. */ - -#include <sysdeps/generic/memcopy.h> - -#undef OP_T_THRES -#define OP_T_THRES 8 - -#undef BYTE_COPY_FWD -#define BYTE_COPY_FWD(dst_bp, src_bp, nbytes) \ - do { \ - int __d0; \ - asm volatile(/* Clear the direction flag, so copying goes forward. */ \ - "cld\n" \ - /* Copy bytes. */ \ - "rep\n" \ - "movsb" : \ - "=D" (dst_bp), "=S" (src_bp), "=c" (__d0) : \ - "0" (dst_bp), "1" (src_bp), "2" (nbytes) : \ - "memory"); \ - } while (0) - -#undef BYTE_COPY_BWD -#define BYTE_COPY_BWD(dst_ep, src_ep, nbytes) \ - do \ - { \ - int __d0; \ - asm volatile(/* Set the direction flag, so copying goes backwards. */ \ - "std\n" \ - /* Copy bytes. */ \ - "rep\n" \ - "movsb\n" \ - /* Clear the dir flag. Convention says it should be 0. */ \ - "cld" : \ - "=D" (dst_ep), "=S" (src_ep), "=c" (__d0) : \ - "0" (dst_ep - 1), "1" (src_ep - 1), "2" (nbytes) : \ - "memory"); \ - dst_ep += 1; \ - src_ep += 1; \ - } while (0) - -#undef WORD_COPY_FWD -#define WORD_COPY_FWD(dst_bp, src_bp, nbytes_left, nbytes) \ - do \ - { \ - int __d0; \ - asm volatile(/* Clear the direction flag, so copying goes forward. */ \ - "cld\n" \ - /* Copy longwords. */ \ - "rep\n" \ - "movsl" : \ - "=D" (dst_bp), "=S" (src_bp), "=c" (__d0) : \ - "0" (dst_bp), "1" (src_bp), "2" ((nbytes) / 4) : \ - "memory"); \ - (nbytes_left) = (nbytes) % 4; \ - } while (0) - -#undef WORD_COPY_BWD -#define WORD_COPY_BWD(dst_ep, src_ep, nbytes_left, nbytes) \ - do \ - { \ - int __d0; \ - asm volatile(/* Set the direction flag, so copying goes backwards. */ \ - "std\n" \ - /* Copy longwords. */ \ - "rep\n" \ - "movsl\n" \ - /* Clear the dir flag. Convention says it should be 0. */ \ - "cld" : \ - "=D" (dst_ep), "=S" (src_ep), "=c" (__d0) : \ - "0" (dst_ep - 4), "1" (src_ep - 4), "2" ((nbytes) / 4) : \ - "memory"); \ - dst_ep += 4; \ - src_ep += 4; \ - (nbytes_left) = (nbytes) % 4; \ - } while (0) diff --git a/sysdeps/i386/memcpy.S b/sysdeps/i386/memcpy.S deleted file mode 100644 index 06568ea724..0000000000 --- a/sysdeps/i386/memcpy.S +++ /dev/null @@ -1,95 +0,0 @@ -/* memcpy with REP MOVSB/STOSB - Copyright (C) 2015-2017 Free Software Foundation, Inc. - This file is part of the GNU C Library. - - The GNU C Library is free software; you can redistribute it and/or - modify it under the terms of the GNU Lesser General Public - License as published by the Free Software Foundation; either - version 2.1 of the License, or (at your option) any later version. - - The GNU C Library is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - Lesser General Public License for more details. - - You should have received a copy of the GNU Lesser General Public - License along with the GNU C Library; if not, see - <http://www.gnu.org/licenses/>. */ - -#include <sysdep.h> -#include "asm-syntax.h" - -#ifndef MEMCPY -# define MEMCPY memcpy -# define MEMCPY_CHK __memcpy_chk -#endif - -#ifdef USE_AS_BCOPY -# define STR2 12 -# define STR1 STR2+4 -# define N STR1+4 -#else -# define STR1 12 -# define STR2 STR1+4 -# define N STR2+4 -#endif - -#define CFI_PUSH(REG) \ - cfi_adjust_cfa_offset (4); \ - cfi_rel_offset (REG, 0) - -#define CFI_POP(REG) \ - cfi_adjust_cfa_offset (-4); \ - cfi_restore (REG) - -#define PUSH(REG) pushl REG; CFI_PUSH (REG) -#define POP(REG) popl REG; CFI_POP (REG) - - .text -#if defined SHARED && IS_IN (libc) && !defined USE_AS_BCOPY -ENTRY (MEMCPY_CHK) - movl 12(%esp), %eax - cmpl %eax, 16(%esp) - jb HIDDEN_JUMPTARGET (__chk_fail) -END (MEMCPY_CHK) -#endif -ENTRY (MEMCPY) - PUSH (%esi) - PUSH (%edi) - movl N(%esp), %ecx - movl STR1(%esp), %edi - movl STR2(%esp), %esi - mov %edi, %eax -#ifdef USE_AS_MEMPCPY - add %ecx, %eax -#endif - -#ifdef USE_AS_MEMMOVE - cmp %esi, %edi - ja L(copy_backward) - je L(bwd_write_0bytes) -#endif - - rep movsb - POP (%edi) - POP (%esi) - ret - -#ifdef USE_AS_MEMMOVE -L(copy_backward): - lea -1(%edi,%ecx), %edi - lea -1(%esi,%ecx), %esi - std - rep movsb - cld -L(bwd_write_0bytes): - POP (%edi) - POP (%esi) - ret -#endif - -END (MEMCPY) - -#ifndef USE_AS_BCOPY -libc_hidden_builtin_def (MEMCPY) -#endif diff --git a/sysdeps/i386/memcpy_chk.S b/sysdeps/i386/memcpy_chk.S deleted file mode 100644 index 0f6f585c41..0000000000 --- a/sysdeps/i386/memcpy_chk.S +++ /dev/null @@ -1,34 +0,0 @@ -/* Checking memcpy for i386. - Copyright (C) 2004-2017 Free Software Foundation, Inc. - This file is part of the GNU C Library. - - The GNU C Library is free software; you can redistribute it and/or - modify it under the terms of the GNU Lesser General Public - License as published by the Free Software Foundation; either - version 2.1 of the License, or (at your option) any later version. - - The GNU C Library is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - Lesser General Public License for more details. - - You should have received a copy of the GNU Lesser General Public - License along with the GNU C Library; if not, see - <http://www.gnu.org/licenses/>. */ - -#ifndef SHARED -# include <sysdep.h> -# include "asm-syntax.h" - - /* For libc.so this is defined in memcpy.S. - For libc.a, this is a separate source to avoid - memcpy bringing in __chk_fail and all routines - it calls. */ - .text -ENTRY (__memcpy_chk) - movl 12(%esp), %eax - cmpl %eax, 16(%esp) - jb __chk_fail - jmp memcpy -END (__memcpy_chk) -#endif diff --git a/sysdeps/i386/memmove.S b/sysdeps/i386/memmove.S deleted file mode 100644 index 60a45d21e0..0000000000 --- a/sysdeps/i386/memmove.S +++ /dev/null @@ -1,4 +0,0 @@ -#define USE_AS_MEMMOVE -#define MEMCPY memmove -#define MEMCPY_CHK __memmove_chk -#include "memcpy.S" diff --git a/sysdeps/i386/memmove_chk.S b/sysdeps/i386/memmove_chk.S deleted file mode 100644 index 0c7037cc05..0000000000 --- a/sysdeps/i386/memmove_chk.S +++ /dev/null @@ -1,33 +0,0 @@ -/* Checking memmove for i386 - Copyright (C) 2004-2017 Free Software Foundation, Inc. - This file is part of the GNU C Library. - - The GNU C Library is free software; you can redistribute it and/or - modify it under the terms of the GNU Lesser General Public - License as published by the Free Software Foundation; either - version 2.1 of the License, or (at your option) any later version. - - The GNU C Library is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - Lesser General Public License for more details. - - You should have received a copy of the GNU Lesser General Public - License along with the GNU C Library; if not, see - <http://www.gnu.org/licenses/>. */ - -#ifndef SHARED -# include <sysdep.h> -# include "asm-syntax.h" - -/* For libc.so this is defined in memmove.S. For libc.a, this is a - separate source to avoid memmove bringing in __chk_fail and all - routines it calls. */ - .text -ENTRY (__memmove_chk) - movl 12(%esp), %eax - cmpl %eax, 16(%esp) - jb __chk_fail - jmp memmove -END (__memmove_chk) -#endif diff --git a/sysdeps/i386/mempcpy.S b/sysdeps/i386/mempcpy.S deleted file mode 100644 index 61addb75f4..0000000000 --- a/sysdeps/i386/mempcpy.S +++ /dev/null @@ -1,7 +0,0 @@ -#define USE_AS_MEMPCPY -#define MEMCPY __mempcpy -#define MEMCPY_CHK __mempcpy_chk -#include "memcpy.S" - -weak_alias (__mempcpy, mempcpy) -libc_hidden_builtin_def (mempcpy) diff --git a/sysdeps/i386/mempcpy_chk.S b/sysdeps/i386/mempcpy_chk.S deleted file mode 100644 index 4d8ac5c25b..0000000000 --- a/sysdeps/i386/mempcpy_chk.S +++ /dev/null @@ -1,33 +0,0 @@ -/* Checking mempcpy for i386 - Copyright (C) 2004-2017 Free Software Foundation, Inc. - This file is part of the GNU C Library. - - The GNU C Library is free software; you can redistribute it and/or - modify it under the terms of the GNU Lesser General Public - License as published by the Free Software Foundation; either - version 2.1 of the License, or (at your option) any later version. - - The GNU C Library is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - Lesser General Public License for more details. - - You should have received a copy of the GNU Lesser General Public - License along with the GNU C Library; if not, see - <http://www.gnu.org/licenses/>. */ - -#ifndef SHARED -# include <sysdep.h> -# include "asm-syntax.h" - -/* For libc.so this is defined in mempcpy.S. For libc.a, this is a - separate source to avoid mempcpy bringing in __chk_fail and all - routines it calls. */ - .text -ENTRY (__mempcpy_chk) - movl 12(%esp), %eax - cmpl %eax, 16(%esp) - jb __chk_fail - jmp mempcpy -END (__mempcpy_chk) -#endif diff --git a/sysdeps/i386/memset.S b/sysdeps/i386/memset.S deleted file mode 100644 index 46ae65d2e4..0000000000 --- a/sysdeps/i386/memset.S +++ /dev/null @@ -1,68 +0,0 @@ -/* memset with REP MOVSB/STOSB - Copyright (C) 2015-2017 Free Software Foundation, Inc. - This file is part of the GNU C Library. - - The GNU C Library is free software; you can redistribute it and/or - modify it under the terms of the GNU Lesser General Public - License as published by the Free Software Foundation; either - version 2.1 of the License, or (at your option) any later version. - - The GNU C Library is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - Lesser General Public License for more details. - - You should have received a copy of the GNU Lesser General Public - License along with the GNU C Library; if not, see - <http://www.gnu.org/licenses/>. */ - -#include <sysdep.h> - -#define CFI_PUSH(REG) \ - cfi_adjust_cfa_offset (4); \ - cfi_rel_offset (REG, 0) - -#define CFI_POP(REG) \ - cfi_adjust_cfa_offset (-4); \ - cfi_restore (REG) - -#define PUSH(REG) pushl REG; CFI_PUSH (REG) -#define POP(REG) popl REG; CFI_POP (REG) - -#define STR1 8 -#ifdef USE_AS_BZERO -#define N STR1+4 -#else -#define STR2 STR1+4 -#define N STR2+4 -#endif - - .text -#if defined SHARED && IS_IN (libc) && !defined USE_AS_BZERO -ENTRY (__memset_chk) - movl 12(%esp), %eax - cmpl %eax, 16(%esp) - jb HIDDEN_JUMPTARGET (__chk_fail) -END (__memset_chk) -#endif -ENTRY (memset) - PUSH (%edi) - movl N(%esp), %ecx - movl STR1(%esp), %edi -#ifdef USE_AS_BZERO - xor %eax, %eax -#else - movzbl STR2(%esp), %eax - mov %edi, %edx -#endif - rep stosb -#ifndef USE_AS_BZERO - mov %edx, %eax -#endif - POP (%edi) - ret -END (memset) - -#ifndef USE_AS_BZERO -libc_hidden_builtin_def (memset) -#endif diff --git a/sysdeps/i386/memset_chk.S b/sysdeps/i386/memset_chk.S deleted file mode 100644 index da7837111e..0000000000 --- a/sysdeps/i386/memset_chk.S +++ /dev/null @@ -1,33 +0,0 @@ -/* Checking memset for i386. - Copyright (C) 2004-2017 Free Software Foundation, Inc. - This file is part of the GNU C Library. - - The GNU C Library is free software; you can redistribute it and/or - modify it under the terms of the GNU Lesser General Public - License as published by the Free Software Foundation; either - version 2.1 of the License, or (at your option) any later version. - - The GNU C Library is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - Lesser General Public License for more details. - - You should have received a copy of the GNU Lesser General Public - License along with the GNU C Library; if not, see - <http://www.gnu.org/licenses/>. */ - -#ifndef SHARED -# include <sysdep.h> -# include "asm-syntax.h" - -/* For libc.so this is defined in memset.S. For libc.a, this is a - separate source to avoid memset bringing in __chk_fail and all - routines it calls. */ - .text -ENTRY (__memset_chk) - movl 12(%esp), %eax - cmpl %eax, 16(%esp) - jb __chk_fail - jmp memset -END (__memset_chk) -#endif diff --git a/sysdeps/i386/memusage.h b/sysdeps/i386/memusage.h deleted file mode 100644 index 30167be833..0000000000 --- a/sysdeps/i386/memusage.h +++ /dev/null @@ -1,20 +0,0 @@ -/* Copyright (C) 2000-2017 Free Software Foundation, Inc. - This file is part of the GNU C Library. - - The GNU C Library is free software; you can redistribute it and/or - modify it under the terms of the GNU Lesser General Public - License as published by the Free Software Foundation; either - version 2.1 of the License, or (at your option) any later version. - - The GNU C Library is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - Lesser General Public License for more details. - - You should have received a copy of the GNU Lesser General Public - License along with the GNU C Library; if not, see - <http://www.gnu.org/licenses/>. */ - -#define GETSP() ({ register uintptr_t stack_ptr asm ("esp"); stack_ptr; }) - -#include <sysdeps/generic/memusage.h> diff --git a/sysdeps/i386/mp_clz_tab.c b/sysdeps/i386/mp_clz_tab.c deleted file mode 100644 index 860f98cc62..0000000000 --- a/sysdeps/i386/mp_clz_tab.c +++ /dev/null @@ -1 +0,0 @@ -/* __clz_tab not needed on i386. */ diff --git a/sysdeps/i386/mul_1.S b/sysdeps/i386/mul_1.S deleted file mode 100644 index cf83d1b343..0000000000 --- a/sysdeps/i386/mul_1.S +++ /dev/null @@ -1,86 +0,0 @@ -/* i80386 __mpn_mul_1 -- Multiply a limb vector with a limb and store - the result in a second limb vector. - Copyright (C) 1992-2017 Free Software Foundation, Inc. - This file is part of the GNU MP Library. - - The GNU MP Library is free software; you can redistribute it and/or modify - it under the terms of the GNU Lesser General Public License as published by - the Free Software Foundation; either version 2.1 of the License, or (at your - option) any later version. - - The GNU MP Library is distributed in the hope that it will be useful, but - WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY - or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public - License for more details. - - You should have received a copy of the GNU Lesser General Public License - along with the GNU MP Library; see the file COPYING.LIB. If not, - see <http://www.gnu.org/licenses/>. */ - -#include <sysdep.h> -#include "asm-syntax.h" - -#define PARMS 4+16 /* space for 4 saved regs */ -#define RES PARMS -#define S1 RES+4 -#define SIZE S1+4 -#define S2LIMB SIZE+4 - -#define res_ptr edi -#define s1_ptr esi -#define size ecx -#define s2_limb ebx - - .text -ENTRY (__mpn_mul_1) - - pushl %res_ptr - cfi_adjust_cfa_offset (4) - pushl %s1_ptr - cfi_adjust_cfa_offset (4) - pushl %ebp - cfi_adjust_cfa_offset (4) - pushl %s2_limb - cfi_adjust_cfa_offset (4) - - movl RES(%esp), %res_ptr - cfi_rel_offset (res_ptr, 12) - movl S1(%esp), %s1_ptr - cfi_rel_offset (s1_ptr, 8) - movl SIZE(%esp), %size - movl S2LIMB(%esp), %s2_limb - cfi_rel_offset (s2_limb, 0) - leal (%res_ptr,%size,4), %res_ptr - leal (%s1_ptr,%size,4), %s1_ptr - negl %size - xorl %ebp, %ebp - cfi_rel_offset (ebp, 4) - ALIGN (3) -L(oop): - movl (%s1_ptr,%size,4), %eax - mull %s2_limb - addl %ebp, %eax - movl %eax, (%res_ptr,%size,4) - adcl $0, %edx - movl %edx, %ebp - - incl %size - jnz L(oop) - movl %ebp, %eax - - popl %s2_limb - cfi_adjust_cfa_offset (-4) - cfi_restore (s2_limb) - popl %ebp - cfi_adjust_cfa_offset (-4) - cfi_restore (ebp) - popl %s1_ptr - cfi_adjust_cfa_offset (-4) - cfi_restore (s1_ptr) - popl %res_ptr - cfi_adjust_cfa_offset (-4) - cfi_restore (res_ptr) - - ret -#undef size -END (__mpn_mul_1) diff --git a/sysdeps/i386/nptl/Makefile b/sysdeps/i386/nptl/Makefile deleted file mode 100644 index 2c61b352eb..0000000000 --- a/sysdeps/i386/nptl/Makefile +++ /dev/null @@ -1,26 +0,0 @@ -# Copyright (C) 2002-2017 Free Software Foundation, Inc. -# This file is part of the GNU C Library. - -# The GNU C Library is free software; you can redistribute it and/or -# modify it under the terms of the GNU Lesser General Public -# License as published by the Free Software Foundation; either -# version 2.1 of the License, or (at your option) any later version. - -# The GNU C Library is distributed in the hope that it will be useful, -# but WITHOUT ANY WARRANTY; without even the implied warranty of -# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU -# Lesser General Public License for more details. - -# You should have received a copy of the GNU Lesser General Public -# License along with the GNU C Library; if not, see -# <http://www.gnu.org/licenses/>. - -ifeq ($(subdir),csu) -gen-as-const-headers += tcb-offsets.sym -endif - -ifeq ($(subdir),nptl) -CFLAGS-pthread_create.c += -mpreferred-stack-boundary=4 -CFLAGS-tst-align.c += -mpreferred-stack-boundary=4 -CFLAGS-tst-align2.c += -mpreferred-stack-boundary=4 -endif diff --git a/sysdeps/i386/nptl/pthread_spin_init.c b/sysdeps/i386/nptl/pthread_spin_init.c deleted file mode 100644 index a1205b9698..0000000000 --- a/sysdeps/i386/nptl/pthread_spin_init.c +++ /dev/null @@ -1,19 +0,0 @@ -/* Copyright (C) 2002-2017 Free Software Foundation, Inc. - This file is part of the GNU C Library. - Contributed by Ulrich Drepper <drepper@redhat.com>, 2002. - - The GNU C Library is free software; you can redistribute it and/or - modify it under the terms of the GNU Lesser General Public - License as published by the Free Software Foundation; either - version 2.1 of the License, or (at your option) any later version. - - The GNU C Library is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - Lesser General Public License for more details. - - You should have received a copy of the GNU Lesser General Public - License along with the GNU C Library; if not, see - <http://www.gnu.org/licenses/>. */ - -/* Not needed. pthread_spin_init is an alias for pthread_spin_unlock. */ diff --git a/sysdeps/i386/nptl/pthread_spin_lock.S b/sysdeps/i386/nptl/pthread_spin_lock.S deleted file mode 100644 index 160244b7a8..0000000000 --- a/sysdeps/i386/nptl/pthread_spin_lock.S +++ /dev/null @@ -1,37 +0,0 @@ -/* Copyright (C) 2012-2017 Free Software Foundation, Inc. - This file is part of the GNU C Library. - - The GNU C Library is free software; you can redistribute it and/or - modify it under the terms of the GNU Lesser General Public - License as published by the Free Software Foundation; either - version 2.1 of the License, or (at your option) any later version. - - The GNU C Library is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - Lesser General Public License for more details. - - You should have received a copy of the GNU Lesser General Public - License along with the GNU C Library; if not, see - <http://www.gnu.org/licenses/>. */ - -#include <lowlevellock.h> - - .globl pthread_spin_lock - .type pthread_spin_lock,@function - .align 16 -pthread_spin_lock: - mov 4(%esp), %eax -1: LOCK - decl 0(%eax) - jne 2f - xor %eax, %eax - ret - - .align 16 -2: rep - nop - cmpl $0, 0(%eax) - jg 1b - jmp 2b - .size pthread_spin_lock,.-pthread_spin_lock diff --git a/sysdeps/i386/nptl/pthread_spin_unlock.S b/sysdeps/i386/nptl/pthread_spin_unlock.S deleted file mode 100644 index b6636ae8d7..0000000000 --- a/sysdeps/i386/nptl/pthread_spin_unlock.S +++ /dev/null @@ -1,31 +0,0 @@ -/* Copyright (C) 2002-2017 Free Software Foundation, Inc. - This file is part of the GNU C Library. - Contributed by Ulrich Drepper <drepper@redhat.com>, 2002. - - The GNU C Library is free software; you can redistribute it and/or - modify it under the terms of the GNU Lesser General Public - License as published by the Free Software Foundation; either - version 2.1 of the License, or (at your option) any later version. - - The GNU C Library is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - Lesser General Public License for more details. - - You should have received a copy of the GNU Lesser General Public - License along with the GNU C Library; if not, see - <http://www.gnu.org/licenses/>. */ - - .globl pthread_spin_unlock - .type pthread_spin_unlock,@function - .align 16 -pthread_spin_unlock: - movl 4(%esp), %eax - movl $1, (%eax) - xorl %eax, %eax - ret - .size pthread_spin_unlock,.-pthread_spin_unlock - - /* The implementation of pthread_spin_init is identical. */ - .globl pthread_spin_init -pthread_spin_init = pthread_spin_unlock diff --git a/sysdeps/i386/nptl/pthreaddef.h b/sysdeps/i386/nptl/pthreaddef.h deleted file mode 100644 index 54abccd11b..0000000000 --- a/sysdeps/i386/nptl/pthreaddef.h +++ /dev/null @@ -1,40 +0,0 @@ -/* Copyright (C) 2002-2017 Free Software Foundation, Inc. - This file is part of the GNU C Library. - Contributed by Ulrich Drepper <drepper@redhat.com>, 2002. - - The GNU C Library is free software; you can redistribute it and/or - modify it under the terms of the GNU Lesser General Public - License as published by the Free Software Foundation; either - version 2.1 of the License, or (at your option) any later version. - - The GNU C Library is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - Lesser General Public License for more details. - - You should have received a copy of the GNU Lesser General Public - License along with the GNU C Library; if not, see - <http://www.gnu.org/licenses/>. */ - -/* Default stack size. */ -#define ARCH_STACK_DEFAULT_SIZE (2 * 1024 * 1024) - -/* Required stack pointer alignment at beginning. SSE requires 16 - bytes. */ -#define STACK_ALIGN 16 - -/* Minimal stack size after allocating thread descriptor and guard size. */ -#define MINIMAL_REST_STACK 2048 - -/* Alignment requirement for TCB. - - Some processors such as Intel Atom pay a big penalty on every - access using a segment override if that segment's base is not - aligned to the size of a cache line. (See Intel 64 and IA-32 - Architectures Optimization Reference Manual, section 13.3.3.3, - "Segment Base".) On such machines, a cache line is 64 bytes. */ -#define TCB_ALIGNMENT 64 - - -/* Location of current stack frame. */ -#define CURRENT_STACK_FRAME __builtin_frame_address (0) diff --git a/sysdeps/i386/nptl/tcb-offsets.sym b/sysdeps/i386/nptl/tcb-offsets.sym deleted file mode 100644 index 695a810386..0000000000 --- a/sysdeps/i386/nptl/tcb-offsets.sym +++ /dev/null @@ -1,17 +0,0 @@ -#include <sysdep.h> -#include <tls.h> -#include <kernel-features.h> - -RESULT offsetof (struct pthread, result) -TID offsetof (struct pthread, tid) -CANCELHANDLING offsetof (struct pthread, cancelhandling) -CLEANUP_JMP_BUF offsetof (struct pthread, cleanup_jmp_buf) -MULTIPLE_THREADS_OFFSET offsetof (tcbhead_t, multiple_threads) -SYSINFO_OFFSET offsetof (tcbhead_t, sysinfo) -CLEANUP offsetof (struct pthread, cleanup) -CLEANUP_PREV offsetof (struct _pthread_cleanup_buffer, __prev) -MUTEX_FUTEX offsetof (pthread_mutex_t, __data.__lock) -POINTER_GUARD offsetof (tcbhead_t, pointer_guard) -#ifndef __ASSUME_PRIVATE_FUTEX -PRIVATE_FUTEX offsetof (tcbhead_t, private_futex) -#endif diff --git a/sysdeps/i386/nptl/tls.h b/sysdeps/i386/nptl/tls.h deleted file mode 100644 index f9a6b11ecf..0000000000 --- a/sysdeps/i386/nptl/tls.h +++ /dev/null @@ -1,435 +0,0 @@ -/* Definition for thread-local data handling. nptl/i386 version. - Copyright (C) 2002-2017 Free Software Foundation, Inc. - This file is part of the GNU C Library. - - The GNU C Library is free software; you can redistribute it and/or - modify it under the terms of the GNU Lesser General Public - License as published by the Free Software Foundation; either - version 2.1 of the License, or (at your option) any later version. - - The GNU C Library is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - Lesser General Public License for more details. - - You should have received a copy of the GNU Lesser General Public - License along with the GNU C Library; if not, see - <http://www.gnu.org/licenses/>. */ - -#ifndef _TLS_H -#define _TLS_H 1 - -#include <dl-sysdep.h> -#ifndef __ASSEMBLER__ -# include <stdbool.h> -# include <stddef.h> -# include <stdint.h> -# include <stdlib.h> -# include <sysdep.h> -# include <libc-pointer-arith.h> /* For cast_to_integer. */ -# include <kernel-features.h> -# include <dl-dtv.h> - -typedef struct -{ - void *tcb; /* Pointer to the TCB. Not necessarily the - thread descriptor used by libpthread. */ - dtv_t *dtv; - void *self; /* Pointer to the thread descriptor. */ - int multiple_threads; - uintptr_t sysinfo; - uintptr_t stack_guard; - uintptr_t pointer_guard; - int gscope_flag; -#ifndef __ASSUME_PRIVATE_FUTEX - int private_futex; -#else - int __glibc_reserved1; -#endif - /* Reservation of some values for the TM ABI. */ - void *__private_tm[4]; - /* GCC split stack support. */ - void *__private_ss; -} tcbhead_t; - -# define TLS_MULTIPLE_THREADS_IN_TCB 1 - -#else /* __ASSEMBLER__ */ -# include <tcb-offsets.h> -#endif - - -/* Alignment requirement for the stack. For IA-32 this is governed by - the SSE memory functions. */ -#define STACK_ALIGN 16 - -#ifndef __ASSEMBLER__ -/* Get system call information. */ -# include <sysdep.h> - -/* The old way: using LDT. */ - -/* Structure passed to `modify_ldt', 'set_thread_area', and 'clone' calls. */ -struct user_desc -{ - unsigned int entry_number; - unsigned long int base_addr; - unsigned int limit; - unsigned int seg_32bit:1; - unsigned int contents:2; - unsigned int read_exec_only:1; - unsigned int limit_in_pages:1; - unsigned int seg_not_present:1; - unsigned int useable:1; - unsigned int empty:25; -}; - -/* Initializing bit fields is slow. We speed it up by using a union. */ -union user_desc_init -{ - struct user_desc desc; - unsigned int vals[4]; -}; - - -/* This is the size of the initial TCB. Can't be just sizeof (tcbhead_t), - because NPTL getpid, __libc_alloca_cutoff etc. need (almost) the whole - struct pthread even when not linked with -lpthread. */ -# define TLS_INIT_TCB_SIZE sizeof (struct pthread) - -/* Alignment requirements for the initial TCB. */ -# define TLS_INIT_TCB_ALIGN __alignof__ (struct pthread) - -/* This is the size of the TCB. */ -# define TLS_TCB_SIZE sizeof (struct pthread) - -/* Alignment requirements for the TCB. */ -# define TLS_TCB_ALIGN __alignof__ (struct pthread) - -/* The TCB can have any size and the memory following the address the - thread pointer points to is unspecified. Allocate the TCB there. */ -# define TLS_TCB_AT_TP 1 -# define TLS_DTV_AT_TP 0 - -/* Get the thread descriptor definition. */ -# include <nptl/descr.h> - - -/* Install the dtv pointer. The pointer passed is to the element with - index -1 which contain the length. */ -# define INSTALL_DTV(descr, dtvp) \ - ((tcbhead_t *) (descr))->dtv = (dtvp) + 1 - -/* Install new dtv for current thread. */ -# define INSTALL_NEW_DTV(dtvp) \ - ({ struct pthread *__pd; \ - THREAD_SETMEM (__pd, header.dtv, (dtvp)); }) - -/* Return dtv of given thread descriptor. */ -# define GET_DTV(descr) \ - (((tcbhead_t *) (descr))->dtv) - -/* Macros to load from and store into segment registers. */ -# ifndef TLS_GET_GS -# define TLS_GET_GS() \ - ({ int __seg; __asm ("movw %%gs, %w0" : "=q" (__seg)); __seg & 0xffff; }) -# endif -# ifndef TLS_SET_GS -# define TLS_SET_GS(val) \ - __asm ("movw %w0, %%gs" :: "q" (val)) -# endif - -#ifdef NEED_DL_SYSINFO -# define INIT_SYSINFO \ - _head->sysinfo = GLRO(dl_sysinfo) -# define SETUP_THREAD_SYSINFO(pd) \ - ((pd)->header.sysinfo = THREAD_GETMEM (THREAD_SELF, header.sysinfo)) -# define CHECK_THREAD_SYSINFO(pd) \ - assert ((pd)->header.sysinfo == THREAD_GETMEM (THREAD_SELF, header.sysinfo)) -#else -# define INIT_SYSINFO -#endif - -#ifndef LOCK_PREFIX -# ifdef UP -# define LOCK_PREFIX /* nothing */ -# else -# define LOCK_PREFIX "lock;" -# endif -#endif - -static inline void __attribute__ ((unused, always_inline)) -tls_fill_user_desc (union user_desc_init *desc, - unsigned int entry_number, - void *pd) -{ - desc->vals[0] = entry_number; - /* The 'base_addr' field. Pointer to the TCB. */ - desc->vals[1] = (unsigned long int) pd; - /* The 'limit' field. We use 4GB which is 0xfffff pages. */ - desc->vals[2] = 0xfffff; - /* Collapsed value of the bitfield: - .seg_32bit = 1 - .contents = 0 - .read_exec_only = 0 - .limit_in_pages = 1 - .seg_not_present = 0 - .useable = 1 */ - desc->vals[3] = 0x51; -} - -/* Code to initially initialize the thread pointer. This might need - special attention since 'errno' is not yet available and if the - operation can cause a failure 'errno' must not be touched. */ -# define TLS_INIT_TP(thrdescr) \ - ({ void *_thrdescr = (thrdescr); \ - tcbhead_t *_head = _thrdescr; \ - union user_desc_init _segdescr; \ - int _result; \ - \ - _head->tcb = _thrdescr; \ - /* For now the thread descriptor is at the same address. */ \ - _head->self = _thrdescr; \ - /* New syscall handling support. */ \ - INIT_SYSINFO; \ - \ - /* Let the kernel pick a value for the 'entry_number' field. */ \ - tls_fill_user_desc (&_segdescr, -1, _thrdescr); \ - \ - /* Install the TLS. */ \ - INTERNAL_SYSCALL_DECL (err); \ - _result = INTERNAL_SYSCALL (set_thread_area, err, 1, &_segdescr.desc); \ - \ - if (_result == 0) \ - /* We know the index in the GDT, now load the segment register. \ - The use of the GDT is described by the value 3 in the lower \ - three bits of the segment descriptor value. \ - \ - Note that we have to do this even if the numeric value of \ - the descriptor does not change. Loading the segment register \ - causes the segment information from the GDT to be loaded \ - which is necessary since we have changed it. */ \ - TLS_SET_GS (_segdescr.desc.entry_number * 8 + 3); \ - \ - _result == 0 ? NULL \ - : "set_thread_area failed when setting up thread-local storage\n"; }) - -# define TLS_DEFINE_INIT_TP(tp, pd) \ - union user_desc_init _segdescr; \ - /* Find the 'entry_number' field that the kernel selected in TLS_INIT_TP. \ - The first three bits of the segment register value select the GDT, \ - ignore them. We get the index from the value of the %gs register in \ - the current thread. */ \ - tls_fill_user_desc (&_segdescr, TLS_GET_GS () >> 3, pd); \ - const struct user_desc *tp = &_segdescr.desc - - -/* Return the address of the dtv for the current thread. */ -# define THREAD_DTV() \ - ({ struct pthread *__pd; \ - THREAD_GETMEM (__pd, header.dtv); }) - - -/* Return the thread descriptor for the current thread. - - The contained asm must *not* be marked volatile since otherwise - assignments like - pthread_descr self = thread_self(); - do not get optimized away. */ -# define THREAD_SELF \ - ({ struct pthread *__self; \ - asm ("movl %%gs:%c1,%0" : "=r" (__self) \ - : "i" (offsetof (struct pthread, header.self))); \ - __self;}) - -/* Magic for libthread_db to know how to do THREAD_SELF. */ -# define DB_THREAD_SELF \ - REGISTER_THREAD_AREA (32, offsetof (struct user_regs_struct, xgs), 3) \ - REGISTER_THREAD_AREA (64, 26 * 8, 3) /* x86-64's user_regs_struct->gs */ - - -/* Read member of the thread descriptor directly. */ -# define THREAD_GETMEM(descr, member) \ - ({ __typeof (descr->member) __value; \ - if (sizeof (__value) == 1) \ - asm volatile ("movb %%gs:%P2,%b0" \ - : "=q" (__value) \ - : "0" (0), "i" (offsetof (struct pthread, member))); \ - else if (sizeof (__value) == 4) \ - asm volatile ("movl %%gs:%P1,%0" \ - : "=r" (__value) \ - : "i" (offsetof (struct pthread, member))); \ - else \ - { \ - if (sizeof (__value) != 8) \ - /* There should not be any value with a size other than 1, \ - 4 or 8. */ \ - abort (); \ - \ - asm volatile ("movl %%gs:%P1,%%eax\n\t" \ - "movl %%gs:%P2,%%edx" \ - : "=A" (__value) \ - : "i" (offsetof (struct pthread, member)), \ - "i" (offsetof (struct pthread, member) + 4)); \ - } \ - __value; }) - - -/* Same as THREAD_GETMEM, but the member offset can be non-constant. */ -# define THREAD_GETMEM_NC(descr, member, idx) \ - ({ __typeof (descr->member[0]) __value; \ - if (sizeof (__value) == 1) \ - asm volatile ("movb %%gs:%P2(%3),%b0" \ - : "=q" (__value) \ - : "0" (0), "i" (offsetof (struct pthread, member[0])), \ - "r" (idx)); \ - else if (sizeof (__value) == 4) \ - asm volatile ("movl %%gs:%P1(,%2,4),%0" \ - : "=r" (__value) \ - : "i" (offsetof (struct pthread, member[0])), \ - "r" (idx)); \ - else \ - { \ - if (sizeof (__value) != 8) \ - /* There should not be any value with a size other than 1, \ - 4 or 8. */ \ - abort (); \ - \ - asm volatile ("movl %%gs:%P1(,%2,8),%%eax\n\t" \ - "movl %%gs:4+%P1(,%2,8),%%edx" \ - : "=&A" (__value) \ - : "i" (offsetof (struct pthread, member[0])), \ - "r" (idx)); \ - } \ - __value; }) - - - -/* Set member of the thread descriptor directly. */ -# define THREAD_SETMEM(descr, member, value) \ - ({ if (sizeof (descr->member) == 1) \ - asm volatile ("movb %b0,%%gs:%P1" : \ - : "iq" (value), \ - "i" (offsetof (struct pthread, member))); \ - else if (sizeof (descr->member) == 4) \ - asm volatile ("movl %0,%%gs:%P1" : \ - : "ir" (value), \ - "i" (offsetof (struct pthread, member))); \ - else \ - { \ - if (sizeof (descr->member) != 8) \ - /* There should not be any value with a size other than 1, \ - 4 or 8. */ \ - abort (); \ - \ - asm volatile ("movl %%eax,%%gs:%P1\n\t" \ - "movl %%edx,%%gs:%P2" : \ - : "A" ((uint64_t) cast_to_integer (value)), \ - "i" (offsetof (struct pthread, member)), \ - "i" (offsetof (struct pthread, member) + 4)); \ - }}) - - -/* Same as THREAD_SETMEM, but the member offset can be non-constant. */ -# define THREAD_SETMEM_NC(descr, member, idx, value) \ - ({ if (sizeof (descr->member[0]) == 1) \ - asm volatile ("movb %b0,%%gs:%P1(%2)" : \ - : "iq" (value), \ - "i" (offsetof (struct pthread, member)), \ - "r" (idx)); \ - else if (sizeof (descr->member[0]) == 4) \ - asm volatile ("movl %0,%%gs:%P1(,%2,4)" : \ - : "ir" (value), \ - "i" (offsetof (struct pthread, member)), \ - "r" (idx)); \ - else \ - { \ - if (sizeof (descr->member[0]) != 8) \ - /* There should not be any value with a size other than 1, \ - 4 or 8. */ \ - abort (); \ - \ - asm volatile ("movl %%eax,%%gs:%P1(,%2,8)\n\t" \ - "movl %%edx,%%gs:4+%P1(,%2,8)" : \ - : "A" ((uint64_t) cast_to_integer (value)), \ - "i" (offsetof (struct pthread, member)), \ - "r" (idx)); \ - }}) - - -/* Atomic compare and exchange on TLS, returning old value. */ -#define THREAD_ATOMIC_CMPXCHG_VAL(descr, member, newval, oldval) \ - ({ __typeof (descr->member) __ret; \ - __typeof (oldval) __old = (oldval); \ - if (sizeof (descr->member) == 4) \ - asm volatile (LOCK_PREFIX "cmpxchgl %2, %%gs:%P3" \ - : "=a" (__ret) \ - : "0" (__old), "r" (newval), \ - "i" (offsetof (struct pthread, member))); \ - else \ - /* Not necessary for other sizes in the moment. */ \ - abort (); \ - __ret; }) - - -/* Atomic logical and. */ -#define THREAD_ATOMIC_AND(descr, member, val) \ - (void) ({ if (sizeof ((descr)->member) == 4) \ - asm volatile (LOCK_PREFIX "andl %1, %%gs:%P0" \ - :: "i" (offsetof (struct pthread, member)), \ - "ir" (val)); \ - else \ - /* Not necessary for other sizes in the moment. */ \ - abort (); }) - - -/* Atomic set bit. */ -#define THREAD_ATOMIC_BIT_SET(descr, member, bit) \ - (void) ({ if (sizeof ((descr)->member) == 4) \ - asm volatile (LOCK_PREFIX "orl %1, %%gs:%P0" \ - :: "i" (offsetof (struct pthread, member)), \ - "ir" (1 << (bit))); \ - else \ - /* Not necessary for other sizes in the moment. */ \ - abort (); }) - - -/* Set the stack guard field in TCB head. */ -#define THREAD_SET_STACK_GUARD(value) \ - THREAD_SETMEM (THREAD_SELF, header.stack_guard, value) -#define THREAD_COPY_STACK_GUARD(descr) \ - ((descr)->header.stack_guard \ - = THREAD_GETMEM (THREAD_SELF, header.stack_guard)) - - -/* Set the pointer guard field in the TCB head. */ -#define THREAD_SET_POINTER_GUARD(value) \ - THREAD_SETMEM (THREAD_SELF, header.pointer_guard, value) -#define THREAD_COPY_POINTER_GUARD(descr) \ - ((descr)->header.pointer_guard \ - = THREAD_GETMEM (THREAD_SELF, header.pointer_guard)) - - -/* Get and set the global scope generation counter in the TCB head. */ -#define THREAD_GSCOPE_FLAG_UNUSED 0 -#define THREAD_GSCOPE_FLAG_USED 1 -#define THREAD_GSCOPE_FLAG_WAIT 2 -#define THREAD_GSCOPE_RESET_FLAG() \ - do \ - { int __res; \ - asm volatile ("xchgl %0, %%gs:%P1" \ - : "=r" (__res) \ - : "i" (offsetof (struct pthread, header.gscope_flag)), \ - "0" (THREAD_GSCOPE_FLAG_UNUSED)); \ - if (__res == THREAD_GSCOPE_FLAG_WAIT) \ - lll_futex_wake (&THREAD_SELF->header.gscope_flag, 1, LLL_PRIVATE); \ - } \ - while (0) -#define THREAD_GSCOPE_SET_FLAG() \ - THREAD_SETMEM (THREAD_SELF, header.gscope_flag, THREAD_GSCOPE_FLAG_USED) -#define THREAD_GSCOPE_WAIT() \ - GL(dl_wait_lookup_done) () - -#endif /* __ASSEMBLER__ */ - -#endif /* tls.h */ diff --git a/sysdeps/i386/preconfigure b/sysdeps/i386/preconfigure deleted file mode 100644 index c8fefd1bff..0000000000 --- a/sysdeps/i386/preconfigure +++ /dev/null @@ -1,5 +0,0 @@ -# preconfigure fragment for i386. - -case "$machine" in -i[4567]86) base_machine=i386 machine=i386/$machine ;; -esac diff --git a/sysdeps/i386/pthread_spin_trylock.S b/sysdeps/i386/pthread_spin_trylock.S deleted file mode 100644 index f71a9fcb2d..0000000000 --- a/sysdeps/i386/pthread_spin_trylock.S +++ /dev/null @@ -1,46 +0,0 @@ -/* Copyright (C) 2002-2017 Free Software Foundation, Inc. - This file is part of the GNU C Library. - Contributed by Ulrich Drepper <drepper@redhat.com>, 2002. - - The GNU C Library is free software; you can redistribute it and/or - modify it under the terms of the GNU Lesser General Public - License as published by the Free Software Foundation; either - version 2.1 of the License, or (at your option) any later version. - - The GNU C Library is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - Lesser General Public License for more details. - - You should have received a copy of the GNU Lesser General Public - License along with the GNU C Library; if not, see - <http://www.gnu.org/licenses/>. */ - -#include <pthread-errnos.h> - - -#ifdef UP -# define LOCK -#else -# define LOCK lock -#endif - - .globl pthread_spin_trylock - .type pthread_spin_trylock,@function - .align 16 -pthread_spin_trylock: - movl 4(%esp), %edx - movl $1, %eax - xorl %ecx, %ecx - LOCK - cmpxchgl %ecx, (%edx) - movl $EBUSY, %eax -#ifdef HAVE_CMOV - cmovel %ecx, %eax -#else - jne 0f - movl %ecx, %eax -0: -#endif - ret - .size pthread_spin_trylock,.-pthread_spin_trylock diff --git a/sysdeps/i386/rawmemchr.S b/sysdeps/i386/rawmemchr.S deleted file mode 100644 index 246ec3f18e..0000000000 --- a/sysdeps/i386/rawmemchr.S +++ /dev/null @@ -1,222 +0,0 @@ -/* rawmemchr (str, ch) -- Return pointer to first occurrence of CH in STR. - For Intel 80x86, x>=3. - Copyright (C) 1994-2017 Free Software Foundation, Inc. - This file is part of the GNU C Library. - Contributed by Ulrich Drepper <drepper@gnu.ai.mit.edu> - Optimised a little by Alan Modra <Alan@SPRI.Levels.UniSA.Edu.Au> - This version is developed using the same algorithm as the fast C - version which carries the following introduction: - Based on strlen implementation by Torbjorn Granlund (tege@sics.se), - with help from Dan Sahlin (dan@sics.se) and - commentary by Jim Blandy (jimb@ai.mit.edu); - adaptation to memchr suggested by Dick Karpinski (dick@cca.ucsf.edu), - and implemented by Roland McGrath (roland@ai.mit.edu). - - The GNU C Library is free software; you can redistribute it and/or - modify it under the terms of the GNU Lesser General Public - License as published by the Free Software Foundation; either - version 2.1 of the License, or (at your option) any later version. - - The GNU C Library is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - Lesser General Public License for more details. - - You should have received a copy of the GNU Lesser General Public - License along with the GNU C Library; if not, see - <http://www.gnu.org/licenses/>. */ - -#include <sysdep.h> -#include "asm-syntax.h" - -#define PARMS 4+4 /* space for 1 saved reg */ -#define RTN PARMS -#define STR RTN -#define CHR STR+4 - - .text -ENTRY (__rawmemchr) - - /* Save callee-safe register used in this function. */ - pushl %edi - cfi_adjust_cfa_offset (4) - cfi_rel_offset (edi, 0) - - /* Load parameters into registers. */ - movl STR(%esp), %eax - movl CHR(%esp), %edx - - /* At the moment %edx contains C. What we need for the - algorithm is C in all bytes of the dword. Avoid - operations on 16 bit words because these require an - prefix byte (and one more cycle). */ - movb %dl, %dh /* Now it is 0|0|c|c */ - movl %edx, %ecx - shll $16, %edx /* Now c|c|0|0 */ - movw %cx, %dx /* And finally c|c|c|c */ - - /* Better performance can be achieved if the word (32 - bit) memory access is aligned on a four-byte-boundary. - So process first bytes one by one until boundary is - reached. Don't use a loop for better performance. */ - - testb $3, %al /* correctly aligned ? */ - je L(1) /* yes => begin loop */ - cmpb %dl, (%eax) /* compare byte */ - je L(9) /* target found => return */ - incl %eax /* increment source pointer */ - - testb $3, %al /* correctly aligned ? */ - je L(1) /* yes => begin loop */ - cmpb %dl, (%eax) /* compare byte */ - je L(9) /* target found => return */ - incl %eax /* increment source pointer */ - - testb $3, %al /* correctly aligned ? */ - je L(1) /* yes => begin loop */ - cmpb %dl, (%eax) /* compare byte */ - je L(9) /* target found => return */ - incl %eax /* increment source pointer */ - - /* We exit the loop if adding MAGIC_BITS to LONGWORD fails to - change any of the hole bits of LONGWORD. - - 1) Is this safe? Will it catch all the zero bytes? - Suppose there is a byte with all zeros. Any carry bits - propagating from its left will fall into the hole at its - least significant bit and stop. Since there will be no - carry from its most significant bit, the LSB of the - byte to the left will be unchanged, and the zero will be - detected. - - 2) Is this worthwhile? Will it ignore everything except - zero bytes? Suppose every byte of LONGWORD has a bit set - somewhere. There will be a carry into bit 8. If bit 8 - is set, this will carry into bit 16. If bit 8 is clear, - one of bits 9-15 must be set, so there will be a carry - into bit 16. Similarly, there will be a carry into bit - 24. If one of bits 24-31 is set, there will be a carry - into bit 32 (=carry flag), so all of the hole bits will - be changed. - - 3) But wait! Aren't we looking for C, not zero? - Good point. So what we do is XOR LONGWORD with a longword, - each of whose bytes is C. This turns each byte that is C - into a zero. */ - - - /* Each round the main loop processes 16 bytes. */ - ALIGN (4) - -L(1): movl (%eax), %ecx /* get word (= 4 bytes) in question */ - movl $0xfefefeff, %edi /* magic value */ - xorl %edx, %ecx /* XOR with word c|c|c|c => bytes of str == c - are now 0 */ - addl %ecx, %edi /* add the magic value to the word. We get - carry bits reported for each byte which - is *not* 0 */ - - /* According to the algorithm we had to reverse the effect of the - XOR first and then test the overflow bits. But because the - following XOR would destroy the carry flag and it would (in a - representation with more than 32 bits) not alter then last - overflow, we can now test this condition. If no carry is signaled - no overflow must have occurred in the last byte => it was 0. */ - jnc L(8) - - /* We are only interested in carry bits that change due to the - previous add, so remove original bits */ - xorl %ecx, %edi /* ((word^charmask)+magic)^(word^charmask) */ - - /* Now test for the other three overflow bits. */ - orl $0xfefefeff, %edi /* set all non-carry bits */ - incl %edi /* add 1: if one carry bit was *not* set - the addition will not result in 0. */ - - /* If at least one byte of the word is C we don't get 0 in %edi. */ - jnz L(8) /* found it => return pointer */ - - /* This process is unfolded four times for better performance. - we don't increment the source pointer each time. Instead we - use offsets and increment by 16 in each run of the loop. But - before probing for the matching byte we need some extra code - (following LL(13) below). Even the len can be compared with - constants instead of decrementing each time. */ - - movl 4(%eax), %ecx /* get word (= 4 bytes) in question */ - movl $0xfefefeff, %edi /* magic value */ - xorl %edx, %ecx /* XOR with word c|c|c|c => bytes of str == c - are now 0 */ - addl %ecx, %edi /* add the magic value to the word. We get - carry bits reported for each byte which - is *not* 0 */ - jnc L(7) /* highest byte is C => return pointer */ - xorl %ecx, %edi /* ((word^charmask)+magic)^(word^charmask) */ - orl $0xfefefeff, %edi /* set all non-carry bits */ - incl %edi /* add 1: if one carry bit was *not* set - the addition will not result in 0. */ - jnz L(7) /* found it => return pointer */ - - movl 8(%eax), %ecx /* get word (= 4 bytes) in question */ - movl $0xfefefeff, %edi /* magic value */ - xorl %edx, %ecx /* XOR with word c|c|c|c => bytes of str == c - are now 0 */ - addl %ecx, %edi /* add the magic value to the word. We get - carry bits reported for each byte which - is *not* 0 */ - jnc L(6) /* highest byte is C => return pointer */ - xorl %ecx, %edi /* ((word^charmask)+magic)^(word^charmask) */ - orl $0xfefefeff, %edi /* set all non-carry bits */ - incl %edi /* add 1: if one carry bit was *not* set - the addition will not result in 0. */ - jnz L(6) /* found it => return pointer */ - - movl 12(%eax), %ecx /* get word (= 4 bytes) in question */ - movl $0xfefefeff, %edi /* magic value */ - xorl %edx, %ecx /* XOR with word c|c|c|c => bytes of str == c - are now 0 */ - addl %ecx, %edi /* add the magic value to the word. We get - carry bits reported for each byte which - is *not* 0 */ - jnc L(5) /* highest byte is C => return pointer */ - xorl %ecx, %edi /* ((word^charmask)+magic)^(word^charmask) */ - orl $0xfefefeff, %edi /* set all non-carry bits */ - incl %edi /* add 1: if one carry bit was *not* set - the addition will not result in 0. */ - jnz L(5) /* found it => return pointer */ - - /* Adjust both counters for a full round, i.e. 16 bytes. */ - addl $16, %eax - jmp L(1) - /* add missing source pointer increments */ -L(5): addl $4, %eax -L(6): addl $4, %eax -L(7): addl $4, %eax - - /* Test for the matching byte in the word. %ecx contains a NUL - char in the byte which originally was the byte we are looking - at. */ -L(8): testb %cl, %cl /* test first byte in dword */ - jz L(9) /* if zero => return pointer */ - incl %eax /* increment source pointer */ - - testb %ch, %ch /* test second byte in dword */ - jz L(9) /* if zero => return pointer */ - incl %eax /* increment source pointer */ - - testl $0xff0000, %ecx /* test third byte in dword */ - jz L(9) /* if zero => return pointer */ - incl %eax /* increment source pointer */ - - /* No further test needed we we know it is one of the four bytes. */ - -L(9): - popl %edi /* pop saved register */ - cfi_adjust_cfa_offset (-4) - cfi_restore (edi) - - ret -END (__rawmemchr) - -libc_hidden_def (__rawmemchr) -weak_alias (__rawmemchr, rawmemchr) diff --git a/sysdeps/i386/rshift.S b/sysdeps/i386/rshift.S deleted file mode 100644 index cf179052b5..0000000000 --- a/sysdeps/i386/rshift.S +++ /dev/null @@ -1,105 +0,0 @@ -/* i80386 __mpn_rshift -- - Copyright (C) 1992-2017 Free Software Foundation, Inc. - This file is part of the GNU MP Library. - - The GNU MP Library is free software; you can redistribute it and/or modify - it under the terms of the GNU Lesser General Public License as published by - the Free Software Foundation; either version 2.1 of the License, or (at your - option) any later version. - - The GNU MP Library is distributed in the hope that it will be useful, but - WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY - or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public - License for more details. - - You should have received a copy of the GNU Lesser General Public License - along with the GNU MP Library; see the file COPYING.LIB. If not, - see <http://www.gnu.org/licenses/>. */ - -#include "sysdep.h" -#include "asm-syntax.h" - -#define PARMS 4+12 /* space for 3 saved regs */ -#define RES PARMS -#define S RES+4 -#define SIZE S+4 -#define CNT SIZE+4 - - .text -ENTRY (__mpn_rshift) - - pushl %edi - cfi_adjust_cfa_offset (4) - pushl %esi - cfi_adjust_cfa_offset (4) - pushl %ebx - cfi_adjust_cfa_offset (4) - - movl RES(%esp),%edi - cfi_rel_offset (edi, 8) - movl S(%esp),%esi - cfi_rel_offset (esi, 4) - movl SIZE(%esp),%edx - movl CNT(%esp),%ecx - leal -4(%edi,%edx,4),%edi - leal (%esi,%edx,4),%esi - negl %edx - - movl (%esi,%edx,4),%ebx /* read least significant limb */ - cfi_rel_offset (ebx, 0) - cfi_remember_state - xorl %eax,%eax - shrdl %cl,%ebx,%eax /* compute carry limb */ - incl %edx - jz L(end) - pushl %eax /* push carry limb onto stack */ - cfi_adjust_cfa_offset (4) - testb $1,%dl - jnz L(1) /* enter loop in the middle */ - movl %ebx,%eax - - ALIGN (3) -L(oop): movl (%esi,%edx,4),%ebx /* load next higher limb */ - shrdl %cl,%ebx,%eax /* compute result limb */ - movl %eax,(%edi,%edx,4) /* store it */ - incl %edx -L(1): movl (%esi,%edx,4),%eax - shrdl %cl,%eax,%ebx - movl %ebx,(%edi,%edx,4) - incl %edx - jnz L(oop) - - shrl %cl,%eax /* compute most significant limb */ - movl %eax,(%edi) /* store it */ - - popl %eax /* pop carry limb */ - cfi_adjust_cfa_offset (-4) - - popl %ebx - cfi_adjust_cfa_offset (-4) - cfi_restore (ebx) - popl %esi - cfi_adjust_cfa_offset (-4) - cfi_restore (esi) - popl %edi - cfi_adjust_cfa_offset (-4) - cfi_restore (edi) - - ret - - cfi_restore_state -L(end): shrl %cl,%ebx /* compute most significant limb */ - movl %ebx,(%edi) /* store it */ - - popl %ebx - cfi_adjust_cfa_offset (-4) - cfi_restore (ebx) - popl %esi - cfi_adjust_cfa_offset (-4) - cfi_restore (esi) - popl %edi - cfi_adjust_cfa_offset (-4) - cfi_restore (edi) - - ret -END (__mpn_rshift) diff --git a/sysdeps/i386/setfpucw.c b/sysdeps/i386/setfpucw.c deleted file mode 100644 index 40b995f18a..0000000000 --- a/sysdeps/i386/setfpucw.c +++ /dev/null @@ -1,54 +0,0 @@ -/* Set the FPU control word for x86. - Copyright (C) 2003-2017 Free Software Foundation, Inc. - This file is part of the GNU C Library. - - The GNU C Library is free software; you can redistribute it and/or - modify it under the terms of the GNU Lesser General Public - License as published by the Free Software Foundation; either - version 2.1 of the License, or (at your option) any later version. - - The GNU C Library is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - Lesser General Public License for more details. - - You should have received a copy of the GNU Lesser General Public - License along with the GNU C Library; if not, see - <http://www.gnu.org/licenses/>. */ - -#include <math.h> -#include <fpu_control.h> -#include <fenv.h> -#include <unistd.h> -#include <ldsodefs.h> -#include <dl-procinfo.h> - -void -__setfpucw (fpu_control_t set) -{ - fpu_control_t cw; - - /* Fetch the current control word. */ - __asm__ ("fnstcw %0" : "=m" (*&cw)); - - /* Preserve the reserved bits, and set the rest as the user - specified (or the default, if the user gave zero). */ - cw &= _FPU_RESERVED; - cw |= set & ~_FPU_RESERVED; - - __asm__ ("fldcw %0" : : "m" (*&cw)); - - /* If the CPU supports SSE, we set the MXCSR as well. */ - if (HAS_CPU_FEATURE (SSE)) - { - unsigned int xnew_exc; - - /* Get the current MXCSR. */ - __asm__ ("stmxcsr %0" : "=m" (*&xnew_exc)); - - xnew_exc &= ~((0xc00 << 3) | (FE_ALL_EXCEPT << 7)); - xnew_exc |= ((set & 0xc00) << 3) | ((set & FE_ALL_EXCEPT) << 7); - - __asm__ ("ldmxcsr %0" : : "m" (*&xnew_exc)); - } -} diff --git a/sysdeps/i386/setjmp.S b/sysdeps/i386/setjmp.S deleted file mode 100644 index 738a899e8b..0000000000 --- a/sysdeps/i386/setjmp.S +++ /dev/null @@ -1,58 +0,0 @@ -/* setjmp for i386. - Copyright (C) 1995-2017 Free Software Foundation, Inc. - This file is part of the GNU C Library. - - The GNU C Library is free software; you can redistribute it and/or - modify it under the terms of the GNU Lesser General Public - License as published by the Free Software Foundation; either - version 2.1 of the License, or (at your option) any later version. - - The GNU C Library is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - Lesser General Public License for more details. - - You should have received a copy of the GNU Lesser General Public - License along with the GNU C Library; if not, see - <http://www.gnu.org/licenses/>. */ - -#include <sysdep.h> -#include <jmpbuf-offsets.h> -#include <asm-syntax.h> -#include <stap-probe.h> - -#define PARMS 4 /* no space for saved regs */ -#define JMPBUF PARMS -#define SIGMSK JMPBUF+4 - -ENTRY (__sigsetjmp) - - movl JMPBUF(%esp), %eax - - /* Save registers. */ - movl %ebx, (JB_BX*4)(%eax) - movl %esi, (JB_SI*4)(%eax) - movl %edi, (JB_DI*4)(%eax) - leal JMPBUF(%esp), %ecx /* Save SP as it will be after we return. */ -#ifdef PTR_MANGLE - PTR_MANGLE (%ecx) -#endif - movl %ecx, (JB_SP*4)(%eax) - movl 0(%esp), %ecx /* Save PC we are returning to now. */ - LIBC_PROBE (setjmp, 3, 4@%eax, -4@SIGMSK(%esp), 4@%ecx) -#ifdef PTR_MANGLE - PTR_MANGLE (%ecx) -#endif - movl %ecx, (JB_PC*4)(%eax) - movl %ebp, (JB_BP*4)(%eax) /* Save caller's frame pointer. */ - -#if IS_IN (rtld) - /* In ld.so we never save the signal mask. */ - xorl %eax, %eax - ret -#else - /* Make a tail call to __sigjmp_save; it takes the same args. */ - jmp __sigjmp_save -#endif -END (__sigsetjmp) -hidden_def (__sigsetjmp) diff --git a/sysdeps/i386/stackguard-macros.h b/sysdeps/i386/stackguard-macros.h deleted file mode 100644 index 039762927c..0000000000 --- a/sysdeps/i386/stackguard-macros.h +++ /dev/null @@ -1,12 +0,0 @@ -#include <stdint.h> - -#define STACK_CHK_GUARD \ - ({ uintptr_t x; asm ("movl %%gs:0x14, %0" : "=r" (x)); x; }) - -#define POINTER_CHK_GUARD \ - ({ \ - uintptr_t x; \ - asm ("movl %%gs:%c1, %0" : "=r" (x) \ - : "i" (offsetof (tcbhead_t, pointer_guard))); \ - x; \ - }) diff --git a/sysdeps/i386/stackinfo.h b/sysdeps/i386/stackinfo.h deleted file mode 100644 index ba17867d3a..0000000000 --- a/sysdeps/i386/stackinfo.h +++ /dev/null @@ -1,43 +0,0 @@ -/* Copyright (C) 1999-2017 Free Software Foundation, Inc. - This file is part of the GNU C Library. - - The GNU C Library is free software; you can redistribute it and/or - modify it under the terms of the GNU Lesser General Public - License as published by the Free Software Foundation; either - version 2.1 of the License, or (at your option) any later version. - - The GNU C Library is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - Lesser General Public License for more details. - - You should have received a copy of the GNU Lesser General Public - License along with the GNU C Library; if not, see - <http://www.gnu.org/licenses/>. */ - -/* This file contains a bit of information about the stack allocation - of the processor. */ - -#ifndef _STACKINFO_H -#define _STACKINFO_H 1 - -#include <elf.h> - -/* On x86 the stack grows down. */ -#define _STACK_GROWS_DOWN 1 - -/* Default to an executable stack. PF_X can be overridden if PT_GNU_STACK is - * present, but it is presumed absent. */ -#define DEFAULT_STACK_PERMS (PF_R|PF_W|PF_X) - -/* Access to the stack pointer. The macros are used in alloca_account - for which they need to act as barriers as well, hence the additional - (unnecessary) parameters. */ -#define stackinfo_get_sp() \ - ({ void *p__; asm volatile ("mov %%esp, %0" : "=r" (p__)); p__; }) -#define stackinfo_sub_sp(ptr) \ - ({ ptrdiff_t d__; \ - asm volatile ("sub %%esp, %0" : "=r" (d__) : "0" (ptr)); \ - d__; }) - -#endif /* stackinfo.h */ diff --git a/sysdeps/i386/start.S b/sysdeps/i386/start.S deleted file mode 100644 index ccb1e2b38f..0000000000 --- a/sysdeps/i386/start.S +++ /dev/null @@ -1,139 +0,0 @@ -/* Startup code compliant to the ELF i386 ABI. - Copyright (C) 1995-2017 Free Software Foundation, Inc. - This file is part of the GNU C Library. - - The GNU C Library is free software; you can redistribute it and/or - modify it under the terms of the GNU Lesser General Public - License as published by the Free Software Foundation; either - version 2.1 of the License, or (at your option) any later version. - - In addition to the permissions in the GNU Lesser General Public - License, the Free Software Foundation gives you unlimited - permission to link the compiled version of this file with other - programs, and to distribute those programs without any restriction - coming from the use of this file. (The GNU Lesser General Public - License restrictions do apply in other respects; for example, they - cover modification of the file, and distribution when not linked - into another program.) - - Note that people who make modified versions of this file are not - obligated to grant this special exception for their modified - versions; it is their choice whether to do so. The GNU Lesser - General Public License gives permission to release a modified - version without this exception; this exception also makes it - possible to release a modified version which carries forward this - exception. - - The GNU C Library is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - Lesser General Public License for more details. - - You should have received a copy of the GNU Lesser General Public - License along with the GNU C Library; if not, see - <http://www.gnu.org/licenses/>. */ - -/* This is the canonical entry point, usually the first thing in the text - segment. The SVR4/i386 ABI (pages 3-31, 3-32) says that when the entry - point runs, most registers' values are unspecified, except for: - - %edx Contains a function pointer to be registered with `atexit'. - This is how the dynamic linker arranges to have DT_FINI - functions called for shared libraries that have been loaded - before this code runs. - - %esp The stack contains the arguments and environment: - 0(%esp) argc - 4(%esp) argv[0] - ... - (4*argc)(%esp) NULL - (4*(argc+1))(%esp) envp[0] - ... - NULL -*/ - - .text - .globl _start - .type _start,@function -_start: - /* Clear the frame pointer. The ABI suggests this be done, to mark - the outermost frame obviously. */ - xorl %ebp, %ebp - - /* Extract the arguments as encoded on the stack and set up - the arguments for `main': argc, argv. envp will be determined - later in __libc_start_main. */ - popl %esi /* Pop the argument count. */ - movl %esp, %ecx /* argv starts just at the current stack top.*/ - - /* Before pushing the arguments align the stack to a 16-byte - (SSE needs 16-byte alignment) boundary to avoid penalties from - misaligned accesses. Thanks to Edward Seidl <seidl@janed.com> - for pointing this out. */ - andl $0xfffffff0, %esp - pushl %eax /* Push garbage because we allocate - 28 more bytes. */ - - /* Provide the highest stack address to the user code (for stacks - which grow downwards). */ - pushl %esp - - pushl %edx /* Push address of the shared library - termination function. */ - -#ifdef SHARED - /* Load PIC register. */ - call 1f - addl $_GLOBAL_OFFSET_TABLE_, %ebx - - /* Push address of our own entry points to .fini and .init. */ - leal __libc_csu_fini@GOTOFF(%ebx), %eax - pushl %eax - leal __libc_csu_init@GOTOFF(%ebx), %eax - pushl %eax - - pushl %ecx /* Push second argument: argv. */ - pushl %esi /* Push first argument: argc. */ - - pushl main@GOT(%ebx) - - /* Call the user's main function, and exit with its value. - But let the libc call main. */ - call __libc_start_main@PLT -#else - /* Push address of our own entry points to .fini and .init. */ - pushl $__libc_csu_fini - pushl $__libc_csu_init - - pushl %ecx /* Push second argument: argv. */ - pushl %esi /* Push first argument: argc. */ - - pushl $main - - /* Call the user's main function, and exit with its value. - But let the libc call main. */ - call __libc_start_main -#endif - - hlt /* Crash if somehow `exit' does return. */ - -#ifdef SHARED -1: movl (%esp), %ebx - ret -#endif - -/* To fulfill the System V/i386 ABI we need this symbol. Yuck, it's so - meaningless since we don't support machines < 80386. */ - .section .rodata - .globl _fp_hw -_fp_hw: .long 3 - .size _fp_hw, 4 - .type _fp_hw,@object - -/* Define a symbol for the first piece of initialized data. */ - .data - .globl __data_start -__data_start: - .long 0 - .weak data_start - data_start = __data_start diff --git a/sysdeps/i386/stpcpy.S b/sysdeps/i386/stpcpy.S deleted file mode 100644 index d9981b677b..0000000000 --- a/sysdeps/i386/stpcpy.S +++ /dev/null @@ -1,88 +0,0 @@ -/* Copy SRC to DEST returning the address of the terminating '\0' in DEST. - For Intel 80x86, x>=3. - Copyright (C) 1994-2017 Free Software Foundation, Inc. - This file is part of the GNU C Library. - Contributed by Ulrich Drepper (drepper@gnu.ai.mit.edu). - - The GNU C Library is free software; you can redistribute it and/or - modify it under the terms of the GNU Lesser General Public - License as published by the Free Software Foundation; either - version 2.1 of the License, or (at your option) any later version. - - The GNU C Library is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - Lesser General Public License for more details. - - You should have received a copy of the GNU Lesser General Public - License along with the GNU C Library; if not, see - <http://www.gnu.org/licenses/>. */ - -/* This function is defined neither in ANSI nor POSIX standards but is - also not invented here. */ - -#include <sysdep.h> -#include "asm-syntax.h" - -#define PARMS 4 /* no space for saved regs */ -#define RTN PARMS -#define DEST RTN -#define SRC DEST+4 - - .text -ENTRY (__stpcpy) - - movl DEST(%esp), %eax - movl SRC(%esp), %ecx - subl %eax, %ecx /* magic: reduce number of loop variants - to one using addressing mode */ - - /* Here we would like to write - - subl $4, %eax - ALIGN (4) - - but the assembler is too smart and optimizes for the shortest - form where the number only needs one byte. But if we could - have the long form we would not need the alignment. */ - - .byte 0x81, 0xe8 /* This is `subl $0x00000004, %eax' */ - .long 0x00000004 - - /* Four times unfolded loop with only one loop counter. This - is achieved by the use of index+base addressing mode. As the - loop counter we use the destination address because this is - also the result. */ -L(1): addl $4, %eax /* increment loop counter */ - - movb (%eax,%ecx), %dl /* load current char */ - movb %dl, (%eax) /* and store it */ - testb %dl, %dl /* was it NUL? */ - jz L(2) /* yes, then exit */ - - movb 1(%eax,%ecx), %dl /* load current char */ - movb %dl, 1(%eax) /* and store it */ - testb %dl, %dl /* was it NUL? */ - jz L(3) /* yes, then exit */ - - movb 2(%eax,%ecx), %dl /* load current char */ - movb %dl, 2(%eax) /* and store it */ - testb %dl, %dl /* was it NUL? */ - jz L(4) /* yes, then exit */ - - movb 3(%eax,%ecx), %dl /* load current char */ - movb %dl, 3(%eax) /* and store it */ - testb %dl, %dl /* was it NUL? */ - jnz L(1) /* no, then continue loop */ - - incl %eax /* correct loop counter */ -L(4): incl %eax -L(3): incl %eax -L(2): - - ret -END (__stpcpy) - -weak_alias (__stpcpy, stpcpy) -libc_hidden_def (__stpcpy) -libc_hidden_builtin_def (stpcpy) diff --git a/sysdeps/i386/stpncpy.S b/sysdeps/i386/stpncpy.S deleted file mode 100644 index 46f2aba713..0000000000 --- a/sysdeps/i386/stpncpy.S +++ /dev/null @@ -1,147 +0,0 @@ -/* copy no more than N bytes from SRC to DEST, returning the address of - the terminating '\0' in DEST. - For Intel 80x86, x>=3. - Copyright (C) 1994-2017 Free Software Foundation, Inc. - This file is part of the GNU C Library. - Contributed by Ulrich Drepper <drepper@gnu.ai.mit.edu> - Some bug fixes by Alan Modra <Alan@SPRI.Levels.UniSA.Edu.Au> - - original wrote n+1 chars in some cases. - - stpncpy() ought to behave like strncpy() ie. not null-terminate - if limited by n. glibc-1.09 stpncpy() does this. - - The GNU C Library is free software; you can redistribute it and/or - modify it under the terms of the GNU Lesser General Public - License as published by the Free Software Foundation; either - version 2.1 of the License, or (at your option) any later version. - - The GNU C Library is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - Lesser General Public License for more details. - - You should have received a copy of the GNU Lesser General Public - License along with the GNU C Library; if not, see - <http://www.gnu.org/licenses/>. */ - -#include <sysdep.h> -#include "asm-syntax.h" - -#define PARMS 4+4 /* space for 1 saved reg */ -#define RTN PARMS -#define DEST RTN -#define SRC DEST+4 -#define LEN SRC+4 - - .text -ENTRY (__stpncpy) - - pushl %esi - cfi_adjust_cfa_offset (4) - - movl DEST(%esp), %eax - movl SRC(%esp), %esi - cfi_rel_offset (esi, 0) - movl LEN(%esp), %ecx - - subl %eax, %esi /* magic: reduce number of loop variants - to one using addressing mode */ - jmp L(1) /* jump to loop "head" */ - - ALIGN(4) - - /* Four times unfolded loop with two loop counters. We get the - third value (the source address) by using the index+base - addressing mode. */ -L(2): movb (%eax,%esi), %dl /* load current char */ - movb %dl, (%eax) /* and store it */ - testb %dl, %dl /* was it NUL? */ - jz L(7) /* yes, then exit */ - - movb 1(%eax,%esi), %dl /* load current char */ - movb %dl, 1(%eax) /* and store it */ - testb %dl, %dl /* was it NUL? */ - jz L(6) /* yes, then exit */ - - movb 2(%eax,%esi), %dl /* load current char */ - movb %dl, 2(%eax) /* and store it */ - testb %dl, %dl /* was it NUL? */ - jz L(5) /* yes, then exit */ - - movb 3(%eax,%esi), %dl /* load current char */ - movb %dl, 3(%eax) /* and store it */ - testb %dl, %dl /* was it NUL? */ - jz L(4) /* yes, then exit */ - - addl $4, %eax /* increment loop counter for full round */ - -L(1): subl $4, %ecx /* still more than 4 bytes allowed? */ - jae L(2) /* yes, then go to start of loop */ - - /* The maximal remaining 15 bytes are not processed in a loop. */ - - addl $4, %ecx /* correct above subtraction */ - jz L(9) /* maximal allowed char reached => go to end */ - - movb (%eax,%esi), %dl /* load current char */ - movb %dl, (%eax) /* and store it */ - testb %dl, %dl /* was it NUL? */ - jz L(3) /* yes, then exit */ - - incl %eax /* increment pointer */ - decl %ecx /* decrement length counter */ - jz L(9) /* no more allowed => exit */ - - movb (%eax,%esi), %dl /* load current char */ - movb %dl, (%eax) /* and store it */ - testb %dl, %dl /* was it NUL? */ - jz L(3) /* yes, then exit */ - - incl %eax /* increment pointer */ - decl %ecx /* decrement length counter */ - jz L(9) /* no more allowed => exit */ - - movb (%eax,%esi), %dl /* load current char */ - movb %dl, (%eax) /* and store it */ - testb %dl, %dl /* was it NUL? */ - jz L(3) /* yes, then exit */ - - incl %eax /* increment pointer */ - jmp L(9) /* we don't have to test for counter underflow - because we know we had a most 3 bytes - remaining => exit */ - - /* When coming from the main loop we have to adjust the pointer. */ -L(4): decl %ecx /* decrement counter */ - incl %eax /* increment pointer */ - -L(5): decl %ecx /* increment pointer */ - incl %eax /* increment pointer */ - -L(6): decl %ecx /* increment pointer */ - incl %eax /* increment pointer */ -L(7): - - addl $3, %ecx /* correct pre-decrementation of counter - at the beginning of the loop; but why 3 - and not 4? Very simple, we have to count - the NUL char we already wrote. */ - jz L(9) /* counter is also 0 => exit */ - - /* We now have to fill the rest of the buffer with NUL. This - is done in a tricky way. Please note that the addressing mode - used below is not the same we used above. Here we use the - %ecx register. */ -L(8): - movb $0, (%ecx,%eax) /* store NUL char */ -L(3): decl %ecx /* all bytes written? */ - jnz L(8) /* no, then again */ - -L(9): popl %esi /* restore saved register content */ - cfi_adjust_cfa_offset (-4) - cfi_restore (esi) - - ret -END (__stpncpy) - -libc_hidden_def (__stpncpy) -weak_alias (__stpncpy, stpncpy) diff --git a/sysdeps/i386/strcat.S b/sysdeps/i386/strcat.S deleted file mode 100644 index 4a26b3c528..0000000000 --- a/sysdeps/i386/strcat.S +++ /dev/null @@ -1,265 +0,0 @@ -/* strcat(dest, src) -- Append SRC on the end of DEST. - For Intel 80x86, x>=4. - Copyright (C) 1994-2017 Free Software Foundation, Inc. - This file is part of the GNU C Library. - Contributed by Ulrich Drepper <drepper@ipd.info.uni-karlsruhe.de>. - Optimised a little by Alan Modra <Alan@SPRI.Levels.UniSA.Edu.Au> - - The GNU C Library is free software; you can redistribute it and/or - modify it under the terms of the GNU Lesser General Public - License as published by the Free Software Foundation; either - version 2.1 of the License, or (at your option) any later version. - - The GNU C Library is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - Lesser General Public License for more details. - - You should have received a copy of the GNU Lesser General Public - License along with the GNU C Library; if not, see - <http://www.gnu.org/licenses/>. */ - -#include <sysdep.h> -#include "asm-syntax.h" - -#define PARMS 4+4 /* space for 1 saved reg */ -#define RTN PARMS -#define DEST RTN -#define SRC DEST+4 - - .text -ENTRY (strcat) - - pushl %edi /* Save callee-safe register. */ - cfi_adjust_cfa_offset (4) - - movl DEST(%esp), %edx - movl SRC(%esp), %ecx - - testb $0xff, (%ecx) /* Is source string empty? */ - jz L(8) /* yes => return */ - - /* Test the first bytes separately until destination is aligned. */ - testl $3, %edx /* destination pointer aligned? */ - jz L(1) /* yes => begin scan loop */ - testb $0xff, (%edx) /* is end of string? */ - jz L(2) /* yes => start appending */ - incl %edx /* increment source pointer */ - - testl $3, %edx /* destination pointer aligned? */ - jz L(1) /* yes => begin scan loop */ - testb $0xff, (%edx) /* is end of string? */ - jz L(2) /* yes => start appending */ - incl %edx /* increment source pointer */ - - testl $3, %edx /* destination pointer aligned? */ - jz L(1) /* yes => begin scan loop */ - testb $0xff, (%edx) /* is end of string? */ - jz L(2) /* yes => start appending */ - incl %edx /* increment source pointer */ - - /* Now we are aligned. Begin scan loop. */ - jmp L(1) - - cfi_rel_offset (edi, 0) - ALIGN(4) - -L(4): addl $16,%edx /* increment destination pointer for round */ - -L(1): movl (%edx), %eax /* get word (= 4 bytes) in question */ - movl $0xfefefeff, %edi /* magic value */ - - /* If you compare this with the algorithm in memchr.S you will - notice that here is an `xorl' statement missing. But you must - not forget that we are looking for C == 0 and `xorl $0, %eax' - is a no-op. */ - - addl %eax, %edi /* add the magic value to the word. We get - carry bits reported for each byte which - is *not* 0 */ - - /* According to the algorithm we had to reverse the effect of the - XOR first and then test the overflow bits. But because the - following XOR would destroy the carry flag and it would (in a - representation with more than 32 bits) not alter then last - overflow, we can now test this condition. If no carry is signaled - no overflow must have occurred in the last byte => it was 0. */ - jnc L(3) - - /* We are only interested in carry bits that change due to the - previous add, so remove original bits */ - xorl %eax, %edi /* ((word^charmask)+magic)^(word^charmask) */ - - /* Now test for the other three overflow bits. */ - orl $0xfefefeff, %edi /* set all non-carry bits */ - incl %edi /* add 1: if one carry bit was *not* set - the addition will not result in 0. */ - - /* If at least one byte of the word is C we don't get 0 in %ecx. */ - jnz L(3) - - movl 4(%edx), %eax /* get word from source */ - movl $0xfefefeff, %edi /* magic value */ - addl %eax, %edi /* add the magic value to the word. We get - carry bits reported for each byte which - is *not* 0 */ - jnc L(5) /* highest byte is C => stop copying */ - xorl %eax, %edi /* ((word^charmask)+magic)^(word^charmask) */ - orl $0xfefefeff, %edi /* set all non-carry bits */ - incl %edi /* add 1: if one carry bit was *not* set - the addition will not result in 0. */ - jnz L(5) /* one byte is NUL => stop copying */ - - movl 8(%edx), %eax /* get word from source */ - movl $0xfefefeff, %edi /* magic value */ - addl %eax, %edi /* add the magic value to the word. We get - carry bits reported for each byte which - is *not* 0 */ - jnc L(6) /* highest byte is C => stop copying */ - xorl %eax, %edi /* ((word^charmask)+magic)^(word^charmask) */ - orl $0xfefefeff, %edi /* set all non-carry bits */ - incl %edi /* add 1: if one carry bit was *not* set - the addition will not result in 0. */ - jnz L(6) /* one byte is NUL => stop copying */ - - movl 12(%edx), %eax /* get word from source */ - movl $0xfefefeff, %edi /* magic value */ - addl %eax, %edi /* add the magic value to the word. We get - carry bits reported for each byte which - is *not* 0 */ - jnc L(7) /* highest byte is C => stop copying */ - xorl %eax, %edi /* ((word^charmask)+magic)^(word^charmask) */ - orl $0xfefefeff, %edi /* set all non-carry bits */ - incl %edi /* add 1: if one carry bit was *not* set - the addition will not result in 0. */ - jz L(4) /* no byte is NUL => carry on copying */ - -L(7): addl $4, %edx /* adjust source pointer */ -L(6): addl $4, %edx -L(5): addl $4, %edx - -L(3): testb %al, %al /* is first byte NUL? */ - jz L(2) /* yes => start copying */ - incl %edx /* increment source pointer */ - - testb %ah, %ah /* is second byte NUL? */ - jz L(2) /* yes => start copying */ - incl %edx /* increment source pointer */ - - testl $0xff0000, %eax /* is third byte NUL? */ - jz L(2) /* yes => start copying */ - incl %edx /* increment source pointer */ - -L(2): subl %ecx, %edx /* reduce number of loop variants */ - - /* Now we have to align the source pointer. */ - testl $3, %ecx /* pointer correctly aligned? */ - jz L(29) /* yes => start copy loop */ - movb (%ecx), %al /* get first byte */ - movb %al, (%ecx,%edx) /* and store it */ - andb %al, %al /* is byte NUL? */ - jz L(8) /* yes => return */ - incl %ecx /* increment pointer */ - - testl $3, %ecx /* pointer correctly aligned? */ - jz L(29) /* yes => start copy loop */ - movb (%ecx), %al /* get first byte */ - movb %al, (%ecx,%edx) /* and store it */ - andb %al, %al /* is byte NUL? */ - jz L(8) /* yes => return */ - incl %ecx /* increment pointer */ - - testl $3, %ecx /* pointer correctly aligned? */ - jz L(29) /* yes => start copy loop */ - movb (%ecx), %al /* get first byte */ - movb %al, (%ecx,%edx) /* and store it */ - andb %al, %al /* is byte NUL? */ - jz L(8) /* yes => return */ - incl %ecx /* increment pointer */ - - /* Now we are aligned. */ - jmp L(29) /* start copy loop */ - - ALIGN(4) - -L(28): movl %eax, 12(%ecx,%edx)/* store word at destination */ - addl $16, %ecx /* adjust pointer for full round */ - -L(29): movl (%ecx), %eax /* get word from source */ - movl $0xfefefeff, %edi /* magic value */ - addl %eax, %edi /* add the magic value to the word. We get - carry bits reported for each byte which - is *not* 0 */ - jnc L(9) /* highest byte is C => stop copying */ - xorl %eax, %edi /* ((word^charmask)+magic)^(word^charmask) */ - orl $0xfefefeff, %edi /* set all non-carry bits */ - incl %edi /* add 1: if one carry bit was *not* set - the addition will not result in 0. */ - jnz L(9) /* one byte is NUL => stop copying */ - movl %eax, (%ecx,%edx) /* store word to destination */ - - movl 4(%ecx), %eax /* get word from source */ - movl $0xfefefeff, %edi /* magic value */ - addl %eax, %edi /* add the magic value to the word. We get - carry bits reported for each byte which - is *not* 0 */ - jnc L(91) /* highest byte is C => stop copying */ - xorl %eax, %edi /* ((word^charmask)+magic)^(word^charmask) */ - orl $0xfefefeff, %edi /* set all non-carry bits */ - incl %edi /* add 1: if one carry bit was *not* set - the addition will not result in 0. */ - jnz L(91) /* one byte is NUL => stop copying */ - movl %eax, 4(%ecx,%edx) /* store word to destination */ - - movl 8(%ecx), %eax /* get word from source */ - movl $0xfefefeff, %edi /* magic value */ - addl %eax, %edi /* add the magic value to the word. We get - carry bits reported for each byte which - is *not* 0 */ - jnc L(92) /* highest byte is C => stop copying */ - xorl %eax, %edi /* ((word^charmask)+magic)^(word^charmask) */ - orl $0xfefefeff, %edi /* set all non-carry bits */ - incl %edi /* add 1: if one carry bit was *not* set - the addition will not result in 0. */ - jnz L(92) /* one byte is NUL => stop copying */ - movl %eax, 8(%ecx,%edx) /* store word to destination */ - - movl 12(%ecx), %eax /* get word from source */ - movl $0xfefefeff, %edi /* magic value */ - addl %eax, %edi /* add the magic value to the word. We get - carry bits reported for each byte which - is *not* 0 */ - jnc L(93) /* highest byte is C => stop copying */ - xorl %eax, %edi /* ((word^charmask)+magic)^(word^charmask) */ - orl $0xfefefeff, %edi /* set all non-carry bits */ - incl %edi /* add 1: if one carry bit was *not* set - the addition will not result in 0. */ - jz L(28) /* no is NUL => carry on copying */ - -L(93): addl $4, %ecx /* adjust pointer */ -L(92): addl $4, %ecx -L(91): addl $4, %ecx - -L(9): movb %al, (%ecx,%edx) /* store first byte of last word */ - orb %al, %al /* is it NUL? */ - jz L(8) /* yes => return */ - - movb %ah, 1(%ecx,%edx) /* store second byte of last word */ - orb %ah, %ah /* is it NUL? */ - jz L(8) /* yes => return */ - - shrl $16, %eax /* make upper bytes accessible */ - movb %al, 2(%ecx,%edx) /* store third byte of last word */ - orb %al, %al /* is it NUL? */ - jz L(8) /* yes => return */ - - movb %ah, 3(%ecx,%edx) /* store fourth byte of last word */ - -L(8): movl DEST(%esp), %eax /* start address of destination is result */ - popl %edi /* restore saved register */ - cfi_adjust_cfa_offset (-4) - cfi_restore (edi) - - ret -END (strcat) -libc_hidden_builtin_def (strcat) diff --git a/sysdeps/i386/strchr.S b/sysdeps/i386/strchr.S deleted file mode 100644 index 6075e77882..0000000000 --- a/sysdeps/i386/strchr.S +++ /dev/null @@ -1,290 +0,0 @@ -/* strchr (str, ch) -- Return pointer to first occurrence of CH in STR. - For Intel 80x86, x>=3. - Copyright (C) 1994-2017 Free Software Foundation, Inc. - This file is part of the GNU C Library. - Contributed by Ulrich Drepper <drepper@gnu.ai.mit.edu> - Some optimisations by Alan Modra <Alan@SPRI.Levels.UniSA.Edu.Au> - - The GNU C Library is free software; you can redistribute it and/or - modify it under the terms of the GNU Lesser General Public - License as published by the Free Software Foundation; either - version 2.1 of the License, or (at your option) any later version. - - The GNU C Library is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - Lesser General Public License for more details. - - You should have received a copy of the GNU Lesser General Public - License along with the GNU C Library; if not, see - <http://www.gnu.org/licenses/>. */ - -#include <sysdep.h> -#include "asm-syntax.h" - -#define PARMS 4+4 /* space for 1 saved reg */ -#define RTN PARMS -#define STR RTN -#define CHR STR+4 - - .text -ENTRY (strchr) - - pushl %edi /* Save callee-safe registers used here. */ - cfi_adjust_cfa_offset (4) - cfi_rel_offset (edi, 0) - movl STR(%esp), %eax - movl CHR(%esp), %edx - - /* At the moment %edx contains C. What we need for the - algorithm is C in all bytes of the dword. Avoid - operations on 16 bit words because these require an - prefix byte (and one more cycle). */ - movb %dl, %dh /* now it is 0|0|c|c */ - movl %edx, %ecx - shll $16, %edx /* now it is c|c|0|0 */ - movw %cx, %dx /* and finally c|c|c|c */ - - /* Before we start with the main loop we process single bytes - until the source pointer is aligned. This has two reasons: - 1. aligned 32-bit memory access is faster - and (more important) - 2. we process in the main loop 32 bit in one step although - we don't know the end of the string. But accessing at - 4-byte alignment guarantees that we never access illegal - memory if this would not also be done by the trivial - implementation (this is because all processor inherent - boundaries are multiples of 4. */ - - testb $3, %al /* correctly aligned ? */ - jz L(11) /* yes => begin loop */ - movb (%eax), %cl /* load byte in question (we need it twice) */ - cmpb %cl, %dl /* compare byte */ - je L(6) /* target found => return */ - testb %cl, %cl /* is NUL? */ - jz L(2) /* yes => return NULL */ - incl %eax /* increment pointer */ - - testb $3, %al /* correctly aligned ? */ - jz L(11) /* yes => begin loop */ - movb (%eax), %cl /* load byte in question (we need it twice) */ - cmpb %cl, %dl /* compare byte */ - je L(6) /* target found => return */ - testb %cl, %cl /* is NUL? */ - jz L(2) /* yes => return NULL */ - incl %eax /* increment pointer */ - - testb $3, %al /* correctly aligned ? */ - jz L(11) /* yes => begin loop */ - movb (%eax), %cl /* load byte in question (we need it twice) */ - cmpb %cl, %dl /* compare byte */ - je L(6) /* target found => return */ - testb %cl, %cl /* is NUL? */ - jz L(2) /* yes => return NULL */ - incl %eax /* increment pointer */ - - /* No we have reached alignment. */ - jmp L(11) /* begin loop */ - - /* We exit the loop if adding MAGIC_BITS to LONGWORD fails to - change any of the hole bits of LONGWORD. - - 1) Is this safe? Will it catch all the zero bytes? - Suppose there is a byte with all zeros. Any carry bits - propagating from its left will fall into the hole at its - least significant bit and stop. Since there will be no - carry from its most significant bit, the LSB of the - byte to the left will be unchanged, and the zero will be - detected. - - 2) Is this worthwhile? Will it ignore everything except - zero bytes? Suppose every byte of LONGWORD has a bit set - somewhere. There will be a carry into bit 8. If bit 8 - is set, this will carry into bit 16. If bit 8 is clear, - one of bits 9-15 must be set, so there will be a carry - into bit 16. Similarly, there will be a carry into bit - 24. If one of bits 24-31 is set, there will be a carry - into bit 32 (=carry flag), so all of the hole bits will - be changed. - - 3) But wait! Aren't we looking for C, not zero? - Good point. So what we do is XOR LONGWORD with a longword, - each of whose bytes is C. This turns each byte that is C - into a zero. */ - - /* Each round the main loop processes 16 bytes. */ - - ALIGN(4) - -L(1): addl $16, %eax /* adjust pointer for whole round */ - -L(11): movl (%eax), %ecx /* get word (= 4 bytes) in question */ - xorl %edx, %ecx /* XOR with word c|c|c|c => bytes of str == c - are now 0 */ - movl $0xfefefeff, %edi /* magic value */ - addl %ecx, %edi /* add the magic value to the word. We get - carry bits reported for each byte which - is *not* C */ - - /* According to the algorithm we had to reverse the effect of the - XOR first and then test the overflow bits. But because the - following XOR would destroy the carry flag and it would (in a - representation with more than 32 bits) not alter then last - overflow, we can now test this condition. If no carry is signaled - no overflow must have occurred in the last byte => it was 0. */ - jnc L(7) - - /* We are only interested in carry bits that change due to the - previous add, so remove original bits */ - xorl %ecx, %edi /* ((word^charmask)+magic)^(word^charmask) */ - - /* Now test for the other three overflow bits. */ - orl $0xfefefeff, %edi /* set all non-carry bits */ - incl %edi /* add 1: if one carry bit was *not* set - the addition will not result in 0. */ - - /* If at least one byte of the word is C we don't get 0 in %edi. */ - jnz L(7) /* found it => return pointer */ - - /* Now we made sure the dword does not contain the character we are - looking for. But because we deal with strings we have to check - for the end of string before testing the next dword. */ - - xorl %edx, %ecx /* restore original dword without reload */ - movl $0xfefefeff, %edi /* magic value */ - addl %ecx, %edi /* add the magic value to the word. We get - carry bits reported for each byte which - is *not* 0 */ - jnc L(2) /* highest byte is NUL => return NULL */ - xorl %ecx, %edi /* (word+magic)^word */ - orl $0xfefefeff, %edi /* set all non-carry bits */ - incl %edi /* add 1: if one carry bit was *not* set - the addition will not result in 0. */ - jnz L(2) /* found NUL => return NULL */ - - movl 4(%eax), %ecx /* get word (= 4 bytes) in question */ - xorl %edx, %ecx /* XOR with word c|c|c|c => bytes of str == c - are now 0 */ - movl $0xfefefeff, %edi /* magic value */ - addl %ecx, %edi /* add the magic value to the word. We get - carry bits reported for each byte which - is *not* C */ - jnc L(71) /* highest byte is C => return pointer */ - xorl %ecx, %edi /* ((word^charmask)+magic)^(word^charmask) */ - orl $0xfefefeff, %edi /* set all non-carry bits */ - incl %edi /* add 1: if one carry bit was *not* set - the addition will not result in 0. */ - jnz L(71) /* found it => return pointer */ - xorl %edx, %ecx /* restore original dword without reload */ - movl $0xfefefeff, %edi /* magic value */ - addl %ecx, %edi /* add the magic value to the word. We get - carry bits reported for each byte which - is *not* 0 */ - jnc L(2) /* highest byte is NUL => return NULL */ - xorl %ecx, %edi /* (word+magic)^word */ - orl $0xfefefeff, %edi /* set all non-carry bits */ - incl %edi /* add 1: if one carry bit was *not* set - the addition will not result in 0. */ - jnz L(2) /* found NUL => return NULL */ - - movl 8(%eax), %ecx /* get word (= 4 bytes) in question */ - xorl %edx, %ecx /* XOR with word c|c|c|c => bytes of str == c - are now 0 */ - movl $0xfefefeff, %edi /* magic value */ - addl %ecx, %edi /* add the magic value to the word. We get - carry bits reported for each byte which - is *not* C */ - jnc L(72) /* highest byte is C => return pointer */ - xorl %ecx, %edi /* ((word^charmask)+magic)^(word^charmask) */ - orl $0xfefefeff, %edi /* set all non-carry bits */ - incl %edi /* add 1: if one carry bit was *not* set - the addition will not result in 0. */ - jnz L(72) /* found it => return pointer */ - xorl %edx, %ecx /* restore original dword without reload */ - movl $0xfefefeff, %edi /* magic value */ - addl %ecx, %edi /* add the magic value to the word. We get - carry bits reported for each byte which - is *not* 0 */ - jnc L(2) /* highest byte is NUL => return NULL */ - xorl %ecx, %edi /* (word+magic)^word */ - orl $0xfefefeff, %edi /* set all non-carry bits */ - incl %edi /* add 1: if one carry bit was *not* set - the addition will not result in 0. */ - jnz L(2) /* found NUL => return NULL */ - - movl 12(%eax), %ecx /* get word (= 4 bytes) in question */ - xorl %edx, %ecx /* XOR with word c|c|c|c => bytes of str == c - are now 0 */ - movl $0xfefefeff, %edi /* magic value */ - addl %ecx, %edi /* add the magic value to the word. We get - carry bits reported for each byte which - is *not* C */ - jnc L(73) /* highest byte is C => return pointer */ - xorl %ecx, %edi /* ((word^charmask)+magic)^(word^charmask) */ - orl $0xfefefeff, %edi /* set all non-carry bits */ - incl %edi /* add 1: if one carry bit was *not* set - the addition will not result in 0. */ - jnz L(73) /* found it => return pointer */ - xorl %edx, %ecx /* restore original dword without reload */ - movl $0xfefefeff, %edi /* magic value */ - addl %ecx, %edi /* add the magic value to the word. We get - carry bits reported for each byte which - is *not* 0 */ - jnc L(2) /* highest byte is NUL => return NULL */ - xorl %ecx, %edi /* (word+magic)^word */ - orl $0xfefefeff, %edi /* set all non-carry bits */ - incl %edi /* add 1: if one carry bit was *not* set - the addition will not result in 0. */ - jz L(1) /* no NUL found => restart loop */ - -L(2): /* Return NULL. */ - xorl %eax, %eax - popl %edi /* restore saved register content */ - cfi_adjust_cfa_offset (-4) - cfi_restore (edi) - - ret - - cfi_adjust_cfa_offset (4) - cfi_rel_offset (edi, 0) -L(73): addl $4, %eax /* adjust pointer */ -L(72): addl $4, %eax -L(71): addl $4, %eax - - /* We now scan for the byte in which the character was matched. - But we have to take care of the case that a NUL char is - found before this in the dword. Note that we XORed %ecx - with the byte we're looking for, therefore the tests below look - reversed. */ - -L(7): testb %cl, %cl /* is first byte C? */ - jz L(6) /* yes => return pointer */ - cmpb %dl, %cl /* is first byte NUL? */ - je L(2) /* yes => return NULL */ - incl %eax /* it's not in the first byte */ - - testb %ch, %ch /* is second byte C? */ - jz L(6) /* yes => return pointer */ - cmpb %dl, %ch /* is second byte NUL? */ - je L(2) /* yes => return NULL? */ - incl %eax /* it's not in the second byte */ - - shrl $16, %ecx /* make upper byte accessible */ - testb %cl, %cl /* is third byte C? */ - jz L(6) /* yes => return pointer */ - cmpb %dl, %cl /* is third byte NUL? */ - je L(2) /* yes => return NULL */ - - /* It must be in the fourth byte and it cannot be NUL. */ - incl %eax - -L(6): - popl %edi /* restore saved register content */ - cfi_adjust_cfa_offset (-4) - cfi_restore (edi) - - ret -END (strchr) - -weak_alias (strchr, index) -libc_hidden_builtin_def (strchr) diff --git a/sysdeps/i386/strchrnul.S b/sysdeps/i386/strchrnul.S deleted file mode 100644 index 800b872c74..0000000000 --- a/sysdeps/i386/strchrnul.S +++ /dev/null @@ -1,278 +0,0 @@ -/* strchrnul (str, chr) -- Return pointer to first occurrence of CHR in STR - or the final NUL byte. - For Intel 80x86, x>=3. - Copyright (C) 1994-2017 Free Software Foundation, Inc. - This file is part of the GNU C Library. - Contributed by Ulrich Drepper <drepper@gnu.org> - Some optimisations by Alan Modra <Alan@SPRI.Levels.UniSA.Edu.Au> - - The GNU C Library is free software; you can redistribute it and/or - modify it under the terms of the GNU Lesser General Public - License as published by the Free Software Foundation; either - version 2.1 of the License, or (at your option) any later version. - - The GNU C Library is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - Lesser General Public License for more details. - - You should have received a copy of the GNU Lesser General Public - License along with the GNU C Library; if not, see - <http://www.gnu.org/licenses/>. */ - -#include <sysdep.h> -#include "asm-syntax.h" - -#define PARMS 4+4 /* space for 1 saved reg */ -#define RTN PARMS -#define STR RTN -#define CHR STR+4 - - .text -ENTRY (__strchrnul) - - pushl %edi /* Save callee-safe registers used here. */ - cfi_adjust_cfa_offset (4) - cfi_rel_offset (edi, 0) - - movl STR(%esp), %eax - movl CHR(%esp), %edx - - /* At the moment %edx contains CHR. What we need for the - algorithm is CHR in all bytes of the dword. Avoid - operations on 16 bit words because these require an - prefix byte (and one more cycle). */ - movb %dl, %dh /* now it is 0|0|c|c */ - movl %edx, %ecx - shll $16, %edx /* now it is c|c|0|0 */ - movw %cx, %dx /* and finally c|c|c|c */ - - /* Before we start with the main loop we process single bytes - until the source pointer is aligned. This has two reasons: - 1. aligned 32-bit memory access is faster - and (more important) - 2. we process in the main loop 32 bit in one step although - we don't know the end of the string. But accessing at - 4-byte alignment guarantees that we never access illegal - memory if this would not also be done by the trivial - implementation (this is because all processor inherent - boundaries are multiples of 4. */ - - testb $3, %al /* correctly aligned ? */ - jz L(11) /* yes => begin loop */ - movb (%eax), %cl /* load byte in question (we need it twice) */ - cmpb %cl, %dl /* compare byte */ - je L(6) /* target found => return */ - testb %cl, %cl /* is NUL? */ - jz L(6) /* yes => return NULL */ - incl %eax /* increment pointer */ - - testb $3, %al /* correctly aligned ? */ - jz L(11) /* yes => begin loop */ - movb (%eax), %cl /* load byte in question (we need it twice) */ - cmpb %cl, %dl /* compare byte */ - je L(6) /* target found => return */ - testb %cl, %cl /* is NUL? */ - jz L(6) /* yes => return NULL */ - incl %eax /* increment pointer */ - - testb $3, %al /* correctly aligned ? */ - jz L(11) /* yes => begin loop */ - movb (%eax), %cl /* load byte in question (we need it twice) */ - cmpb %cl, %dl /* compare byte */ - je L(6) /* target found => return */ - testb %cl, %cl /* is NUL? */ - jz L(6) /* yes => return NULL */ - incl %eax /* increment pointer */ - - /* No we have reached alignment. */ - jmp L(11) /* begin loop */ - - /* We exit the loop if adding MAGIC_BITS to LONGWORD fails to - change any of the hole bits of LONGWORD. - - 1) Is this safe? Will it catch all the zero bytes? - Suppose there is a byte with all zeros. Any carry bits - propagating from its left will fall into the hole at its - least significant bit and stop. Since there will be no - carry from its most significant bit, the LSB of the - byte to the left will be unchanged, and the zero will be - detected. - - 2) Is this worthwhile? Will it ignore everything except - zero bytes? Suppose every byte of LONGWORD has a bit set - somewhere. There will be a carry into bit 8. If bit 8 - is set, this will carry into bit 16. If bit 8 is clear, - one of bits 9-15 must be set, so there will be a carry - into bit 16. Similarly, there will be a carry into bit - 24. If one of bits 24-31 is set, there will be a carry - into bit 32 (=carry flag), so all of the hole bits will - be changed. - - 3) But wait! Aren't we looking for CHR, not zero? - Good point. So what we do is XOR LONGWORD with a longword, - each of whose bytes is CHR. This turns each byte that is CHR - into a zero. */ - - /* Each round the main loop processes 16 bytes. */ - - ALIGN(4) - -L(1): addl $16, %eax /* adjust pointer for whole round */ - -L(11): movl (%eax), %ecx /* get word (= 4 bytes) in question */ - xorl %edx, %ecx /* XOR with word c|c|c|c => bytes of str == c - are now 0 */ - movl $0xfefefeff, %edi /* magic value */ - addl %ecx, %edi /* add the magic value to the word. We get - carry bits reported for each byte which - is *not* CHR */ - - /* According to the algorithm we had to reverse the effect of the - XOR first and then test the overflow bits. But because the - following XOR would destroy the carry flag and it would (in a - representation with more than 32 bits) not alter then last - overflow, we can now test this condition. If no carry is signaled - no overflow must have occurred in the last byte => it was 0. */ - jnc L(7) - - /* We are only interested in carry bits that change due to the - previous add, so remove original bits */ - xorl %ecx, %edi /* ((word^charmask)+magic)^(word^charmask) */ - - /* Now test for the other three overflow bits. */ - orl $0xfefefeff, %edi /* set all non-carry bits */ - incl %edi /* add 1: if one carry bit was *not* set - the addition will not result in 0. */ - - /* If at least one byte of the word is CHR we don't get 0 in %edi. */ - jnz L(7) /* found it => return pointer */ - - /* Now we made sure the dword does not contain the character we are - looking for. But because we deal with strings we have to check - for the end of string before testing the next dword. */ - - xorl %edx, %ecx /* restore original dword without reload */ - movl $0xfefefeff, %edi /* magic value */ - addl %ecx, %edi /* add the magic value to the word. We get - carry bits reported for each byte which - is *not* 0 */ - jnc L(7) /* highest byte is NUL => return NULL */ - xorl %ecx, %edi /* (word+magic)^word */ - orl $0xfefefeff, %edi /* set all non-carry bits */ - incl %edi /* add 1: if one carry bit was *not* set - the addition will not result in 0. */ - jnz L(7) /* found NUL => return NULL */ - - movl 4(%eax), %ecx /* get word (= 4 bytes) in question */ - xorl %edx, %ecx /* XOR with word c|c|c|c => bytes of str == c - are now 0 */ - movl $0xfefefeff, %edi /* magic value */ - addl %ecx, %edi /* add the magic value to the word. We get - carry bits reported for each byte which - is *not* CHR */ - jnc L(71) /* highest byte is CHR => return pointer */ - xorl %ecx, %edi /* ((word^charmask)+magic)^(word^charmask) */ - orl $0xfefefeff, %edi /* set all non-carry bits */ - incl %edi /* add 1: if one carry bit was *not* set - the addition will not result in 0. */ - jnz L(71) /* found it => return pointer */ - xorl %edx, %ecx /* restore original dword without reload */ - movl $0xfefefeff, %edi /* magic value */ - addl %ecx, %edi /* add the magic value to the word. We get - carry bits reported for each byte which - is *not* 0 */ - jnc L(71) /* highest byte is NUL => return NULL */ - xorl %ecx, %edi /* (word+magic)^word */ - orl $0xfefefeff, %edi /* set all non-carry bits */ - incl %edi /* add 1: if one carry bit was *not* set - the addition will not result in 0. */ - jnz L(71) /* found NUL => return NULL */ - - movl 8(%eax), %ecx /* get word (= 4 bytes) in question */ - xorl %edx, %ecx /* XOR with word c|c|c|c => bytes of str == c - are now 0 */ - movl $0xfefefeff, %edi /* magic value */ - addl %ecx, %edi /* add the magic value to the word. We get - carry bits reported for each byte which - is *not* CHR */ - jnc L(72) /* highest byte is CHR => return pointer */ - xorl %ecx, %edi /* ((word^charmask)+magic)^(word^charmask) */ - orl $0xfefefeff, %edi /* set all non-carry bits */ - incl %edi /* add 1: if one carry bit was *not* set - the addition will not result in 0. */ - jnz L(72) /* found it => return pointer */ - xorl %edx, %ecx /* restore original dword without reload */ - movl $0xfefefeff, %edi /* magic value */ - addl %ecx, %edi /* add the magic value to the word. We get - carry bits reported for each byte which - is *not* 0 */ - jnc L(72) /* highest byte is NUL => return NULL */ - xorl %ecx, %edi /* (word+magic)^word */ - orl $0xfefefeff, %edi /* set all non-carry bits */ - incl %edi /* add 1: if one carry bit was *not* set - the addition will not result in 0. */ - jnz L(72) /* found NUL => return NULL */ - - movl 12(%eax), %ecx /* get word (= 4 bytes) in question */ - xorl %edx, %ecx /* XOR with word c|c|c|c => bytes of str == c - are now 0 */ - movl $0xfefefeff, %edi /* magic value */ - addl %ecx, %edi /* add the magic value to the word. We get - carry bits reported for each byte which - is *not* CHR */ - jnc L(73) /* highest byte is CHR => return pointer */ - xorl %ecx, %edi /* ((word^charmask)+magic)^(word^charmask) */ - orl $0xfefefeff, %edi /* set all non-carry bits */ - incl %edi /* add 1: if one carry bit was *not* set - the addition will not result in 0. */ - jnz L(73) /* found it => return pointer */ - xorl %edx, %ecx /* restore original dword without reload */ - movl $0xfefefeff, %edi /* magic value */ - addl %ecx, %edi /* add the magic value to the word. We get - carry bits reported for each byte which - is *not* 0 */ - jnc L(73) /* highest byte is NUL => return NULL */ - xorl %ecx, %edi /* (word+magic)^word */ - orl $0xfefefeff, %edi /* set all non-carry bits */ - incl %edi /* add 1: if one carry bit was *not* set - the addition will not result in 0. */ - jz L(1) /* no NUL found => restart loop */ - -L(73): addl $4, %eax /* adjust pointer */ -L(72): addl $4, %eax -L(71): addl $4, %eax - - /* We now scan for the byte in which the character was matched. - But we have to take care of the case that a NUL char is - found before this in the dword. */ - -L(7): testb %cl, %cl /* is first byte CHR? */ - jz L(6) /* yes => return pointer */ - cmpb %dl, %cl /* is first byte NUL? */ - je L(6) /* yes => return NULL */ - incl %eax /* it's not in the first byte */ - - testb %ch, %ch /* is second byte CHR? */ - jz L(6) /* yes => return pointer */ - cmpb %dl, %ch /* is second byte NUL? */ - je L(6) /* yes => return NULL? */ - incl %eax /* it's not in the second byte */ - - shrl $16, %ecx /* make upper byte accessible */ - testb %cl, %cl /* is third byte CHR? */ - jz L(6) /* yes => return pointer */ - cmpb %dl, %cl /* is third byte NUL? */ - je L(6) /* yes => return NULL */ - - /* It must be in the fourth byte and it cannot be NUL. */ - incl %eax - -L(6): popl %edi /* restore saved register content */ - cfi_adjust_cfa_offset (-4) - cfi_restore (edi) - - ret -END (__strchrnul) - -weak_alias (__strchrnul, strchrnul) diff --git a/sysdeps/i386/strcspn.S b/sysdeps/i386/strcspn.S deleted file mode 100644 index c852a3b1e5..0000000000 --- a/sysdeps/i386/strcspn.S +++ /dev/null @@ -1,240 +0,0 @@ -/* strcspn (str, ss) -- Return the length of the initial segment of STR - which contains no characters from SS. - For Intel 80x86, x>=3. - Copyright (C) 1994-2017 Free Software Foundation, Inc. - This file is part of the GNU C Library. - Contributed by Ulrich Drepper <drepper@gnu.ai.mit.edu> - Bug fixes by Alan Modra <Alan@SPRI.Levels.UniSA.Edu.Au> - - The GNU C Library is free software; you can redistribute it and/or - modify it under the terms of the GNU Lesser General Public - License as published by the Free Software Foundation; either - version 2.1 of the License, or (at your option) any later version. - - The GNU C Library is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - Lesser General Public License for more details. - - You should have received a copy of the GNU Lesser General Public - License along with the GNU C Library; if not, see - <http://www.gnu.org/licenses/>. */ - -#include <sysdep.h> -#include "asm-syntax.h" - -#define PARMS 4 /* no space for saved regs */ -#define STR PARMS -#define STOP STR+4 - - .text -ENTRY (strcspn) - - movl STR(%esp), %edx - movl STOP(%esp), %eax - - /* First we create a table with flags for all possible characters. - For the ASCII (7bit/8bit) or ISO-8859-X character sets which are - supported by the C string functions we have 256 characters. - Before inserting marks for the stop characters we clear the whole - table. The unrolled form is much faster than a loop. */ - xorl %ecx, %ecx /* %ecx = 0 !!! */ - - pushl %ecx /* make a 256 bytes long block filled with 0 */ - cfi_adjust_cfa_offset (4) - pushl %ecx - cfi_adjust_cfa_offset (4) - pushl %ecx - cfi_adjust_cfa_offset (4) - pushl %ecx - cfi_adjust_cfa_offset (4) - pushl %ecx - cfi_adjust_cfa_offset (4) - pushl %ecx - cfi_adjust_cfa_offset (4) - pushl %ecx - cfi_adjust_cfa_offset (4) - pushl %ecx - cfi_adjust_cfa_offset (4) - pushl %ecx - cfi_adjust_cfa_offset (4) - pushl %ecx - cfi_adjust_cfa_offset (4) - pushl %ecx - cfi_adjust_cfa_offset (4) - pushl %ecx - cfi_adjust_cfa_offset (4) - pushl %ecx - cfi_adjust_cfa_offset (4) - pushl %ecx - cfi_adjust_cfa_offset (4) - pushl %ecx - cfi_adjust_cfa_offset (4) - pushl %ecx - cfi_adjust_cfa_offset (4) - pushl %ecx - cfi_adjust_cfa_offset (4) - pushl %ecx - cfi_adjust_cfa_offset (4) - pushl %ecx - cfi_adjust_cfa_offset (4) - pushl %ecx - cfi_adjust_cfa_offset (4) - pushl %ecx - cfi_adjust_cfa_offset (4) - pushl %ecx - cfi_adjust_cfa_offset (4) - pushl %ecx - cfi_adjust_cfa_offset (4) - pushl %ecx - cfi_adjust_cfa_offset (4) - pushl %ecx - cfi_adjust_cfa_offset (4) - pushl %ecx - cfi_adjust_cfa_offset (4) - pushl %ecx - cfi_adjust_cfa_offset (4) - pushl %ecx - cfi_adjust_cfa_offset (4) - pushl %ecx - cfi_adjust_cfa_offset (4) - pushl %ecx - cfi_adjust_cfa_offset (4) - pushl %ecx - cfi_adjust_cfa_offset (4) - pushl %ecx - cfi_adjust_cfa_offset (4) - pushl %ecx - cfi_adjust_cfa_offset (4) - pushl %ecx - cfi_adjust_cfa_offset (4) - pushl %ecx - cfi_adjust_cfa_offset (4) - pushl %ecx - cfi_adjust_cfa_offset (4) - pushl %ecx - cfi_adjust_cfa_offset (4) - pushl %ecx - cfi_adjust_cfa_offset (4) - pushl %ecx - cfi_adjust_cfa_offset (4) - pushl %ecx - cfi_adjust_cfa_offset (4) - pushl %ecx - cfi_adjust_cfa_offset (4) - pushl %ecx - cfi_adjust_cfa_offset (4) - pushl %ecx - cfi_adjust_cfa_offset (4) - pushl %ecx - cfi_adjust_cfa_offset (4) - pushl %ecx - cfi_adjust_cfa_offset (4) - pushl %ecx - cfi_adjust_cfa_offset (4) - pushl %ecx - cfi_adjust_cfa_offset (4) - pushl %ecx - cfi_adjust_cfa_offset (4) - pushl %ecx - cfi_adjust_cfa_offset (4) - pushl %ecx - cfi_adjust_cfa_offset (4) - pushl %ecx - cfi_adjust_cfa_offset (4) - pushl %ecx - cfi_adjust_cfa_offset (4) - pushl %ecx - cfi_adjust_cfa_offset (4) - pushl %ecx - cfi_adjust_cfa_offset (4) - pushl %ecx - cfi_adjust_cfa_offset (4) - pushl %ecx - cfi_adjust_cfa_offset (4) - pushl %ecx - cfi_adjust_cfa_offset (4) - pushl %ecx - cfi_adjust_cfa_offset (4) - pushl $0 /* These immediate values make the label 2 */ - cfi_adjust_cfa_offset (4) - pushl $0 /* to be aligned on a 16 byte boundary to */ - cfi_adjust_cfa_offset (4) - pushl $0 /* get a better performance of the loop. */ - cfi_adjust_cfa_offset (4) - pushl $0 - cfi_adjust_cfa_offset (4) - pushl $0 - cfi_adjust_cfa_offset (4) - pushl $0 - cfi_adjust_cfa_offset (4) - -/* For understanding the following code remember that %ecx == 0 now. - Although all the following instruction only modify %cl we always - have a correct zero-extended 32-bit value in %ecx. */ - -/* Don't change the "testb $0xff,%%cl" to "testb %%cl,%%cl". We want - longer instructions so that the next loop aligns without adding nops. */ - -L(2): movb (%eax), %cl /* get byte from stopset */ - testb %cl, %cl /* is NUL char? */ - jz L(1) /* yes => start compare loop */ - movb %cl, (%esp,%ecx) /* set corresponding byte in stopset table */ - - movb 1(%eax), %cl /* get byte from stopset */ - testb $0xff, %cl /* is NUL char? */ - jz L(1) /* yes => start compare loop */ - movb %cl, (%esp,%ecx) /* set corresponding byte in stopset table */ - - movb 2(%eax), %cl /* get byte from stopset */ - testb $0xff, %cl /* is NUL char? */ - jz L(1) /* yes => start compare loop */ - movb %cl, (%esp,%ecx) /* set corresponding byte in stopset table */ - - movb 3(%eax), %cl /* get byte from stopset */ - addl $4, %eax /* increment stopset pointer */ - movb %cl, (%esp,%ecx) /* set corresponding byte in stopset table */ - testb $0xff, %cl /* is NUL char? */ - jnz L(2) /* no => process next dword from stopset */ - -L(1): leal -4(%edx), %eax /* prepare loop */ - - /* We use a neat trick for the following loop. Normally we would - have to test for two termination conditions - 1. a character in the stopset was found - and - 2. the end of the string was found - But as a sign that the character is in the stopset we store its - value in the table. But the value of NUL is NUL so the loop - terminates for NUL in every case. */ - -L(3): addl $4, %eax /* adjust pointer for full loop round */ - - movb (%eax), %cl /* get byte from string */ - cmpb %cl, (%esp,%ecx) /* is it contained in stopset? */ - je L(4) /* yes => return */ - - movb 1(%eax), %cl /* get byte from string */ - cmpb %cl, (%esp,%ecx) /* is it contained in stopset? */ - je L(5) /* yes => return */ - - movb 2(%eax), %cl /* get byte from string */ - cmpb %cl, (%esp,%ecx) /* is it contained in stopset? */ - je L(6) /* yes => return */ - - movb 3(%eax), %cl /* get byte from string */ - cmpb %cl, (%esp,%ecx) /* is it contained in stopset? */ - jne L(3) /* yes => return */ - - incl %eax /* adjust pointer */ -L(6): incl %eax -L(5): incl %eax - -L(4): addl $256, %esp /* remove stopset */ - cfi_adjust_cfa_offset (-256) - subl %edx, %eax /* we have to return the number of valid - characters, so compute distance to first - non-valid character */ - ret -END (strcspn) -libc_hidden_builtin_def (strcspn) diff --git a/sysdeps/i386/string-inlines.c b/sysdeps/i386/string-inlines.c deleted file mode 100644 index d023bc3aa3..0000000000 --- a/sysdeps/i386/string-inlines.c +++ /dev/null @@ -1,47 +0,0 @@ -/* Copyright (C) 1999-2017 Free Software Foundation, Inc. - This file is part of the GNU C Library. - - The GNU C Library is free software; you can redistribute it and/or - modify it under the terms of the GNU Lesser General Public - License as published by the Free Software Foundation; either - version 2.1 of the License, or (at your option) any later version. - - The GNU C Library is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - Lesser General Public License for more details. - - You should have received a copy of the GNU Lesser General Public - License along with the GNU C Library; if not, see - <http://www.gnu.org/licenses/>. */ - -/* This is to avoid PLT entries for the x86 version. */ -#define __memcpy_g __memcpy_g_internal -#define __strchr_g __strchr_g_internal -#include <string/string-inlines.c> - -void * -(__memcpy_c) (void *d, const void *s, size_t n) -{ - return memcpy (d, s, n); -} - -void * -__memset_cc (void *s, unsigned long int pattern, size_t n) -{ - return memset (s, pattern & 0xff, n); -} -strong_alias (__memset_cc, __memset_cg) - -void * -__memset_gg (void *s, char c, size_t n) -{ - return memset (s, c, n); -} - -#ifdef __memcpy_c -# undef __memcpy_g -strong_alias (__memcpy_g_internal, __memcpy_g) -# undef __strchr_g -strong_alias (__strchr_g_internal, __strchr_g) -#endif diff --git a/sysdeps/i386/strlen.S b/sysdeps/i386/strlen.S deleted file mode 100644 index 192fadf20a..0000000000 --- a/sysdeps/i386/strlen.S +++ /dev/null @@ -1,132 +0,0 @@ -/* strlen(str) -- determine the length of the string STR. - Optimized for Intel 80x86, x>=4. - Copyright (C) 1991-2017 Free Software Foundation, Inc. - Contributed by Ulrich Drepper <drepper@gnu.ai.mit.edu>. - This file is part of the GNU C Library. - - The GNU C Library is free software; you can redistribute it and/or - modify it under the terms of the GNU Lesser General Public - License as published by the Free Software Foundation; either - version 2.1 of the License, or (at your option) any later version. - - The GNU C Library is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - Lesser General Public License for more details. - - You should have received a copy of the GNU Lesser General Public - License along with the GNU C Library; if not, see - <http://www.gnu.org/licenses/>. */ - -#include <sysdep.h> -#include "asm-syntax.h" - -#define PARMS 4 /* no space for saved regs */ -#define STR PARMS - - .text -ENTRY (strlen) - - movl STR(%esp), %ecx - movl %ecx, %eax /* duplicate it */ - - andl $3, %ecx /* mask alignment bits */ - jz L(1) /* aligned => start loop */ - cmpb %ch, (%eax) /* is byte NUL? */ - je L(2) /* yes => return */ - incl %eax /* increment pointer */ - - xorl $3, %ecx /* was alignment = 3? */ - jz L(1) /* yes => now it is aligned and start loop */ - cmpb %ch, (%eax) /* is byte NUL? */ - je L(2) /* yes => return */ - addl $1, %eax /* increment pointer */ - - subl $1, %ecx /* was alignment = 2? */ - jz L(1) /* yes => now it is aligned and start loop */ - cmpb %ch, (%eax) /* is byte NUL? */ - je L(2) /* yes => return */ - -/* Don't change the above `addl $1,%eax' and `subl $1, %ecx' into `incl %eax' - and `decl %ecx' resp. The additional two byte per instruction make the - label 4 to be aligned on a 16 byte boundary with nops. - - The following `sub $15, %eax' is part of this trick, too. Together with - the next instruction (`addl $16, %eax') it is in fact a `incl %eax', just - as expected from the algorithm. But doing so has the advantage that - no jump to label 1 is necessary and so the pipeline is not flushed. */ - - subl $15, %eax /* effectively +1 */ - - -L(4): addl $16, %eax /* adjust pointer for full loop */ - -L(1): movl (%eax), %ecx /* get word (= 4 bytes) in question */ - movl $0xfefefeff, %edx /* magic value */ - addl %ecx, %edx /* add the magic value to the word. We get - carry bits reported for each byte which - is *not* 0 */ - jnc L(3) /* highest byte is NUL => return pointer */ - xorl %ecx, %edx /* (word+magic)^word */ - orl $0xfefefeff, %edx /* set all non-carry bits */ - incl %edx /* add 1: if one carry bit was *not* set - the addition will not result in 0. */ - jnz L(3) /* found NUL => return pointer */ - - movl 4(%eax), %ecx /* get word (= 4 bytes) in question */ - movl $0xfefefeff, %edx /* magic value */ - addl %ecx, %edx /* add the magic value to the word. We get - carry bits reported for each byte which - is *not* 0 */ - jnc L(5) /* highest byte is NUL => return pointer */ - xorl %ecx, %edx /* (word+magic)^word */ - orl $0xfefefeff, %edx /* set all non-carry bits */ - incl %edx /* add 1: if one carry bit was *not* set - the addition will not result in 0. */ - jnz L(5) /* found NUL => return pointer */ - - movl 8(%eax), %ecx /* get word (= 4 bytes) in question */ - movl $0xfefefeff, %edx /* magic value */ - addl %ecx, %edx /* add the magic value to the word. We get - carry bits reported for each byte which - is *not* 0 */ - jnc L(6) /* highest byte is NUL => return pointer */ - xorl %ecx, %edx /* (word+magic)^word */ - orl $0xfefefeff, %edx /* set all non-carry bits */ - incl %edx /* add 1: if one carry bit was *not* set - the addition will not result in 0. */ - jnz L(6) /* found NUL => return pointer */ - - movl 12(%eax), %ecx /* get word (= 4 bytes) in question */ - movl $0xfefefeff, %edx /* magic value */ - addl %ecx, %edx /* add the magic value to the word. We get - carry bits reported for each byte which - is *not* 0 */ - jnc L(7) /* highest byte is NUL => return pointer */ - xorl %ecx, %edx /* (word+magic)^word */ - orl $0xfefefeff, %edx /* set all non-carry bits */ - incl %edx /* add 1: if one carry bit was *not* set - the addition will not result in 0. */ - jz L(4) /* no NUL found => continue loop */ - -L(7): addl $4, %eax /* adjust pointer */ -L(6): addl $4, %eax -L(5): addl $4, %eax - -L(3): testb %cl, %cl /* is first byte NUL? */ - jz L(2) /* yes => return */ - incl %eax /* increment pointer */ - - testb %ch, %ch /* is second byte NUL? */ - jz L(2) /* yes => return */ - incl %eax /* increment pointer */ - - testl $0xff0000, %ecx /* is third byte NUL? */ - jz L(2) /* yes => return pointer */ - incl %eax /* increment pointer */ - -L(2): subl STR(%esp), %eax /* compute difference to string start */ - - ret -END (strlen) -libc_hidden_builtin_def (strlen) diff --git a/sysdeps/i386/strlen.c b/sysdeps/i386/strlen.c deleted file mode 100644 index 0b69957392..0000000000 --- a/sysdeps/i386/strlen.c +++ /dev/null @@ -1,35 +0,0 @@ -/* Determine the length of a string. For Intel 80x86, x>=3. - Copyright (C) 1991-2017 Free Software Foundation, Inc. - This file is part of the GNU C Library. - Contributed by Torbjorn Granlund (tege@sics.se). - - The GNU C Library is free software; you can redistribute it and/or - modify it under the terms of the GNU Lesser General Public - License as published by the Free Software Foundation; either - version 2.1 of the License, or (at your option) any later version. - - The GNU C Library is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - Lesser General Public License for more details. - - You should have received a copy of the GNU Lesser General Public - License along with the GNU C Library; if not, see - <http://www.gnu.org/licenses/>. */ - -#include <string.h> - -size_t -strlen (const char *str) -{ - int cnt; - - asm("cld\n" /* Search forward. */ - /* Some old versions of gas need `repne' instead of `repnz'. */ - "repnz\n" /* Look for a zero byte. */ - "scasb" /* %0, %1, %3 */ : - "=c" (cnt) : "D" (str), "0" (-1), "a" (0)); - - return -2 - cnt; -} -libc_hidden_builtin_def (strlen) diff --git a/sysdeps/i386/strpbrk.S b/sysdeps/i386/strpbrk.S deleted file mode 100644 index 1109b233da..0000000000 --- a/sysdeps/i386/strpbrk.S +++ /dev/null @@ -1,243 +0,0 @@ -/* strcspn (str, ss) -- Return the length of the initial segement of STR - which contains no characters from SS. - For Intel 80x86, x>=3. - Copyright (C) 1994-2017 Free Software Foundation, Inc. - Contributed by Ulrich Drepper <drepper@gnu.ai.mit.edu> - Bug fixes by Alan Modra <Alan@SPRI.Levels.UniSA.Edu.Au> - This file is part of the GNU C Library. - - The GNU C Library is free software; you can redistribute it and/or - modify it under the terms of the GNU Lesser General Public - License as published by the Free Software Foundation; either - version 2.1 of the License, or (at your option) any later version. - - The GNU C Library is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - Lesser General Public License for more details. - - You should have received a copy of the GNU Lesser General Public - License along with the GNU C Library; if not, see - <http://www.gnu.org/licenses/>. */ - -#include <sysdep.h> -#include "asm-syntax.h" - -#define PARMS 4 /* no space for saved regs */ -#define RTN PARMS -#define STR RTN -#define STOP STR+4 - - .text -ENTRY (strpbrk) - - movl STR(%esp), %edx - movl STOP(%esp), %eax - - /* First we create a table with flags for all possible characters. - For the ASCII (7bit/8bit) or ISO-8859-X character sets which are - supported by the C string functions we have 256 characters. - Before inserting marks for the stop characters we clear the whole - table. The unrolled form is much faster than a loop. */ - xorl %ecx, %ecx /* %ecx = 0 !!! */ - - pushl %ecx /* make a 256 bytes long block filled with 0 */ - cfi_adjust_cfa_offset (4) - pushl %ecx - cfi_adjust_cfa_offset (4) - pushl %ecx - cfi_adjust_cfa_offset (4) - pushl %ecx - cfi_adjust_cfa_offset (4) - pushl %ecx - cfi_adjust_cfa_offset (4) - pushl %ecx - cfi_adjust_cfa_offset (4) - pushl %ecx - cfi_adjust_cfa_offset (4) - pushl %ecx - cfi_adjust_cfa_offset (4) - pushl %ecx - cfi_adjust_cfa_offset (4) - pushl %ecx - cfi_adjust_cfa_offset (4) - pushl %ecx - cfi_adjust_cfa_offset (4) - pushl %ecx - cfi_adjust_cfa_offset (4) - pushl %ecx - cfi_adjust_cfa_offset (4) - pushl %ecx - cfi_adjust_cfa_offset (4) - pushl %ecx - cfi_adjust_cfa_offset (4) - pushl %ecx - cfi_adjust_cfa_offset (4) - pushl %ecx - cfi_adjust_cfa_offset (4) - pushl %ecx - cfi_adjust_cfa_offset (4) - pushl %ecx - cfi_adjust_cfa_offset (4) - pushl %ecx - cfi_adjust_cfa_offset (4) - pushl %ecx - cfi_adjust_cfa_offset (4) - pushl %ecx - cfi_adjust_cfa_offset (4) - pushl %ecx - cfi_adjust_cfa_offset (4) - pushl %ecx - cfi_adjust_cfa_offset (4) - pushl %ecx - cfi_adjust_cfa_offset (4) - pushl %ecx - cfi_adjust_cfa_offset (4) - pushl %ecx - cfi_adjust_cfa_offset (4) - pushl %ecx - cfi_adjust_cfa_offset (4) - pushl %ecx - cfi_adjust_cfa_offset (4) - pushl %ecx - cfi_adjust_cfa_offset (4) - pushl %ecx - cfi_adjust_cfa_offset (4) - pushl %ecx - cfi_adjust_cfa_offset (4) - pushl %ecx - cfi_adjust_cfa_offset (4) - pushl %ecx - cfi_adjust_cfa_offset (4) - pushl %ecx - cfi_adjust_cfa_offset (4) - pushl %ecx - cfi_adjust_cfa_offset (4) - pushl %ecx - cfi_adjust_cfa_offset (4) - pushl %ecx - cfi_adjust_cfa_offset (4) - pushl %ecx - cfi_adjust_cfa_offset (4) - pushl %ecx - cfi_adjust_cfa_offset (4) - pushl %ecx - cfi_adjust_cfa_offset (4) - pushl %ecx - cfi_adjust_cfa_offset (4) - pushl %ecx - cfi_adjust_cfa_offset (4) - pushl %ecx - cfi_adjust_cfa_offset (4) - pushl %ecx - cfi_adjust_cfa_offset (4) - pushl %ecx - cfi_adjust_cfa_offset (4) - pushl %ecx - cfi_adjust_cfa_offset (4) - pushl %ecx - cfi_adjust_cfa_offset (4) - pushl %ecx - cfi_adjust_cfa_offset (4) - pushl %ecx - cfi_adjust_cfa_offset (4) - pushl %ecx - cfi_adjust_cfa_offset (4) - pushl %ecx - cfi_adjust_cfa_offset (4) - pushl %ecx - cfi_adjust_cfa_offset (4) - pushl %ecx - cfi_adjust_cfa_offset (4) - pushl %ecx - cfi_adjust_cfa_offset (4) - pushl %ecx - cfi_adjust_cfa_offset (4) - pushl %ecx - cfi_adjust_cfa_offset (4) - pushl %ecx - cfi_adjust_cfa_offset (4) - pushl $0 /* These immediate values make the label 2 */ - cfi_adjust_cfa_offset (4) - pushl $0 /* to be aligned on a 16 byte boundary to */ - cfi_adjust_cfa_offset (4) - pushl $0 /* get a better performance of the loop. */ - cfi_adjust_cfa_offset (4) - pushl $0 - cfi_adjust_cfa_offset (4) - pushl $0 - cfi_adjust_cfa_offset (4) - pushl $0 - cfi_adjust_cfa_offset (4) - -/* For understanding the following code remember that %ecx == 0 now. - Although all the following instruction only modify %cl we always - have a correct zero-extended 32-bit value in %ecx. */ - -/* Don't change the "testb $0xff,%%cl" to "testb %%cl,%%cl". We want - longer instructions so that the next loop aligns without adding nops. */ - -L(2): movb (%eax), %cl /* get byte from stopset */ - testb %cl, %cl /* is NUL char? */ - jz L(1) /* yes => start compare loop */ - movb %cl, (%esp,%ecx) /* set corresponding byte in stopset table */ - - movb 1(%eax), %cl /* get byte from stopset */ - testb $0xff, %cl /* is NUL char? */ - jz L(1) /* yes => start compare loop */ - movb %cl, (%esp,%ecx) /* set corresponding byte in stopset table */ - - movb 2(%eax), %cl /* get byte from stopset */ - testb $0xff, %cl /* is NUL char? */ - jz L(1) /* yes => start compare loop */ - movb %cl, (%esp,%ecx) /* set corresponding byte in stopset table */ - - movb 3(%eax), %cl /* get byte from stopset */ - addl $4, %eax /* increment stopset pointer */ - movb %cl, (%esp,%ecx) /* set corresponding byte in stopset table */ - testb $0xff, %cl /* is NUL char? */ - jnz L(2) /* no => process next dword from stopset */ - -L(1): leal -4(%edx), %eax /* prepare loop */ - - /* We use a neat trick for the following loop. Normally we would - have to test for two termination conditions - 1. a character in the stopset was found - and - 2. the end of the string was found - But as a sign that the character is in the stopset we store its - value in the table. But the value of NUL is NUL so the loop - terminates for NUL in every case. */ - -L(3): addl $4, %eax /* adjust pointer for full loop round */ - - movb (%eax), %cl /* get byte from string */ - cmpb %cl, (%esp,%ecx) /* is it contained in stopset? */ - je L(4) /* yes => return */ - - movb 1(%eax), %cl /* get byte from string */ - cmpb %cl, (%esp,%ecx) /* is it contained in stopset? */ - je L(5) /* yes => return */ - - movb 2(%eax), %cl /* get byte from string */ - cmpb %cl, (%esp,%ecx) /* is it contained in stopset? */ - je L(6) /* yes => return */ - - movb 3(%eax), %cl /* get byte from string */ - cmpb %cl, (%esp,%ecx) /* is it contained in stopset? */ - jne L(3) /* yes => return */ - - incl %eax /* adjust pointer */ -L(6): incl %eax -L(5): incl %eax - -L(4): addl $256, %esp /* remove stopset */ - cfi_adjust_cfa_offset (-256) - - orb %cl, %cl /* was last character NUL? */ - jnz L(7) /* no => return pointer */ - xorl %eax, %eax - -L(7): ret -END (strpbrk) -libc_hidden_builtin_def (strpbrk) diff --git a/sysdeps/i386/strrchr.S b/sysdeps/i386/strrchr.S deleted file mode 100644 index 95b304dc0b..0000000000 --- a/sysdeps/i386/strrchr.S +++ /dev/null @@ -1,334 +0,0 @@ -/* strrchr (str, ch) -- Return pointer to last occurrence of CH in STR. - For Intel 80x86, x>=3. - Copyright (C) 1994-2017 Free Software Foundation, Inc. - This file is part of the GNU C Library. - Contributed by Ulrich Drepper <drepper@gnu.ai.mit.edu> - Some optimisations by Alan Modra <Alan@SPRI.Levels.UniSA.Edu.Au> - - The GNU C Library is free software; you can redistribute it and/or - modify it under the terms of the GNU Lesser General Public - License as published by the Free Software Foundation; either - version 2.1 of the License, or (at your option) any later version. - - The GNU C Library is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - Lesser General Public License for more details. - - You should have received a copy of the GNU Lesser General Public - License along with the GNU C Library; if not, see - <http://www.gnu.org/licenses/>. */ - -#include <sysdep.h> -#include "asm-syntax.h" - -#define PARMS 4+8 /* space for 2 saved regs */ -#define RTN PARMS -#define STR RTN -#define CHR STR+4 - - .text -ENTRY (strrchr) - - pushl %edi /* Save callee-safe registers used here. */ - cfi_adjust_cfa_offset (4) - cfi_rel_offset (edi, 0) - pushl %esi - cfi_adjust_cfa_offset (4) - - xorl %eax, %eax - movl STR(%esp), %esi - cfi_rel_offset (esi, 0) - movl CHR(%esp), %ecx - - /* At the moment %ecx contains C. What we need for the - algorithm is C in all bytes of the dword. Avoid - operations on 16 bit words because these require an - prefix byte (and one more cycle). */ - movb %cl, %ch /* now it is 0|0|c|c */ - movl %ecx, %edx - shll $16, %ecx /* now it is c|c|0|0 */ - movw %dx, %cx /* and finally c|c|c|c */ - - /* Before we start with the main loop we process single bytes - until the source pointer is aligned. This has two reasons: - 1. aligned 32-bit memory access is faster - and (more important) - 2. we process in the main loop 32 bit in one step although - we don't know the end of the string. But accessing at - 4-byte alignment guarantees that we never access illegal - memory if this would not also be done by the trivial - implementation (this is because all processor inherent - boundaries are multiples of 4. */ - - testl $3, %esi /* correctly aligned ? */ - jz L(19) /* yes => begin loop */ - movb (%esi), %dl /* load byte in question (we need it twice) */ - cmpb %dl, %cl /* compare byte */ - jne L(11) /* target found => return */ - movl %esi, %eax /* remember pointer as possible result */ -L(11): orb %dl, %dl /* is NUL? */ - jz L(2) /* yes => return NULL */ - incl %esi /* increment pointer */ - - testl $3, %esi /* correctly aligned ? */ - jz L(19) /* yes => begin loop */ - movb (%esi), %dl /* load byte in question (we need it twice) */ - cmpb %dl, %cl /* compare byte */ - jne L(12) /* target found => return */ - movl %esi, %eax /* remember pointer as result */ -L(12): orb %dl, %dl /* is NUL? */ - jz L(2) /* yes => return NULL */ - incl %esi /* increment pointer */ - - testl $3, %esi /* correctly aligned ? */ - jz L(19) /* yes => begin loop */ - movb (%esi), %dl /* load byte in question (we need it twice) */ - cmpb %dl, %cl /* compare byte */ - jne L(13) /* target found => return */ - movl %esi, %eax /* remember pointer as result */ -L(13): orb %dl, %dl /* is NUL? */ - jz L(2) /* yes => return NULL */ - incl %esi /* increment pointer */ - - /* No we have reached alignment. */ - jmp L(19) /* begin loop */ - - /* We exit the loop if adding MAGIC_BITS to LONGWORD fails to - change any of the hole bits of LONGWORD. - - 1) Is this safe? Will it catch all the zero bytes? - Suppose there is a byte with all zeros. Any carry bits - propagating from its left will fall into the hole at its - least significant bit and stop. Since there will be no - carry from its most significant bit, the LSB of the - byte to the left will be unchanged, and the zero will be - detected. - - 2) Is this worthwhile? Will it ignore everything except - zero bytes? Suppose every byte of LONGWORD has a bit set - somewhere. There will be a carry into bit 8. If bit 8 - is set, this will carry into bit 16. If bit 8 is clear, - one of bits 9-15 must be set, so there will be a carry - into bit 16. Similarly, there will be a carry into bit - 24. If one of bits 24-31 is set, there will be a carry - into bit 32 (=carry flag), so all of the hole bits will - be changed. - - 3) But wait! Aren't we looking for C, not zero? - Good point. So what we do is XOR LONGWORD with a longword, - each of whose bytes is C. This turns each byte that is C - into a zero. */ - - /* Each round the main loop processes 16 bytes. */ - - /* Jump to here when the character is detected. We chose this - way around because the character one is looking for is not - as frequent as the rest and taking a conditional jump is more - expensive than ignoring it. - - Some more words to the code below: it might not be obvious why - we decrement the source pointer here. In the loop the pointer - is not pre-incremented and so it still points before the word - we are looking at. But you should take a look at the instruction - which gets executed before we get into the loop: `addl $16, %esi'. - This makes the following subs into adds. */ - - /* These fill bytes make the main loop be correctly aligned. - We cannot use align because it is not the following instruction - which should be aligned. */ - .byte 0, 0 -#ifndef PROF - /* Profiling adds some code and so changes the alignment. */ - .byte 0 -#endif - -L(4): subl $4, %esi /* adjust pointer */ -L(41): subl $4, %esi -L(42): subl $4, %esi -L(43): testl $0xff000000, %edx /* is highest byte == C? */ - jnz L(33) /* no => try other bytes */ - leal 15(%esi), %eax /* store address as result */ - jmp L(1) /* and start loop again */ - -L(3): subl $4, %esi /* adjust pointer */ -L(31): subl $4, %esi -L(32): subl $4, %esi -L(33): testl $0xff0000, %edx /* is C in third byte? */ - jnz L(51) /* no => try other bytes */ - leal 14(%esi), %eax /* store address as result */ - jmp L(1) /* and start loop again */ - -L(51): - /* At this point we know that the byte is in one of the lower bytes. - We make a guess and correct it if necessary. This reduces the - number of necessary jumps. */ - leal 12(%esi), %eax /* guess address of lowest byte as result */ - testb %dh, %dh /* is guess correct? */ - jnz L(1) /* yes => start loop */ - leal 13(%esi), %eax /* correct guess to second byte */ - -L(1): addl $16, %esi /* increment pointer for full round */ - -L(19): movl (%esi), %edx /* get word (= 4 bytes) in question */ - movl $0xfefefeff, %edi /* magic value */ - addl %edx, %edi /* add the magic value to the word. We get - carry bits reported for each byte which - is *not* 0 */ - - /* According to the algorithm we had to reverse the effect of the - XOR first and then test the overflow bits. But because the - following XOR would destroy the carry flag and it would (in a - representation with more than 32 bits) not alter then last - overflow, we can now test this condition. If no carry is signaled - no overflow must have occurred in the last byte => it was 0. */ - - jnc L(20) /* found NUL => check last word */ - - /* We are only interested in carry bits that change due to the - previous add, so remove original bits */ - xorl %edx, %edi /* (word+magic)^word */ - - /* Now test for the other three overflow bits. */ - orl $0xfefefeff, %edi /* set all non-carry bits */ - incl %edi /* add 1: if one carry bit was *not* set - the addition will not result in 0. */ - - /* If at least one byte of the word is C we don't get 0 in %edi. */ - jnz L(20) /* found NUL => check last word */ - - /* Now we made sure the dword does not contain the character we are - looking for. But because we deal with strings we have to check - for the end of string before testing the next dword. */ - - xorl %ecx, %edx /* XOR with word c|c|c|c => bytes of str == c - are now 0 */ - movl $0xfefefeff, %edi /* magic value */ - addl %edx, %edi /* add the magic value to the word. We get - carry bits reported for each byte which - is *not* 0 */ - jnc L(4) /* highest byte is C => examine dword */ - xorl %edx, %edi /* ((word^charmask)+magic)^(word^charmask) */ - orl $0xfefefeff, %edi /* set all non-carry bits */ - incl %edi /* add 1: if one carry bit was *not* set - the addition will not result in 0. */ - jnz L(3) /* C is detected in the word => examine it */ - - movl 4(%esi), %edx /* get word (= 4 bytes) in question */ - movl $0xfefefeff, %edi /* magic value */ - addl %edx, %edi /* add the magic value to the word. We get - carry bits reported for each byte which - is *not* 0 */ - jnc L(21) /* found NUL => check last word */ - xorl %edx, %edi /* (word+magic)^word */ - orl $0xfefefeff, %edi /* set all non-carry bits */ - incl %edi /* add 1: if one carry bit was *not* set - the addition will not result in 0. */ - jnz L(21) /* found NUL => check last word */ - xorl %ecx, %edx /* XOR with word c|c|c|c => bytes of str == c - are now 0 */ - movl $0xfefefeff, %edi /* magic value */ - addl %edx, %edi /* add the magic value to the word. We get - carry bits reported for each byte which - is *not* 0 */ - jnc L(41) /* highest byte is C => examine dword */ - xorl %edx, %edi /* ((word^charmask)+magic)^(word^charmask) */ - orl $0xfefefeff, %edi /* set all non-carry bits */ - incl %edi /* add 1: if one carry bit was *not* set - the addition will not result in 0. */ - jnz L(31) /* C is detected in the word => examine it */ - - movl 8(%esi), %edx /* get word (= 4 bytes) in question */ - movl $0xfefefeff, %edi /* magic value */ - addl %edx, %edi /* add the magic value to the word. We get - carry bits reported for each byte which - is *not* 0 */ - jnc L(22) /* found NUL => check last word */ - xorl %edx, %edi /* (word+magic)^word */ - orl $0xfefefeff, %edi /* set all non-carry bits */ - incl %edi /* add 1: if one carry bit was *not* set - the addition will not result in 0. */ - jnz L(22) /* found NUL => check last word */ - xorl %ecx, %edx /* XOR with word c|c|c|c => bytes of str == c - are now 0 */ - movl $0xfefefeff, %edi /* magic value */ - addl %edx, %edi /* add the magic value to the word. We get - carry bits reported for each byte which - is *not* 0 */ - jnc L(42) /* highest byte is C => examine dword */ - xorl %edx, %edi /* ((word^charmask)+magic)^(word^charmask) */ - orl $0xfefefeff, %edi /* set all non-carry bits */ - incl %edi /* add 1: if one carry bit was *not* set - the addition will not result in 0. */ - jnz L(32) /* C is detected in the word => examine it */ - - movl 12(%esi), %edx /* get word (= 4 bytes) in question */ - movl $0xfefefeff, %edi /* magic value */ - addl %edx, %edi /* add the magic value to the word. We get - carry bits reported for each byte which - is *not* 0 */ - jnc L(23) /* found NUL => check last word */ - xorl %edx, %edi /* (word+magic)^word */ - orl $0xfefefeff, %edi /* set all non-carry bits */ - incl %edi /* add 1: if one carry bit was *not* set - the addition will not result in 0. */ - jnz L(23) /* found NUL => check last word */ - xorl %ecx, %edx /* XOR with word c|c|c|c => bytes of str == c - are now 0 */ - movl $0xfefefeff, %edi /* magic value */ - addl %edx, %edi /* add the magic value to the word. We get - carry bits reported for each byte which - is *not* 0 */ - jnc L(43) /* highest byte is C => examine dword */ - xorl %edx, %edi /* ((word^charmask)+magic)^(word^charmask) */ - orl $0xfefefeff, %edi /* set all non-carry bits */ - incl %edi /* add 1: if one carry bit was *not* set - the addition will not result in 0. */ - jz L(1) /* C is not detected => restart loop */ - jmp L(33) /* examine word */ - -L(23): addl $4, %esi /* adjust pointer */ -L(22): addl $4, %esi -L(21): addl $4, %esi - - /* What remains to do is to test which byte the NUL char is and - whether the searched character appears in one of the bytes - before. A special case is that the searched byte maybe NUL. - In this case a pointer to the terminating NUL char has to be - returned. */ - -L(20): cmpb %cl, %dl /* is first byte == C? */ - jne L(24) /* no => skip */ - movl %esi, %eax /* store address as result */ -L(24): testb %dl, %dl /* is first byte == NUL? */ - jz L(2) /* yes => return */ - - cmpb %cl, %dh /* is second byte == C? */ - jne L(25) /* no => skip */ - leal 1(%esi), %eax /* store address as result */ -L(25): testb %dh, %dh /* is second byte == NUL? */ - jz L(2) /* yes => return */ - - shrl $16,%edx /* make upper bytes accessible */ - cmpb %cl, %dl /* is third byte == C */ - jne L(26) /* no => skip */ - leal 2(%esi), %eax /* store address as result */ -L(26): testb %dl, %dl /* is third byte == NUL */ - jz L(2) /* yes => return */ - - cmpb %cl, %dh /* is fourth byte == C */ - jne L(2) /* no => skip */ - leal 3(%esi), %eax /* store address as result */ - -L(2): popl %esi /* restore saved register content */ - cfi_adjust_cfa_offset (-4) - cfi_restore (esi) - popl %edi - cfi_adjust_cfa_offset (-4) - cfi_restore (edi) - - ret -END (strrchr) - -weak_alias (strrchr, rindex) -libc_hidden_builtin_def (strrchr) diff --git a/sysdeps/i386/strspn.S b/sysdeps/i386/strspn.S deleted file mode 100644 index d433eb6af5..0000000000 --- a/sysdeps/i386/strspn.S +++ /dev/null @@ -1,240 +0,0 @@ -/* strcspn (str, ss) -- Return the length of the initial segment of STR - which contains only characters from SS. - For Intel 80x86, x>=3. - Copyright (C) 1994-2017 Free Software Foundation, Inc. - This file is part of the GNU C Library. - Contributed by Ulrich Drepper <drepper@gnu.ai.mit.edu> - Bug fixes by Alan Modra <Alan@SPRI.Levels.UniSA.Edu.Au> - - The GNU C Library is free software; you can redistribute it and/or - modify it under the terms of the GNU Lesser General Public - License as published by the Free Software Foundation; either - version 2.1 of the License, or (at your option) any later version. - - The GNU C Library is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - Lesser General Public License for more details. - - You should have received a copy of the GNU Lesser General Public - License along with the GNU C Library; if not, see - <http://www.gnu.org/licenses/>. */ - -#include <sysdep.h> -#include "asm-syntax.h" - -#define PARMS 4 /* no space for saved regs */ -#define STR PARMS -#define SKIP STR+4 - - .text -ENTRY (strspn) - - movl STR(%esp), %edx - movl SKIP(%esp), %eax - - /* First we create a table with flags for all possible characters. - For the ASCII (7bit/8bit) or ISO-8859-X character sets which are - supported by the C string functions we have 256 characters. - Before inserting marks for the stop characters we clear the whole - table. The unrolled form is much faster than a loop. */ - xorl %ecx, %ecx /* %ecx = 0 !!! */ - - pushl %ecx /* make a 256 bytes long block filled with 0 */ - cfi_adjust_cfa_offset (4) - pushl %ecx - cfi_adjust_cfa_offset (4) - pushl %ecx - cfi_adjust_cfa_offset (4) - pushl %ecx - cfi_adjust_cfa_offset (4) - pushl %ecx - cfi_adjust_cfa_offset (4) - pushl %ecx - cfi_adjust_cfa_offset (4) - pushl %ecx - cfi_adjust_cfa_offset (4) - pushl %ecx - cfi_adjust_cfa_offset (4) - pushl %ecx - cfi_adjust_cfa_offset (4) - pushl %ecx - cfi_adjust_cfa_offset (4) - pushl %ecx - cfi_adjust_cfa_offset (4) - pushl %ecx - cfi_adjust_cfa_offset (4) - pushl %ecx - cfi_adjust_cfa_offset (4) - pushl %ecx - cfi_adjust_cfa_offset (4) - pushl %ecx - cfi_adjust_cfa_offset (4) - pushl %ecx - cfi_adjust_cfa_offset (4) - pushl %ecx - cfi_adjust_cfa_offset (4) - pushl %ecx - cfi_adjust_cfa_offset (4) - pushl %ecx - cfi_adjust_cfa_offset (4) - pushl %ecx - cfi_adjust_cfa_offset (4) - pushl %ecx - cfi_adjust_cfa_offset (4) - pushl %ecx - cfi_adjust_cfa_offset (4) - pushl %ecx - cfi_adjust_cfa_offset (4) - pushl %ecx - cfi_adjust_cfa_offset (4) - pushl %ecx - cfi_adjust_cfa_offset (4) - pushl %ecx - cfi_adjust_cfa_offset (4) - pushl %ecx - cfi_adjust_cfa_offset (4) - pushl %ecx - cfi_adjust_cfa_offset (4) - pushl %ecx - cfi_adjust_cfa_offset (4) - pushl %ecx - cfi_adjust_cfa_offset (4) - pushl %ecx - cfi_adjust_cfa_offset (4) - pushl %ecx - cfi_adjust_cfa_offset (4) - pushl %ecx - cfi_adjust_cfa_offset (4) - pushl %ecx - cfi_adjust_cfa_offset (4) - pushl %ecx - cfi_adjust_cfa_offset (4) - pushl %ecx - cfi_adjust_cfa_offset (4) - pushl %ecx - cfi_adjust_cfa_offset (4) - pushl %ecx - cfi_adjust_cfa_offset (4) - pushl %ecx - cfi_adjust_cfa_offset (4) - pushl %ecx - cfi_adjust_cfa_offset (4) - pushl %ecx - cfi_adjust_cfa_offset (4) - pushl %ecx - cfi_adjust_cfa_offset (4) - pushl %ecx - cfi_adjust_cfa_offset (4) - pushl %ecx - cfi_adjust_cfa_offset (4) - pushl %ecx - cfi_adjust_cfa_offset (4) - pushl %ecx - cfi_adjust_cfa_offset (4) - pushl %ecx - cfi_adjust_cfa_offset (4) - pushl %ecx - cfi_adjust_cfa_offset (4) - pushl %ecx - cfi_adjust_cfa_offset (4) - pushl %ecx - cfi_adjust_cfa_offset (4) - pushl %ecx - cfi_adjust_cfa_offset (4) - pushl %ecx - cfi_adjust_cfa_offset (4) - pushl %ecx - cfi_adjust_cfa_offset (4) - pushl %ecx - cfi_adjust_cfa_offset (4) - pushl %ecx - cfi_adjust_cfa_offset (4) - pushl %ecx - cfi_adjust_cfa_offset (4) - pushl %ecx - cfi_adjust_cfa_offset (4) - pushl %ecx - cfi_adjust_cfa_offset (4) - pushl $0 /* These immediate values make the label 2 */ - cfi_adjust_cfa_offset (4) - pushl $0 /* to be aligned on a 16 byte boundary to */ - cfi_adjust_cfa_offset (4) - pushl $0 /* get a better performance of the loop. */ - cfi_adjust_cfa_offset (4) - pushl $0 - cfi_adjust_cfa_offset (4) - pushl $0 - cfi_adjust_cfa_offset (4) - pushl $0 - cfi_adjust_cfa_offset (4) - -/* For understanding the following code remember that %ecx == 0 now. - Although all the following instruction only modify %cl we always - have a correct zero-extended 32-bit value in %ecx. */ - -/* Don't change the "testb $0xff,%%cl" to "testb %%cl,%%cl". We want - longer instructions so that the next loop aligns without adding nops. */ - -L(2): movb (%eax), %cl /* get byte from stopset */ - testb %cl, %cl /* is NUL char? */ - jz L(1) /* yes => start compare loop */ - movb %cl, (%esp,%ecx) /* set corresponding byte in stopset table */ - - movb 1(%eax), %cl /* get byte from stopset */ - testb $0xff, %cl /* is NUL char? */ - jz L(1) /* yes => start compare loop */ - movb %cl, (%esp,%ecx) /* set corresponding byte in stopset table */ - - movb 2(%eax), %cl /* get byte from stopset */ - testb $0xff, %cl /* is NUL char? */ - jz L(1) /* yes => start compare loop */ - movb %cl, (%esp,%ecx) /* set corresponding byte in stopset table */ - - movb 3(%eax), %cl /* get byte from stopset */ - addl $4, %eax /* increment stopset pointer */ - movb %cl, (%esp,%ecx) /* set corresponding byte in stopset table */ - testb $0xff, %cl /* is NUL char? */ - jnz L(2) /* no => process next dword from stopset */ - -L(1): leal -4(%edx), %eax /* prepare loop */ - - /* We use a neat trick for the following loop. Normally we would - have to test for two termination conditions - 1. a character in the stopset was found - and - 2. the end of the string was found - But as a sign that the character is in the stopset we store its - value in the table. But the value of NUL is NUL so the loop - terminates for NUL in every case. */ - -L(3): addl $4, %eax /* adjust pointer for full loop round */ - - movb (%eax), %cl /* get byte from string */ - testb %cl, (%esp,%ecx) /* is it contained in skipset? */ - jz L(4) /* no => return */ - - movb 1(%eax), %cl /* get byte from string */ - testb %cl, (%esp,%ecx) /* is it contained in skipset? */ - jz L(5) /* no => return */ - - movb 2(%eax), %cl /* get byte from string */ - testb %cl, (%esp,%ecx) /* is it contained in skipset? */ - jz L(6) /* no => return */ - - movb 3(%eax), %cl /* get byte from string */ - testb %cl, (%esp,%ecx) /* is it contained in skipset? */ - jnz L(3) /* yes => start loop again */ - - incl %eax /* adjust pointer */ -L(6): incl %eax -L(5): incl %eax - -L(4): addl $256, %esp /* remove stopset */ - cfi_adjust_cfa_offset (-256) - subl %edx, %eax /* we have to return the number of valid - characters, so compute distance to first - non-valid character */ - ret -END (strspn) -libc_hidden_builtin_def (strspn) diff --git a/sysdeps/i386/sub_n.S b/sysdeps/i386/sub_n.S deleted file mode 100644 index 3649da29e7..0000000000 --- a/sysdeps/i386/sub_n.S +++ /dev/null @@ -1,111 +0,0 @@ -/* i80386 __mpn_sub_n -- Add two limb vectors of the same length > 0 and store - sum in a third limb vector. - Copyright (C) 1992-2017 Free Software Foundation, Inc. - This file is part of the GNU MP Library. - - The GNU MP Library is free software; you can redistribute it and/or modify - it under the terms of the GNU Lesser General Public License as published by - the Free Software Foundation; either version 2.1 of the License, or (at your - option) any later version. - - The GNU MP Library is distributed in the hope that it will be useful, but - WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY - or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public - License for more details. - - You should have received a copy of the GNU Lesser General Public License - along with the GNU MP Library; see the file COPYING.LIB. If not, - see <http://www.gnu.org/licenses/>. */ - -#include "sysdep.h" -#include "asm-syntax.h" - -#define PARMS 4+8 /* space for 2 saved regs */ -#define RES PARMS -#define S1 RES+4 -#define S2 S1+4 -#define SIZE S2+4 - - .text -ENTRY (__mpn_sub_n) - - pushl %edi - cfi_adjust_cfa_offset (4) - pushl %esi - cfi_adjust_cfa_offset (4) - - movl RES(%esp),%edi - cfi_rel_offset (edi, 4) - movl S1(%esp),%esi - cfi_rel_offset (esi, 0) - movl S2(%esp),%edx - movl SIZE(%esp),%ecx - movl %ecx,%eax - shrl $3,%ecx /* compute count for unrolled loop */ - negl %eax - andl $7,%eax /* get index where to start loop */ - jz L(oop) /* necessary special case for 0 */ - incl %ecx /* adjust loop count */ - shll $2,%eax /* adjustment for pointers... */ - subl %eax,%edi /* ... since they are offset ... */ - subl %eax,%esi /* ... by a constant when we ... */ - subl %eax,%edx /* ... enter the loop */ - shrl $2,%eax /* restore previous value */ -#ifdef PIC -/* Calculate start address in loop for PIC. Due to limitations in some - assemblers, Loop-L0-3 cannot be put into the leal */ - call L(0) - cfi_adjust_cfa_offset (4) -L(0): leal (%eax,%eax,8),%eax - addl (%esp),%eax - addl $(L(oop)-L(0)-3),%eax - addl $4,%esp - cfi_adjust_cfa_offset (-4) -#else -/* Calculate start address in loop for non-PIC. */ - leal (L(oop) - 3)(%eax,%eax,8),%eax -#endif - jmp *%eax /* jump into loop */ - ALIGN (3) -L(oop): movl (%esi),%eax - sbbl (%edx),%eax - movl %eax,(%edi) - movl 4(%esi),%eax - sbbl 4(%edx),%eax - movl %eax,4(%edi) - movl 8(%esi),%eax - sbbl 8(%edx),%eax - movl %eax,8(%edi) - movl 12(%esi),%eax - sbbl 12(%edx),%eax - movl %eax,12(%edi) - movl 16(%esi),%eax - sbbl 16(%edx),%eax - movl %eax,16(%edi) - movl 20(%esi),%eax - sbbl 20(%edx),%eax - movl %eax,20(%edi) - movl 24(%esi),%eax - sbbl 24(%edx),%eax - movl %eax,24(%edi) - movl 28(%esi),%eax - sbbl 28(%edx),%eax - movl %eax,28(%edi) - leal 32(%edi),%edi - leal 32(%esi),%esi - leal 32(%edx),%edx - decl %ecx - jnz L(oop) - - sbbl %eax,%eax - negl %eax - - popl %esi - cfi_adjust_cfa_offset (-4) - cfi_restore (esi) - popl %edi - cfi_adjust_cfa_offset (-4) - cfi_restore (edi) - - ret -END (__mpn_sub_n) diff --git a/sysdeps/i386/submul_1.S b/sysdeps/i386/submul_1.S deleted file mode 100644 index c765e8dd79..0000000000 --- a/sysdeps/i386/submul_1.S +++ /dev/null @@ -1,86 +0,0 @@ -/* i80386 __mpn_submul_1 -- Multiply a limb vector with a limb and subtract - the result from a second limb vector. - Copyright (C) 1992-2017 Free Software Foundation, Inc. - This file is part of the GNU MP Library. - - The GNU MP Library is free software; you can redistribute it and/or modify - it under the terms of the GNU Lesser General Public License as published by - the Free Software Foundation; either version 2.1 of the License, or (at your - option) any later version. - - The GNU MP Library is distributed in the hope that it will be useful, but - WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY - or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public - License for more details. - - You should have received a copy of the GNU Lesser General Public License - along with the GNU MP Library; see the file COPYING.LIB. If not, - see <http://www.gnu.org/licenses/>. */ - -#include "sysdep.h" -#include "asm-syntax.h" - -#define PARMS 4+16 /* space for 4 saved regs */ -#define RES PARMS -#define S1 RES+4 -#define SIZE S1+4 -#define S2LIMB SIZE+4 - -#define res_ptr edi -#define s1_ptr esi -#define sizeP ecx -#define s2_limb ebx - - .text -ENTRY (__mpn_submul_1) - - pushl %edi - cfi_adjust_cfa_offset (4) - pushl %esi - cfi_adjust_cfa_offset (4) - pushl %ebp - cfi_adjust_cfa_offset (4) - pushl %ebx - cfi_adjust_cfa_offset (4) - cfi_rel_offset (edi, 12) - cfi_rel_offset (esi, 8) - cfi_rel_offset (ebp, 4) - cfi_rel_offset (ebx, 0) - - movl RES(%esp), %res_ptr - movl S1(%esp), %s1_ptr - movl SIZE(%esp), %sizeP - movl S2LIMB(%esp), %s2_limb - leal (%res_ptr,%sizeP,4), %res_ptr - leal (%s1_ptr,%sizeP,4), %s1_ptr - negl %sizeP - xorl %ebp, %ebp - ALIGN (3) -L(oop): - movl (%s1_ptr,%sizeP,4), %eax - mull %s2_limb - addl %ebp, %eax - adcl $0, %edx - subl %eax, (%res_ptr,%sizeP,4) - adcl $0, %edx - movl %edx, %ebp - - incl %sizeP - jnz L(oop) - movl %ebp, %eax - - popl %ebx - cfi_adjust_cfa_offset (-4) - cfi_restore (ebx) - popl %ebp - cfi_adjust_cfa_offset (-4) - cfi_restore (ebp) - popl %esi - cfi_adjust_cfa_offset (-4) - cfi_restore (esi) - popl %edi - cfi_adjust_cfa_offset (-4) - cfi_restore (edi) - - ret -END (__mpn_submul_1) diff --git a/sysdeps/i386/symbol-hacks.h b/sysdeps/i386/symbol-hacks.h deleted file mode 100644 index 36a13c83f7..0000000000 --- a/sysdeps/i386/symbol-hacks.h +++ /dev/null @@ -1,21 +0,0 @@ -/* Hacks needed for symbol manipulation. i386 version. - Copyright (C) 2017 Free Software Foundation, Inc. - This file is part of the GNU C Library. - - The GNU C Library is free software; you can redistribute it and/or - modify it under the terms of the GNU Lesser General Public - License as published by the Free Software Foundation; either - version 2.1 of the License, or (at your option) any later version. - - The GNU C Library is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - Lesser General Public License for more details. - - You should have received a copy of the GNU Lesser General Public - License along with the GNU C Library; if not, see - <http://www.gnu.org/licenses/>. */ - -#include <sysdeps/wordsize-32/divdi3-symbol-hacks.h> - -#include_next "symbol-hacks.h" diff --git a/sysdeps/i386/sys/ucontext.h b/sysdeps/i386/sys/ucontext.h deleted file mode 100644 index fb5df11965..0000000000 --- a/sysdeps/i386/sys/ucontext.h +++ /dev/null @@ -1,139 +0,0 @@ -/* Copyright (C) 1997-2017 Free Software Foundation, Inc. - This file is part of the GNU C Library. - - The GNU C Library is free software; you can redistribute it and/or - modify it under the terms of the GNU Lesser General Public - License as published by the Free Software Foundation; either - version 2.1 of the License, or (at your option) any later version. - - The GNU C Library is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - Lesser General Public License for more details. - - You should have received a copy of the GNU Lesser General Public - License along with the GNU C Library; if not, see - <http://www.gnu.org/licenses/>. */ - -/* System V/i386 ABI compliant context switching support. */ - -#ifndef _SYS_UCONTEXT_H -#define _SYS_UCONTEXT_H 1 - -#include <features.h> - -#include <bits/types/sigset_t.h> -#include <bits/sigcontext.h> -#include <bits/types/stack_t.h> - - -/* Type for general register. */ -typedef int greg_t; - -/* Number of general registers. */ -#define __NGREG 19 -#ifdef __USE_MISC -# define NGREG __NGREG -#endif - -/* Container for all general registers. */ -typedef greg_t gregset_t[__NGREG]; - -#ifdef __USE_MISC -/* Number of each register is the `gregset_t' array. */ -enum -{ - REG_GS = 0, -# define REG_GS REG_GS - REG_FS, -# define REG_FS REG_FS - REG_ES, -# define REG_ES REG_ES - REG_DS, -# define REG_DS REG_DS - REG_EDI, -# define REG_EDI REG_EDI - REG_ESI, -# define REG_ESI REG_ESI - REG_EBP, -# define REG_EBP REG_EBP - REG_ESP, -# define REG_ESP REG_ESP - REG_EBX, -# define REG_EBX REG_EBX - REG_EDX, -# define REG_EDX REG_EDX - REG_ECX, -# define REG_ECX REG_ECX - REG_EAX, -# define REG_EAX REG_EAX - REG_TRAPNO, -# define REG_TRAPNO REG_TRAPNO - REG_ERR, -# define REG_ERR REG_ERR - REG_EIP, -# define REG_EIP REG_EIP - REG_CS, -# define REG_CS REG_CS - REG_EFL, -# define REG_EFL REG_EFL - REG_UESP, -# define REG_UESP REG_UESP - REG_SS -# define REG_SS REG_SS -}; -#endif - -#ifdef __USE_MISC -# define __ctx(fld) fld -# define __ctxt(tag) tag -#else -# define __ctx(fld) __ ## fld -# define __ctxt(tag) /* Empty. */ -#endif - -/* Structure to describe FPU registers. */ -typedef struct fpregset - { - union - { - struct __ctxt(fpchip_state) - { - int __ctx(state)[27]; - int __ctx(status); - } __ctx(fpchip_state); - - struct __ctxt(fp_emul_space) - { - char __ctx(fp_emul)[246]; - char __ctx(fp_epad)[2]; - } __ctx(fp_emul_space); - - int __ctx(f_fpregs)[62]; - } __ctx(fp_reg_set); - - long int __ctx(f_wregs)[33]; - } fpregset_t; - -/* Context to describe whole processor state. */ -typedef struct - { - gregset_t __ctx(gregs); - fpregset_t __ctx(fpregs); - } mcontext_t; - -#undef __ctx -#undef __ctxt - -/* Userlevel context. */ -typedef struct ucontext - { - unsigned long int uc_flags; - struct ucontext *uc_link; - sigset_t uc_sigmask; - stack_t uc_stack; - mcontext_t uc_mcontext; - long int uc_filler[5]; - } ucontext_t; - -#endif /* sys/ucontext.h */ diff --git a/sysdeps/i386/sysdep.h b/sysdeps/i386/sysdep.h deleted file mode 100644 index d2b0860b99..0000000000 --- a/sysdeps/i386/sysdep.h +++ /dev/null @@ -1,159 +0,0 @@ -/* Assembler macros for i386. - Copyright (C) 1991-2017 Free Software Foundation, Inc. - This file is part of the GNU C Library. - - The GNU C Library is free software; you can redistribute it and/or - modify it under the terms of the GNU Lesser General Public - License as published by the Free Software Foundation; either - version 2.1 of the License, or (at your option) any later version. - - The GNU C Library is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - Lesser General Public License for more details. - - You should have received a copy of the GNU Lesser General Public - License along with the GNU C Library; if not, see - <http://www.gnu.org/licenses/>. */ - -#include <sysdeps/generic/sysdep.h> - -#include <features.h> /* For __GNUC_PREREQ. */ - -/* It is desirable that the names of PIC thunks match those used by - GCC so that multiple copies are eliminated by the linker. Because - GCC 4.6 and earlier use __i686 in the names, it is necessary to - override that predefined macro. */ -#if defined __i686 && defined __ASSEMBLER__ -#undef __i686 -#define __i686 __i686 -#endif - -#ifdef __ASSEMBLER__ -# define GET_PC_THUNK(reg) __x86.get_pc_thunk.reg -#else -# define GET_PC_THUNK_STR(reg) "__x86.get_pc_thunk." #reg -#endif - -#ifdef __ASSEMBLER__ - -/* Syntactic details of assembler. */ - -/* ELF uses byte-counts for .align, most others use log2 of count of bytes. */ -#define ALIGNARG(log2) 1<<log2 -#define ASM_SIZE_DIRECTIVE(name) .size name,.-name; - - -/* Define an entry point visible from C. - - There is currently a bug in gdb which prevents us from specifying - incomplete stabs information. Fake some entries here which specify - the current source file. */ -#define ENTRY(name) \ - .globl C_SYMBOL_NAME(name); \ - .type C_SYMBOL_NAME(name),@function; \ - .align ALIGNARG(4); \ - C_LABEL(name) \ - cfi_startproc; \ - CALL_MCOUNT - -#undef END -#define END(name) \ - cfi_endproc; \ - ASM_SIZE_DIRECTIVE(name) - -#define ENTRY_CHK(name) ENTRY (name) -#define END_CHK(name) END (name) - -/* If compiled for profiling, call `mcount' at the start of each function. */ -#ifdef PROF -/* The mcount code relies on a normal frame pointer being on the stack - to locate our caller, so push one just for its benefit. */ -#define CALL_MCOUNT \ - pushl %ebp; cfi_adjust_cfa_offset (4); movl %esp, %ebp; \ - cfi_def_cfa_register (ebp); call JUMPTARGET(mcount); \ - popl %ebp; cfi_def_cfa (esp, 4); -#else -#define CALL_MCOUNT /* Do nothing. */ -#endif - -/* Since C identifiers are not normally prefixed with an underscore - on this system, the asm identifier `syscall_error' intrudes on the - C name space. Make sure we use an innocuous name. */ -#define syscall_error __syscall_error -#define mcount _mcount - -#define PSEUDO(name, syscall_name, args) \ - .globl syscall_error; \ -lose: SYSCALL_PIC_SETUP \ - jmp JUMPTARGET(syscall_error); \ - ENTRY (name) \ - DO_CALL (syscall_name, args); \ - jb lose - -#undef PSEUDO_END -#define PSEUDO_END(name) \ - END (name) - -# define SETUP_PIC_REG(reg) \ - .ifndef GET_PC_THUNK(reg); \ - .section .gnu.linkonce.t.GET_PC_THUNK(reg),"ax",@progbits; \ - .globl GET_PC_THUNK(reg); \ - .hidden GET_PC_THUNK(reg); \ - .p2align 4; \ - .type GET_PC_THUNK(reg),@function; \ -GET_PC_THUNK(reg): \ - movl (%esp), %e##reg; \ - ret; \ - .size GET_PC_THUNK(reg), . - GET_PC_THUNK(reg); \ - .previous; \ - .endif; \ - call GET_PC_THUNK(reg) - -# define LOAD_PIC_REG(reg) \ - SETUP_PIC_REG(reg); addl $_GLOBAL_OFFSET_TABLE_, %e##reg - -#undef JUMPTARGET -#ifdef PIC -#define JUMPTARGET(name) name##@PLT -#define SYSCALL_PIC_SETUP \ - pushl %ebx; \ - cfi_adjust_cfa_offset (4); \ - call 0f; \ -0: popl %ebx; \ - cfi_adjust_cfa_offset (-4); \ - addl $_GLOBAL_OFFSET_TABLE_+[.-0b], %ebx; - -#else -#define JUMPTARGET(name) name -#define SYSCALL_PIC_SETUP /* Nothing. */ -#endif - -/* Local label name for asm code. */ -#ifndef L -#define L(name) .L##name -#endif - -#define atom_text_section .section ".text.atom", "ax" - -#else /* __ASSEMBLER__ */ - -# define SETUP_PIC_REG_STR(reg) \ - ".ifndef " GET_PC_THUNK_STR (reg) "\n" \ - ".section .gnu.linkonce.t." GET_PC_THUNK_STR (reg) ",\"ax\",@progbits\n" \ - ".globl " GET_PC_THUNK_STR (reg) "\n" \ - ".hidden " GET_PC_THUNK_STR (reg) "\n" \ - ".p2align 4\n" \ - ".type " GET_PC_THUNK_STR (reg) ",@function\n" \ -GET_PC_THUNK_STR (reg) ":" \ - "movl (%%esp), %%e" #reg "\n" \ - "ret\n" \ - ".size " GET_PC_THUNK_STR (reg) ", . - " GET_PC_THUNK_STR (reg) "\n" \ - ".previous\n" \ - ".endif\n" \ - "call " GET_PC_THUNK_STR (reg) - -# define LOAD_PIC_REG_STR(reg) \ - SETUP_PIC_REG_STR (reg) "\naddl $_GLOBAL_OFFSET_TABLE_, %%e" #reg - -#endif /* __ASSEMBLER__ */ diff --git a/sysdeps/i386/tls-macros.h b/sysdeps/i386/tls-macros.h deleted file mode 100644 index 053cba05d1..0000000000 --- a/sysdeps/i386/tls-macros.h +++ /dev/null @@ -1,78 +0,0 @@ -#include <features.h> /* For __GNUC_PREREQ. */ - -#define TLS_LE(x) \ - ({ int *__l; \ - asm ("movl %%gs:0,%0\n\t" \ - "subl $" #x "@tpoff,%0" \ - : "=r" (__l)); \ - __l; }) - -#if defined PIC && !__GNUC_PREREQ (5,0) -# define TLS_IE(x) \ - ({ int *__l; \ - asm ("movl %%gs:0,%0\n\t" \ - "subl " #x "@gottpoff(%%ebx),%0" \ - : "=r" (__l)); \ - __l; }) -#else -# define TLS_IE(x) \ - ({ int *__l, __b; \ - asm ("call 1f\n\t" \ - ".subsection 1\n" \ - "1:\tmovl (%%esp), %%ebx\n\t" \ - "ret\n\t" \ - ".previous\n\t" \ - "addl $_GLOBAL_OFFSET_TABLE_, %%ebx\n\t" \ - "movl %%gs:0,%0\n\t" \ - "subl " #x "@gottpoff(%%ebx),%0" \ - : "=r" (__l), "=&b" (__b)); \ - __l; }) -#endif - -#if defined PIC && !__GNUC_PREREQ (5,0) -# define TLS_LD(x) \ - ({ int *__l, __c, __d; \ - asm ("leal " #x "@tlsldm(%%ebx),%%eax\n\t" \ - "call ___tls_get_addr@plt\n\t" \ - "leal " #x "@dtpoff(%%eax), %%eax" \ - : "=a" (__l), "=&c" (__c), "=&d" (__d)); \ - __l; }) -#else -# define TLS_LD(x) \ - ({ int *__l, __b, __c, __d; \ - asm ("call 1f\n\t" \ - ".subsection 1\n" \ - "1:\tmovl (%%esp), %%ebx\n\t" \ - "ret\n\t" \ - ".previous\n\t" \ - "addl $_GLOBAL_OFFSET_TABLE_, %%ebx\n\t" \ - "leal " #x "@tlsldm(%%ebx),%%eax\n\t" \ - "call ___tls_get_addr@plt\n\t" \ - "leal " #x "@dtpoff(%%eax), %%eax" \ - : "=a" (__l), "=&b" (__b), "=&c" (__c), "=&d" (__d)); \ - __l; }) -#endif - -#if defined PIC && !__GNUC_PREREQ (5,0) -# define TLS_GD(x) \ - ({ int *__l, __c, __d; \ - asm ("leal " #x "@tlsgd(%%ebx),%%eax\n\t" \ - "call ___tls_get_addr@plt\n\t" \ - "nop" \ - : "=a" (__l), "=&c" (__c), "=&d" (__d)); \ - __l; }) -#else -# define TLS_GD(x) \ - ({ int *__l, __b, __c, __d; \ - asm ("call 1f\n\t" \ - ".subsection 1\n" \ - "1:\tmovl (%%esp), %%ebx\n\t" \ - "ret\n\t" \ - ".previous\n\t" \ - "addl $_GLOBAL_OFFSET_TABLE_, %%ebx\n\t" \ - "leal " #x "@tlsgd(%%ebx),%%eax\n\t" \ - "call ___tls_get_addr@plt\n\t" \ - "nop" \ - : "=a" (__l), "=&b" (__b), "=&c" (__c), "=&d" (__d)); \ - __l; }) -#endif diff --git a/sysdeps/i386/tlsdesc.c b/sysdeps/i386/tlsdesc.c deleted file mode 100644 index 90de2bb05e..0000000000 --- a/sysdeps/i386/tlsdesc.c +++ /dev/null @@ -1,268 +0,0 @@ -/* Manage TLS descriptors. i386 version. - Copyright (C) 2005-2017 Free Software Foundation, Inc. - This file is part of the GNU C Library. - - The GNU C Library is free software; you can redistribute it and/or - modify it under the terms of the GNU Lesser General Public - License as published by the Free Software Foundation; either - version 2.1 of the License, or (at your option) any later version. - - The GNU C Library is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - Lesser General Public License for more details. - - You should have received a copy of the GNU Lesser General Public - License along with the GNU C Library; if not, see - <http://www.gnu.org/licenses/>. */ - -#include <link.h> -#include <ldsodefs.h> -#include <elf/dynamic-link.h> -#include <tls.h> -#include <dl-tlsdesc.h> -#include <dl-unmap-segments.h> -#include <tlsdeschtab.h> - -/* The following 4 functions take an entry_check_offset argument. - It's computed by the caller as an offset between its entry point - and the call site, such that by adding the built-in return address - that is implicitly passed to the function with this offset, we can - easily obtain the caller's entry point to compare with the entry - point given in the TLS descriptor. If it's changed, we want to - return immediately. */ - -/* This function is used to lazily resolve TLS_DESC REL relocations - that reference the *ABS* segment in their own link maps. The - argument is the addend originally stored there. */ - -void -__attribute__ ((regparm (3))) attribute_hidden -_dl_tlsdesc_resolve_abs_plus_addend_fixup (struct tlsdesc volatile *td, - struct link_map *l, - ptrdiff_t entry_check_offset) -{ - ptrdiff_t addend = (ptrdiff_t) td->arg; - - if (_dl_tlsdesc_resolve_early_return_p (td, __builtin_return_address (0) - - entry_check_offset)) - return; - -#ifndef SHARED - CHECK_STATIC_TLS (l, l); -#else - if (!TRY_STATIC_TLS (l, l)) - { - td->arg = _dl_make_tlsdesc_dynamic (l, addend); - td->entry = _dl_tlsdesc_dynamic; - } - else -#endif - { - td->arg = (void*) (addend - l->l_tls_offset); - td->entry = _dl_tlsdesc_return; - } - - _dl_tlsdesc_wake_up_held_fixups (); -} - -/* This function is used to lazily resolve TLS_DESC REL relocations - that originally had zero addends. The argument location, that - originally held the addend, is used to hold a pointer to the - relocation, but it has to be restored before we call the function - that applies relocations. */ - -void -__attribute__ ((regparm (3))) attribute_hidden -_dl_tlsdesc_resolve_rel_fixup (struct tlsdesc volatile *td, - struct link_map *l, - ptrdiff_t entry_check_offset) -{ - const ElfW(Rel) *reloc = td->arg; - - if (_dl_tlsdesc_resolve_early_return_p (td, __builtin_return_address (0) - - entry_check_offset)) - return; - - /* The code below was borrowed from _dl_fixup(), - except for checking for STB_LOCAL. */ - const ElfW(Sym) *const symtab - = (const void *) D_PTR (l, l_info[DT_SYMTAB]); - const char *strtab = (const void *) D_PTR (l, l_info[DT_STRTAB]); - const ElfW(Sym) *sym = &symtab[ELFW(R_SYM) (reloc->r_info)]; - lookup_t result; - - /* Look up the target symbol. If the normal lookup rules are not - used don't look in the global scope. */ - if (ELFW(ST_BIND) (sym->st_info) != STB_LOCAL - && __builtin_expect (ELFW(ST_VISIBILITY) (sym->st_other), 0) == 0) - { - const struct r_found_version *version = NULL; - - if (l->l_info[VERSYMIDX (DT_VERSYM)] != NULL) - { - const ElfW(Half) *vernum = - (const void *) D_PTR (l, l_info[VERSYMIDX (DT_VERSYM)]); - ElfW(Half) ndx = vernum[ELFW(R_SYM) (reloc->r_info)] & 0x7fff; - version = &l->l_versions[ndx]; - if (version->hash == 0) - version = NULL; - } - - result = _dl_lookup_symbol_x (strtab + sym->st_name, l, &sym, - l->l_scope, version, ELF_RTYPE_CLASS_PLT, - DL_LOOKUP_ADD_DEPENDENCY, NULL); - } - else - { - /* We already found the symbol. The module (and therefore its load - address) is also known. */ - result = l; - } - - if (!sym) - { - td->arg = 0; - td->entry = _dl_tlsdesc_undefweak; - } - else - { -# ifndef SHARED - CHECK_STATIC_TLS (l, result); -# else - if (!TRY_STATIC_TLS (l, result)) - { - td->arg = _dl_make_tlsdesc_dynamic (result, sym->st_value); - td->entry = _dl_tlsdesc_dynamic; - } - else -# endif - { - td->arg = (void*)(sym->st_value - result->l_tls_offset); - td->entry = _dl_tlsdesc_return; - } - } - - _dl_tlsdesc_wake_up_held_fixups (); -} - -/* This function is used to lazily resolve TLS_DESC RELA relocations. - The argument location is used to hold a pointer to the relocation. */ - -void -__attribute__ ((regparm (3))) attribute_hidden -_dl_tlsdesc_resolve_rela_fixup (struct tlsdesc volatile *td, - struct link_map *l, - ptrdiff_t entry_check_offset) -{ - const ElfW(Rela) *reloc = td->arg; - - if (_dl_tlsdesc_resolve_early_return_p (td, __builtin_return_address (0) - - entry_check_offset)) - return; - - /* The code below was borrowed from _dl_fixup(), - except for checking for STB_LOCAL. */ - const ElfW(Sym) *const symtab - = (const void *) D_PTR (l, l_info[DT_SYMTAB]); - const char *strtab = (const void *) D_PTR (l, l_info[DT_STRTAB]); - const ElfW(Sym) *sym = &symtab[ELFW(R_SYM) (reloc->r_info)]; - lookup_t result; - - /* Look up the target symbol. If the normal lookup rules are not - used don't look in the global scope. */ - if (ELFW(ST_BIND) (sym->st_info) != STB_LOCAL - && __builtin_expect (ELFW(ST_VISIBILITY) (sym->st_other), 0) == 0) - { - const struct r_found_version *version = NULL; - - if (l->l_info[VERSYMIDX (DT_VERSYM)] != NULL) - { - const ElfW(Half) *vernum = - (const void *) D_PTR (l, l_info[VERSYMIDX (DT_VERSYM)]); - ElfW(Half) ndx = vernum[ELFW(R_SYM) (reloc->r_info)] & 0x7fff; - version = &l->l_versions[ndx]; - if (version->hash == 0) - version = NULL; - } - - result = _dl_lookup_symbol_x (strtab + sym->st_name, l, &sym, - l->l_scope, version, ELF_RTYPE_CLASS_PLT, - DL_LOOKUP_ADD_DEPENDENCY, NULL); - } - else - { - /* We already found the symbol. The module (and therefore its load - address) is also known. */ - result = l; - } - - if (!sym) - { - td->arg = (void*) reloc->r_addend; - td->entry = _dl_tlsdesc_undefweak; - } - else - { -# ifndef SHARED - CHECK_STATIC_TLS (l, result); -# else - if (!TRY_STATIC_TLS (l, result)) - { - td->arg = _dl_make_tlsdesc_dynamic (result, sym->st_value - + reloc->r_addend); - td->entry = _dl_tlsdesc_dynamic; - } - else -# endif - { - td->arg = (void*) (sym->st_value - result->l_tls_offset - + reloc->r_addend); - td->entry = _dl_tlsdesc_return; - } - } - - _dl_tlsdesc_wake_up_held_fixups (); -} - -/* This function is used to avoid busy waiting for other threads to - complete the lazy relocation. Once another thread wins the race to - relocate a TLS descriptor, it sets the descriptor up such that this - function is called to wait until the resolver releases the - lock. */ - -void -__attribute__ ((regparm (3))) attribute_hidden -_dl_tlsdesc_resolve_hold_fixup (struct tlsdesc volatile *td, - struct link_map *l __attribute__((__unused__)), - ptrdiff_t entry_check_offset) -{ - /* Maybe we're lucky and can return early. */ - if (__builtin_return_address (0) - entry_check_offset != td->entry) - return; - - /* Locking here will stop execution until the running resolver runs - _dl_tlsdesc_wake_up_held_fixups(), releasing the lock. - - FIXME: We'd be better off waiting on a condition variable, such - that we didn't have to hold the lock throughout the relocation - processing. */ - __rtld_lock_lock_recursive (GL(dl_load_lock)); - __rtld_lock_unlock_recursive (GL(dl_load_lock)); -} - - -/* Unmap the dynamic object, but also release its TLS descriptor table - if there is one. */ - -void -internal_function -_dl_unmap (struct link_map *map) -{ - _dl_unmap_segments (map); - -#ifdef SHARED - if (map->l_mach.tlsdesc_table) - htab_delete (map->l_mach.tlsdesc_table); -#endif -} diff --git a/sysdeps/i386/tlsdesc.sym b/sysdeps/i386/tlsdesc.sym deleted file mode 100644 index 33854975d0..0000000000 --- a/sysdeps/i386/tlsdesc.sym +++ /dev/null @@ -1,17 +0,0 @@ -#include <stddef.h> -#include <sysdep.h> -#include <tls.h> -#include <link.h> -#include <dl-tlsdesc.h> - --- - --- Abuse tls.h macros to derive offsets relative to the thread register. - -DTV_OFFSET offsetof(struct pthread, header.dtv) - -TLSDESC_ARG offsetof(struct tlsdesc, arg) - -TLSDESC_GEN_COUNT offsetof(struct tlsdesc_dynamic_arg, gen_count) -TLSDESC_MODID offsetof(struct tlsdesc_dynamic_arg, tlsinfo.ti_module) -TLSDESC_MODOFF offsetof(struct tlsdesc_dynamic_arg, tlsinfo.ti_offset) diff --git a/sysdeps/i386/tst-audit.h b/sysdeps/i386/tst-audit.h deleted file mode 100644 index 87bf199c85..0000000000 --- a/sysdeps/i386/tst-audit.h +++ /dev/null @@ -1,25 +0,0 @@ -/* Definitions for testing PLT entry/exit auditing. i386 version. - - Copyright (C) 2012-2017 Free Software Foundation, Inc. - - This file is part of the GNU C Library. - - The GNU C Library is free software; you can redistribute it and/or - modify it under the terms of the GNU Lesser General Public - License as published by the Free Software Foundation; either - version 2.1 of the License, or (at your option) any later version. - - The GNU C Library is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - Lesser General Public License for more details. - - You should have received a copy of the GNU Lesser General Public - License along with the GNU C Library. If not, see - <http://www.gnu.org/licenses/>. */ - -#define pltenter la_i86_gnu_pltenter -#define pltexit la_i86_gnu_pltexit -#define La_regs La_i86_regs -#define La_retval La_i86_retval -#define int_retval lrv_eax diff --git a/sysdeps/i386/tst-audit3.c b/sysdeps/i386/tst-audit3.c deleted file mode 100644 index b67a59d733..0000000000 --- a/sysdeps/i386/tst-audit3.c +++ /dev/null @@ -1,37 +0,0 @@ -/* Test case for i386 preserved registers in dynamic linker. - Copyright (C) 2015-2017 Free Software Foundation, Inc. - This file is part of the GNU C Library. - - The GNU C Library is free software; you can redistribute it and/or - modify it under the terms of the GNU Lesser General Public - License as published by the Free Software Foundation; either - version 2.1 of the License, or (at your option) any later version. - - The GNU C Library is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - Lesser General Public License for more details. - - You should have received a copy of the GNU Lesser General Public - License along with the GNU C Library; if not, see - <http://www.gnu.org/licenses/>. */ - -#include <stdlib.h> -#include "tst-audit3.h" - -static int -do_test (void) -{ - long long ll = audit1_test (1, 2, 3); - if (ll != 30) - abort (); - - float f = audit2_test (1, 2, 3); - if (f != 30) - abort (); - - return 0; -} - -#define TEST_FUNCTION do_test () -#include "../../test-skeleton.c" diff --git a/sysdeps/i386/tst-audit3.h b/sysdeps/i386/tst-audit3.h deleted file mode 100644 index f6d3b9181e..0000000000 --- a/sysdeps/i386/tst-audit3.h +++ /dev/null @@ -1,20 +0,0 @@ -/* Test case for i386 preserved registers in dynamic linker. - Copyright (C) 2015-2017 Free Software Foundation, Inc. - This file is part of the GNU C Library. - - The GNU C Library is free software; you can redistribute it and/or - modify it under the terms of the GNU Lesser General Public - License as published by the Free Software Foundation; either - version 2.1 of the License, or (at your option) any later version. - - The GNU C Library is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - Lesser General Public License for more details. - - You should have received a copy of the GNU Lesser General Public - License along with the GNU C Library; if not, see - <http://www.gnu.org/licenses/>. */ - -extern long long audit1_test (int, int, int) __attribute__ ((regparm(3))); -extern float audit2_test (int, int, int) __attribute__ ((regparm(3))); diff --git a/sysdeps/i386/tst-auditmod3a.c b/sysdeps/i386/tst-auditmod3a.c deleted file mode 100644 index a333cdcff9..0000000000 --- a/sysdeps/i386/tst-auditmod3a.c +++ /dev/null @@ -1,38 +0,0 @@ -/* Test case for i386 preserved registers in dynamic linker. - Copyright (C) 2015-2017 Free Software Foundation, Inc. - This file is part of the GNU C Library. - - The GNU C Library is free software; you can redistribute it and/or - modify it under the terms of the GNU Lesser General Public - License as published by the Free Software Foundation; either - version 2.1 of the License, or (at your option) any later version. - - The GNU C Library is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - Lesser General Public License for more details. - - You should have received a copy of the GNU Lesser General Public - License along with the GNU C Library; if not, see - <http://www.gnu.org/licenses/>. */ - -#include <stdlib.h> -#include "tst-audit3.h" - -long long -__attribute__ ((regparm(3))) -audit1_test (int i, int j, int k) -{ - if (i != 1 || j != 2 || k != 3) - abort (); - return 30; -} - -float -__attribute__ ((regparm(3))) -audit2_test (int i, int j, int k) -{ - if (i != 1 || j != 2 || k != 3) - abort (); - return 30; -} diff --git a/sysdeps/i386/tst-auditmod3b.c b/sysdeps/i386/tst-auditmod3b.c deleted file mode 100644 index 523f3cec90..0000000000 --- a/sysdeps/i386/tst-auditmod3b.c +++ /dev/null @@ -1,186 +0,0 @@ -/* Test case for i386 preserved registers in dynamic linker. - Copyright (C) 2015-2017 Free Software Foundation, Inc. - This file is part of the GNU C Library. - - The GNU C Library is free software; you can redistribute it and/or - modify it under the terms of the GNU Lesser General Public - License as published by the Free Software Foundation; either - version 2.1 of the License, or (at your option) any later version. - - The GNU C Library is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - Lesser General Public License for more details. - - You should have received a copy of the GNU Lesser General Public - License along with the GNU C Library; if not, see - <http://www.gnu.org/licenses/>. */ - -#include <dlfcn.h> -#include <stdint.h> -#include <stdio.h> -#include <stdlib.h> -#include <stddef.h> -#include <string.h> -#include <unistd.h> -#include <link.h> -#include <bits/wordsize.h> -#include <gnu/lib-names.h> - -unsigned int -la_version (unsigned int v) -{ - setlinebuf (stdout); - - printf ("version: %u\n", v); - - char buf[20]; - sprintf (buf, "%u", v); - - return v; -} - -void -la_activity (uintptr_t *cookie, unsigned int flag) -{ - const char *flagstr; - switch (flag) - { - case LA_ACT_CONSISTENT: - flagstr = "consistent"; - break; - case LA_ACT_ADD: - flagstr = "add"; - break; - case LA_ACT_DELETE: - flagstr = "delete"; - break; - default: - printf ("activity: unknown activity %u\n", flag); - return; - } - printf ("activity: %s\n", flagstr); -} - -char * -la_objsearch (const char *name, uintptr_t *cookie, unsigned int flag) -{ - const char *flagstr; - switch (flag) - { - case LA_SER_ORIG: - flagstr = "LA_SET_ORIG"; - break; - case LA_SER_LIBPATH: - flagstr = "LA_SER_LIBPATH"; - break; - case LA_SER_RUNPATH: - flagstr = "LA_SER_RUNPATH"; - break; - case LA_SER_CONFIG: - flagstr = "LA_SER_CONFIG"; - break; - case LA_SER_DEFAULT: - flagstr = "LA_SER_DEFAULT"; - break; - case LA_SER_SECURE: - flagstr = "LA_SER_SECURE"; - break; - default: - printf ("objsearch: %s, unknown flag %d\n", name, flag); - return (char *) name; - } - - printf ("objsearch: %s, %s\n", name, flagstr); - return (char *) name; -} - -unsigned int -la_objopen (struct link_map *l, Lmid_t lmid, uintptr_t *cookie) -{ - printf ("objopen: %ld, %s\n", lmid, l->l_name); - - return 3; -} - -void -la_preinit (uintptr_t *cookie) -{ - printf ("preinit\n"); -} - -unsigned int -la_objclose (uintptr_t *cookie) -{ - printf ("objclose\n"); - return 0; -} - -uintptr_t -la_symbind32 (Elf32_Sym *sym, unsigned int ndx, uintptr_t *refcook, - uintptr_t *defcook, unsigned int *flags, const char *symname) -{ - printf ("symbind32: symname=%s, st_value=%#lx, ndx=%u, flags=%u\n", - symname, (long int) sym->st_value, ndx, *flags); - - return sym->st_value; -} - -#include "tst-audit.h" - -ElfW(Addr) -pltenter (ElfW(Sym) *sym, unsigned int ndx, uintptr_t *refcook, - uintptr_t *defcook, La_regs *regs, unsigned int *flags, - const char *symname, long int *framesizep) -{ - printf ("pltenter: symname=%s, st_value=%#lx, ndx=%u, flags=%u\n", - symname, (long int) sym->st_value, ndx, *flags); - - if (strcmp (symname, "audit1_test") == 0 - || strcmp (symname, "audit2_test") == 0) - { - if (regs->lr_eax != 1 - || regs->lr_edx != 2 - || regs->lr_ecx != 3) - abort (); - - *framesizep = 200; - } - - return sym->st_value; -} - -unsigned int -pltexit (ElfW(Sym) *sym, unsigned int ndx, uintptr_t *refcook, - uintptr_t *defcook, const La_regs *inregs, La_retval *outregs, - const char *symname) -{ - printf ("pltexit: symname=%s, st_value=%#lx, ndx=%u, retval=%tu\n", - symname, (long int) sym->st_value, ndx, - (ptrdiff_t) outregs->int_retval); - - if (strcmp (symname, "audit1_test") == 0 - || strcmp (symname, "audit2_test") == 0) - { - if (inregs->lr_eax != 1 - || inregs->lr_edx != 2 - || inregs->lr_ecx != 3) - abort (); - - if (strcmp (symname, "audit1_test") == 0) - { - long long x = ((unsigned long long) outregs->lrv_eax - | (unsigned long long) outregs->lrv_edx << 32); - - if (x != 30) - abort (); - } - else if (strcmp (symname, "audit2_test") == 0) - { - if (outregs->lrv_st0 != 30) - abort (); - } - } - - return 0; -} diff --git a/sysdeps/i386/tst-ld-sse-use.sh b/sysdeps/i386/tst-ld-sse-use.sh deleted file mode 100755 index 83a1dc59fb..0000000000 --- a/sysdeps/i386/tst-ld-sse-use.sh +++ /dev/null @@ -1,103 +0,0 @@ -#!/bin/bash -# Make sure no code in ld.so uses xmm/ymm/zmm registers on i386. -# Copyright (C) 2009-2017 Free Software Foundation, Inc. -# This file is part of the GNU C Library. - -# The GNU C Library is free software; you can redistribute it and/or -# modify it under the terms of the GNU Lesser General Public -# License as published by the Free Software Foundation; either -# version 2.1 of the License, or (at your option) any later version. - -# The GNU C Library is distributed in the hope that it will be useful, -# but WITHOUT ANY WARRANTY; without even the implied warranty of -# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU -# Lesser General Public License for more details. - -# You should have received a copy of the GNU Lesser General Public -# License along with the GNU C Library; if not, see -# <http://www.gnu.org/licenses/>. - -set -e - -objpfx="$1" -NM="$2" -OBJDUMP="$3" -READELF="$4" - -tmp=$(mktemp ${objpfx}tst-ld-sse-use.XXXXXX) -trap 'rm -f "$tmp"' 1 2 3 15 - -# List of object files we have to test -rtldobjs=$($READELF -W -wi ${objpfx}dl-allobjs.os | - awk '/^ </ { if ($5 == "(DW_TAG_compile_unit)") c=1; else c=0 } $2 == "DW_AT_name" { if (c == 1) print $NF }' | - sed 's,\(.*/\|\)\([_[:alnum:]-]*[.]\).$,\2os,') -rtldobjs="$rtldobjs $(ar t ${objpfx}rtld-libc.a)" - -# OBJECT symbols can be ignored. -$READELF -sW ${objpfx}dl-allobjs.os ${objpfx}rtld-libc.a | -egrep " OBJECT *GLOBAL " | -awk '{if ($7 != "ABS") print $8 }' | -sort -u > "$tmp" -declare -a objects -objects=($(cat "$tmp")) - -objs="dl-runtime.os" -tocheck="dl-runtime.os" - -while test -n "$objs"; do - this="$objs" - objs="" - - for f in $this; do - undef=$($NM -u "$objpfx"../*/"$f" | awk '{print $2}') - if test -n "$undef"; then - for s in $undef; do - for obj in ${objects[*]} "_GLOBAL_OFFSET_TABLE_"; do - if test "$obj" = "$s"; then - continue 2 - fi - done - for o in $rtldobjs; do - ro=$(echo "$objpfx"../*/"$o") - if $NM -g --defined-only "$ro" | egrep -qs " $s\$"; then - if ! (echo "$tocheck $objs" | fgrep -qs "$o"); then - echo "$o needed for $s" - objs="$objs $o" - fi - break; - fi - done - done - fi - done - tocheck="$tocheck$objs" -done - -echo -echo -echo "object files needed: $tocheck" - -cp /dev/null "$tmp" -for f in $tocheck; do - $OBJDUMP -d "$objpfx"../*/"$f" | - awk 'BEGIN { last="" } /^[[:xdigit:]]* <[_[:alnum:]]*>:$/ { fct=substr($2, 2, length($2)-3) } /,%[xyz]mm[[:digit:]]*$/ { if (last != fct) { print fct; last=fct} }' | - while read fct; do - if test "$fct" = "_dl_runtime_profile" -o "$fct" = "_dl_x86_64_restore_sse"; then - continue; - fi - echo "function $fct in $f modifies xmm/ymm/zmm" >> "$tmp" - result=1 - done -done - -if test -s "$tmp"; then - echo - echo - cat "$tmp" - result=1 -else - result=0 -fi - -rm "$tmp" -exit $result diff --git a/sysdeps/i386/tst-stack-align.h b/sysdeps/i386/tst-stack-align.h deleted file mode 100644 index 76276d4a28..0000000000 --- a/sysdeps/i386/tst-stack-align.h +++ /dev/null @@ -1,41 +0,0 @@ -/* Copyright (C) 2004-2017 Free Software Foundation, Inc. - This file is part of the GNU C Library. - - The GNU C Library is free software; you can redistribute it and/or - modify it under the terms of the GNU Lesser General Public - License as published by the Free Software Foundation; either - version 2.1 of the License, or (at your option) any later version. - - The GNU C Library is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - Lesser General Public License for more details. - - You should have received a copy of the GNU Lesser General Public - License along with the GNU C Library; if not, see - <http://www.gnu.org/licenses/>. */ - -#include <stdio.h> -#include <stdint.h> - -typedef struct { int i[4]; } int_al16 __attribute__((aligned (16))); - -#define TEST_STACK_ALIGN() \ - ({ \ - int_al16 _m; \ - double _d = 12.0; \ - long double _ld = 15.0; \ - int _ret = 0; \ - printf ("int_al16: %p %zu\n", &_m, __alignof (int_al16)); \ - if ((((uintptr_t) &_m) & (__alignof (int_al16) - 1)) != 0) \ - _ret = 1; \ - \ - printf ("double: %g %p %zu\n", _d, &_d, __alignof (double)); \ - if ((((uintptr_t) &_d) & (__alignof (double) - 1)) != 0) \ - _ret = 1; \ - \ - printf ("ldouble: %Lg %p %zu\n", _ld, &_ld, __alignof (long double)); \ - if ((((uintptr_t) &_ld) & (__alignof (long double) - 1)) != 0) \ - _ret = 1; \ - _ret; \ - }) |