diff options
author | H.J. Lu <hongjiu.lu@intel.com> | 2010-11-08 03:41:34 -0500 |
---|---|---|
committer | Ulrich Drepper <drepper@gmail.com> | 2010-11-08 03:41:34 -0500 |
commit | ff02d5280bf252e86d325ff4348feaf531ede831 (patch) | |
tree | 243484af328916c3945588aab649615521ceebc6 /sysdeps/x86_64/multiarch | |
parent | 344d0b545d0a0a0ab737ff333d807969721ce381 (diff) | |
download | glibc-ff02d5280bf252e86d325ff4348feaf531ede831.tar glibc-ff02d5280bf252e86d325ff4348feaf531ede831.tar.gz glibc-ff02d5280bf252e86d325ff4348feaf531ede831.tar.bz2 glibc-ff02d5280bf252e86d325ff4348feaf531ede831.zip |
Use IFUNC on x86-64 memset
Diffstat (limited to 'sysdeps/x86_64/multiarch')
-rw-r--r-- | sysdeps/x86_64/multiarch/Makefile | 3 | ||||
-rw-r--r-- | sysdeps/x86_64/multiarch/bzero.S | 56 | ||||
-rw-r--r-- | sysdeps/x86_64/multiarch/cacheinfo.c | 2 | ||||
-rw-r--r-- | sysdeps/x86_64/multiarch/init-arch.c | 5 | ||||
-rw-r--r-- | sysdeps/x86_64/multiarch/init-arch.h | 12 | ||||
-rw-r--r-- | sysdeps/x86_64/multiarch/memset-x86-64.S | 18 | ||||
-rw-r--r-- | sysdeps/x86_64/multiarch/memset.S | 74 | ||||
-rw-r--r-- | sysdeps/x86_64/multiarch/memset_chk.S | 44 |
8 files changed, 210 insertions, 4 deletions
diff --git a/sysdeps/x86_64/multiarch/Makefile b/sysdeps/x86_64/multiarch/Makefile index 5d2e34ebc8..19aa4be4cf 100644 --- a/sysdeps/x86_64/multiarch/Makefile +++ b/sysdeps/x86_64/multiarch/Makefile @@ -8,7 +8,8 @@ sysdep_routines += stpncpy-c strncpy-c strcmp-ssse3 strncmp-ssse3 \ strend-sse4 memcmp-sse4 memcpy-ssse3 mempcpy-ssse3 \ memmove-ssse3 memcpy-ssse3-back mempcpy-ssse3-back \ memmove-ssse3-back strcasestr-nonascii strcasecmp_l-ssse3 \ - strncase_l-ssse3 strlen-sse4 strlen-no-bsf + strncase_l-ssse3 strlen-sse4 strlen-no-bsf \ + memset-x86-64 ifeq (yes,$(config-cflags-sse4)) sysdep_routines += strcspn-c strpbrk-c strspn-c strstr-c strcasestr-c varshift CFLAGS-varshift.c += -msse4 diff --git a/sysdeps/x86_64/multiarch/bzero.S b/sysdeps/x86_64/multiarch/bzero.S new file mode 100644 index 0000000000..9c9eebd5ef --- /dev/null +++ b/sysdeps/x86_64/multiarch/bzero.S @@ -0,0 +1,56 @@ +/* Multiple versions of bzero + Copyright (C) 2010 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, write to the Free + Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA + 02111-1307 USA. */ + +#include <sysdep.h> +#include <init-arch.h> + + .text +ENTRY(__bzero) + .type __bzero, @gnu_indirect_function + cmpl $0, __cpu_features+KIND_OFFSET(%rip) + jne 1f + call __init_cpu_features +1: leaq __bzero_x86_64(%rip), %rax + testl $bit_Prefer_SSE_for_memop, __cpu_features+FEATURE_OFFSET+index_Prefer_SSE_for_memop(%rip) + jz 2f + leaq __bzero_sse2(%rip), %rax +2: ret +END(__bzero) + + .type __bzero_sse2, @function +__bzero_sse2: + cfi_startproc + CALL_MCOUNT + mov %rsi,%rdx /* Adjust parameter. */ + xorl %esi,%esi /* Fill with 0s. */ + jmp __memset_sse2 + cfi_endproc + .size __bzero_sse2, .-__bzero_sse2 + + .type __bzero_x86_64, @function +__bzero_x86_64: + cfi_startproc + CALL_MCOUNT + mov %rsi,%rdx /* Adjust parameter. */ + xorl %esi,%esi /* Fill with 0s. */ + jmp __memset_x86_64 + cfi_endproc + .size __bzero_x86_64, .-__bzero_x86_64 + +weak_alias (__bzero, bzero) diff --git a/sysdeps/x86_64/multiarch/cacheinfo.c b/sysdeps/x86_64/multiarch/cacheinfo.c new file mode 100644 index 0000000000..f87b8dce6b --- /dev/null +++ b/sysdeps/x86_64/multiarch/cacheinfo.c @@ -0,0 +1,2 @@ +#define DISABLE_PREFERRED_MEMORY_INSTRUCTION +#include "../cacheinfo.c" diff --git a/sysdeps/x86_64/multiarch/init-arch.c b/sysdeps/x86_64/multiarch/init-arch.c index 786466d5fd..15bc9046e3 100644 --- a/sysdeps/x86_64/multiarch/init-arch.c +++ b/sysdeps/x86_64/multiarch/init-arch.c @@ -59,6 +59,11 @@ __init_cpu_features (void) get_common_indeces (&family, &model); + /* Intel processors prefer SSE instruction for memory/string + routines if they are avaiable. */ + __cpu_features.feature[index_Prefer_SSE_for_memop] + |= bit_Prefer_SSE_for_memop; + unsigned int eax = __cpu_features.cpuid[COMMON_CPUID_INDEX_1].eax; unsigned int extended_family = (eax >> 20) & 0xff; unsigned int extended_model = (eax >> 12) & 0xf0; diff --git a/sysdeps/x86_64/multiarch/init-arch.h b/sysdeps/x86_64/multiarch/init-arch.h index 783b02015e..6e409b8f17 100644 --- a/sysdeps/x86_64/multiarch/init-arch.h +++ b/sysdeps/x86_64/multiarch/init-arch.h @@ -16,9 +16,10 @@ Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA. */ -#define bit_Fast_Rep_String (1 << 0) -#define bit_Fast_Copy_Backward (1 << 1) -#define bit_Slow_BSF (1 << 2) +#define bit_Fast_Rep_String (1 << 0) +#define bit_Fast_Copy_Backward (1 << 1) +#define bit_Slow_BSF (1 << 2) +#define bit_Prefer_SSE_for_memop (1 << 3) #ifdef __ASSEMBLER__ @@ -37,6 +38,7 @@ # define index_Fast_Rep_String FEATURE_INDEX_1*FEATURE_SIZE # define index_Fast_Copy_Backward FEATURE_INDEX_1*FEATURE_SIZE # define index_Slow_BSF FEATURE_INDEX_1*FEATURE_SIZE +# define index_Prefer_SSE_for_memop FEATURE_INDEX_1*FEATURE_SIZE #else /* __ASSEMBLER__ */ @@ -109,6 +111,7 @@ extern const struct cpu_features *__get_cpu_features (void) # define index_Fast_Rep_String FEATURE_INDEX_1 # define index_Fast_Copy_Backward FEATURE_INDEX_1 # define index_Slow_BSF FEATURE_INDEX_1 +# define index_Prefer_SSE_for_memop FEATURE_INDEX_1 #define HAS_ARCH_FEATURE(idx, bit) \ ((__get_cpu_features ()->feature[idx] & (bit)) != 0) @@ -122,4 +125,7 @@ extern const struct cpu_features *__get_cpu_features (void) #define HAS_SLOW_BSF \ HAS_ARCH_FEATURE (index_Slow_BSF, bit_Slow_BSF) +#define HAS_PREFER_SSE_FOR_MEMOP \ + HAS_ARCH_FEATURE (index_Prefer_SSE_for_memop, bit_Prefer_SSE_for_memop) + #endif /* __ASSEMBLER__ */ diff --git a/sysdeps/x86_64/multiarch/memset-x86-64.S b/sysdeps/x86_64/multiarch/memset-x86-64.S new file mode 100644 index 0000000000..5e8cfb3e9b --- /dev/null +++ b/sysdeps/x86_64/multiarch/memset-x86-64.S @@ -0,0 +1,18 @@ +#include <sysdep.h> + +#ifndef NOT_IN_libc +# undef ENTRY_CHK +# define ENTRY_CHK(name) \ + .type __memset_chk_x86_64, @function; \ + .globl __memset_chk_x86_64; \ + .p2align 4; \ + __memset_chk_x86_64: cfi_startproc; \ + CALL_MCOUNT +# undef END_CHK +# define END_CHK(name) \ + cfi_endproc; .size __memset_chk_x86_64, .-__memset_chk_x86_64 + +# define libc_hidden_builtin_def(name) +# define memset __memset_x86_64 +# include "../memset.S" +#endif diff --git a/sysdeps/x86_64/multiarch/memset.S b/sysdeps/x86_64/multiarch/memset.S new file mode 100644 index 0000000000..a8d0e9ea22 --- /dev/null +++ b/sysdeps/x86_64/multiarch/memset.S @@ -0,0 +1,74 @@ +/* Multiple versions of memset + Copyright (C) 2010 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, write to the Free + Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA + 02111-1307 USA. */ + +#include <sysdep.h> +#include <init-arch.h> + +/* Define multiple versions only for the definition in lib. */ +#ifndef NOT_IN_libc +ENTRY(memset) + .type memset, @gnu_indirect_function + cmpl $0, __cpu_features+KIND_OFFSET(%rip) + jne 1f + call __init_cpu_features +1: leaq __memset_x86_64(%rip), %rax + testl $bit_Prefer_SSE_for_memop, __cpu_features+FEATURE_OFFSET+index_Prefer_SSE_for_memop(%rip) + jz 2f + leaq __memset_sse2(%rip), %rax +2: ret +END(memset) + +# define USE_SSE2 1 + +# undef ENTRY +# define ENTRY(name) \ + .type __memset_sse2, @function; \ + .globl __memset_sse2; \ + .p2align 4; \ + __memset_sse2: cfi_startproc; \ + CALL_MCOUNT +# undef END +# define END(name) \ + cfi_endproc; .size __memset_sse2, .-__memset_sse2 + +# undef ENTRY_CHK +# define ENTRY_CHK(name) \ + .type __memset_chk_sse2, @function; \ + .globl __memset_chk_sse2; \ + .p2align 4; \ + __memset_chk_sse2: cfi_startproc; \ + CALL_MCOUNT +# undef END_CHK +# define END_CHK(name) \ + cfi_endproc; .size __memset_chk_sse2, .-__memset_chk_sse2 + +# ifdef SHARED +# undef libc_hidden_builtin_def +/* It doesn't make sense to send libc-internal memset calls through a PLT. + The speedup we get from using GPR instruction is likely eaten away + by the indirect call in the PLT. */ +# define libc_hidden_builtin_def(name) \ + .globl __GI_memset; __GI_memset = __memset_sse2 +# endif + +# undef strong_alias +# define strong_alias(original, alias) +#endif + +#include "../memset.S" diff --git a/sysdeps/x86_64/multiarch/memset_chk.S b/sysdeps/x86_64/multiarch/memset_chk.S new file mode 100644 index 0000000000..16afe60c66 --- /dev/null +++ b/sysdeps/x86_64/multiarch/memset_chk.S @@ -0,0 +1,44 @@ +/* Multiple versions of __memset_chk + Copyright (C) 2010 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, write to the Free + Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA + 02111-1307 USA. */ + +#include <sysdep.h> +#include <init-arch.h> + +/* Define multiple versions only for the definition in lib. */ +#ifndef NOT_IN_libc +# ifdef SHARED +ENTRY(__memset_chk) + .type __memset_chk, @gnu_indirect_function + cmpl $0, __cpu_features+KIND_OFFSET(%rip) + jne 1f + call __init_cpu_features +1: leaq __memset_chk_x86_64(%rip), %rax + testl $bit_Prefer_SSE_for_memop, __cpu_features+FEATURE_OFFSET+index_Prefer_SSE_for_memop(%rip) + jz 2f + leaq __memset_chk_sse2(%rip), %rax +2: ret +END(__memset_chk) + +strong_alias (__memset_chk, __memset_zero_constant_len_parameter) + .section .gnu.warning.__memset_zero_constant_len_parameter + .string "memset used with constant zero length parameter; this could be due to transposed parameters" +# else +# include "../memset_chk.S" +# endif +#endif |