aboutsummaryrefslogtreecommitdiff
path: root/sysdeps/x86_64
diff options
context:
space:
mode:
authorH.J. Lu <hongjiu.lu@intel.com>2011-06-24 14:15:32 -0400
committerUlrich Drepper <drepper@gmail.com>2011-06-24 14:15:32 -0400
commit0b1cbaaef5ccc21baf2c35d4698fb28e82eab385 (patch)
treec1f6ad8a49ef79510355c765ad3e385067e7ade0 /sysdeps/x86_64
parent07f494a027b3adea1f3cd0cd4ca7c10949cdc476 (diff)
downloadglibc-0b1cbaaef5ccc21baf2c35d4698fb28e82eab385.tar
glibc-0b1cbaaef5ccc21baf2c35d4698fb28e82eab385.tar.gz
glibc-0b1cbaaef5ccc21baf2c35d4698fb28e82eab385.tar.bz2
glibc-0b1cbaaef5ccc21baf2c35d4698fb28e82eab385.zip
Optimized st{r,p}{,n}cpy for SSE2/SSSE3 on x86-32
Diffstat (limited to 'sysdeps/x86_64')
-rw-r--r--sysdeps/x86_64/multiarch/init-arch.c11
-rw-r--r--sysdeps/x86_64/multiarch/init-arch.h6
2 files changed, 14 insertions, 3 deletions
diff --git a/sysdeps/x86_64/multiarch/init-arch.c b/sysdeps/x86_64/multiarch/init-arch.c
index 809d105c77..81b2378467 100644
--- a/sysdeps/x86_64/multiarch/init-arch.c
+++ b/sysdeps/x86_64/multiarch/init-arch.c
@@ -97,13 +97,18 @@ __init_cpu_features (void)
case 0x2c:
case 0x2e:
case 0x2f:
- /* Rep string instructions and copy backward are fast on
- Intel Core i3, i5 and i7. */
+ /* Rep string instructions, copy backward and unaligned loads
+ are fast on Intel Core i3, i5 and i7. */
#if index_Fast_Rep_String != index_Fast_Copy_Backward
# error index_Fast_Rep_String != index_Fast_Copy_Backward
#endif
+#if index_Fast_Rep_String != index_Fast_Unaligned_Load
+# error index_Fast_Rep_String != index_Fast_Unaligned_Load
+#endif
__cpu_features.feature[index_Fast_Rep_String]
- |= bit_Fast_Rep_String | bit_Fast_Copy_Backward;
+ |= (bit_Fast_Rep_String
+ | bit_Fast_Copy_Backward
+ | bit_Fast_Unaligned_Load);
break;
}
}
diff --git a/sysdeps/x86_64/multiarch/init-arch.h b/sysdeps/x86_64/multiarch/init-arch.h
index 6e409b8f17..addf5f3dde 100644
--- a/sysdeps/x86_64/multiarch/init-arch.h
+++ b/sysdeps/x86_64/multiarch/init-arch.h
@@ -20,6 +20,7 @@
#define bit_Fast_Copy_Backward (1 << 1)
#define bit_Slow_BSF (1 << 2)
#define bit_Prefer_SSE_for_memop (1 << 3)
+#define bit_Fast_Unaligned_Load (1 << 4)
#ifdef __ASSEMBLER__
@@ -39,6 +40,7 @@
# define index_Fast_Copy_Backward FEATURE_INDEX_1*FEATURE_SIZE
# define index_Slow_BSF FEATURE_INDEX_1*FEATURE_SIZE
# define index_Prefer_SSE_for_memop FEATURE_INDEX_1*FEATURE_SIZE
+# define index_Fast_Unaligned_Load FEATURE_INDEX_1*FEATURE_SIZE
#else /* __ASSEMBLER__ */
@@ -112,6 +114,7 @@ extern const struct cpu_features *__get_cpu_features (void)
# define index_Fast_Copy_Backward FEATURE_INDEX_1
# define index_Slow_BSF FEATURE_INDEX_1
# define index_Prefer_SSE_for_memop FEATURE_INDEX_1
+# define index_Fast_Unaligned_Load FEATURE_INDEX_1
#define HAS_ARCH_FEATURE(idx, bit) \
((__get_cpu_features ()->feature[idx] & (bit)) != 0)
@@ -128,4 +131,7 @@ extern const struct cpu_features *__get_cpu_features (void)
#define HAS_PREFER_SSE_FOR_MEMOP \
HAS_ARCH_FEATURE (index_Prefer_SSE_for_memop, bit_Prefer_SSE_for_memop)
+#define HAS_FAST_UNALIGNED_LOAD \
+ HAS_ARCH_FEATURE (index_Fast_Unaligned_Load, bit_Fast_Unaligned_Load)
+
#endif /* __ASSEMBLER__ */