aboutsummaryrefslogtreecommitdiff
path: root/sysdeps
diff options
context:
space:
mode:
authorAmit Pawar <Amit.Pawar@amd.com>2018-07-06 09:55:36 -0400
committerCarlos O'Donell <carlos@redhat.com>2018-07-06 09:55:36 -0400
commitbce5911b67392717b44f857a97bb348747be50e7 (patch)
tree030eb3ba338dec69e3d2d700c1ad885daacc1713 /sysdeps
parent3a885c1f51b18852869a91cf59a1b39da1595c7a (diff)
downloadglibc-bce5911b67392717b44f857a97bb348747be50e7.tar
glibc-bce5911b67392717b44f857a97bb348747be50e7.tar.gz
glibc-bce5911b67392717b44f857a97bb348747be50e7.tar.bz2
glibc-bce5911b67392717b44f857a97bb348747be50e7.zip
Use AVX_Fast_Unaligned_Load from Zen onwards.
From Zen onwards this will be enabled. It was disabled for the Excavator case and will remain disabled. Reviewd-by: Carlos O'Donell <carlos@redhat.com>
Diffstat (limited to 'sysdeps')
-rw-r--r--sysdeps/x86/cpu-features.c18
1 files changed, 13 insertions, 5 deletions
diff --git a/sysdeps/x86/cpu-features.c b/sysdeps/x86/cpu-features.c
index 0fc3674c4b..d41ebde823 100644
--- a/sysdeps/x86/cpu-features.c
+++ b/sysdeps/x86/cpu-features.c
@@ -78,8 +78,15 @@ get_common_indeces (struct cpu_features *cpu_features,
/* The following features depend on AVX being usable. */
/* Determine if AVX2 is usable. */
if (CPU_FEATURES_CPU_P (cpu_features, AVX2))
+ {
cpu_features->feature[index_arch_AVX2_Usable]
|= bit_arch_AVX2_Usable;
+
+ /* Unaligned load with 256-bit AVX registers are faster on
+ Intel/AMD processors with AVX2. */
+ cpu_features->feature[index_arch_AVX_Fast_Unaligned_Load]
+ |= bit_arch_AVX_Fast_Unaligned_Load;
+ }
/* Determine if FMA is usable. */
if (CPU_FEATURES_CPU_P (cpu_features, FMA))
cpu_features->feature[index_arch_FMA_Usable]
@@ -298,11 +305,6 @@ init_cpu_features (struct cpu_features *cpu_features)
}
}
- /* Unaligned load with 256-bit AVX registers are faster on
- Intel processors with AVX2. */
- if (CPU_FEATURES_ARCH_P (cpu_features, AVX2_Usable))
- cpu_features->feature[index_arch_AVX_Fast_Unaligned_Load]
- |= bit_arch_AVX_Fast_Unaligned_Load;
/* Since AVX512ER is unique to Xeon Phi, set Prefer_No_VZEROUPPER
if AVX512ER is available. Don't use AVX512 to avoid lower CPU
@@ -351,9 +353,15 @@ init_cpu_features (struct cpu_features *cpu_features)
#endif
/* "Excavator" */
if (model >= 0x60 && model <= 0x7f)
+ {
cpu_features->feature[index_arch_Fast_Unaligned_Load]
|= (bit_arch_Fast_Unaligned_Load
| bit_arch_Fast_Copy_Backward);
+
+ /* Unaligned AVX loads are slower.*/
+ cpu_features->feature[index_arch_AVX_Fast_Unaligned_Load]
+ &= ~bit_arch_AVX_Fast_Unaligned_Load;
+ }
}
}
else