diff options
Diffstat (limited to 'sysdeps/x86_64/fpu/Makefile')
-rw-r--r-- | sysdeps/x86_64/fpu/Makefile | 12 |
1 files changed, 12 insertions, 0 deletions
diff --git a/sysdeps/x86_64/fpu/Makefile b/sysdeps/x86_64/fpu/Makefile index 2b7d69bb50..74b14ba096 100644 --- a/sysdeps/x86_64/fpu/Makefile +++ b/sysdeps/x86_64/fpu/Makefile @@ -237,3 +237,15 @@ CFLAGS-test-float-libmvec-sincosf-avx512.c = -DREQUIRE_AVX512F CFLAGS-test-float-libmvec-sincosf-avx512-main.c = $(libmvec-sincos-cflags) $(float-vlen16-arch-ext-cflags) endif endif + +ifeq ($(subdir)$(config-cflags-mprefer-vector-width),mathyes) +# When compiled with -O3 -march=skylake, GCC 8 and 9 optimize some loops +# in branred.c with 256-bit vector instructions, which leads to store +# forward stall: +# +# https://gcc.gnu.org/bugzilla/show_bug.cgi?id=90579 +# +# Limit vector width to 128 bits to work around this issue. It improves +# performance of sin and cos by more than 40% on Skylake. +CFLAGS-branred.c = -mprefer-vector-width=128 +endif |