aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorUlrich Drepper <drepper@gmail.com>2011-10-20 22:43:15 -0400
committerUlrich Drepper <drepper@gmail.com>2011-10-20 22:43:15 -0400
commited72b6545f6d20f2d29ed71d65394d4a75ad358e (patch)
treee47730c47098dfbf1d41d95210009fd4a5fc0e5c
parent8d4f46c613c4397c5531b959744541862cf09ad0 (diff)
downloadglibc-ed72b6545f6d20f2d29ed71d65394d4a75ad358e.tar
glibc-ed72b6545f6d20f2d29ed71d65394d4a75ad358e.tar.gz
glibc-ed72b6545f6d20f2d29ed71d65394d4a75ad358e.tar.bz2
glibc-ed72b6545f6d20f2d29ed71d65394d4a75ad358e.zip
Check for FMA4 support and generate appropriate fma functions
-rw-r--r--ChangeLog9
-rw-r--r--config.h.in3
-rw-r--r--sysdeps/i386/configure63
-rw-r--r--sysdeps/i386/configure.in11
-rw-r--r--sysdeps/x86_64/fpu/multiarch/s_fma.c22
-rw-r--r--sysdeps/x86_64/fpu/multiarch/s_fmaf.c22
-rw-r--r--sysdeps/x86_64/multiarch/init-arch.c10
-rw-r--r--sysdeps/x86_64/multiarch/init-arch.h2
8 files changed, 115 insertions, 27 deletions
diff --git a/ChangeLog b/ChangeLog
index 6cf990699a..b2d629b953 100644
--- a/ChangeLog
+++ b/ChangeLog
@@ -1,5 +1,14 @@
2011-10-20 Ulrich Drepper <drepper@gmail.com>
+ * sysdeps/i386/configure.in: Test for -mfma4 option.
+ * config.h.in: Add HAVE_FMA4_SUPPORT entry.
+ * sysdeps/x86_64/multiarch/init-arch.h: Define HAS_FMA4 and
+ COMMON_CPUID_INDEX_80000001.
+ * sysdeps/x86_64/multiarch/init-arch.c: Read 80000001 leaf for AMD.
+ * sysdeps/x86_64/fpu/multiarch/s_fma.c: Test for FMA4 support and
+ use it if FMA3 is not supported.
+ * sysdeps/x86_64/fpu/multiarch/s_fmaf.c: Likewise.
+
* sysdeps/x86_64/multiarch/s_fma.c: Moved to ../fpu/multiarch.
* sysdeps/x86_64/multiarch/s_fmaf.c: Likewise.
diff --git a/config.h.in b/config.h.in
index 7b5095691d..7db663025a 100644
--- a/config.h.in
+++ b/config.h.in
@@ -118,6 +118,9 @@
/* Define if gcc supports AVX. */
#undef HAVE_AVX_SUPPORT
+/* Define if gcc supports FMA4. */
+#undef HAVE_FMA4_SUPPORT
+
/* Define if the compiler's exception support is based on libunwind. */
#undef HAVE_CC_WITH_LIBUNWIND
diff --git a/sysdeps/i386/configure b/sysdeps/i386/configure
index adffe3f9a9..ae494e2712 100644
--- a/sysdeps/i386/configure
+++ b/sysdeps/i386/configure
@@ -167,7 +167,7 @@ sed 's/^/| /' conftest.$ac_ext >&5
ac_retval=1
fi
- eval $as_lineno_stack; test "x$as_lineno_stack" = x && { as_lineno=; unset as_lineno;}
+ eval $as_lineno_stack; ${as_lineno_stack:+:} unset as_lineno
as_fn_set_status $ac_retval
} # ac_fn_c_try_compile
@@ -193,7 +193,7 @@ $as_echo "$ac_try_echo"; } >&5
mv -f conftest.er1 conftest.err
fi
$as_echo "$as_me:${as_lineno-$LINENO}: \$? = $ac_status" >&5
- test $ac_status = 0; } >/dev/null && {
+ test $ac_status = 0; } > conftest.i && {
test -z "$ac_c_preproc_warn_flag$ac_c_werror_flag" ||
test ! -s conftest.err
}; then :
@@ -204,7 +204,7 @@ sed 's/^/| /' conftest.$ac_ext >&5
ac_retval=1
fi
- eval $as_lineno_stack; test "x$as_lineno_stack" = x && { as_lineno=; unset as_lineno;}
+ eval $as_lineno_stack; ${as_lineno_stack:+:} unset as_lineno
as_fn_set_status $ac_retval
} # ac_fn_c_try_cpp
@@ -217,10 +217,10 @@ fi
ac_fn_c_check_header_mongrel ()
{
as_lineno=${as_lineno-"$1"} as_lineno_stack=as_lineno_stack=$as_lineno_stack
- if eval "test \"\${$3+set}\"" = set; then :
+ if eval \${$3+:} false; then :
{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for $2" >&5
$as_echo_n "checking for $2... " >&6; }
-if eval "test \"\${$3+set}\"" = set; then :
+if eval \${$3+:} false; then :
$as_echo_n "(cached) " >&6
fi
eval ac_res=\$$3
@@ -256,7 +256,7 @@ if ac_fn_c_try_cpp "$LINENO"; then :
else
ac_header_preproc=no
fi
-rm -f conftest.err conftest.$ac_ext
+rm -f conftest.err conftest.i conftest.$ac_ext
{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_header_preproc" >&5
$as_echo "$ac_header_preproc" >&6; }
@@ -283,7 +283,7 @@ $as_echo "$as_me: WARNING: $2: proceeding with the compiler's result" >&2;}
esac
{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for $2" >&5
$as_echo_n "checking for $2... " >&6; }
-if eval "test \"\${$3+set}\"" = set; then :
+if eval \${$3+:} false; then :
$as_echo_n "(cached) " >&6
else
eval "$3=\$ac_header_compiler"
@@ -292,7 +292,7 @@ eval ac_res=\$$3
{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_res" >&5
$as_echo "$ac_res" >&6; }
fi
- eval $as_lineno_stack; test "x$as_lineno_stack" = x && { as_lineno=; unset as_lineno;}
+ eval $as_lineno_stack; ${as_lineno_stack:+:} unset as_lineno
} # ac_fn_c_check_header_mongrel
@@ -333,7 +333,7 @@ sed 's/^/| /' conftest.$ac_ext >&5
ac_retval=$ac_status
fi
rm -rf conftest.dSYM conftest_ipa8_conftest.oo
- eval $as_lineno_stack; test "x$as_lineno_stack" = x && { as_lineno=; unset as_lineno;}
+ eval $as_lineno_stack; ${as_lineno_stack:+:} unset as_lineno
as_fn_set_status $ac_retval
} # ac_fn_c_try_run
@@ -347,7 +347,7 @@ ac_fn_c_check_header_compile ()
as_lineno=${as_lineno-"$1"} as_lineno_stack=as_lineno_stack=$as_lineno_stack
{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for $2" >&5
$as_echo_n "checking for $2... " >&6; }
-if eval "test \"\${$3+set}\"" = set; then :
+if eval \${$3+:} false; then :
$as_echo_n "(cached) " >&6
else
cat confdefs.h - <<_ACEOF >conftest.$ac_ext
@@ -365,7 +365,7 @@ fi
eval ac_res=\$$3
{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_res" >&5
$as_echo "$ac_res" >&6; }
- eval $as_lineno_stack; test "x$as_lineno_stack" = x && { as_lineno=; unset as_lineno;}
+ eval $as_lineno_stack; ${as_lineno_stack:+:} unset as_lineno
} # ac_fn_c_check_header_compile
# This file is generated from configure.in by Autoconf. DO NOT EDIT!
@@ -375,7 +375,7 @@ $as_echo "$ac_res" >&6; }
{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for grep that handles long lines and -e" >&5
$as_echo_n "checking for grep that handles long lines and -e... " >&6; }
-if test "${ac_cv_path_GREP+set}" = set; then :
+if ${ac_cv_path_GREP+:} false; then :
$as_echo_n "(cached) " >&6
else
if test -z "$GREP"; then
@@ -438,7 +438,7 @@ $as_echo "$ac_cv_path_GREP" >&6; }
{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for egrep" >&5
$as_echo_n "checking for egrep... " >&6; }
-if test "${ac_cv_path_EGREP+set}" = set; then :
+if ${ac_cv_path_EGREP+:} false; then :
$as_echo_n "(cached) " >&6
else
if echo a | $GREP -E '(a|b)' >/dev/null 2>&1
@@ -505,7 +505,7 @@ $as_echo "$ac_cv_path_EGREP" >&6; }
{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for ANSI C header files" >&5
$as_echo_n "checking for ANSI C header files... " >&6; }
-if test "${ac_cv_header_stdc+set}" = set; then :
+if ${ac_cv_header_stdc+:} false; then :
$as_echo_n "(cached) " >&6
else
cat confdefs.h - <<_ACEOF >conftest.$ac_ext
@@ -633,7 +633,7 @@ done
ac_fn_c_check_header_mongrel "$LINENO" "cpuid.h" "ac_cv_header_cpuid_h" "$ac_includes_default"
-if test "x$ac_cv_header_cpuid_h" = x""yes; then :
+if test "x$ac_cv_header_cpuid_h" = xyes; then :
else
as_fn_error $? "gcc must provide the <cpuid.h> header" "$LINENO" 5
@@ -643,7 +643,7 @@ fi
{ $as_echo "$as_me:${as_lineno-$LINENO}: checking if -g produces usable source locations for assembler-with-cpp" >&5
$as_echo_n "checking if -g produces usable source locations for assembler-with-cpp... " >&6; }
-if test "${libc_cv_cpp_asm_debuginfo+set}" = set; then :
+if ${libc_cv_cpp_asm_debuginfo+:} false; then :
$as_echo_n "(cached) " >&6
else
cat > conftest.S <<EOF
@@ -693,7 +693,7 @@ fi
{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for SSE4 support" >&5
$as_echo_n "checking for SSE4 support... " >&6; }
-if test "${libc_cv_cc_sse4+set}" = set; then :
+if ${libc_cv_cc_sse4+:} false; then :
$as_echo_n "(cached) " >&6
else
if { ac_try='${CC-cc} -msse4 -xc /dev/null -S -o /dev/null'
@@ -716,7 +716,7 @@ fi
{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for assembler -mtune=i686 support" >&5
$as_echo_n "checking for assembler -mtune=i686 support... " >&6; }
-if test "${libc_cv_as_i686+set}" = set; then :
+if ${libc_cv_as_i686+:} false; then :
$as_echo_n "(cached) " >&6
else
if { ac_try='${CC-cc} -Wa,-mtune=i686 -xc /dev/null -S -o /dev/null'
@@ -735,7 +735,7 @@ $as_echo "$libc_cv_as_i686" >&6; }
{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for AVX support" >&5
$as_echo_n "checking for AVX support... " >&6; }
-if test "${libc_cv_cc_avx+set}" = set; then :
+if ${libc_cv_cc_avx+:} false; then :
$as_echo_n "(cached) " >&6
else
if { ac_try='${CC-cc} -mavx -xc /dev/null -S -o /dev/null'
@@ -756,9 +756,32 @@ if test $libc_cv_cc_avx = yes; then
fi
+{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for FMA4 support" >&5
+$as_echo_n "checking for FMA4 support... " >&6; }
+if ${libc_cv_cc_fma4+:} false; then :
+ $as_echo_n "(cached) " >&6
+else
+ if { ac_try='${CC-cc} -mfma4 -xc /dev/null -S -o /dev/null'
+ { { eval echo "\"\$as_me\":${as_lineno-$LINENO}: \"$ac_try\""; } >&5
+ (eval $ac_try) 2>&5
+ ac_status=$?
+ $as_echo "$as_me:${as_lineno-$LINENO}: \$? = $ac_status" >&5
+ test $ac_status = 0; }; }; then
+ libc_cv_cc_fma4=yes
+else
+ libc_cv_cc_fma4=no
+fi
+fi
+{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $libc_cv_cc_fma4" >&5
+$as_echo "$libc_cv_cc_fma4" >&6; }
+if test $libc_cv_cc_fma4 = yes; then
+ $as_echo "#define HAVE_FMA4_SUPPORT 1" >>confdefs.h
+
+fi
+
{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for -mno-vzeroupper support" >&5
$as_echo_n "checking for -mno-vzeroupper support... " >&6; }
-if test "${libc_cv_cc_novzeroupper+set}" = set; then :
+if ${libc_cv_cc_novzeroupper+:} false; then :
$as_echo_n "(cached) " >&6
else
if { ac_try='${CC-cc} -mno-vzeroupper -xc /dev/null -S -o /dev/null'
diff --git a/sysdeps/i386/configure.in b/sysdeps/i386/configure.in
index 67fd1d7df1..5a9840e16c 100644
--- a/sysdeps/i386/configure.in
+++ b/sysdeps/i386/configure.in
@@ -67,6 +67,17 @@ if test $libc_cv_cc_avx = yes; then
AC_DEFINE(HAVE_AVX_SUPPORT)
fi
+dnl Check if -mfma4 works.
+AC_CACHE_CHECK(for FMA4 support, libc_cv_cc_fma4, [dnl
+if AC_TRY_COMMAND([${CC-cc} -mfma4 -xc /dev/null -S -o /dev/null]); then
+ libc_cv_cc_fma4=yes
+else
+ libc_cv_cc_fma4=no
+fi])
+if test $libc_cv_cc_fma4 = yes; then
+ AC_DEFINE(HAVE_FMA4_SUPPORT)
+fi
+
dnl Check if -mno-vzeroupper works.
AC_CACHE_CHECK(for -mno-vzeroupper support, libc_cv_cc_novzeroupper, [dnl
if AC_TRY_COMMAND([${CC-cc} -mno-vzeroupper -xc /dev/null -S -o /dev/null]); then
diff --git a/sysdeps/x86_64/fpu/multiarch/s_fma.c b/sysdeps/x86_64/fpu/multiarch/s_fma.c
index 9a680c68fc..06f2d001d9 100644
--- a/sysdeps/x86_64/fpu/multiarch/s_fma.c
+++ b/sysdeps/x86_64/fpu/multiarch/s_fma.c
@@ -1,5 +1,5 @@
/* FMA version of fma.
- Copyright (C) 2009, 2010 Free Software Foundation, Inc.
+ Copyright (C) 2009, 2010, 2011 Free Software Foundation, Inc.
Contributed by Intel Corporation.
This file is part of the GNU C Library.
@@ -28,13 +28,29 @@ extern double __fma_sse2 (double x, double y, double z) attribute_hidden;
static double
-__fma_fma (double x, double y, double z)
+__fma_fma3 (double x, double y, double z)
{
asm ("vfmadd213sd %3, %2, %0" : "=x" (x) : "0" (x), "x" (y), "xm" (z));
return x;
}
-libm_ifunc (__fma, HAS_FMA ? __fma_fma : __fma_sse2);
+
+# ifdef HAVE_FMA4_SUPPORT
+static double
+__fma_fma4 (double x, double y, double z)
+{
+ asm ("vfmaddsd %3, %2, %1, %0" : "=x" (x) : "x" (x), "xm" (y), "xm" (z));
+ return x;
+}
+# else
+# undef HAS_FMA4
+# define HAS_FMA4 0
+# define __fma_fma4 NULL
+# endif
+
+
+libm_ifunc (__fma, HAS_FMA
+ ? __fma_fma3 : (HAS_FMA4 ? __fma_fma4 : __fma_sse2));
weak_alias (__fma, fma)
# define __fma __fma_sse2
diff --git a/sysdeps/x86_64/fpu/multiarch/s_fmaf.c b/sysdeps/x86_64/fpu/multiarch/s_fmaf.c
index 85ef65a50e..53c08de47c 100644
--- a/sysdeps/x86_64/fpu/multiarch/s_fmaf.c
+++ b/sysdeps/x86_64/fpu/multiarch/s_fmaf.c
@@ -1,5 +1,5 @@
/* FMA version of fmaf.
- Copyright (C) 2009, 2010 Free Software Foundation, Inc.
+ Copyright (C) 2009, 2010, 2011 Free Software Foundation, Inc.
This file is part of the GNU C Library.
The GNU C Library is free software; you can redistribute it and/or
@@ -27,13 +27,29 @@ extern float __fmaf_sse2 (float x, float y, float z) attribute_hidden;
static float
-__fmaf_fma (float x, float y, float z)
+__fmaf_fma3 (float x, float y, float z)
{
asm ("vfmadd213ss %3, %2, %0" : "=x" (x) : "0" (x), "x" (y), "xm" (z));
return x;
}
-libm_ifunc (__fmaf, HAS_FMA ? __fmaf_fma : __fmaf_sse2);
+
+# ifdef HAVE_FMA4_SUPPORT
+static float
+__fmaf_fma4 (float x, float y, float z)
+{
+ asm ("vfmaddss %3, %2, %1, %0" : "=x" (x) : "x" (x), "xm" (y), "xm" (z));
+ return x;
+}
+# else
+# undef HAS_FMA4
+# define HAS_FMA4 0
+# define __fmaf_fma4 NULL
+# endif
+
+
+libm_ifunc (__fmaf, HAS_FMA
+ ? __fmaf_fma3 : (HAS_FMA4 ? __fmaf_fma4 : __fmaf_sse2));
weak_alias (__fmaf, fmaf)
# define __fmaf __fmaf_sse2
diff --git a/sysdeps/x86_64/multiarch/init-arch.c b/sysdeps/x86_64/multiarch/init-arch.c
index 0a145ca259..3fde5d94ce 100644
--- a/sysdeps/x86_64/multiarch/init-arch.c
+++ b/sysdeps/x86_64/multiarch/init-arch.c
@@ -86,7 +86,7 @@ __init_cpu_features (void)
default:
/* Unknown family 0x06 processors. Assuming this is one
- of Core i3/i5/i7 processors if AVX is available. */
+ of Core i3/i5/i7 processors if AVX is available. */
if ((ecx & bit_AVX) == 0)
break;
@@ -131,6 +131,14 @@ __init_cpu_features (void)
if ((ecx & 0x200))
__cpu_features.feature[index_Prefer_SSE_for_memop]
|= bit_Prefer_SSE_for_memop;
+
+ __cpuid (0x80000000, eax, ebx, ecx, edx);
+ if (eax >= 0x80000001)
+ __cpuid (0x80000001,
+ __cpu_features.cpuid[COMMON_CPUID_INDEX_80000001].eax,
+ __cpu_features.cpuid[COMMON_CPUID_INDEX_80000001].ebx,
+ __cpu_features.cpuid[COMMON_CPUID_INDEX_80000001].ecx,
+ __cpu_features.cpuid[COMMON_CPUID_INDEX_80000001].edx);
}
else
kind = arch_kind_other;
diff --git a/sysdeps/x86_64/multiarch/init-arch.h b/sysdeps/x86_64/multiarch/init-arch.h
index e8d48c2456..2fb6f75b66 100644
--- a/sysdeps/x86_64/multiarch/init-arch.h
+++ b/sysdeps/x86_64/multiarch/init-arch.h
@@ -53,6 +53,7 @@
enum
{
COMMON_CPUID_INDEX_1 = 0,
+ COMMON_CPUID_INDEX_80000001, /* for AMD */
/* Keep the following line at the end. */
COMMON_CPUID_INDEX_MAX
};
@@ -113,6 +114,7 @@ extern const struct cpu_features *__get_cpu_features (void)
# define HAS_SSE4_1 HAS_CPU_FEATURE (COMMON_CPUID_INDEX_1, ecx, 19)
# define HAS_SSE4_2 HAS_CPU_FEATURE (COMMON_CPUID_INDEX_1, ecx, 20)
# define HAS_FMA HAS_CPU_FEATURE (COMMON_CPUID_INDEX_1, ecx, 12)
+# define HAS_FMA4 HAS_CPU_FEATURE (COMMON_CPUID_INDEX_80000001, ecx, 16)
# define index_Fast_Rep_String FEATURE_INDEX_1
# define index_Fast_Copy_Backward FEATURE_INDEX_1