aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorCarlos O'Donell <carlos_odonell@mentor.com>2012-05-16 20:14:24 -0700
committerCarlos O'Donell <carlos_odonell@mentor.com>2012-05-18 16:22:54 -0400
commit8d93c8972c66eac8044ae1a43845de56b12f78c8 (patch)
tree9c487430ce470594d47d8dde41d4bfca7d5d107a
parentb934126eb207511033369153eff9199d974c9a33 (diff)
downloadglibc-8d93c8972c66eac8044ae1a43845de56b12f78c8.tar
glibc-8d93c8972c66eac8044ae1a43845de56b12f78c8.tar.gz
glibc-8d93c8972c66eac8044ae1a43845de56b12f78c8.tar.bz2
glibc-8d93c8972c66eac8044ae1a43845de56b12f78c8.zip
BZ#14059: Fix AVX and FMA4 detection.
Fix AVX and FMA4 detection by following the guidelines set out by Intel and AMD for detecting these features. (cherry picked from commit 1a0994f5356214e8af8a1c1cc33fbf74a7ac8993)
-rw-r--r--ChangeLog30
-rw-r--r--NEWS2
-rw-r--r--sysdeps/i386/i686/multiarch/Makefile1
-rw-r--r--sysdeps/i386/i686/multiarch/test-multiarch.c1
-rw-r--r--sysdeps/x86_64/multiarch/Makefile1
-rw-r--r--sysdeps/x86_64/multiarch/init-arch.c27
-rw-r--r--sysdeps/x86_64/multiarch/init-arch.h51
-rw-r--r--sysdeps/x86_64/multiarch/strcmp.S9
-rw-r--r--sysdeps/x86_64/multiarch/test-multiarch.c90
9 files changed, 181 insertions, 31 deletions
diff --git a/ChangeLog b/ChangeLog
index 94cd02a2bb..88280958dd 100644
--- a/ChangeLog
+++ b/ChangeLog
@@ -1,3 +1,33 @@
+2012-05-17 Andreas Jaeger <aj@suse.de>
+ Carlos O'Donell <carlos_odonell@mentor.com>
+
+ [BZ #14059]
+ * sysdeps/x86_64/multiarch/init-arch.h
+ (bit_YMM_Usable): Rename to...
+ (bit_AVX_Usable): ... this.
+ (bit_FMA4_Usable): New macro.
+ (bit_XMM_state): New macro.
+ (bit_YMM_state): New macro.
+ [__ASSEMBLER__] (index_YMM_Usable): Rename to...
+ [__ASSEMBLER__] (index_AVX_Usable): ... this.
+ [__ASSEMBLER__] (index_FMA4_Usable): New macro.
+ (CPUID_OSXSAVE): New macro.
+ (CPUID_AVX): New macro.
+ (CPUID_FMA4): New macro.
+ (index_YMM_Usable): Rename to...
+ (index_AVX_Usable): ... this.
+ (HAS_AVX): Use HAS_ARCH_FEATURE.
+ (HAS_FMA4): Likewise.
+ (HAS_YMM_USABLE): Remove.
+ * sysdeps/x86_64/multiarch/init-arch.c (__init_cpu_features):
+ Enable AVX or FMA4 IFF YMM and XMM states are usable and the features
+ are present.
+ * sysdeps/x86_64/multiarch/strcmp.S: Use bit_AVX_Usable.
+ * sysdeps/i386/i686/multiarch/Makefile: Add test-multiarch to tests.
+ * sysdeps/x86_64/multiarch/Makefile: Likewise.
+ * sysdeps/i386/i686/multiarch/test-multiarch.c: New file.
+ * sysdeps/x86_64/multiarch/test-multiarch.c: New file.
+
2012-02-27 David S. Miller <davem@davemloft.net>
* sysdeps/ieee754/ldbl-128/s_nearbyintl.c (__nearbyintl): Do not
diff --git a/NEWS b/NEWS
index f16430eea7..eb1dbe8ab8 100644
--- a/NEWS
+++ b/NEWS
@@ -10,7 +10,7 @@ Version 2.15.1
* The following bugs are resolved with this release:
411, 2547, 2548, 11365, 11494, 13583, 13731, 13732, 13733, 13747, 13748,
- 13749, 13753
+ 13749, 13753, 14059
Version 2.15
diff --git a/sysdeps/i386/i686/multiarch/Makefile b/sysdeps/i386/i686/multiarch/Makefile
index b764e5b825..8946bfa586 100644
--- a/sysdeps/i386/i686/multiarch/Makefile
+++ b/sysdeps/i386/i686/multiarch/Makefile
@@ -1,5 +1,6 @@
ifeq ($(subdir),csu)
aux += init-arch
+tests += test-multiarch
gen-as-const-headers += ifunc-defines.sym
endif
diff --git a/sysdeps/i386/i686/multiarch/test-multiarch.c b/sysdeps/i386/i686/multiarch/test-multiarch.c
new file mode 100644
index 0000000000..593cfec273
--- /dev/null
+++ b/sysdeps/i386/i686/multiarch/test-multiarch.c
@@ -0,0 +1 @@
+#include <sysdeps/x86_64/multiarch/test-multiarch.c>
diff --git a/sysdeps/x86_64/multiarch/Makefile b/sysdeps/x86_64/multiarch/Makefile
index 9a183f068e..dd6c27d0b4 100644
--- a/sysdeps/x86_64/multiarch/Makefile
+++ b/sysdeps/x86_64/multiarch/Makefile
@@ -1,5 +1,6 @@
ifeq ($(subdir),csu)
aux += init-arch
+tests += test-multiarch
gen-as-const-headers += ifunc-defines.sym
endif
diff --git a/sysdeps/x86_64/multiarch/init-arch.c b/sysdeps/x86_64/multiarch/init-arch.c
index 76d146c1f0..df0fe55cad 100644
--- a/sysdeps/x86_64/multiarch/init-arch.c
+++ b/sysdeps/x86_64/multiarch/init-arch.c
@@ -1,6 +1,6 @@
/* Initialize CPU feature data.
This file is part of the GNU C Library.
- Copyright (C) 2008, 2009, 2010, 2011, 2012 Free Software Foundation, Inc.
+ Copyright (C) 2008-2012 Free Software Foundation, Inc.
Contributed by Ulrich Drepper <drepper@redhat.com>.
The GNU C Library is free software; you can redistribute it and/or
@@ -144,16 +144,23 @@ __init_cpu_features (void)
else
kind = arch_kind_other;
- if (__cpu_features.cpuid[COMMON_CPUID_INDEX_1].ecx & bit_AVX)
+ /* Can we call xgetbv? */
+ if (CPUID_OSXSAVE)
{
- /* Reset the AVX bit in case OSXSAVE is disabled. */
- if ((__cpu_features.cpuid[COMMON_CPUID_INDEX_1].ecx & bit_OSXSAVE) != 0
- && ({ unsigned int xcrlow;
- unsigned int xcrhigh;
- asm ("xgetbv"
- : "=a" (xcrlow), "=d" (xcrhigh) : "c" (0));
- (xcrlow & 6) == 6; }))
- __cpu_features.feature[index_YMM_Usable] |= bit_YMM_Usable;
+ unsigned int xcrlow;
+ unsigned int xcrhigh;
+ asm ("xgetbv" : "=a" (xcrlow), "=d" (xcrhigh) : "c" (0));
+ /* Is YMM and XMM state usable? */
+ if ((xcrlow & (bit_YMM_state | bit_XMM_state)) ==
+ (bit_YMM_state | bit_XMM_state))
+ {
+ /* Determine if AVX is usable. */
+ if (CPUID_AVX)
+ __cpu_features.feature[index_AVX_Usable] |= bit_AVX_Usable;
+ /* Determine if FMA4 is usable. */
+ if (CPUID_FMA4)
+ __cpu_features.feature[index_FMA4_Usable] |= bit_FMA4_Usable;
+ }
}
__cpu_features.family = family;
diff --git a/sysdeps/x86_64/multiarch/init-arch.h b/sysdeps/x86_64/multiarch/init-arch.h
index 2dc75ab37b..37566c88df 100644
--- a/sysdeps/x86_64/multiarch/init-arch.h
+++ b/sysdeps/x86_64/multiarch/init-arch.h
@@ -1,5 +1,5 @@
/* This file is part of the GNU C Library.
- Copyright (C) 2008, 2009, 2010, 2011, 2012 Free Software Foundation, Inc.
+ Copyright (C) 2008-2012 Free Software Foundation, Inc.
The GNU C Library is free software; you can redistribute it and/or
modify it under the terms of the GNU Lesser General Public
@@ -22,8 +22,10 @@
#define bit_Prefer_SSE_for_memop (1 << 3)
#define bit_Fast_Unaligned_Load (1 << 4)
#define bit_Prefer_PMINUB_for_stringop (1 << 5)
-#define bit_YMM_Usable (1 << 6)
+#define bit_AVX_Usable (1 << 6)
+#define bit_FMA4_Usable (1 << 7)
+/* CPUID Feature flags. */
#define bit_SSE2 (1 << 26)
#define bit_SSSE3 (1 << 9)
#define bit_SSE4_1 (1 << 19)
@@ -34,6 +36,10 @@
#define bit_FMA (1 << 12)
#define bit_FMA4 (1 << 16)
+/* XCR0 Feature flags. */
+#define bit_XMM_state (1 << 1)
+#define bit_YMM_state (2 << 1)
+
#ifdef __ASSEMBLER__
# include <ifunc-defines.h>
@@ -50,7 +56,8 @@
# define index_Prefer_SSE_for_memop FEATURE_INDEX_1*FEATURE_SIZE
# define index_Fast_Unaligned_Load FEATURE_INDEX_1*FEATURE_SIZE
# define index_Prefer_PMINUB_for_stringop FEATURE_INDEX_1*FEATURE_SIZE
-# define index_YMM_Usable FEATURE_INDEX_1*FEATURE_SIZE
+# define index_AVX_Usable FEATURE_INDEX_1*FEATURE_SIZE
+# define index_FMA4_Usable FEATURE_INDEX_1*FEATURE_SIZE
#else /* __ASSEMBLER__ */
@@ -114,35 +121,45 @@ extern const struct cpu_features *__get_cpu_features (void)
/* Following are the feature tests used throughout libc. */
+/* CPUID_* evaluates to true if the feature flag is enabled.
+ We always use &__cpu_features because the HAS_CPUID_* macros
+ are called only within __init_cpu_features, where we can't
+ call __get_cpu_features without infinite recursion. */
+# define HAS_CPUID_FLAG(idx, reg, bit) \
+ (((&__cpu_features)->cpuid[idx].reg & (bit)) != 0)
+
+# define CPUID_OSXSAVE \
+ HAS_CPUID_FLAG (COMMON_CPUID_INDEX_1, ecx, bit_OSXSAVE)
+# define CPUID_AVX \
+ HAS_CPUID_FLAG (COMMON_CPUID_INDEX_1, ecx, bit_AVX)
+# define CPUID_FMA4 \
+ HAS_CPUID_FLAG (COMMON_CPUID_INDEX_80000001, ecx, bit_FMA4)
+
+/* HAS_* evaluates to true if we may use the feature at runtime. */
# define HAS_SSE2 HAS_CPU_FEATURE (COMMON_CPUID_INDEX_1, edx, bit_SSE2)
# define HAS_POPCOUNT HAS_CPU_FEATURE (COMMON_CPUID_INDEX_1, ecx, bit_POPCOUNT)
# define HAS_SSSE3 HAS_CPU_FEATURE (COMMON_CPUID_INDEX_1, ecx, bit_SSSE3)
# define HAS_SSE4_1 HAS_CPU_FEATURE (COMMON_CPUID_INDEX_1, ecx, bit_SSE4_1)
# define HAS_SSE4_2 HAS_CPU_FEATURE (COMMON_CPUID_INDEX_1, ecx, bit_SSE4_2)
# define HAS_FMA HAS_CPU_FEATURE (COMMON_CPUID_INDEX_1, ecx, bit_FMA)
-# define HAS_AVX HAS_CPU_FEATURE (COMMON_CPUID_INDEX_1, ecx, bit_AVX)
-# define HAS_FMA4 HAS_CPU_FEATURE (COMMON_CPUID_INDEX_80000001, ecx, bit_FMA4)
# define index_Fast_Rep_String FEATURE_INDEX_1
# define index_Fast_Copy_Backward FEATURE_INDEX_1
# define index_Slow_BSF FEATURE_INDEX_1
# define index_Prefer_SSE_for_memop FEATURE_INDEX_1
# define index_Fast_Unaligned_Load FEATURE_INDEX_1
-# define index_YMM_Usable FEATURE_INDEX_1
+# define index_AVX_Usable FEATURE_INDEX_1
+# define index_FMA4_Usable FEATURE_INDEX_1
# define HAS_ARCH_FEATURE(name) \
((__get_cpu_features ()->feature[index_##name] & (bit_##name)) != 0)
-# define HAS_FAST_REP_STRING HAS_ARCH_FEATURE (Fast_Rep_String)
-
-# define HAS_FAST_COPY_BACKWARD HAS_ARCH_FEATURE (Fast_Copy_Backward)
-
-# define HAS_SLOW_BSF HAS_ARCH_FEATURE (Slow_BSF)
-
-# define HAS_PREFER_SSE_FOR_MEMOP HAS_ARCH_FEATURE (Prefer_SSE_for_memop)
-
-# define HAS_FAST_UNALIGNED_LOAD HAS_ARCH_FEATURE (Fast_Unaligned_Load)
-
-# define HAS_YMM_USABLE HAS_ARCH_FEATURE (YMM_Usable)
+# define HAS_FAST_REP_STRING HAS_ARCH_FEATURE (Fast_Rep_String)
+# define HAS_FAST_COPY_BACKWARD HAS_ARCH_FEATURE (Fast_Copy_Backward)
+# define HAS_SLOW_BSF HAS_ARCH_FEATURE (Slow_BSF)
+# define HAS_PREFER_SSE_FOR_MEMOP HAS_ARCH_FEATURE (Prefer_SSE_for_memop)
+# define HAS_FAST_UNALIGNED_LOAD HAS_ARCH_FEATURE (Fast_Unaligned_Load)
+# define HAS_AVX HAS_ARCH_FEATURE (AVX_Usable)
+# define HAS_FMA4 HAS_ARCH_FEATURE (FMA4_Usable)
#endif /* __ASSEMBLER__ */
diff --git a/sysdeps/x86_64/multiarch/strcmp.S b/sysdeps/x86_64/multiarch/strcmp.S
index f93c83d7d4..d0361608d2 100644
--- a/sysdeps/x86_64/multiarch/strcmp.S
+++ b/sysdeps/x86_64/multiarch/strcmp.S
@@ -1,5 +1,5 @@
/* strcmp with SSE4.2
- Copyright (C) 2009, 2010, 2011 Free Software Foundation, Inc.
+ Copyright (C) 2009-2012 Free Software Foundation, Inc.
Contributed by Intel Corporation.
This file is part of the GNU C Library.
@@ -84,6 +84,7 @@
.text
ENTRY(STRCMP)
.type STRCMP, @gnu_indirect_function
+ /* Manually inlined call to __get_cpu_features. */
cmpl $0, __cpu_features+KIND_OFFSET(%rip)
jne 1f
call __init_cpu_features
@@ -101,13 +102,14 @@ END(STRCMP)
# ifdef USE_AS_STRCASECMP_L
ENTRY(__strcasecmp)
.type __strcasecmp, @gnu_indirect_function
+ /* Manually inlined call to __get_cpu_features. */
cmpl $0, __cpu_features+KIND_OFFSET(%rip)
jne 1f
call __init_cpu_features
1:
# ifdef HAVE_AVX_SUPPORT
leaq __strcasecmp_avx(%rip), %rax
- testl $bit_AVX, __cpu_features+CPUID_OFFSET+index_AVX(%rip)
+ testl $bit_AVX_Usable, __cpu_features+FEATURE_OFFSET+index_AVX_Usable(%rip)
jnz 2f
# endif
leaq __strcasecmp_sse42(%rip), %rax
@@ -124,13 +126,14 @@ weak_alias (__strcasecmp, strcasecmp)
# ifdef USE_AS_STRNCASECMP_L
ENTRY(__strncasecmp)
.type __strncasecmp, @gnu_indirect_function
+ /* Manually inlined call to __get_cpu_features. */
cmpl $0, __cpu_features+KIND_OFFSET(%rip)
jne 1f
call __init_cpu_features
1:
# ifdef HAVE_AVX_SUPPORT
leaq __strncasecmp_avx(%rip), %rax
- testl $bit_AVX, __cpu_features+CPUID_OFFSET+index_AVX(%rip)
+ testl $bit_AVX_Usable, __cpu_features+FEATURE_OFFSET+index_AVX_Usable(%rip)
jnz 2f
# endif
leaq __strncasecmp_sse42(%rip), %rax
diff --git a/sysdeps/x86_64/multiarch/test-multiarch.c b/sysdeps/x86_64/multiarch/test-multiarch.c
new file mode 100644
index 0000000000..76b1af2f8c
--- /dev/null
+++ b/sysdeps/x86_64/multiarch/test-multiarch.c
@@ -0,0 +1,90 @@
+/* Test CPU feature data.
+ This file is part of the GNU C Library.
+ Copyright (C) 2012 Free Software Foundation, Inc.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library; if not, see
+ <http://www.gnu.org/licenses/>. */
+
+#include <init-arch.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+
+static char *cpu_flags;
+
+/* Search for flags in /proc/cpuinfo and store line
+ in cpu_flags. */
+void
+get_cpuinfo (void)
+{
+ FILE *f;
+ char *line = NULL;
+ size_t len = 0;
+ ssize_t read;
+
+ f = fopen ("/proc/cpuinfo", "r");
+ if (f == NULL)
+ {
+ printf ("cannot open /proc/cpuinfo");
+ exit (1);
+ }
+
+ while ((read = getline (&line, &len, f)) != -1)
+ {
+ if (strncmp (line, "flags", 5) == 0)
+ {
+ cpu_flags = strdup (line);
+ break;
+ }
+ }
+ fclose (f);
+ free (line);
+}
+
+int
+check_proc (const char *proc_name, int flag, const char *name)
+{
+ int found = 0;
+
+ printf ("Checking %s:\n", name);
+ printf (" init-arch %d\n", flag);
+ if (strstr (cpu_flags, proc_name) != NULL)
+ found = 1;
+ printf (" cpuinfo (%s) %d\n", proc_name, found);
+
+ if (found != flag)
+ printf (" *** failure ***\n");
+
+ return (found != flag);
+}
+
+static int
+do_test (int argc, char **argv)
+{
+ int fails;
+
+ get_cpuinfo ();
+ fails = check_proc ("avx", HAS_AVX, "HAS_AVX");
+ fails += check_proc ("fma4", HAS_FMA4, "HAS_FMA4");
+ fails += check_proc ("sse4_2", HAS_SSE4_2, "HAS_SSE4_2");
+ fails += check_proc ("sse4_1", HAS_SSE4_1, "HAS_SSE4_1");
+ fails += check_proc ("ssse3", HAS_SSSE3, "HAS_SSSE3");
+ fails += check_proc ("popcnt", HAS_POPCOUNT, "HAS_POPCOUNT");
+
+ printf ("%d differences between /proc/cpuinfo and glibc code.\n", fails);
+
+ return (fails != 0);
+}
+
+#include "../../../test-skeleton.c"