This is the mail archive of the libc-alpha@sourceware.org mailing list for the glibc project.


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]
Other format: [Raw text]

[PATCH v4] BZ #14059 - Fix AVX and FMA4 detection.


[resent with libc-alpha in the CC this time]

Allan,

This is the final patch. Could you give this a whirl?

Andreas,

Please review.

Passes for me on x86_64 with and without AVX with no regressions.

v1
- Fixed AVX detection.
v2
- Cleaned up AVX detection.
- Fixed FMA4 detection incorrectly.
v3
- Added test-multiarch regression test.
v4
- Fixed FMA4 detection correctly.
- test-multiarch now uses test-skeleton.c

OK to commit?

2012-05-16 ?Andreas Jaeger ?<aj@suse.de>
? ? ? ? ? ?Carlos O'Donell ?<carlos_odonell@mentor.com>

? ? ? ?[BZ #14059]
? ? ? ?* sysdeps/x86_64/multiarch/init-arch.h
? ? ? ?(bit_YMM_Usable): Rename to...
? ? ? ?(bit_AVX_Usable): ... this.
? ? ? ?(bit_FMA4_Usable): New macro.
? ? ? ?(bit_XMM_state): New macro.
? ? ? ?(bit_YMM_state): New macro.
? ? ? ?[__ASSEMBLER__] (index_YMM_Usable): Rename to...
? ? ? ?[__ASSEMBLER__] (index_AVX_Usable): ... this.
? ? ? ?[__ASSEMBLER__] (index_FMA4_Usable): New macro.
? ? ? ?(CPUID_OSXSAVE): New macro.
? ? ? ?(CPUID_AVX): New macro.
? ? ? ?(CPUID_FMA4): New macro.
? ? ? ?(index_YMM_Usable): Rename to...
? ? ? ?(index_AVX_Usable): ... this.
? ? ? ?(HAS_AVX): Use HAS_ARCH_FEATURE.
? ? ? ?(HAS_FMA4): Likewise.
? ? ? ?(HAS_YMM_USABLE): Remove.
? ? ? ?* sysdeps/x86_64/multiarch/init-arch.c (__init_cpu_features):
? ? ? ?Enable AVX or FMA4 IFF YMM and XMM states are usable and the features
? ? ? ?are present.
? ? ? ?* sysdeps/x86_64/multiarch/strcmp.S: Use bit_AVX_Usable.
? ? ? ?* sysdeps/i386/i686/multiarch/Makefile: Add test-multiarch to tests.
? ? ? ?* sysdeps/x86_64/multiarch/Makefile: Likewise.
? ? ? ?* sysdeps/i386/i686/multiarch/test-multiarch.c: New file.
? ? ? ?* sysdeps/x86_64/multiarch/test-multiarch.c: New file.

diff --git a/sysdeps/i386/i686/multiarch/Makefile
b/sysdeps/i386/i686/multiarch/Makefile
index b764e5b..8946bfa 100644
--- a/sysdeps/i386/i686/multiarch/Makefile
+++ b/sysdeps/i386/i686/multiarch/Makefile
@@ -1,5 +1,6 @@
?ifeq ($(subdir),csu)
?aux += init-arch
+tests += test-multiarch
?gen-as-const-headers += ifunc-defines.sym
?endif

diff --git a/sysdeps/i386/i686/multiarch/test-multiarch.c
b/sysdeps/i386/i686/multiarch/test-multiarch.c
new file mode 100644
index 0000000..593cfec
--- /dev/null
+++ b/sysdeps/i386/i686/multiarch/test-multiarch.c
@@ -0,0 +1 @@
+#include <sysdeps/x86_64/multiarch/test-multiarch.c>
diff --git a/sysdeps/x86_64/multiarch/Makefile
b/sysdeps/x86_64/multiarch/Makefile
index 9a183f0..dd6c27d 100644
--- a/sysdeps/x86_64/multiarch/Makefile
+++ b/sysdeps/x86_64/multiarch/Makefile
@@ -1,5 +1,6 @@
?ifeq ($(subdir),csu)
?aux += init-arch
+tests += test-multiarch
?gen-as-const-headers += ifunc-defines.sym
?endif

diff --git a/sysdeps/x86_64/multiarch/init-arch.c
b/sysdeps/x86_64/multiarch/init-arch.c
index 80527ec..155033d 100644
--- a/sysdeps/x86_64/multiarch/init-arch.c
+++ b/sysdeps/x86_64/multiarch/init-arch.c
@@ -1,6 +1,6 @@
?/* Initialize CPU feature data.
? ?This file is part of the GNU C Library.
- ? Copyright (C) 2008, 2009, 2010, 2011, 2012 Free Software Foundation, Inc.
+ ? Copyright (C) 2008-2012 Free Software Foundation, Inc.
? ?Contributed by Ulrich Drepper <drepper@redhat.com>.

? ?The GNU C Library is free software; you can redistribute it and/or
@@ -143,16 +143,23 @@ __init_cpu_features (void)
? else
? ? kind = arch_kind_other;

- ?if (__cpu_features.cpuid[COMMON_CPUID_INDEX_1].ecx & bit_AVX)
+ ?/* Can we call xgetbv? ?*/
+ ?if (CPUID_OSXSAVE)
? ? {
- ? ? ?/* Reset the AVX bit in case OSXSAVE is disabled. ?*/
- ? ? ?if ((__cpu_features.cpuid[COMMON_CPUID_INDEX_1].ecx & bit_OSXSAVE) != 0
- ? ? ? ? && ({ unsigned int xcrlow;
- ? ? ? ? ? ? ? unsigned int xcrhigh;
- ? ? ? ? ? ? ? asm ("xgetbv"
- ? ? ? ? ? ? ? ? ? ?: "=a" (xcrlow), "=d" (xcrhigh) : "c" (0));
- ? ? ? ? ? ? ? (xcrlow & 6) == 6; }))
- ? ? ? __cpu_features.feature[index_YMM_Usable] |= bit_YMM_Usable;
+ ? ? ?unsigned int xcrlow;
+ ? ? ?unsigned int xcrhigh;
+ ? ? ?asm ("xgetbv" : "=a" (xcrlow), "=d" (xcrhigh) : "c" (0));
+ ? ? ?/* Is YMM and XMM state usable? ?*/
+ ? ? ?if ((xcrlow & (bit_YMM_state | bit_XMM_state)) ==
+ ? ? ? ? (bit_YMM_state | bit_XMM_state))
+ ? ? ? {
+ ? ? ? ? /* Determine if AVX is usable. ?*/
+ ? ? ? ? if (CPUID_AVX)
+ ? ? ? ? ? __cpu_features.feature[index_AVX_Usable] |= bit_AVX_Usable;
+ ? ? ? ? /* Determine if FMA4 is usable. ?*/
+ ? ? ? ? if (CPUID_FMA4)
+ ? ? ? ? ? __cpu_features.feature[index_FMA4_Usable] |= bit_FMA4_Usable;
+ ? ? ? }
? ? }

? __cpu_features.family = family;
diff --git a/sysdeps/x86_64/multiarch/init-arch.h
b/sysdeps/x86_64/multiarch/init-arch.h
index 5054e46..45e2651 100644
--- a/sysdeps/x86_64/multiarch/init-arch.h
+++ b/sysdeps/x86_64/multiarch/init-arch.h
@@ -1,5 +1,5 @@
?/* This file is part of the GNU C Library.
- ? Copyright (C) 2008, 2009, 2010, 2011, 2012 Free Software Foundation, Inc.
+ ? Copyright (C) 2008-2012 Free Software Foundation, Inc.

? ?The GNU C Library is free software; you can redistribute it and/or
? ?modify it under the terms of the GNU Lesser General Public
@@ -21,8 +21,10 @@
?#define bit_Prefer_SSE_for_memop ? ? ? (1 << 3)
?#define bit_Fast_Unaligned_Load ? ? ? ? ? ? ? ?(1 << 4)
?#define bit_Prefer_PMINUB_for_stringop (1 << 5)
-#define bit_YMM_Usable ? ? ? ? ? ? ? ? (1 << 6)
+#define bit_AVX_Usable ? ? ? ? ? ? ? ? (1 << 6)
+#define bit_FMA4_Usable ? ? ? ? ? ? ? ? ? ? ? ?(1 << 7)

+/* CPUID Feature flags. ?*/
?#define bit_SSE2 ? ? ? (1 << 26)
?#define bit_SSSE3 ? ? ?(1 << 9)
?#define bit_SSE4_1 ? ? (1 << 19)
@@ -33,6 +35,10 @@
?#define bit_FMA ? ? ? ? ? ? ? ?(1 << 12)
?#define bit_FMA4 ? ? ? (1 << 16)

+/* XCR0 Feature flags. ?*/
+#define bit_XMM_state ?(1 << 1)
+#define bit_YMM_state ?(2 << 1)
+
?#ifdef __ASSEMBLER__

?# include <ifunc-defines.h>
@@ -49,7 +55,8 @@
?# define index_Prefer_SSE_for_memop ? ?FEATURE_INDEX_1*FEATURE_SIZE
?# define index_Fast_Unaligned_Load ? ? FEATURE_INDEX_1*FEATURE_SIZE
?# define index_Prefer_PMINUB_for_stringop FEATURE_INDEX_1*FEATURE_SIZE
-# define index_YMM_Usable ? ? ? ? ? ? ?FEATURE_INDEX_1*FEATURE_SIZE
+# define index_AVX_Usable ? ? ? ? ? ? ?FEATURE_INDEX_1*FEATURE_SIZE
+# define index_FMA4_Usable ? ? ? ? ? ? FEATURE_INDEX_1*FEATURE_SIZE

?#else ?/* __ASSEMBLER__ */

@@ -113,35 +120,45 @@ extern const struct cpu_features
*__get_cpu_features (void)

?/* Following are the feature tests used throughout libc. ?*/

+/* CPUID_* evaluates to true if the feature flag is enabled.
+ ? We always use &__cpu_features because the HAS_CPUID_* macros
+ ? are called only within __init_cpu_features, where we can't
+ ? call __get_cpu_features without infinite recursion. ?*/
+# define HAS_CPUID_FLAG(idx, reg, bit) \
+ ?(((&__cpu_features)->cpuid[idx].reg & (bit)) != 0)
+
+# define CPUID_OSXSAVE \
+ ?HAS_CPUID_FLAG (COMMON_CPUID_INDEX_1, ecx, bit_OSXSAVE)
+# define CPUID_AVX \
+ ?HAS_CPUID_FLAG (COMMON_CPUID_INDEX_1, ecx, bit_AVX)
+# define CPUID_FMA4 \
+ ?HAS_CPUID_FLAG (COMMON_CPUID_INDEX_80000001, ecx, bit_FMA4)
+
+/* HAS_* evaluates to true if we may use the feature at runtime. ?*/
?# define HAS_SSE2 ? ? ?HAS_CPU_FEATURE (COMMON_CPUID_INDEX_1, edx, bit_SSE2)
?# define HAS_POPCOUNT ?HAS_CPU_FEATURE (COMMON_CPUID_INDEX_1, ecx,
bit_POPCOUNT)
?# define HAS_SSSE3 ? ? HAS_CPU_FEATURE (COMMON_CPUID_INDEX_1, ecx, bit_SSSE3)
?# define HAS_SSE4_1 ? ?HAS_CPU_FEATURE (COMMON_CPUID_INDEX_1, ecx, bit_SSE4_1)
?# define HAS_SSE4_2 ? ?HAS_CPU_FEATURE (COMMON_CPUID_INDEX_1, ecx, bit_SSE4_2)
?# define HAS_FMA ? ? ? HAS_CPU_FEATURE (COMMON_CPUID_INDEX_1, ecx, bit_FMA)
-# define HAS_AVX ? ? ? HAS_CPU_FEATURE (COMMON_CPUID_INDEX_1, ecx, bit_AVX)
-# define HAS_FMA4 ? ? ?HAS_CPU_FEATURE (COMMON_CPUID_INDEX_80000001,
ecx, bit_FMA4)

?# define index_Fast_Rep_String ? ? ? ? FEATURE_INDEX_1
?# define index_Fast_Copy_Backward ? ? ?FEATURE_INDEX_1
?# define index_Slow_BSF ? ? ? ? ? ? ? ? ? ? ? ?FEATURE_INDEX_1
?# define index_Prefer_SSE_for_memop ? ?FEATURE_INDEX_1
?# define index_Fast_Unaligned_Load ? ? FEATURE_INDEX_1
-# define index_YMM_Usable ? ? ? ? ? ? ?FEATURE_INDEX_1
+# define index_AVX_Usable ? ? ? ? ? ? ?FEATURE_INDEX_1
+# define index_FMA4_Usable ? ? ? ? ? ? FEATURE_INDEX_1

?# define HAS_ARCH_FEATURE(name) \
? ((__get_cpu_features ()->feature[index_##name] & (bit_##name)) != 0)

-# define HAS_FAST_REP_STRING ? HAS_ARCH_FEATURE (Fast_Rep_String)
-
-# define HAS_FAST_COPY_BACKWARD ? ? ? ?HAS_ARCH_FEATURE (Fast_Copy_Backward)
-
-# define HAS_SLOW_BSF ? ? ? ? ?HAS_ARCH_FEATURE (Slow_BSF)
-
-# define HAS_PREFER_SSE_FOR_MEMOP HAS_ARCH_FEATURE (Prefer_SSE_for_memop)
-
-# define HAS_FAST_UNALIGNED_LOAD HAS_ARCH_FEATURE (Fast_Unaligned_Load)
-
-# define HAS_YMM_USABLE ? ? ? ? ? ? ? ?HAS_ARCH_FEATURE (YMM_Usable)
+# define HAS_FAST_REP_STRING ? ? ? ? ? HAS_ARCH_FEATURE (Fast_Rep_String)
+# define HAS_FAST_COPY_BACKWARD ? ? ? ? ? ? ? ?HAS_ARCH_FEATURE
(Fast_Copy_Backward)
+# define HAS_SLOW_BSF ? ? ? ? ? ? ? ? ?HAS_ARCH_FEATURE (Slow_BSF)
+# define HAS_PREFER_SSE_FOR_MEMOP ? ? ?HAS_ARCH_FEATURE (Prefer_SSE_for_memop)
+# define HAS_FAST_UNALIGNED_LOAD ? ? ? HAS_ARCH_FEATURE (Fast_Unaligned_Load)
+# define HAS_AVX ? ? ? ? ? ? ? ? ? ? ? HAS_ARCH_FEATURE (AVX_Usable)
+# define HAS_FMA4 ? ? ? ? ? ? ? ? ? ? ?HAS_ARCH_FEATURE (FMA4_Usable)

?#endif /* __ASSEMBLER__ */
diff --git a/sysdeps/x86_64/multiarch/strcmp.S
b/sysdeps/x86_64/multiarch/strcmp.S
index 2b9870b..d366d09 100644
--- a/sysdeps/x86_64/multiarch/strcmp.S
+++ b/sysdeps/x86_64/multiarch/strcmp.S
@@ -1,5 +1,5 @@
?/* strcmp with SSE4.2
- ? Copyright (C) 2009, 2010, 2011 Free Software Foundation, Inc.
+ ? Copyright (C) 2009-2012 Free Software Foundation, Inc.
? ?Contributed by Intel Corporation.
? ?This file is part of the GNU C Library.

@@ -83,6 +83,7 @@
? ? ? ?.text
?ENTRY(STRCMP)
? ? ? ?.type ? STRCMP, @gnu_indirect_function
+ ? ? ? /* Manually inlined call to __get_cpu_features. ?*/
? ? ? ?cmpl ? ?$0, __cpu_features+KIND_OFFSET(%rip)
? ? ? ?jne ? ? 1f
? ? ? ?call ? ?__init_cpu_features
@@ -100,13 +101,14 @@ END(STRCMP)
?# ifdef USE_AS_STRCASECMP_L
?ENTRY(__strcasecmp)
? ? ? ?.type ? __strcasecmp, @gnu_indirect_function
+ ? ? ? /* Manually inlined call to __get_cpu_features. ?*/
? ? ? ?cmpl ? ?$0, __cpu_features+KIND_OFFSET(%rip)
? ? ? ?jne ? ? 1f
? ? ? ?call ? ?__init_cpu_features
?1:
?# ?ifdef HAVE_AVX_SUPPORT
? ? ? ?leaq ? ?__strcasecmp_avx(%rip), %rax
- ? ? ? testl ? $bit_AVX, __cpu_features+CPUID_OFFSET+index_AVX(%rip)
+ ? ? ? testl ? $bit_AVX_Usable,
__cpu_features+FEATURE_OFFSET+index_AVX_Usable(%rip)
? ? ? ?jnz ? ? 2f
?# ?endif
? ? ? ?leaq ? ?__strcasecmp_sse42(%rip), %rax
@@ -123,13 +125,14 @@ weak_alias (__strcasecmp, strcasecmp)
?# ifdef USE_AS_STRNCASECMP_L
?ENTRY(__strncasecmp)
? ? ? ?.type ? __strncasecmp, @gnu_indirect_function
+ ? ? ? /* Manually inlined call to __get_cpu_features. ?*/
? ? ? ?cmpl ? ?$0, __cpu_features+KIND_OFFSET(%rip)
? ? ? ?jne ? ? 1f
? ? ? ?call ? ?__init_cpu_features
?1:
?# ?ifdef HAVE_AVX_SUPPORT
? ? ? ?leaq ? ?__strncasecmp_avx(%rip), %rax
- ? ? ? testl ? $bit_AVX, __cpu_features+CPUID_OFFSET+index_AVX(%rip)
+ ? ? ? testl ? $bit_AVX_Usable,
__cpu_features+FEATURE_OFFSET+index_AVX_Usable(%rip)
? ? ? ?jnz ? ? 2f
?# ?endif
? ? ? ?leaq ? ?__strncasecmp_sse42(%rip), %rax
diff --git a/sysdeps/x86_64/multiarch/test-multiarch.c
b/sysdeps/x86_64/multiarch/test-multiarch.c
new file mode 100644
index 0000000..76b1af2
--- /dev/null
+++ b/sysdeps/x86_64/multiarch/test-multiarch.c
@@ -0,0 +1,90 @@
+/* Test CPU feature data.
+ ? This file is part of the GNU C Library.
+ ? Copyright (C) 2012 Free Software Foundation, Inc.
+
+ ? The GNU C Library is free software; you can redistribute it and/or
+ ? modify it under the terms of the GNU Lesser General Public
+ ? License as published by the Free Software Foundation; either
+ ? version 2.1 of the License, or (at your option) any later version.
+
+ ? The GNU C Library is distributed in the hope that it will be useful,
+ ? but WITHOUT ANY WARRANTY; without even the implied warranty of
+ ? MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. ?See the GNU
+ ? Lesser General Public License for more details.
+
+ ? You should have received a copy of the GNU Lesser General Public
+ ? License along with the GNU C Library; if not, see
+ ? <http://www.gnu.org/licenses/>. ?*/
+
+#include <init-arch.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+
+static char *cpu_flags;
+
+/* Search for flags in /proc/cpuinfo and store line
+ ? in cpu_flags. ?*/
+void
+get_cpuinfo (void)
+{
+ ?FILE *f;
+ ?char *line = NULL;
+ ?size_t len = 0;
+ ?ssize_t read;
+
+ ?f = fopen ("/proc/cpuinfo", "r");
+ ?if (f == NULL)
+ ? ?{
+ ? ? ?printf ("cannot open /proc/cpuinfo");
+ ? ? ?exit (1);
+ ? ?}
+
+ ?while ((read = getline (&line, &len, f)) != -1)
+ ? ?{
+ ? ? ?if (strncmp (line, "flags", 5) == 0)
+ ? ? ? {
+ ? ? ? ? cpu_flags = strdup (line);
+ ? ? ? ? break;
+ ? ? ? }
+ ? ?}
+ ?fclose (f);
+ ?free (line);
+}
+
+int
+check_proc (const char *proc_name, int flag, const char *name)
+{
+ ?int found = 0;
+
+ ?printf ("Checking %s:\n", name);
+ ?printf (" ?init-arch %d\n", flag);
+ ?if (strstr (cpu_flags, proc_name) != NULL)
+ ? ?found = 1;
+ ?printf (" ?cpuinfo (%s) %d\n", proc_name, found);
+
+ ?if (found != flag)
+ ? ?printf (" *** failure ***\n");
+
+ ?return (found != flag);
+}
+
+static int
+do_test (int argc, char **argv)
+{
+ ?int fails;
+
+ ?get_cpuinfo ();
+ ?fails = check_proc ("avx", HAS_AVX, "HAS_AVX");
+ ?fails += check_proc ("fma4", HAS_FMA4, "HAS_FMA4");
+ ?fails += check_proc ("sse4_2", HAS_SSE4_2, "HAS_SSE4_2");
+ ?fails += check_proc ("sse4_1", HAS_SSE4_1, "HAS_SSE4_1");
+ ?fails += check_proc ("ssse3", HAS_SSSE3, "HAS_SSSE3");
+ ?fails += check_proc ("popcnt", HAS_POPCOUNT, "HAS_POPCOUNT");
+
+ ?printf ("%d differences between /proc/cpuinfo and glibc code.\n", fails);
+
+ ?return (fails != 0);
+}
+
+#include "../../../test-skeleton.c"
---

Cheers,
Carlos.


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]