This is the mail archive of the
libc-alpha@sourceware.org
mailing list for the glibc project.
Re: [PATCH] BZ #14059 - HAS_FMA4 check needs to also check for AVX
On 05/11/2012 08:34 AM, Andreas Jaeger wrote:
> On 05/11/2012 12:18 AM, Carlos O'Donell wrote:
>> On Thu, May 10, 2012 at 6:08 PM, Carlos O'Donell
>> <carlos@systemhalted.org> wrote:
>>> On Tue, May 8, 2012 at 4:23 AM, Andreas Jaeger<aj@suse.de> wrote:
>>>>
>>>> There are two things broken in checking for HAS_FMA4/AVX on x86-64 in
>>>> the multiarch code:
>>>>
>>>> 1. We should disable FAM4 if AVX is not available (due to disabled
>>>> OSXSAVE)
>>>>
>>>> 2. commit 56f6f6a2403cfa7267cad722597113be35ecf70d reverted some changes
>>>> from commit 08cf777f9e7f6d826658a99c7d77a359f73a45bf but forgot to
>>>> revert the change for AVX. We really have to disable AVX if it's
>>>>
>>>> The issue is easy reproduceable under Xen for the reporter.
>>>>
>>>> Also, the two commits introduced YMM_Usable and removed its usage but
>>>> did not remove the code from the headers, I'm cleaning this up as well.
>>>>
>>>> I'm appending a patch that I tested on Linux/x86-64 (without Xen).
>>>>
>>>> Ok to commit?
>>>
>>> No.
>>>
>>> I actually think the code Ulrich wrote originally was almost correct,
>>> but missing this:
>>
>> If I had to fix it it would look like this:
>
> Thanks, this looks great - but misses the check for FMA4.
>
> Here's a combined patch that compiled fine for me on
> Linux/x86-64. Jeff, could you test it under Xen, please?
>
> Andreas
>
> 2012-05-11 Andreas Jaeger<aj@suse.de>
> Carlos O'Donell<carlos_odonell@mentor.com>
>
> [BZ #14059]
> * sysdeps/x86_64/multiarch/init-arch.c (__init_cpu_features):
> Fix check for AVX, enable FMA4 only if it exists and if AVX is
> usable.
>
> * sysdeps/x86_64/multiarch/init-arch.h (bit_AVX_Usable): Renamed
> from bit_YMM_usable.
> (bit_FMA4_Usable): New macro.
> (bit_XMM_state): New macro.
> (bit_YMM_state): New macro.
> (index_AVX_Usable): Renamed from index_YMM_Usable.
> (index_FMA4_Usable): New macro.
> (HAS_YMM_USABLE): Delete.
> (HAS_AVX): Use HAS_ARCH_FEATURE.
> (HAS_FMA4): Likewise.
That version above contained a typo. Here's an update
Allan, does this work now?
Andreas
diff --git a/sysdeps/x86_64/multiarch/init-arch.c b/sysdeps/x86_64/multiarch/init-arch.c
index 80527ec..6a49552 100644
--- a/sysdeps/x86_64/multiarch/init-arch.c
+++ b/sysdeps/x86_64/multiarch/init-arch.c
@@ -145,16 +145,22 @@ __init_cpu_features (void)
if (__cpu_features.cpuid[COMMON_CPUID_INDEX_1].ecx & bit_AVX)
{
- /* Reset the AVX bit in case OSXSAVE is disabled. */
+ /* Determine if AVX is usable. */
if ((__cpu_features.cpuid[COMMON_CPUID_INDEX_1].ecx & bit_OSXSAVE) != 0
&& ({ unsigned int xcrlow;
unsigned int xcrhigh;
asm ("xgetbv"
: "=a" (xcrlow), "=d" (xcrhigh) : "c" (0));
- (xcrlow & 6) == 6; }))
- __cpu_features.feature[index_YMM_Usable] |= bit_YMM_Usable;
+ (xcrlow & (bit_YMM_state | bit_XMM_state)) ==
+ (bit_YMM_state | bit_XMM_state); }))
+ __cpu_features.feature[index_AVX_Usable] = 1;
}
+ /* FMA4 depends on AVX support. */
+ if (__cpu_features.cpuid[COMMON_CPUID_INDEX_80000001].ecx & bit_FMA4
+ && HAS_AVX)
+ __cpu_features.feature[index_FMA4_Usable] = 1;
+
__cpu_features.family = family;
__cpu_features.model = model;
atomic_write_barrier ();
diff --git a/sysdeps/x86_64/multiarch/init-arch.h b/sysdeps/x86_64/multiarch/init-arch.h
index 5054e46..2d02e2b 100644
--- a/sysdeps/x86_64/multiarch/init-arch.h
+++ b/sysdeps/x86_64/multiarch/init-arch.h
@@ -21,8 +21,10 @@
#define bit_Prefer_SSE_for_memop (1 << 3)
#define bit_Fast_Unaligned_Load (1 << 4)
#define bit_Prefer_PMINUB_for_stringop (1 << 5)
-#define bit_YMM_Usable (1 << 6)
+#define bit_AVX_Usable (1 << 6)
+#define bit_FMA4_Usable (1 << 7)
+/* CPUID Feature flags. */
#define bit_SSE2 (1 << 26)
#define bit_SSSE3 (1 << 9)
#define bit_SSE4_1 (1 << 19)
@@ -33,6 +35,10 @@
#define bit_FMA (1 << 12)
#define bit_FMA4 (1 << 16)
+/* XCR0 Feature flags. */
+#define bit_XMM_state (1 << 1)
+#define bit_YMM_state (2 << 1)
+
#ifdef __ASSEMBLER__
# include <ifunc-defines.h>
@@ -49,7 +55,8 @@
# define index_Prefer_SSE_for_memop FEATURE_INDEX_1*FEATURE_SIZE
# define index_Fast_Unaligned_Load FEATURE_INDEX_1*FEATURE_SIZE
# define index_Prefer_PMINUB_for_stringop FEATURE_INDEX_1*FEATURE_SIZE
-# define index_YMM_Usable FEATURE_INDEX_1*FEATURE_SIZE
+# define index_AVX_Usable FEATURE_INDEX_1*FEATURE_SIZE
+# define index_FMA4_Usable FEATURE_INDEX_1*FEATURE_SIZE
#else /* __ASSEMBLER__ */
@@ -119,15 +126,14 @@ extern const struct cpu_features *__get_cpu_features (void)
# define HAS_SSE4_1 HAS_CPU_FEATURE (COMMON_CPUID_INDEX_1, ecx, bit_SSE4_1)
# define HAS_SSE4_2 HAS_CPU_FEATURE (COMMON_CPUID_INDEX_1, ecx, bit_SSE4_2)
# define HAS_FMA HAS_CPU_FEATURE (COMMON_CPUID_INDEX_1, ecx, bit_FMA)
-# define HAS_AVX HAS_CPU_FEATURE (COMMON_CPUID_INDEX_1, ecx, bit_AVX)
-# define HAS_FMA4 HAS_CPU_FEATURE (COMMON_CPUID_INDEX_80000001, ecx, bit_FMA4)
# define index_Fast_Rep_String FEATURE_INDEX_1
# define index_Fast_Copy_Backward FEATURE_INDEX_1
# define index_Slow_BSF FEATURE_INDEX_1
# define index_Prefer_SSE_for_memop FEATURE_INDEX_1
# define index_Fast_Unaligned_Load FEATURE_INDEX_1
-# define index_YMM_Usable FEATURE_INDEX_1
+# define index_AVX_Usable FEATURE_INDEX_1
+# define index_FMA4_Usable FEATURE_INDEX_1
# define HAS_ARCH_FEATURE(name) \
((__get_cpu_features ()->feature[index_##name] & (bit_##name)) != 0)
@@ -142,6 +148,8 @@ extern const struct cpu_features *__get_cpu_features (void)
# define HAS_FAST_UNALIGNED_LOAD HAS_ARCH_FEATURE (Fast_Unaligned_Load)
-# define HAS_YMM_USABLE HAS_ARCH_FEATURE (YMM_Usable)
+# define HAS_AVX HAS_ARCH_FEATURE (AVX_Usable)
+
+# define HAS_FMA4 HAS_ARCH_FEATURE (FMA4_Usable)
#endif /* __ASSEMBLER__ */
--
Andreas Jaeger aj@{suse.com,opensuse.org} Twitter/Identica: jaegerandi
SUSE LINUX Products GmbH, Maxfeldstr. 5, 90409 Nürnberg, Germany
GF: Jeff Hawn,Jennifer Guild,Felix Imendörffer,HRB16746 (AG Nürnberg)
GPG fingerprint = 93A3 365E CE47 B889 DF7F FED1 389A 563C C272 A126