This is the mail archive of the
libc-alpha@sourceware.org
mailing list for the glibc project.
Re: [PATCH] BZ #14059 - HAS_FMA4 check needs to also check for AVX
- From: "Carlos O'Donell" <carlos at systemhalted dot org>
- To: Andreas Jaeger <aj at suse dot de>
- Cc: libc-alpha at sourceware dot org
- Date: Thu, 10 May 2012 18:08:36 -0400
- Subject: Re: [PATCH] BZ #14059 - HAS_FMA4 check needs to also check for AVX
- References: <hopqaf6pz3.fsf@byrd.suse.de>
On Tue, May 8, 2012 at 4:23 AM, Andreas Jaeger <aj@suse.de> wrote:
>
> There are two things broken in checking for HAS_FMA4/AVX on x86-64 in
> the multiarch code:
>
> 1. We should disable FAM4 if AVX is not available (due to disabled
> ? OSXSAVE)
>
> 2. commit 56f6f6a2403cfa7267cad722597113be35ecf70d reverted some changes
> ? from commit 08cf777f9e7f6d826658a99c7d77a359f73a45bf but forgot to
> ? revert the change for AVX. We really have to disable AVX if it's
>
> The issue is easy reproduceable under Xen for the reporter.
>
> Also, the two commits introduced YMM_Usable and removed its usage but
> did not remove the code from the headers, I'm cleaning this up as well.
>
> I'm appending a patch that I tested on Linux/x86-64 (without Xen).
>
> Ok to commit?
No.
I actually think the code Ulrich wrote originally was almost correct,
but missing this:
diff --git a/sysdeps/x86_64/multiarch/init-arch.h
b/sysdeps/x86_64/multiarch/init-arch.h
index 5054e46..4620513 100644
--- a/sysdeps/x86_64/multiarch/init-arch.h
+++ b/sysdeps/x86_64/multiarch/init-arch.h
@@ -119,7 +119,7 @@ extern const struct cpu_features *__get_cpu_features (void)
# define HAS_SSE4_1 HAS_CPU_FEATURE (COMMON_CPUID_INDEX_1, ecx, bit_SSE4_1)
# define HAS_SSE4_2 HAS_CPU_FEATURE (COMMON_CPUID_INDEX_1, ecx, bit_SSE4_2)
# define HAS_FMA HAS_CPU_FEATURE (COMMON_CPUID_INDEX_1, ecx, bit_FMA)
-# define HAS_AVX HAS_CPU_FEATURE (COMMON_CPUID_INDEX_1, ecx, bit_AVX)
+# define HAS_AVX HAS_ARCH_FEATURE (YMM_Usable)
# define HAS_FMA4 HAS_CPU_FEATURE (COMMON_CPUID_INDEX_80000001,
ecx, bit_FMA4)
# define index_Fast_Rep_String FEATURE_INDEX_1
That way we end up following exactly what the Intel manual says to do:
* Check for AVX = 1 in cpuid return.
* Check for OSXSAVE = 1
* Check XCR0 has YMM and XMM usable.
* Set YMM_usable
* Allow glibc to use AVX instructions by having HAS_AVX return 1 based
on YMM_usable.
> Andreas
>
> 2012-05-08 ?Andreas Jaeger ?<aj@suse.de>
>
> ? ? ? ?* sysdeps/x86_64/multiarch/init-arch.h (bit_YMM_Usable): Remove,
> ? ? ? ?it's unused.
> ? ? ? ?(index_YMM_Usable): Likewise.
> ? ? ? ?(index_YMM_Usable): Likewise.
> ? ? ? ?(HAS_YMM_USABLE): Likewise.
>
> 2012-05-08 ?Jim Westfall ?<jwestfall@surrealistic.net>
>
> ? ? ? ?[BZ #14059]
> ? ? ? ?* sysdeps/x86_64/multiarch/init-arch.c (__init_cpu_features):
> ? ? ? ?Disable FMA4 if AVX is disabled.
> ? ? ? ?(__init_cpu_features): Revert broken check for bit_YMM_USABLE.
>
> diff --git a/sysdeps/x86_64/multiarch/init-arch.c b/sysdeps/x86_64/multiarch/init-arch.c
> index 80527ec..dbb091f 100644
> --- a/sysdeps/x86_64/multiarch/init-arch.c
> +++ b/sysdeps/x86_64/multiarch/init-arch.c
> @@ -146,15 +146,20 @@ __init_cpu_features (void)
> ? if (__cpu_features.cpuid[COMMON_CPUID_INDEX_1].ecx & bit_AVX)
> ? ? {
> ? ? ? /* Reset the AVX bit in case OSXSAVE is disabled. ?*/
> - ? ? ?if ((__cpu_features.cpuid[COMMON_CPUID_INDEX_1].ecx & bit_OSXSAVE) != 0
> - ? ? ? ? && ({ unsigned int xcrlow;
> - ? ? ? ? ? ? ? unsigned int xcrhigh;
> - ? ? ? ? ? ? ? asm ("xgetbv"
> - ? ? ? ? ? ? ? ? ? ?: "=a" (xcrlow), "=d" (xcrhigh) : "c" (0));
> - ? ? ? ? ? ? ? (xcrlow & 6) == 6; }))
> - ? ? ? __cpu_features.feature[index_YMM_Usable] |= bit_YMM_Usable;
> + ? ? ?if ((__cpu_features.cpuid[COMMON_CPUID_INDEX_1].ecx & bit_OSXSAVE) == 0
> + ? ? ? ? || ({ unsigned int xcrlow;
> + ? ? ? ? ? ? unsigned int xcrhigh;
> + ? ? ? ? ? ? asm ("xgetbv"
> + ? ? ? ? ? ? ? ? ?: "=a" (xcrlow), "=d" (xcrhigh) : "c" (0));
> + ? ? ? ? ? ? (xcrlow & 6) != 6; }))
This is black magic. Please use macros like "(xcrlow & (bit_YMM_state
| bit_XMM_state)) != (bit_YMM_state | bit_XMM_state)", thus it's clear
that your checking the state of YMM and XMM in XCR0.
> + ? ? ? __cpu_features.cpuid[COMMON_CPUID_INDEX_1].ecx &= ~bit_AVX;
I don't like this.
It rewrites the values returned by cpuid and we should not do that.
We should instead use a new feature and HAS_AVX check the new feature.
> ? ? }
>
> + ?/* FMA4 depends on AVX support. ?*/
> + ?if (__cpu_features.cpuid[COMMON_CPUID_INDEX_80000001].ecx & bit_FMA4
> + ? ? ?&& (__cpu_features.cpuid[COMMON_CPUID_INDEX_1].ecx & bit_AVX) == 0)
> + ? ?__cpu_features.cpuid[COMMON_CPUID_INDEX_80000001].ecx &= ~bit_FMA4;
With the new feature check you would change this to check for the new
feature instead of writing what *appears* to be invalid code per the
Intel manual.
> +
> ? __cpu_features.family = family;
> ? __cpu_features.model = model;
> ? atomic_write_barrier ();
> diff --git a/sysdeps/x86_64/multiarch/init-arch.h b/sysdeps/x86_64/multiarch/init-arch.h
> index 5054e46..2bddea3 100644
> --- a/sysdeps/x86_64/multiarch/init-arch.h
> +++ b/sysdeps/x86_64/multiarch/init-arch.h
> @@ -21,7 +21,6 @@
> ?#define bit_Prefer_SSE_for_memop ? ? ? (1 << 3)
> ?#define bit_Fast_Unaligned_Load ? ? ? ? ? ? ? ?(1 << 4)
> ?#define bit_Prefer_PMINUB_for_stringop (1 << 5)
> -#define bit_YMM_Usable ? ? ? ? ? ? ? ? (1 << 6)
> ?#define bit_SSE2 ? ? ? (1 << 26)
> ?#define bit_SSSE3 ? ? ?(1 << 9)
> @@ -49,7 +48,6 @@
> ?# define index_Prefer_SSE_for_memop ? ?FEATURE_INDEX_1*FEATURE_SIZE
> ?# define index_Fast_Unaligned_Load ? ? FEATURE_INDEX_1*FEATURE_SIZE
> ?# define index_Prefer_PMINUB_for_stringop FEATURE_INDEX_1*FEATURE_SIZE
> -# define index_YMM_Usable ? ? ? ? ? ? ?FEATURE_INDEX_1*FEATURE_SIZE
>
> ?#else ?/* __ASSEMBLER__ */
>
> @@ -127,7 +125,6 @@ extern const struct cpu_features *__get_cpu_features (void)
> ?# define index_Slow_BSF ? ? ? ? ? ? ? ? ? ? ? ?FEATURE_INDEX_1
> ?# define index_Prefer_SSE_for_memop ? ?FEATURE_INDEX_1
> ?# define index_Fast_Unaligned_Load ? ? FEATURE_INDEX_1
> -# define index_YMM_Usable ? ? ? ? ? ? ?FEATURE_INDEX_1
>
> ?# define HAS_ARCH_FEATURE(name) \
> ? ((__get_cpu_features ()->feature[index_##name] & (bit_##name)) != 0)
> @@ -142,6 +139,4 @@ extern const struct cpu_features *__get_cpu_features (void)
>
> ?# define HAS_FAST_UNALIGNED_LOAD HAS_ARCH_FEATURE (Fast_Unaligned_Load)
>
> -# define HAS_YMM_USABLE ? ? ? ? ? ? ? ?HAS_ARCH_FEATURE (YMM_Usable)
> -
> ?#endif /* __ASSEMBLER__ */
>
> --
> ?Andreas Jaeger aj@{suse.com,opensuse.org} Twitter/Identica: jaegerandi
> ?SUSE LINUX Products GmbH, Maxfeldstr. 5, 90409 Nürnberg, Germany
> ? GF: Jeff Hawn,Jennifer Guild,Felix Imendörffer,HRB16746 (AG Nürnberg)
> ? ?GPG fingerprint = 93A3 365E CE47 B889 DF7F ?FED1 389A 563C C272 A126