This is the mail archive of the
libc-alpha@sourceware.org
mailing list for the glibc project.
Re: [PATCH] BZ #14059 - HAS_FMA4 check needs to also check for AVX
On Thu, May 10, 2012 at 6:08 PM, Carlos O'Donell
<carlos@systemhalted.org> wrote:
> On Tue, May 8, 2012 at 4:23 AM, Andreas Jaeger <aj@suse.de> wrote:
>>
>> There are two things broken in checking for HAS_FMA4/AVX on x86-64 in
>> the multiarch code:
>>
>> 1. We should disable FAM4 if AVX is not available (due to disabled
>> ? OSXSAVE)
>>
>> 2. commit 56f6f6a2403cfa7267cad722597113be35ecf70d reverted some changes
>> ? from commit 08cf777f9e7f6d826658a99c7d77a359f73a45bf but forgot to
>> ? revert the change for AVX. We really have to disable AVX if it's
>>
>> The issue is easy reproduceable under Xen for the reporter.
>>
>> Also, the two commits introduced YMM_Usable and removed its usage but
>> did not remove the code from the headers, I'm cleaning this up as well.
>>
>> I'm appending a patch that I tested on Linux/x86-64 (without Xen).
>>
>> Ok to commit?
>
> No.
>
> I actually think the code Ulrich wrote originally was almost correct,
> but missing this:
If I had to fix it it would look like this:
diff --git a/sysdeps/x86_64/multiarch/init-arch.c
b/sysdeps/x86_64/multiarch/init-arch.c
index 80527ec..0f0efb7 100644
--- a/sysdeps/x86_64/multiarch/init-arch.c
+++ b/sysdeps/x86_64/multiarch/init-arch.c
@@ -145,14 +145,15 @@ __init_cpu_features (void)
if (__cpu_features.cpuid[COMMON_CPUID_INDEX_1].ecx & bit_AVX)
{
- /* Reset the AVX bit in case OSXSAVE is disabled. */
+ /* Determine if AVX is usable. */
if ((__cpu_features.cpuid[COMMON_CPUID_INDEX_1].ecx & bit_OSXSAVE) != 0
&& ({ unsigned int xcrlow;
unsigned int xcrhigh;
asm ("xgetbv"
: "=a" (xcrlow), "=d" (xcrhigh) : "c" (0));
- (xcrlow & 6) == 6; }))
- __cpu_features.feature[index_YMM_Usable] |= bit_YMM_Usable;
+ (xcrlow & (bit_YMM_state | bit_XMM_state)) ==
+ (bit_YMM_state | bit_XMM_state); }))
+ __cpu_features.feature[index_AVX_Usable] = 1;
}
__cpu_features.family = family;
diff --git a/sysdeps/x86_64/multiarch/init-arch.h
b/sysdeps/x86_64/multiarch/init-arch.h
index 5054e46..e70c4fe 100644
--- a/sysdeps/x86_64/multiarch/init-arch.h
+++ b/sysdeps/x86_64/multiarch/init-arch.h
@@ -21,8 +21,9 @@
#define bit_Prefer_SSE_for_memop (1 << 3)
#define bit_Fast_Unaligned_Load (1 << 4)
#define bit_Prefer_PMINUB_for_stringop (1 << 5)
-#define bit_YMM_Usable (1 << 6)
+#define bit_AVX_Usable (1 << 6)
+/* CPUID Feature flags. */
#define bit_SSE2 (1 << 26)
#define bit_SSSE3 (1 << 9)
#define bit_SSE4_1 (1 << 19)
@@ -33,6 +34,10 @@
#define bit_FMA (1 << 12)
#define bit_FMA4 (1 << 16)
+/* XCR0 Feature flags. */
+#define bit_XMM_state (1 << 1)
+#define bit_YMM_state (2 << 1)
+
#ifdef __ASSEMBLER__
# include <ifunc-defines.h>
@@ -49,7 +54,7 @@
# define index_Prefer_SSE_for_memop FEATURE_INDEX_1*FEATURE_SIZE
# define index_Fast_Unaligned_Load FEATURE_INDEX_1*FEATURE_SIZE
# define index_Prefer_PMINUB_for_stringop FEATURE_INDEX_1*FEATURE_SIZE
-# define index_YMM_Usable FEATURE_INDEX_1*FEATURE_SIZE
+# define index_AVX_Usable FEATURE_INDEX_1*FEATURE_SIZE
#else /* __ASSEMBLER__ */
@@ -119,7 +124,7 @@ extern const struct cpu_features *__get_cpu_features (void)
# define HAS_SSE4_1 HAS_CPU_FEATURE (COMMON_CPUID_INDEX_1, ecx, bit_SSE4_1)
# define HAS_SSE4_2 HAS_CPU_FEATURE (COMMON_CPUID_INDEX_1, ecx, bit_SSE4_2)
# define HAS_FMA HAS_CPU_FEATURE (COMMON_CPUID_INDEX_1, ecx, bit_FMA)
-# define HAS_AVX HAS_CPU_FEATURE (COMMON_CPUID_INDEX_1, ecx, bit_AVX)
+# define HAS_AVX HAS_ARCH_FEATURE (AVX_Usable)
# define HAS_FMA4 HAS_CPU_FEATURE (COMMON_CPUID_INDEX_80000001,
ecx, bit_FMA4)
# define index_Fast_Rep_String FEATURE_INDEX_1
@@ -127,7 +132,7 @@ extern const struct cpu_features *__get_cpu_features (void)
# define index_Slow_BSF FEATURE_INDEX_1
# define index_Prefer_SSE_for_memop FEATURE_INDEX_1
# define index_Fast_Unaligned_Load FEATURE_INDEX_1
-# define index_YMM_Usable FEATURE_INDEX_1
+# define index_AVX_Usable FEATURE_INDEX_1
# define HAS_ARCH_FEATURE(name) \
((__get_cpu_features ()->feature[index_##name] & (bit_##name)) != 0)
@@ -142,6 +147,6 @@ extern const struct cpu_features *__get_cpu_features (void)
# define HAS_FAST_UNALIGNED_LOAD HAS_ARCH_FEATURE (Fast_Unaligned_Load)
-# define HAS_YMM_USABLE HAS_ARCH_FEATURE (YMM_Usable)
+# define HAS_AVX_USABLE HAS_ARCH_FEATURE (AVX_Usable)
#endif /* __ASSEMBLER__ */
===
Cheers,
Carlos.