This is the mail archive of the libc-alpha@sourceware.org mailing list for the glibc project.


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]
Other format: [Raw text]

Re: [PATCH] BZ #14059 - HAS_FMA4 check needs to also check for AVX


On 05/11/2012 08:34 AM, Andreas Jaeger wrote:
> On 05/11/2012 12:18 AM, Carlos O'Donell wrote:
>> On Thu, May 10, 2012 at 6:08 PM, Carlos O'Donell
>> <carlos@systemhalted.org>   wrote:
>>> On Tue, May 8, 2012 at 4:23 AM, Andreas Jaeger<aj@suse.de>   wrote:
>>>>
>>>> There are two things broken in checking for HAS_FMA4/AVX on x86-64 in
>>>> the multiarch code:
>>>>
>>>> 1. We should disable FAM4 if AVX is not available (due to disabled
>>>>     OSXSAVE)
>>>>
>>>> 2. commit 56f6f6a2403cfa7267cad722597113be35ecf70d reverted some changes
>>>>     from commit 08cf777f9e7f6d826658a99c7d77a359f73a45bf but forgot to
>>>>     revert the change for AVX. We really have to disable AVX if it's
>>>>
>>>> The issue is easy reproduceable under Xen for the reporter.
>>>>
>>>> Also, the two commits introduced YMM_Usable and removed its usage but
>>>> did not remove the code from the headers, I'm cleaning this up as well.
>>>>
>>>> I'm appending a patch that I tested on Linux/x86-64 (without Xen).
>>>>
>>>> Ok to commit?
>>>
>>> No.
>>>
>>> I actually think the code Ulrich wrote originally was almost correct,
>>> but missing this:
>>
>> If I had to fix it it would look like this:
> 
> Thanks, this looks great - but misses the check for FMA4.
> 
> Here's a combined patch that compiled fine for me on
> Linux/x86-64. Jeff, could you test it under Xen, please?
> 
> Andreas
> 
> 2012-05-11  Andreas Jaeger<aj@suse.de>
> 	Carlos O'Donell<carlos_odonell@mentor.com>
> 
> 	[BZ #14059]
> 	* sysdeps/x86_64/multiarch/init-arch.c (__init_cpu_features):
> 	Fix check for AVX, enable FMA4 only if it exists and if AVX is
> 	usable.
> 
> 	* sysdeps/x86_64/multiarch/init-arch.h (bit_AVX_Usable): Renamed
> 	from bit_YMM_usable.
> 	(bit_FMA4_Usable): New macro.
> 	(bit_XMM_state): New macro.
> 	(bit_YMM_state): New macro.
> 	(index_AVX_Usable): Renamed from index_YMM_Usable.
> 	(index_FMA4_Usable): New macro.
> 	(HAS_YMM_USABLE): Delete.
> 	(HAS_AVX): Use HAS_ARCH_FEATURE.
> 	(HAS_FMA4): Likewise.

That version above contained a typo. Here's an update

Allan, does this work now?

Andreas

diff --git a/sysdeps/x86_64/multiarch/init-arch.c b/sysdeps/x86_64/multiarch/init-arch.c
index 80527ec..6a49552 100644
--- a/sysdeps/x86_64/multiarch/init-arch.c
+++ b/sysdeps/x86_64/multiarch/init-arch.c
@@ -145,16 +145,22 @@ __init_cpu_features (void)
 
   if (__cpu_features.cpuid[COMMON_CPUID_INDEX_1].ecx & bit_AVX)
     {
-      /* Reset the AVX bit in case OSXSAVE is disabled.  */
+      /* Determine if AVX is usable.  */
       if ((__cpu_features.cpuid[COMMON_CPUID_INDEX_1].ecx & bit_OSXSAVE) != 0
 	  && ({ unsigned int xcrlow;
 		unsigned int xcrhigh;
 		asm ("xgetbv"
 		     : "=a" (xcrlow), "=d" (xcrhigh) : "c" (0));
-		(xcrlow & 6) == 6; }))
-	__cpu_features.feature[index_YMM_Usable] |= bit_YMM_Usable;
+		(xcrlow & (bit_YMM_state | bit_XMM_state)) ==
+		 (bit_YMM_state | bit_XMM_state); }))
+	__cpu_features.feature[index_AVX_Usable] = 1;
     }
 
+  /* FMA4 depends on AVX support.  */
+  if (__cpu_features.cpuid[COMMON_CPUID_INDEX_80000001].ecx & bit_FMA4
+      && HAS_AVX)
+	__cpu_features.feature[index_FMA4_Usable] = 1;
+
   __cpu_features.family = family;
   __cpu_features.model = model;
   atomic_write_barrier ();
diff --git a/sysdeps/x86_64/multiarch/init-arch.h b/sysdeps/x86_64/multiarch/init-arch.h
index 5054e46..2d02e2b 100644
--- a/sysdeps/x86_64/multiarch/init-arch.h
+++ b/sysdeps/x86_64/multiarch/init-arch.h
@@ -21,8 +21,10 @@
 #define bit_Prefer_SSE_for_memop	(1 << 3)
 #define bit_Fast_Unaligned_Load		(1 << 4)
 #define bit_Prefer_PMINUB_for_stringop	(1 << 5)
-#define bit_YMM_Usable			(1 << 6)
+#define bit_AVX_Usable			(1 << 6)
+#define bit_FMA4_Usable			(1 << 7)
 
+/* CPUID Feature flags.  */
 #define bit_SSE2	(1 << 26)
 #define bit_SSSE3	(1 << 9)
 #define bit_SSE4_1	(1 << 19)
@@ -33,6 +35,10 @@
 #define bit_FMA		(1 << 12)
 #define bit_FMA4	(1 << 16)
 
+/* XCR0 Feature flags.  */
+#define bit_XMM_state  (1 << 1)
+#define bit_YMM_state  (2 << 1)
+
 #ifdef	__ASSEMBLER__
 
 # include <ifunc-defines.h>
@@ -49,7 +55,8 @@
 # define index_Prefer_SSE_for_memop	FEATURE_INDEX_1*FEATURE_SIZE
 # define index_Fast_Unaligned_Load	FEATURE_INDEX_1*FEATURE_SIZE
 # define index_Prefer_PMINUB_for_stringop FEATURE_INDEX_1*FEATURE_SIZE
-# define index_YMM_Usable		FEATURE_INDEX_1*FEATURE_SIZE
+# define index_AVX_Usable		FEATURE_INDEX_1*FEATURE_SIZE
+# define index_FMA4_Usable		FEATURE_INDEX_1*FEATURE_SIZE
 
 #else	/* __ASSEMBLER__ */
 
@@ -119,15 +126,14 @@ extern const struct cpu_features *__get_cpu_features (void)
 # define HAS_SSE4_1	HAS_CPU_FEATURE (COMMON_CPUID_INDEX_1, ecx, bit_SSE4_1)
 # define HAS_SSE4_2	HAS_CPU_FEATURE (COMMON_CPUID_INDEX_1, ecx, bit_SSE4_2)
 # define HAS_FMA	HAS_CPU_FEATURE (COMMON_CPUID_INDEX_1, ecx, bit_FMA)
-# define HAS_AVX	HAS_CPU_FEATURE (COMMON_CPUID_INDEX_1, ecx, bit_AVX)
-# define HAS_FMA4	HAS_CPU_FEATURE (COMMON_CPUID_INDEX_80000001, ecx, bit_FMA4)
 
 # define index_Fast_Rep_String		FEATURE_INDEX_1
 # define index_Fast_Copy_Backward	FEATURE_INDEX_1
 # define index_Slow_BSF			FEATURE_INDEX_1
 # define index_Prefer_SSE_for_memop	FEATURE_INDEX_1
 # define index_Fast_Unaligned_Load	FEATURE_INDEX_1
-# define index_YMM_Usable		FEATURE_INDEX_1
+# define index_AVX_Usable		FEATURE_INDEX_1
+# define index_FMA4_Usable		FEATURE_INDEX_1
 
 # define HAS_ARCH_FEATURE(name) \
   ((__get_cpu_features ()->feature[index_##name] & (bit_##name)) != 0)
@@ -142,6 +148,8 @@ extern const struct cpu_features *__get_cpu_features (void)
 
 # define HAS_FAST_UNALIGNED_LOAD HAS_ARCH_FEATURE (Fast_Unaligned_Load)
 
-# define HAS_YMM_USABLE		HAS_ARCH_FEATURE (YMM_Usable)
+# define HAS_AVX		HAS_ARCH_FEATURE (AVX_Usable)
+
+# define HAS_FMA4		HAS_ARCH_FEATURE (FMA4_Usable)
 
 #endif	/* __ASSEMBLER__ */

-- 
 Andreas Jaeger aj@{suse.com,opensuse.org} Twitter/Identica: jaegerandi
  SUSE LINUX Products GmbH, Maxfeldstr. 5, 90409 Nürnberg, Germany
   GF: Jeff Hawn,Jennifer Guild,Felix Imendörffer,HRB16746 (AG Nürnberg)
    GPG fingerprint = 93A3 365E CE47 B889 DF7F  FED1 389A 563C C272 A126


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]