This is the mail archive of the libc-alpha@sourceware.org mailing list for the glibc project.


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]
Other format: [Raw text]

Re: [PATCH RFC] X86_64 Avx2 Detection


From: Sihai Yao <sihai.ysh@alibaba-inc.com>

This patch sets bit_AVX2_Usable of __cpu_features.feature by checking
COMMON_CPUID_INDEX_7 for Haswell. Architecture related assembler file
can use this bit to determine calling path.

---
This version removed the unrelated cpu module branch code and FEATURE_INDEX_7,
which is unusefull for AVX

 ChangeLog                                  | 8 ++++++++
 sysdeps/x86_64/multiarch/ifunc-defines.sym | 1 +
 sysdeps/x86_64/multiarch/init-arch.c       | 3 +++
 sysdeps/x86_64/multiarch/init-arch.h       | 8 ++++++++
 4 files changed, 20 insertions(+)

diff --git a/ChangeLog b/ChangeLog
index fb0177d..ba8980c 100644
--- a/ChangeLog
+++ b/ChangeLog
@@ -1,3 +1,11 @@
+2014-04-04  Sihai Yao  <sihai.ysh@alibaba-inc.com>
+	* sysdeps/x86_64/multiarch/ifunc-defines.sym: Add COMMON_CPU_INDEX_7 and
+	FEATURE_INDEX_7.
+	* sysdeps/x86_64/multiarch/init-arch.c: Add AVX2 detection from cpu
+	features word of COMMON_CPUID_INDEX_7.
+	* sysdeps/x86_64/multiarch/init-arch.h: Add bit_AVX2_Usable and
+	index_AVX2_Usable for future assembly code to determing calling path.
+
 2014-04-10 Torvald Riegel  <triegel@redhat.com>
 
 	* benchtests/pthread_once-inputs: New file.
diff --git a/sysdeps/x86_64/multiarch/ifunc-defines.sym b/sysdeps/x86_64/multiarch/ifunc-defines.sym
index eb1538a..a410d88 100644
--- a/sysdeps/x86_64/multiarch/ifunc-defines.sym
+++ b/sysdeps/x86_64/multiarch/ifunc-defines.sym
@@ -17,4 +17,5 @@ FEATURE_OFFSET		offsetof (struct cpu_features, feature)
 FEATURE_SIZE		sizeof (unsigned int)
 
 COMMON_CPUID_INDEX_1
+COMMON_CPUID_INDEX_7
 FEATURE_INDEX_1
diff --git a/sysdeps/x86_64/multiarch/init-arch.c b/sysdeps/x86_64/multiarch/init-arch.c
index db74d97..2a6dcb7 100644
--- a/sysdeps/x86_64/multiarch/init-arch.c
+++ b/sysdeps/x86_64/multiarch/init-arch.c
@@ -167,6 +167,9 @@ __init_cpu_features (void)
 	  /* Determine if AVX is usable.  */
 	  if (CPUID_AVX)
 	    __cpu_features.feature[index_AVX_Usable] |= bit_AVX_Usable;
+	  /* Determine if AVX2 is usable.  */
+	  if (CPUID_AVX2)
+	    __cpu_features.feature[index_AVX2_Usable] |= bit_AVX2_Usable;
 	  /* Determine if FMA is usable.  */
 	  if (CPUID_FMA)
 	    __cpu_features.feature[index_FMA_Usable] |= bit_FMA_Usable;
diff --git a/sysdeps/x86_64/multiarch/init-arch.h b/sysdeps/x86_64/multiarch/init-arch.h
index 793707a..813b6de 100644
--- a/sysdeps/x86_64/multiarch/init-arch.h
+++ b/sysdeps/x86_64/multiarch/init-arch.h
@@ -24,6 +24,7 @@
 #define bit_FMA_Usable			(1 << 7)
 #define bit_FMA4_Usable			(1 << 8)
 #define bit_Slow_SSE4_2			(1 << 9)
+#define bit_AVX2_Usable			(1 << 10)
 
 /* CPUID Feature flags.  */
 
@@ -40,6 +41,7 @@
 
 /* COMMON_CPUID_INDEX_7.  */
 #define bit_RTM		(1 << 11)
+#define bit_AVX2	(1 << 5)
 
 /* XCR0 Feature flags.  */
 #define bit_XMM_state  (1 << 1)
@@ -54,6 +56,7 @@
 # define index_SSE4_1	COMMON_CPUID_INDEX_1*CPUID_SIZE+CPUID_ECX_OFFSET
 # define index_SSE4_2	COMMON_CPUID_INDEX_1*CPUID_SIZE+CPUID_ECX_OFFSET
 # define index_AVX	COMMON_CPUID_INDEX_1*CPUID_SIZE+CPUID_ECX_OFFSET
+# define index_AVX2	COMMON_CPUID_INDEX_7*CPUID_SIZE+CPUID_EBX_OFFSET
 
 # define index_Fast_Rep_String		FEATURE_INDEX_1*FEATURE_SIZE
 # define index_Fast_Copy_Backward	FEATURE_INDEX_1*FEATURE_SIZE
@@ -64,6 +67,7 @@
 # define index_FMA_Usable		FEATURE_INDEX_1*FEATURE_SIZE
 # define index_FMA4_Usable		FEATURE_INDEX_1*FEATURE_SIZE
 # define index_Slow_SSE4_2		FEATURE_INDEX_1*FEATURE_SIZE
+# define index_AVX2_Usable		FEATURE_INDEX_1*FEATURE_SIZE
 
 #else	/* __ASSEMBLER__ */
 
@@ -145,6 +149,8 @@ extern const struct cpu_features *__get_cpu_features (void)
   HAS_CPUID_FLAG (COMMON_CPUID_INDEX_80000001, ecx, bit_FMA4)
 # define CPUID_RTM \
   HAS_CPUID_FLAG (COMMON_CPUID_INDEX_7, ebx, bit_RTM)
+# define CPUID_AVX2 \
+  HAS_CPUID_FLAG (COMMON_CPUID_INDEX_7, ebx, bit_AVX2)
 
 /* HAS_* evaluates to true if we may use the feature at runtime.  */
 # define HAS_SSE2	HAS_CPU_FEATURE (COMMON_CPUID_INDEX_1, edx, bit_SSE2)
@@ -153,6 +159,7 @@ extern const struct cpu_features *__get_cpu_features (void)
 # define HAS_SSE4_1	HAS_CPU_FEATURE (COMMON_CPUID_INDEX_1, ecx, bit_SSE4_1)
 # define HAS_SSE4_2	HAS_CPU_FEATURE (COMMON_CPUID_INDEX_1, ecx, bit_SSE4_2)
 # define HAS_RTM	HAS_CPU_FEATURE (COMMON_CPUID_INDEX_7, ebx, bit_RTM)
+# define HAS_AVX2	HAS_CPU_FEATURE (COMMON_CPUID_INDEX_7, ebx, bit_AVX2)
 
 # define index_Fast_Rep_String		FEATURE_INDEX_1
 # define index_Fast_Copy_Backward	FEATURE_INDEX_1
@@ -163,6 +170,7 @@ extern const struct cpu_features *__get_cpu_features (void)
 # define index_FMA_Usable		FEATURE_INDEX_1
 # define index_FMA4_Usable		FEATURE_INDEX_1
 # define index_Slow_SSE4_2		FEATURE_INDEX_1
+# define index_AVX2_Usable		FEATURE_INDEX_1
 
 # define HAS_ARCH_FEATURE(name) \
   ((__get_cpu_features ()->feature[index_##name] & (bit_##name)) != 0)
-- 
1.8.1.4


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]