FMA4 detection is failing in GLIBC (from 2.23 release onwards) due to the fix for defect [BZ #19214]. Cause: To detect for FMA4 support, CPUID needs to be called by setting the value 0x80000001 in $eax register. FMA4 support only found on AMD cpu's so this requires special handling and can't be detected through generic logic. Earlier detection logic was executed after the CPUID execution but now detection logic is executed before the CPUID execution. GLIBC-2.23 release also fails to detect so I am filing this defect by selecting the version 2.23 and if this is not OK then will select version 2.24 for this defect. The code diff is given below. ---------------------- DIFF ------------------------ diff --git a/sysdeps/x86/cpu-features.c b/sysdeps/x86/cpu-features.c index c8f81ef..1787716 100644 --- a/sysdeps/x86/cpu-features.c +++ b/sysdeps/x86/cpu-features.c @@ -19,79 +19,23 @@ #include <cpuid.h> #include <cpu-features.h> -static void +static inline void get_common_indeces (struct cpu_features *cpu_features, unsigned int *family, unsigned int *model, unsigned int *extended_model) { - if (family) + unsigned int eax; + __cpuid (1, eax, cpu_features->cpuid[COMMON_CPUID_INDEX_1].ebx, + cpu_features->cpuid[COMMON_CPUID_INDEX_1].ecx, + cpu_features->cpuid[COMMON_CPUID_INDEX_1].edx); + GLRO(dl_x86_cpu_features).cpuid[COMMON_CPUID_INDEX_1].eax = eax; + *family = (eax >> 8) & 0x0f; + *model = (eax >> 4) & 0x0f; + *extended_model = (eax >> 12) & 0xf0; + if (*family == 0x0f) { - unsigned int eax; - __cpuid (1, eax, cpu_features->cpuid[COMMON_CPUID_INDEX_1].ebx, - cpu_features->cpuid[COMMON_CPUID_INDEX_1].ecx, - cpu_features->cpuid[COMMON_CPUID_INDEX_1].edx); - cpu_features->cpuid[COMMON_CPUID_INDEX_1].eax = eax; - *family = (eax >> 8) & 0x0f; - *model = (eax >> 4) & 0x0f; - *extended_model = (eax >> 12) & 0xf0; - if (*family == 0x0f) - { - *family += (eax >> 20) & 0xff; - *model += *extended_model; - } - } - - if (cpu_features->max_cpuid >= 7) - __cpuid_count (7, 0, - cpu_features->cpuid[COMMON_CPUID_INDEX_7].eax, - cpu_features->cpuid[COMMON_CPUID_INDEX_7].ebx, - cpu_features->cpuid[COMMON_CPUID_INDEX_7].ecx, - cpu_features->cpuid[COMMON_CPUID_INDEX_7].edx); - - /* Can we call xgetbv? */ - if (CPU_FEATURES_CPU_P (cpu_features, OSXSAVE)) - { - unsigned int xcrlow; - unsigned int xcrhigh; - asm ("xgetbv" : "=a" (xcrlow), "=d" (xcrhigh) : "c" (0)); - /* Is YMM and XMM state usable? */ - if ((xcrlow & (bit_YMM_state | bit_XMM_state)) == - (bit_YMM_state | bit_XMM_state)) - { - /* Determine if AVX is usable. */ - if (CPU_FEATURES_CPU_P (cpu_features, AVX)) - cpu_features->feature[index_arch_AVX_Usable] - |= bit_arch_AVX_Usable; - /* Determine if AVX2 is usable. */ - if (CPU_FEATURES_CPU_P (cpu_features, AVX2)) - cpu_features->feature[index_arch_AVX2_Usable] - |= bit_arch_AVX2_Usable; - /* Check if OPMASK state, upper 256-bit of ZMM0-ZMM15 and - ZMM16-ZMM31 state are enabled. */ - if ((xcrlow & (bit_Opmask_state | bit_ZMM0_15_state - | bit_ZMM16_31_state)) == - (bit_Opmask_state | bit_ZMM0_15_state | bit_ZMM16_31_state)) - { - /* Determine if AVX512F is usable. */ - if (CPU_FEATURES_CPU_P (cpu_features, AVX512F)) - { - cpu_features->feature[index_arch_AVX512F_Usable] - |= bit_arch_AVX512F_Usable; - /* Determine if AVX512DQ is usable. */ - if (CPU_FEATURES_CPU_P (cpu_features, AVX512DQ)) - cpu_features->feature[index_arch_AVX512DQ_Usable] - |= bit_arch_AVX512DQ_Usable; - } - } - /* Determine if FMA is usable. */ - if (CPU_FEATURES_CPU_P (cpu_features, FMA)) - cpu_features->feature[index_arch_FMA_Usable] - |= bit_arch_FMA_Usable; - /* Determine if FMA4 is usable. */ - if (CPU_FEATURES_CPU_P (cpu_features, FMA4)) - cpu_features->feature[index_arch_FMA4_Usable] - |= bit_arch_FMA4_Usable; - } + *family += (eax >> 20) & 0xff; + *model += *extended_model; } } @@ -227,19 +165,73 @@ init_cpu_features (struct cpu_features *cpu_features) } } else - { - kind = arch_kind_other; - get_common_indeces (cpu_features, NULL, NULL, NULL); - } + kind = arch_kind_other; /* Support i586 if CX8 is available. */ - if (CPU_FEATURES_CPU_P (cpu_features, CX8)) + if (HAS_CPU_FEATURE (CX8)) cpu_features->feature[index_arch_I586] |= bit_arch_I586; /* Support i686 if CMOV is available. */ - if (CPU_FEATURES_CPU_P (cpu_features, CMOV)) + if (HAS_CPU_FEATURE (CMOV)) cpu_features->feature[index_arch_I686] |= bit_arch_I686; + if (cpu_features->max_cpuid >= 7) + __cpuid_count (7, 0, + cpu_features->cpuid[COMMON_CPUID_INDEX_7].eax, + cpu_features->cpuid[COMMON_CPUID_INDEX_7].ebx, + cpu_features->cpuid[COMMON_CPUID_INDEX_7].ecx, + cpu_features->cpuid[COMMON_CPUID_INDEX_7].edx); + + /* Can we call xgetbv? */ + if (HAS_CPU_FEATURE (OSXSAVE)) + { + unsigned int xcrlow; + unsigned int xcrhigh; + asm ("xgetbv" : "=a" (xcrlow), "=d" (xcrhigh) : "c" (0)); + /* Is YMM and XMM state usable? */ + if ((xcrlow & (bit_YMM_state | bit_XMM_state)) == + (bit_YMM_state | bit_XMM_state)) + { + /* Determine if AVX is usable. */ + if (HAS_CPU_FEATURE (AVX)) + cpu_features->feature[index_arch_AVX_Usable] + |= bit_arch_AVX_Usable; +#if index_arch_AVX2_Usable != index_arch_AVX_Fast_Unaligned_Load +# error index_arch_AVX2_Usable != index_arch_AVX_Fast_Unaligned_Load +#endif + /* Determine if AVX2 is usable. Unaligned load with 256-bit + AVX registers are faster on processors with AVX2. */ + if (HAS_CPU_FEATURE (AVX2)) + cpu_features->feature[index_arch_AVX2_Usable] + |= bit_arch_AVX2_Usable | bit_arch_AVX_Fast_Unaligned_Load; + /* Check if OPMASK state, upper 256-bit of ZMM0-ZMM15 and + ZMM16-ZMM31 state are enabled. */ + if ((xcrlow & (bit_Opmask_state | bit_ZMM0_15_state + | bit_ZMM16_31_state)) == + (bit_Opmask_state | bit_ZMM0_15_state | bit_ZMM16_31_state)) + { + /* Determine if AVX512F is usable. */ + if (HAS_CPU_FEATURE (AVX512F)) + { + cpu_features->feature[index_arch_AVX512F_Usable] + |= bit_arch_AVX512F_Usable; + /* Determine if AVX512DQ is usable. */ + if (HAS_CPU_FEATURE (AVX512DQ)) + cpu_features->feature[index_arch_AVX512DQ_Usable] + |= bit_arch_AVX512DQ_Usable; + } + } + /* Determine if FMA is usable. */ + if (HAS_CPU_FEATURE (FMA)) + cpu_features->feature[index_arch_FMA_Usable] + |= bit_arch_FMA_Usable; + /* Determine if FMA4 is usable. */ + if (HAS_CPU_FEATURE (FMA4)) + cpu_features->feature[index_arch_FMA4_Usable] + |= bit_arch_FMA4_Usable; + } + } + #if !HAS_CPUID no_cpuid: #endif
Created attachment 9325 [details] A patch Try this.
This works fine. Can you please commit to the trunk.
This is an automated email from the git hooks/post-receive script. It was generated because a ref change was pushed to the repository containing the project "GNU C Library master sources". The branch, master has been updated via 91655fc307fec2d5a8d60446b4de11cc986b47fa (commit) from c9bd40daaee18cf1d9824e4a7ebaebe321e0a5a8 (commit) Those revisions listed above that are new to this repository have not appeared on any other notification email; so we list those revisions in full, below. - Log ----------------------------------------------------------------- https://sourceware.org/git/gitweb.cgi?p=glibc.git;h=91655fc307fec2d5a8d60446b4de11cc986b47fa commit 91655fc307fec2d5a8d60446b4de11cc986b47fa Author: H.J. Lu <hjl.tools@gmail.com> Date: Tue Jun 7 08:00:21 2016 -0700 Check FMA after COMMON_CPUID_INDEX_80000001 Since the FMA4 bit is in COMMON_CPUID_INDEX_80000001 and FMA4 requires AVX, determine if FMA4 is usable after COMMON_CPUID_INDEX_80000001 is available and if AVX is usable. [BZ #20195] * sysdeps/x86/cpu-features.c (get_common_indeces): Move FMA4 check to ... (init_cpu_features): Here. ----------------------------------------------------------------------- Summary of changes: ChangeLog | 7 +++++++ sysdeps/x86/cpu-features.c | 13 +++++++++---- 2 files changed, 16 insertions(+), 4 deletions(-)
Fixed.