[PATCH] x86: Detect Intel Advanced Matrix Extensions
H.J. Lu
hjl.tools@gmail.com
Fri Jun 26 12:40:40 GMT 2020
On Thu, Jun 25, 2020 at 3:38 PM H.J. Lu <hjl.tools@gmail.com> wrote:
>
> Intel Advanced Matrix Extensions (Intel AMX) is a new programming
> paradigm consisting of two components: a set of 2-dimensional registers
> (tiles) representing sub-arrays from a larger 2-dimensional memory image,
> and accelerators able to operate on tiles. Intel AMX is an extensible
> architecture. New accelerators can be added and the existing accelerator
> may be enhanced to provide higher performance. The initial features are
> AMX-BF16, AMX-TILE and AMX-INT8, which are usable only if the operating
> system supports both XTILECFG state and XTILEDATA state.
>
> Add AMX-BF16, AMX-TILE and AMX-INT8 support to HAS_CPU_FEATURE and
> CPU_FEATURE_USABLE.
> ---
> sysdeps/x86/cpu-features.c | 18 ++++++++++++++++++
> sysdeps/x86/cpu-features.h | 20 ++++++++++++++++++++
> sysdeps/x86/tst-get-cpu-features.c | 6 ++++++
> 3 files changed, 44 insertions(+)
>
> diff --git a/sysdeps/x86/cpu-features.c b/sysdeps/x86/cpu-features.c
> index 79bc0d7216..c351bdd54a 100644
> --- a/sysdeps/x86/cpu-features.c
> +++ b/sysdeps/x86/cpu-features.c
> @@ -239,6 +239,24 @@ get_common_indices (struct cpu_features *cpu_features,
> }
> }
>
> + /* Are XTILECFG and XTILEDATA states usable? */
> + if ((xcrlow & (bit_XTILECFG_state | bit_XTILEDATA_state))
> + == (bit_XTILECFG_state | bit_XTILEDATA_state))
> + {
> + /* Determine if AMX_BF16 is usable. */
> + if (CPU_FEATURES_CPU_P (cpu_features, AMX_BF16))
> + cpu_features->usable[index_arch_AMX_BF16_Usable]
> + |= bit_arch_AMX_BF16_Usable;
> + /* Determine if AMX_TILE is usable. */
> + if (CPU_FEATURES_CPU_P (cpu_features, AMX_TILE))
> + cpu_features->usable[index_arch_AMX_TILE_Usable]
> + |= bit_arch_AMX_TILE_Usable;
> + /* Determine if AMX_INT8 is usable. */
> + if (CPU_FEATURES_CPU_P (cpu_features, AMX_INT8))
> + cpu_features->usable[index_arch_AMX_INT8_Usable]
> + |= bit_arch_AMX_INT8_Usable;
> + }
> +
> /* For _dl_runtime_resolve, set xsave_state_size to xsave area
> size + integer register save size and align it to 64 bytes. */
> if (cpu_features->basic.max_cpuid >= 0xd)
> diff --git a/sysdeps/x86/cpu-features.h b/sysdeps/x86/cpu-features.h
> index 574f055e0c..78d0692fab 100644
> --- a/sysdeps/x86/cpu-features.h
> +++ b/sysdeps/x86/cpu-features.h
> @@ -156,6 +156,9 @@ extern const struct cpu_features *__get_cpu_features (void)
> #define bit_arch_AVX512_VP2INTERSECT_Usable (1u << 24)
> #define bit_arch_AVX512_BF16_Usable (1u << 25)
> #define bit_arch_PKU_Usable (1u << 26)
> +#define bit_arch_AMX_BF16_Usable (1u << 27)
> +#define bit_arch_AMX_TILE_Usable (1u << 28)
> +#define bit_arch_AMX_INT8_Usable (1u << 29)
>
> #define index_arch_AVX_Usable USABLE_FEATURE_INDEX_1
> #define index_arch_AVX2_Usable USABLE_FEATURE_INDEX_1
> @@ -184,6 +187,9 @@ extern const struct cpu_features *__get_cpu_features (void)
> #define index_arch_AVX512_VP2INTERSECT_Usable USABLE_FEATURE_INDEX_1
> #define index_arch_AVX512_BF16_Usable USABLE_FEATURE_INDEX_1
> #define index_arch_PKU_Usable USABLE_FEATURE_INDEX_1
> +#define index_arch_AMX_BF16_Usable USABLE_FEATURE_INDEX_1
> +#define index_arch_AMX_TILE_Usable USABLE_FEATURE_INDEX_1
> +#define index_arch_AMX_INT8_Usable USABLE_FEATURE_INDEX_1
>
> #define feature_AVX_Usable usable
> #define feature_AVX2_Usable usable
> @@ -212,6 +218,9 @@ extern const struct cpu_features *__get_cpu_features (void)
> #define feature_AVX512_VP2INTERSECT_Usable usable
> #define feature_AVX512_BF16_Usable usable
> #define feature_PKU_Usable usable
> +#define feature_AMX_BF16_Usable usable
> +#define feature_AMX_TILE_Usable usable
> +#define feature_AMX_INT8_Usable usable
>
> /* CPU features. */
>
> @@ -347,6 +356,9 @@ extern const struct cpu_features *__get_cpu_features (void)
> #define bit_cpu_TSXLDTRK (1u << 16)
> #define bit_cpu_PCONFIG (1u << 18)
> #define bit_cpu_IBT (1u << 20)
> +#define bit_cpu_AMX_BF16 (1u << 22)
> +#define bit_cpu_AMX_TILE (1u << 24)
> +#define bit_cpu_AMX_INT8 (1u << 25)
> #define bit_cpu_IBRS_IBPB (1u << 26)
> #define bit_cpu_STIBP (1u << 27)
> #define bit_cpu_L1D_FLUSH (1u << 28)
> @@ -527,6 +539,9 @@ extern const struct cpu_features *__get_cpu_features (void)
> #define index_cpu_SERIALIZE COMMON_CPUID_INDEX_7
> #define index_cpu_HYBRID COMMON_CPUID_INDEX_7
> #define index_cpu_TSXLDTRK COMMON_CPUID_INDEX_7
> +#define index_cpu_AMX_BF16 COMMON_CPUID_INDEX_7
> +#define index_cpu_AMX_TILE COMMON_CPUID_INDEX_7
> +#define index_cpu_AMX_INT8 COMMON_CPUID_INDEX_7
> #define index_cpu_PCONFIG COMMON_CPUID_INDEX_7
> #define index_cpu_IBT COMMON_CPUID_INDEX_7
> #define index_cpu_IBRS_IBPB COMMON_CPUID_INDEX_7
> @@ -709,6 +724,9 @@ extern const struct cpu_features *__get_cpu_features (void)
> #define reg_SERIALIZE edx
> #define reg_HYBRID edx
> #define reg_TSXLDTRK edx
> +#define reg_AMX_BF16 edx
> +#define reg_AMX_TILE edx
> +#define reg_AMX_INT8 edx
> #define reg_PCONFIG edx
> #define reg_IBT edx
> #define reg_IBRS_IBPB edx
> @@ -819,6 +837,8 @@ extern const struct cpu_features *__get_cpu_features (void)
> #define bit_Opmask_state (1u << 5)
> #define bit_ZMM0_15_state (1u << 6)
> #define bit_ZMM16_31_state (1u << 7)
> +#define bit_XTILECFG_state (1u << 17)
> +#define bit_XTILEDATA_state (1u << 18)
>
> # if defined (_LIBC) && !IS_IN (nonlib)
> /* Unused for x86. */
> diff --git a/sysdeps/x86/tst-get-cpu-features.c b/sysdeps/x86/tst-get-cpu-features.c
> index c60918cf00..3d44af202e 100644
> --- a/sysdeps/x86/tst-get-cpu-features.c
> +++ b/sysdeps/x86/tst-get-cpu-features.c
> @@ -185,6 +185,9 @@ do_test (void)
> CHECK_CPU_FEATURE (SERIALIZE);
> CHECK_CPU_FEATURE (HYBRID);
> CHECK_CPU_FEATURE (TSXLDTRK);
> + CHECK_CPU_FEATURE (AMX_BF16);
> + CHECK_CPU_FEATURE (AMX_TILE);
> + CHECK_CPU_FEATURE (AMX_INT8);
> CHECK_CPU_FEATURE (PCONFIG);
> CHECK_CPU_FEATURE (IBT);
> CHECK_CPU_FEATURE (IBRS_IBPB);
> @@ -239,6 +242,9 @@ do_test (void)
> CHECK_CPU_FEATURE_USABLE (AVX512_4VNNIW);
> CHECK_CPU_FEATURE_USABLE (AVX512_4FMAPS);
> CHECK_CPU_FEATURE_USABLE (AVX512_VP2INTERSECT);
> + CHECK_CPU_FEATURE_USABLE (AMX_BF16);
> + CHECK_CPU_FEATURE_USABLE (AMX_TILE);
> + CHECK_CPU_FEATURE_USABLE (AMX_INT8);
> CHECK_CPU_FEATURE_USABLE (XOP);
> CHECK_CPU_FEATURE_USABLE (FMA4);
> CHECK_CPU_FEATURE_USABLE (XSAVEC);
> --
> 2.26.2
>
I am checking it in.
--
H.J.
More information about the Libc-alpha
mailing list