[PATCH 2/3] Support Intel AMX-TF32
Haochen Jiang
haochen.jiang@intel.com
Wed Dec 18 06:32:03 GMT 2024
Changes in v2 has been mentioned in previous AMX-TRANSPOSE patch.
---
In this patch, we will support AMX-TF32. It is a simple ISA
comparing to the previous ones, so there is no special handling.
gas/ChangeLog:
* config/tc-i386.c: Add amx_tf32.
* doc/c-i386.texi: Document .amx_tf32.
* testsuite/gas/i386/x86-64.exp: Run AMX-TF32 tests.
* testsuite/gas/i386/x86-64-amx-tf32-bad.d: New test.
* testsuite/gas/i386/x86-64-amx-tf32-bad.s: Ditto.
* testsuite/gas/i386/x86-64-amx-tf32-intel.d: Ditto.
* testsuite/gas/i386/x86-64-amx-tf32-inval.l: Ditto.
* testsuite/gas/i386/x86-64-amx-tf32-inval.s: Ditto.
* testsuite/gas/i386/x86-64-amx-tf32.d: Ditto.
* testsuite/gas/i386/x86-64-amx-tf32.s: Ditto.
opcodes/ChangeLog:
* i386-dis.c (PREFIX_VEX_0F3848_X86_64_W_0_L_0): New.
(X86_64_VEX_0F3848): Ditto.
(VEX_LEN_0F3848_X86_64_W_0): Ditto.
(VEX_W_0F3848_X86_64): Ditto.
(prefix_table): Add PREFIX_VEX_0F3848_X86_64_W_0_L_0.
(x86_64_table): Add X86_64_VEX_0F3848.
(vex_len_table): Add VEX_LEN_0F3848_X86_64_W_0.
(vex_w_table): Add VEX_W_0F3848_X86_64.
* i386-gen.c (isa_dependencies): Add AMX_TF32.
(cpu_flags): Ditto.
* i386-init.h: Regenerated.
* i386-mnem.h: Ditto.
* i386-opc.h (CpuAMX_TF32): New.
(i386_cpu_flags): Add cpuamx_tf32.
* i386-opc.tbl: Add AMX-TF32 instructions.
* i386-tbl.h: Regenerated.
---
gas/config/tc-i386.c | 1 +
gas/doc/c-i386.texi | 4 +-
gas/testsuite/gas/i386/x86-64-amx-tf32-bad.d | 16 +
gas/testsuite/gas/i386/x86-64-amx-tf32-bad.s | 18 +
.../gas/i386/x86-64-amx-tf32-intel.d | 15 +
.../gas/i386/x86-64-amx-tf32-inval.l | 7 +
.../gas/i386/x86-64-amx-tf32-inval.s | 10 +
gas/testsuite/gas/i386/x86-64-amx-tf32.d | 13 +
gas/testsuite/gas/i386/x86-64-amx-tf32.s | 15 +
gas/testsuite/gas/i386/x86-64.exp | 4 +
opcodes/i386-dis.c | 28 +-
opcodes/i386-gen.c | 3 +
opcodes/i386-init.h | 740 +++++-----
opcodes/i386-mnem.h | 1246 +++++++++--------
opcodes/i386-opc.h | 3 +
opcodes/i386-opc.tbl | 3 +
opcodes/i386-tbl.h | 239 ++--
17 files changed, 1274 insertions(+), 1091 deletions(-)
create mode 100644 gas/testsuite/gas/i386/x86-64-amx-tf32-bad.d
create mode 100644 gas/testsuite/gas/i386/x86-64-amx-tf32-bad.s
create mode 100644 gas/testsuite/gas/i386/x86-64-amx-tf32-intel.d
create mode 100644 gas/testsuite/gas/i386/x86-64-amx-tf32-inval.l
create mode 100644 gas/testsuite/gas/i386/x86-64-amx-tf32-inval.s
create mode 100644 gas/testsuite/gas/i386/x86-64-amx-tf32.d
create mode 100644 gas/testsuite/gas/i386/x86-64-amx-tf32.s
diff --git a/gas/config/tc-i386.c b/gas/config/tc-i386.c
index 3d74963f447..54382392a98 100644
--- a/gas/config/tc-i386.c
+++ b/gas/config/tc-i386.c
@@ -1183,6 +1183,7 @@ static const arch_entry cpu_arch[] =
SUBARCH (amx_fp16, AMX_FP16, ANY_AMX_FP16, false),
SUBARCH (amx_complex, AMX_COMPLEX, ANY_AMX_COMPLEX, false),
SUBARCH (amx_transpose, AMX_TRANSPOSE, ANY_AMX_TRANSPOSE, false),
+ SUBARCH (amx_tf32, AMX_TF32, ANY_AMX_TF32, false),
SUBARCH (amx_tile, AMX_TILE, ANY_AMX_TILE, false),
SUBARCH (movdiri, MOVDIRI, MOVDIRI, false),
SUBARCH (movdir64b, MOVDIR64B, MOVDIR64B, false),
diff --git a/gas/doc/c-i386.texi b/gas/doc/c-i386.texi
index 8c0b154d584..45ef8566837 100644
--- a/gas/doc/c-i386.texi
+++ b/gas/doc/c-i386.texi
@@ -229,6 +229,7 @@ accept various extension mnemonics. For example,
@code{amx_fp16},
@code{amx_complex},
@code{amx_transpose},
+@code{amx_tf32},
@code{amx_tile},
@code{vmx},
@code{vmfunc},
@@ -1701,7 +1702,8 @@ supported on the CPU specified. The choices for @var{cpu_type} are:
@item @samp{.shstk} @tab @samp{.gfni} @tab @samp{.vaes} @tab @samp{.vpclmulqdq}
@item @samp{.movdiri} @tab @samp{.movdir64b} @tab @samp{.enqcmd} @tab @samp{.tsxldtrk}
@item @samp{.amx_int8} @tab @samp{.amx_bf16} @tab @samp{.amx_fp16}
-@item @samp{.amx_complex} @tab @samp{.amx_transpose} @tab @samp{.amx_tile}
+@item @samp{.amx_complex} @tab @samp{.amx_transpose} @tab @samp{.amx_tf32}
+@item @samp{.amx_tile}
@item @samp{.kl} @tab @samp{.widekl} @tab @samp{.uintr} @tab @samp{.hreset}
@item @samp{.3dnow} @tab @samp{.3dnowa} @tab @samp{.sse4a} @tab @samp{.sse5}
@item @samp{.syscall} @tab @samp{.rdtscp} @tab @samp{.svme}
diff --git a/gas/testsuite/gas/i386/x86-64-amx-tf32-bad.d b/gas/testsuite/gas/i386/x86-64-amx-tf32-bad.d
new file mode 100644
index 00000000000..bcf8674b06c
--- /dev/null
+++ b/gas/testsuite/gas/i386/x86-64-amx-tf32-bad.d
@@ -0,0 +1,16 @@
+#objdump: -drw
+#name: x86_64 AMX_TF32 bad insns
+
+.*: +file format .*
+
+
+Disassembly of section \.text:
+
+0+ <\.text>:
+\s*[a-f0-9]+:\s*c4 e2 71 48 d1\s+tmmultf32ps %tmm1/\(bad\),%tmm1/\(bad\),%tmm2
+\s*[a-f0-9]+:\s*c4 e2 69 48 c9\s+tmmultf32ps %tmm2,%tmm1/\(bad\),%tmm1/\(bad\)
+\s*[a-f0-9]+:\s*c4 e2 71 48 ca\s+tmmultf32ps %tmm1/\(bad\),%tmm2,%tmm1\/\(bad\)
+\s*[a-f0-9]+:\s*c4 e2 70 48 d1\s+ttmmultf32ps %tmm1/\(bad\),%tmm1/\(bad\),%tmm2
+\s*[a-f0-9]+:\s*c4 e2 68 48 c9\s+ttmmultf32ps %tmm2,%tmm1/\(bad\),%tmm1/\(bad\)
+\s*[a-f0-9]+:\s*c4 e2 70 48 ca\s+ttmmultf32ps %tmm1/\(bad\),%tmm2,%tmm1/\(bad\)
+#pass
diff --git a/gas/testsuite/gas/i386/x86-64-amx-tf32-bad.s b/gas/testsuite/gas/i386/x86-64-amx-tf32-bad.s
new file mode 100644
index 00000000000..3d4ea3dce77
--- /dev/null
+++ b/gas/testsuite/gas/i386/x86-64-amx-tf32-bad.s
@@ -0,0 +1,18 @@
+.text
+ #tmmultf32ps %tmm1, %tmm1, %tmm2 all tmm registers should be distinct
+ .insn VEX.128.66.0F38.W0 0x48, %tmm1, %tmm1, %tmm2
+
+ #tmmultf32ps %tmm1, %tmm2, %tmm1 all tmm registers should be distinct
+ .insn VEX.128.66.0F38.W0 0x48, %tmm1, %tmm2, %tmm1
+
+ #tmmultf32ps %tmm2, %tmm1, %tmm1 all tmm registers should be distinct
+ .insn VEX.128.66.0F38.W0 0x48, %tmm2, %tmm1, %tmm1
+
+ #ttmmultf32ps %tmm1, %tmm1, %tmm2 all tmm registers should be distinct
+ .insn VEX.128.NP.0F38.W0 0x48, %tmm1, %tmm1, %tmm2
+
+ #ttmmultf32ps %tmm1, %tmm2, %tmm1 all tmm registers should be distinct
+ .insn VEX.128.NP.0F38.W0 0x48, %tmm1, %tmm2, %tmm1
+
+ #ttmmultf32ps %tmm2, %tmm1, %tmm1 all tmm registers should be distinct
+ .insn VEX.128.NP.0F38.W0 0x48, %tmm2, %tmm1, %tmm1
diff --git a/gas/testsuite/gas/i386/x86-64-amx-tf32-intel.d b/gas/testsuite/gas/i386/x86-64-amx-tf32-intel.d
new file mode 100644
index 00000000000..cc9a1d34061
--- /dev/null
+++ b/gas/testsuite/gas/i386/x86-64-amx-tf32-intel.d
@@ -0,0 +1,15 @@
+#objdump: -dw -Mintel
+#name: x86_64 AMX-TF32 insns (Intel disassembly)
+#source: x86-64-amx-tf32.s
+
+.*: +file format .*
+
+Disassembly of section \.text:
+
+#...
+[a-f0-9]+ <_intel>:
+\s*[a-f0-9]+:\s*c4 e2 59 48 f5\s+tmmultf32ps tmm6,tmm5,tmm4
+\s*[a-f0-9]+:\s*c4 e2 71 48 da\s+tmmultf32ps tmm3,tmm2,tmm1
+\s*[a-f0-9]+:\s*c4 e2 58 48 f5\s+ttmmultf32ps tmm6,tmm5,tmm4
+\s*[a-f0-9]+:\s*c4 e2 70 48 da\s+ttmmultf32ps tmm3,tmm2,tmm1
+#pass
diff --git a/gas/testsuite/gas/i386/x86-64-amx-tf32-inval.l b/gas/testsuite/gas/i386/x86-64-amx-tf32-inval.l
new file mode 100644
index 00000000000..ea6004936e6
--- /dev/null
+++ b/gas/testsuite/gas/i386/x86-64-amx-tf32-inval.l
@@ -0,0 +1,7 @@
+.* Assembler messages:
+.*:5: Error: all tmm registers must be distinct for `tmmultf32ps'
+.*:6: Error: all tmm registers must be distinct for `tmmultf32ps'
+.*:7: Error: all tmm registers must be distinct for `tmmultf32ps'
+.*:8: Error: all tmm registers must be distinct for `ttmmultf32ps'
+.*:9: Error: all tmm registers must be distinct for `ttmmultf32ps'
+.*:10: Error: all tmm registers must be distinct for `ttmmultf32ps'
diff --git a/gas/testsuite/gas/i386/x86-64-amx-tf32-inval.s b/gas/testsuite/gas/i386/x86-64-amx-tf32-inval.s
new file mode 100644
index 00000000000..56a3b46e6ea
--- /dev/null
+++ b/gas/testsuite/gas/i386/x86-64-amx-tf32-inval.s
@@ -0,0 +1,10 @@
+# Check Illegal 64bit AMX-TF32 instructions
+
+ .text
+_start:
+ tmmultf32ps %tmm1, %tmm1, %tmm2
+ tmmultf32ps %tmm1, %tmm2, %tmm1
+ tmmultf32ps %tmm2, %tmm1, %tmm1
+ ttmmultf32ps %tmm1, %tmm1, %tmm2
+ ttmmultf32ps %tmm1, %tmm2, %tmm1
+ ttmmultf32ps %tmm2, %tmm1, %tmm1
diff --git a/gas/testsuite/gas/i386/x86-64-amx-tf32.d b/gas/testsuite/gas/i386/x86-64-amx-tf32.d
new file mode 100644
index 00000000000..4fa91cbc040
--- /dev/null
+++ b/gas/testsuite/gas/i386/x86-64-amx-tf32.d
@@ -0,0 +1,13 @@
+#objdump: -dw
+#name: x86_64 AMX-TF32 insns
+
+.*: +file format .*
+
+Disassembly of section \.text:
+
+0+ <_start>:
+\s*[a-f0-9]+:\s*c4 e2 59 48 f5\s+tmmultf32ps %tmm4,%tmm5,%tmm6
+\s*[a-f0-9]+:\s*c4 e2 71 48 da\s+tmmultf32ps %tmm1,%tmm2,%tmm3
+\s*[a-f0-9]+:\s*c4 e2 58 48 f5\s+ttmmultf32ps %tmm4,%tmm5,%tmm6
+\s*[a-f0-9]+:\s*c4 e2 70 48 da\s+ttmmultf32ps %tmm1,%tmm2,%tmm3
+#pass
diff --git a/gas/testsuite/gas/i386/x86-64-amx-tf32.s b/gas/testsuite/gas/i386/x86-64-amx-tf32.s
new file mode 100644
index 00000000000..9c1433ed49b
--- /dev/null
+++ b/gas/testsuite/gas/i386/x86-64-amx-tf32.s
@@ -0,0 +1,15 @@
+# Check 64bit AMX-TF32 instructions
+
+ .text
+_start:
+ tmmultf32ps %tmm4, %tmm5, %tmm6
+ tmmultf32ps %tmm1, %tmm2, %tmm3
+ ttmmultf32ps %tmm4, %tmm5, %tmm6
+ ttmmultf32ps %tmm1, %tmm2, %tmm3
+
+_intel:
+ .intel_syntax noprefix
+ tmmultf32ps tmm6, tmm5, tmm4
+ tmmultf32ps tmm3, tmm2, tmm1
+ ttmmultf32ps tmm6, tmm5, tmm4
+ ttmmultf32ps tmm3, tmm2, tmm1
diff --git a/gas/testsuite/gas/i386/x86-64.exp b/gas/testsuite/gas/i386/x86-64.exp
index ca3733b0dde..15f8e289675 100644
--- a/gas/testsuite/gas/i386/x86-64.exp
+++ b/gas/testsuite/gas/i386/x86-64.exp
@@ -527,6 +527,10 @@ run_dump_test "x86-64-amx-transpose"
run_dump_test "x86-64-amx-transpose-intel"
run_list_test "x86-64-amx-transpose-inval"
run_dump_test "x86-64-amx-transpose-bad"
+run_dump_test "x86-64-amx-tf32"
+run_dump_test "x86-64-amx-tf32-intel"
+run_list_test "x86-64-amx-tf32-inval"
+run_dump_test "x86-64-amx-tf32-bad"
run_dump_test "x86-64-clzero"
run_dump_test "x86-64-mwaitx-bdver4"
run_list_test "x86-64-mwaitx-reg"
diff --git a/opcodes/i386-dis.c b/opcodes/i386-dis.c
index f9767edb314..55a4f978475 100644
--- a/opcodes/i386-dis.c
+++ b/opcodes/i386-dis.c
@@ -1131,6 +1131,7 @@ enum
PREFIX_VEX_0F98_L_0_W_1,
PREFIX_VEX_0F99_L_0_W_0,
PREFIX_VEX_0F99_L_0_W_1,
+ PREFIX_VEX_0F3848_X86_64_L_0_W_0,
PREFIX_VEX_0F3849_X86_64_L_0_W_0_M_0,
PREFIX_VEX_0F3849_X86_64_L_0_W_0_M_1,
PREFIX_VEX_0F384B_X86_64_L_0_W_0,
@@ -1347,6 +1348,7 @@ enum
X86_64_0F38F8_M_1,
X86_64_0FC7_REG_6_MOD_3_PREFIX_1,
+ X86_64_VEX_0F3848,
X86_64_VEX_0F3849,
X86_64_VEX_0F384B,
X86_64_VEX_0F385C,
@@ -1429,6 +1431,7 @@ enum
VEX_LEN_0F381A,
VEX_LEN_0F3836,
VEX_LEN_0F3841,
+ VEX_LEN_0F3848_X86_64,
VEX_LEN_0F3849_X86_64,
VEX_LEN_0F384B_X86_64,
VEX_LEN_0F385A,
@@ -1600,6 +1603,7 @@ enum
VEX_W_0F382F,
VEX_W_0F3836,
VEX_W_0F3846,
+ VEX_W_0F3848_X86_64_L_0,
VEX_W_0F3849_X86_64_L_0,
VEX_W_0F384B_X86_64_L_0,
VEX_W_0F3850,
@@ -4068,6 +4072,13 @@ static const struct dis386 prefix_table[][4] = {
{ "ktestd", { MaskG, MaskR }, 0 },
},
+ /* PREFIX_VEX_0F3848_X86_64_L_0_W_0 */
+ {
+ { "ttmmultf32ps", { TMM, Rtmm, VexTmm }, 0 },
+ { Bad_Opcode },
+ { "tmmultf32ps", { TMM, Rtmm, VexTmm }, 0 },
+ },
+
/* PREFIX_VEX_0F3849_X86_64_L_0_W_0_M_0 */
{
{ "ldtilecfg", { M }, 0 },
@@ -4603,6 +4614,12 @@ static const struct dis386 x86_64_table[][2] = {
{ "senduipi", { Eq }, 0 },
},
+ /* X86_64_VEX_0F3848 */
+ {
+ { Bad_Opcode },
+ { VEX_LEN_TABLE (VEX_LEN_0F3848_X86_64) },
+ },
+
/* X86_64_VEX_0F3849 */
{
{ Bad_Opcode },
@@ -6514,7 +6531,7 @@ static const struct dis386 vex_table[][256] = {
{ VEX_W_TABLE (VEX_W_0F3846) },
{ "vpsllv%DQ", { XM, Vex, EXx }, PREFIX_DATA },
/* 48 */
- { Bad_Opcode },
+ { X86_64_TABLE (X86_64_VEX_0F3848) },
{ X86_64_TABLE (X86_64_VEX_0F3849) },
{ Bad_Opcode },
{ X86_64_TABLE (X86_64_VEX_0F384B) },
@@ -7194,6 +7211,11 @@ static const struct dis386 vex_len_table[][2] = {
{ "vphminposuw", { XM, EXx }, PREFIX_DATA },
},
+ /* VEX_LEN_0F3848_X86_64 */
+ {
+ { VEX_W_TABLE (VEX_W_0F3848_X86_64_L_0) },
+ },
+
/* VEX_LEN_0F3849_X86_64 */
{
{ VEX_W_TABLE (VEX_W_0F3849_X86_64_L_0) },
@@ -7880,6 +7902,10 @@ static const struct dis386 vex_w_table[][2] = {
/* VEX_W_0F3846 */
{ "vpsravd", { XM, Vex, EXx }, PREFIX_DATA },
},
+ {
+ /* VEX_W_0F3848_X86_64_L_0 */
+ { PREFIX_TABLE (PREFIX_VEX_0F3848_X86_64_L_0_W_0) },
+ },
{
/* VEX_W_0F3849_X86_64_L_0 */
{ MOD_TABLE (MOD_VEX_0F3849_X86_64_L_0_W_0) },
diff --git a/opcodes/i386-gen.c b/opcodes/i386-gen.c
index 4521a6639d3..ad5ab897b28 100644
--- a/opcodes/i386-gen.c
+++ b/opcodes/i386-gen.c
@@ -267,6 +267,8 @@ static const dependency isa_dependencies[] =
"AMX_TILE" },
{ "AMX_TRANSPOSE",
"AMX_TILE" },
+ { "AMX_TF32",
+ "AMX_TILE" },
{ "KL",
"SSE2" },
{ "WIDEKL",
@@ -434,6 +436,7 @@ static bitfield cpu_flags[] =
BITFIELD (AMX_FP16),
BITFIELD (AMX_COMPLEX),
BITFIELD (AMX_TRANSPOSE),
+ BITFIELD (AMX_TF32),
BITFIELD (AMX_TILE),
BITFIELD (MOVDIRI),
BITFIELD (MOVDIR64B),
diff --git a/opcodes/i386-opc.h b/opcodes/i386-opc.h
index 223a53a96fb..5173c053494 100644
--- a/opcodes/i386-opc.h
+++ b/opcodes/i386-opc.h
@@ -250,6 +250,8 @@ enum i386_cpu
CpuAMX_FP16,
/* AMX-COMPLEX instructions required. */
CpuAMX_COMPLEX,
+ /* AMX-TF32 Instructions support required. */
+ CpuAMX_TF32,
/* AMX-TILE instructions required */
CpuAMX_TILE,
/* GFNI instructions required */
@@ -500,6 +502,7 @@ typedef union i386_cpu_flags
unsigned int cpuamx_bf16:1;
unsigned int cpuamx_fp16:1;
unsigned int cpuamx_complex:1;
+ unsigned int cpuamx_tf32:1;
unsigned int cpuamx_tile:1;
unsigned int cpugfni:1;
unsigned int cpuvaes:1;
diff --git a/opcodes/i386-opc.tbl b/opcodes/i386-opc.tbl
index 7b018adf35b..6697fe9d4f0 100644
--- a/opcodes/i386-opc.tbl
+++ b/opcodes/i386-opc.tbl
@@ -3226,6 +3226,9 @@ ttdpfp16ps, 0xf26c, AMX_FP16&AMX_TRANSPOSE, Modrm|Vex128|Space0F38|Src2VVVV|VexW
ttransposed, 0xf35f, AMX_TRANSPOSE, Modrm|Vex128|Space0F38|VexW0|NoSuf, { RegTMM, RegTMM }
+tmmultf32ps, 0x6648, AMX_TF32, Modrm|Vex128|Space0F38|Src2VVVV|VexW0|NoSuf, { RegTMM, RegTMM, RegTMM }
+ttmmultf32ps, 0x48, AMX_TF32&AMX_TRANSPOSE, Modrm|Vex128|Space0F38|Src2VVVV|VexW0|NoSuf, { RegTMM, RegTMM, RegTMM }
+
// AMX instructions end.
// KEYLOCKER instructions.
--
2.31.1
More information about the Binutils
mailing list