[PATCH v2 4/4] x86: fold F16C VEX and EVEX templates
Jan Beulich
jbeulich@suse.com
Tue Sep 19 15:45:59 GMT 2023
Following the folding of some generic AVX/AVX2 templates with their
AVX512F counterpart ones, do this for F16C ones as well, requiring one
further adjustment to cpu_flags_match(). Note that there is a slight
asymmetry with the FMA checks, resulting from the various vector lengths
having separate insn templates here, but being a single combined one
(each) for FMA.
---
TBD: Unlike for FMA the gains aren't as big yet the code changes are
slightly bigger. The change may therefore be deemed to not be worth
it.
---
v2: Eliminate unwanted side effect.
--- a/gas/config/tc-i386.c
+++ b/gas/config/tc-i386.c
@@ -1926,9 +1926,12 @@ cpu_flags_match (const insn_template *t)
{
x.bitfield.cpuavx512f = 0;
x.bitfield.cpuavx512vl = 0;
- if (x.bitfield.cpufma && !cpu.bitfield.cpufma)
+ if ((x.bitfield.cpufma && !cpu.bitfield.cpufma)
+ || (x.bitfield.cpuf16c && !cpu.bitfield.cpuf16c))
x.bitfield.cpuavx = 0;
}
+ else if (cpu.bitfield.cpuf16c)
+ x.bitfield.cpuavx512vl = 0;
}
}
@@ -1953,6 +1956,8 @@ cpu_flags_match (const insn_template *t)
: cpu.bitfield.cpuavx)
&& (!x.bitfield.cpufma || cpu.bitfield.cpufma
|| cpu_arch_flags.bitfield.cpuavx512f)
+ && (!x.bitfield.cpuf16c || cpu.bitfield.cpuf16c
+ || cpu_arch_flags.bitfield.cpuavx512f)
&& (!x.bitfield.cpugfni || cpu.bitfield.cpugfni)
&& (!x.bitfield.cpuvaes || cpu.bitfield.cpuvaes)
&& (!x.bitfield.cpuvpclmulqdq || cpu.bitfield.cpuvpclmulqdq))
--- a/opcodes/i386-opc.tbl
+++ b/opcodes/i386-opc.tbl
@@ -1793,10 +1793,10 @@ rdgsbase, 0xf30fae/1, FSGSBase, Modrm|Ig
rdrand, 0xfc7/6, RdRnd, Modrm|NoSuf, { Reg16|Reg32|Reg64 }
wrfsbase, 0xf30fae/2, FSGSBase, Modrm|IgnoreSize|NoSuf, { Reg32|Reg64 }
wrgsbase, 0xf30fae/3, FSGSBase, Modrm|IgnoreSize|NoSuf, { Reg32|Reg64 }
-vcvtph2ps, 0x6613, F16C, Modrm|Vex|Space0F38|VexW0|NoSuf, { Qword|Unspecified|BaseIndex|RegXMM, RegXMM }
-vcvtph2ps, 0x6613, F16C, Modrm|Vex=2|Space0F38|VexW=1|NoSuf, { Unspecified|BaseIndex|RegXMM, RegYMM }
-vcvtps2ph, 0x661d, F16C, Modrm|Vex|Space0F3A|VexW0|NoSuf, { Imm8, RegXMM, Qword|Unspecified|BaseIndex|RegXMM }
-vcvtps2ph, 0x661d, F16C, Modrm|Vex=2|Space0F3A|VexW=1|NoSuf, { Imm8, RegYMM, Unspecified|BaseIndex|RegXMM }
+vcvtph2ps, 0x6613, F16C|AVX|AVX512F|AVX512VL, Modrm|Vex128|EVex128|Masking|Space0F38|VexW0|Disp8MemShift=3|NoSuf, { RegXMM|Qword|Unspecified|BaseIndex, RegXMM }
+vcvtph2ps, 0x6613, F16C|AVX|AVX512F|AVX512VL, Modrm|Vex256|EVex256|Masking|Space0F38|VexW0|Disp8MemShift=4|NoSuf, { RegXMM|Unspecified|BaseIndex, RegYMM }
+vcvtps2ph, 0x661D, F16C|AVX|AVX512F|AVX512VL, Modrm|Vex128|EVex128|Masking|Space0F3A|VexW0|Disp8MemShift=3|NoSuf, { Imm8, RegXMM, RegXMM|Qword|Unspecified|BaseIndex }
+vcvtps2ph, 0x661D, F16C|AVX|AVX512F|AVX512VL, Modrm|Vex256|EVex256|Masking|Space0F3A|VexW0|Disp8MemShift=4|NoSuf, { Imm8, RegYMM, RegXMM|Unspecified|BaseIndex }
// FMA instructions
@@ -2525,15 +2525,9 @@ vcvtdq2pd, 0xF3E6, AVX512F|AVX512VL, Mod
vcvtudq2pd, 0xF37A, AVX512F|AVX512VL, Modrm|EVex128|Masking|Space0F|VexW0|Broadcast|Disp8MemShift=3|NoSuf, { RegXMM|Dword|Qword|Unspecified|BaseIndex, RegXMM }
vcvtudq2pd, 0xF37A, AVX512F|AVX512VL, Modrm|EVex256|Masking|Space0F|VexW0|Broadcast|Disp8MemShift=4|NoSuf, { RegXMM|Dword|Unspecified|BaseIndex, RegYMM }
-vcvtph2ps, 0x6613, AVX512F|AVX512VL, Modrm|EVex=2|Masking|Space0F38|VexW0|Disp8MemShift=3|NoSuf, { RegXMM|Qword|Unspecified|BaseIndex, RegXMM }
-vcvtph2ps, 0x6613, AVX512F|AVX512VL, Modrm|EVex=3|Masking|Space0F38|VexW=1|Disp8MemShift=4|NoSuf, { RegXMM|Unspecified|BaseIndex, RegYMM }
-
vcvtps2pd, 0x5A, AVX512F|AVX512VL, Modrm|EVex128|Masking|Space0F|VexW0|Broadcast|Disp8MemShift=3|NoSuf, { RegXMM|Dword|Qword|Unspecified|BaseIndex, RegXMM }
vcvtps2pd, 0x5A, AVX512F|AVX512VL, Modrm|EVex256|Masking|Space0F|VexW0|Broadcast|Disp8MemShift=4|NoSuf, { RegXMM|Dword|Unspecified|BaseIndex, RegYMM }
-vcvtps2ph, 0x661D, AVX512F|AVX512VL, Modrm|EVex128|Masking|Space0F3A|VexW0|Disp8MemShift=3|NoSuf, { Imm8, RegXMM, RegXMM|Qword|Unspecified|BaseIndex }
-vcvtps2ph, 0x661D, AVX512F|AVX512VL, Modrm|EVex256|Masking|Space0F3A|VexW0|Disp8MemShift=4|NoSuf, { Imm8, RegYMM, RegXMM|Unspecified|BaseIndex }
-
vmovddup, 0xF212, AVX512F|AVX512VL, Modrm|EVex=2|Masking|Space0F|VexW1|Disp8MemShift=3|NoSuf, { RegXMM|Qword|Unspecified|BaseIndex, RegXMM }
vpmovdb, 0xF331, AVX512F|AVX512VL, Modrm|EVex=2|Masking|Space0F38|VexW0|Disp8MemShift=2|NoSuf, { RegXMM, RegXMM|Dword|Unspecified|BaseIndex }
More information about the Binutils
mailing list