[PATCH v2 1/4] x86: fold certain VEX and EVEX templates
Jan Beulich
jbeulich@suse.com
Tue Sep 19 15:44:26 GMT 2023
In anticipation of APX introduce logic to reduce the number of templates
we have now, allowing to limit some the number of ones we then need to
gain.
The fundamental requirements are that
- attributes be compatible, which specifically means VexW needs to be
the same in the templates (which often isn't the case, for VEX
encodings having far more WIG tha, EVEX ones),
- the EVEX form being AVX512F (with or without AVX512VL), not any of its
extensions (the same will then be required for APX - it'll need to be
APX_F).
Note that in check_register() there's now a redundant zmm check. Since
this logic will need revisiting for APX anyway, I'd like to keep it that
way for now. (Similarly a couple of if()-s which could be folded are
kept separate, to reduce code churn when adding APX support.)
---
RFC: Of course there are quite a few code changes, so there is the
question of the savings being worth it.
The AVX512F constraint may be possible to relax, but the change is big
enough already.
---
v2: Deal with need_evex_encoding() being unreliable ahead of operand
parsing (the difference really is largely benign here, but becomes
relevant in subsequent changes).
--- a/gas/config/tc-i386.c
+++ b/gas/config/tc-i386.c
@@ -436,6 +436,7 @@ struct _i386_insn
vex_encoding_vex,
vex_encoding_vex3,
vex_encoding_evex,
+ vex_encoding_evex512,
vex_encoding_error
} vec_encoding;
@@ -1872,6 +1873,13 @@ cpu_flags_and_not (i386_cpu_flags x, i38
static const i386_cpu_flags avx512 = CPU_ANY_AVX512F_FLAGS;
+static INLINE bool need_evex_encoding (void)
+{
+ return i.vec_encoding == vex_encoding_evex
+ || i.vec_encoding == vex_encoding_evex512
+ || i.mask.reg;
+}
+
#define CPU_FLAGS_ARCH_MATCH 0x1
#define CPU_FLAGS_64BIT_MATCH 0x2
@@ -1899,6 +1907,29 @@ cpu_flags_match (const insn_template *t)
/* This instruction is available only on some archs. */
i386_cpu_flags cpu = cpu_arch_flags;
+ /* Dual VEX/EVEX templates may need stripping of one of the flags. */
+ if (t->opcode_modifier.vex && t->opcode_modifier.evex)
+ {
+ /* Dual AVX/AVX512F templates need to retain AVX512F only if we already
+ know that EVEX encoding will be needed. */
+ if ((x.bitfield.cpuavx || x.bitfield.cpuavx2)
+ && x.bitfield.cpuavx512f)
+ {
+ if (need_evex_encoding ())
+ {
+ x.bitfield.cpuavx = 0;
+ x.bitfield.cpuavx2 = 0;
+ }
+ /* need_evex_encoding() isn't reliable before operands were
+ parsed. */
+ else if (i.operands)
+ {
+ x.bitfield.cpuavx512f = 0;
+ x.bitfield.cpuavx512vl = 0;
+ }
+ }
+ }
+
/* AVX512VL is no standalone feature - match it and then strip it. */
if (x.bitfield.cpuavx512vl && !cpu.bitfield.cpuavx512vl)
return match;
@@ -1924,6 +1955,8 @@ cpu_flags_match (const insn_template *t)
&& (!x.bitfield.cpupclmulqdq || cpu.bitfield.cpupclmulqdq))
match |= CPU_FLAGS_ARCH_MATCH;
}
+ else if (x.bitfield.cpuavx2 && cpu.bitfield.cpuavx2)
+ match |= CPU_FLAGS_ARCH_MATCH;
else if (x.bitfield.cpuavx512f)
{
/* We need to check a few extra flags with AVX512F. */
@@ -3646,6 +3679,27 @@ install_template (const insn_template *t
i.tm = *t;
+ /* Dual VEX/EVEX templates need stripping one of the possible variants. */
+ if (t->opcode_modifier.vex && t->opcode_modifier.evex)
+ {
+ if ((is_cpu (t, CpuAVX) || is_cpu (t, CpuAVX2))
+ && is_cpu (t, CpuAVX512F))
+ {
+ if (need_evex_encoding ())
+ {
+ i.tm.opcode_modifier.vex = 0;
+ i.tm.cpu.bitfield.cpuavx = 0;
+ if (is_cpu (&i.tm, CpuAVX2))
+ i.tm.cpu.bitfield.isa = 0;
+ }
+ else
+ {
+ i.tm.opcode_modifier.evex = 0;
+ i.tm.cpu.bitfield.cpuavx512f = 0;
+ }
+ }
+ }
+
/* Note that for pseudo prefixes this produces a length of 1. But for them
the length isn't interesting at all. */
for (l = 1; l < 4; ++l)
@@ -4553,6 +4607,8 @@ optimize_encoding (void)
i.tm.opcode_modifier.vex = VEX128;
i.tm.opcode_modifier.vexw = VEXW0;
i.tm.opcode_modifier.evex = 0;
+ i.vec_encoding = vex_encoding_vex;
+ i.mask.reg = NULL;
}
else if (optimize > 1)
i.tm.opcode_modifier.evex = EVEX128;
@@ -5438,6 +5494,11 @@ md_assemble (char *line)
if (optimize && !i.no_optimize && i.tm.opcode_modifier.optimize)
optimize_encoding ();
+ /* Past optimization there's no need to distinguish vex_encoding_evex and
+ vex_encoding_evex512 anymore. */
+ if (i.vec_encoding == vex_encoding_evex512)
+ i.vec_encoding = vex_encoding_evex;
+
if (use_unaligned_vector_move)
encode_with_unaligned_vector_move ();
@@ -5467,6 +5528,7 @@ md_assemble (char *line)
if (i.tm.operand_types[j].bitfield.tmmword)
i.xstate |= xstate_tmm;
else if (i.tm.operand_types[j].bitfield.zmmword
+ && !i.tm.opcode_modifier.vex
&& vector_size >= VSZ512)
i.xstate |= xstate_zmm;
else if (i.tm.operand_types[j].bitfield.ymmword
@@ -6468,7 +6530,8 @@ check_VecOperands (const insn_template *
cpu = cpu_flags_and (cpu_flags_from_attr (t->cpu), avx512);
if (!cpu_flags_all_zero (&cpu)
&& !is_cpu (t, CpuAVX512VL)
- && !cpu_arch_flags.bitfield.cpuavx512vl)
+ && !cpu_arch_flags.bitfield.cpuavx512vl
+ && (!t->opcode_modifier.vex || need_evex_encoding ()))
{
for (op = 0; op < t->operands; ++op)
{
@@ -6779,6 +6842,8 @@ check_VecOperands (const insn_template *
/* Check vector Disp8 operand. */
if (t->opcode_modifier.disp8memshift
+ && (!t->opcode_modifier.vex
+ || need_evex_encoding ())
&& i.disp_encoding <= disp_encoding_8bit)
{
if (i.broadcast.type || i.broadcast.bytes)
@@ -6874,7 +6939,8 @@ VEX_check_encoding (const insn_template
return 1;
}
- if (i.vec_encoding == vex_encoding_evex)
+ if (i.vec_encoding == vex_encoding_evex
+ || i.vec_encoding == vex_encoding_evex512)
{
/* This instruction must be encoded with EVEX prefix. */
if (!is_evex_encoding (t))
@@ -11211,6 +11277,10 @@ s_insn (int dummy ATTRIBUTE_UNUSED)
goto done;
}
+ /* No need to distinguish vex_encoding_evex and vex_encoding_evex512. */
+ if (i.vec_encoding == vex_encoding_evex512)
+ i.vec_encoding = vex_encoding_evex;
+
/* Are we to emit ModR/M encoding? */
if (!i.short_form
&& (i.mem_operands
@@ -11633,6 +11703,12 @@ RC_SAE_specifier (const char *pstr)
return NULL;
}
+ if (i.vec_encoding == vex_encoding_default)
+ i.vec_encoding = vex_encoding_evex512;
+ else if (i.vec_encoding != vex_encoding_evex
+ && i.vec_encoding != vex_encoding_evex512)
+ return NULL;
+
i.rounding.type = RC_NamesTable[j].type;
return (char *)(pstr + RC_NamesTable[j].len);
@@ -11692,6 +11768,12 @@ check_VecOperations (char *op_string)
}
op_string++;
+ if (i.vec_encoding == vex_encoding_default)
+ i.vec_encoding = vex_encoding_evex;
+ else if (i.vec_encoding != vex_encoding_evex
+ && i.vec_encoding != vex_encoding_evex512)
+ goto unknown_vec_op;
+
i.broadcast.type = bcst_type;
i.broadcast.operand = this_operand;
@@ -13953,8 +14035,17 @@ static bool check_register (const reg_en
}
}
- if (vector_size < VSZ512 && r->reg_type.bitfield.zmmword)
- return false;
+ if (r->reg_type.bitfield.zmmword)
+ {
+ if (vector_size < VSZ512)
+ return false;
+
+ if (i.vec_encoding == vex_encoding_default)
+ i.vec_encoding = vex_encoding_evex512;
+ else if (i.vec_encoding != vex_encoding_evex
+ && i.vec_encoding != vex_encoding_evex512)
+ i.vec_encoding = vex_encoding_error;
+ }
if (vector_size < VSZ256 && r->reg_type.bitfield.ymmword)
return false;
@@ -13979,7 +14070,8 @@ static bool check_register (const reg_en
|| flag_code != CODE_64BIT)
return false;
- if (i.vec_encoding == vex_encoding_default)
+ if (i.vec_encoding == vex_encoding_default
+ || i.vec_encoding == vex_encoding_evex512)
i.vec_encoding = vex_encoding_evex;
else if (i.vec_encoding != vex_encoding_evex)
i.vec_encoding = vex_encoding_error;
--- a/gas/config/tc-i386-intel.c
+++ b/gas/config/tc-i386-intel.c
@@ -209,6 +209,11 @@ operatorT i386_operator (const char *nam
|| i386_types[j].sz[0] > 8
|| (i386_types[j].sz[0] & (i386_types[j].sz[0] - 1)))
return O_illegal;
+ if (i.vec_encoding == vex_encoding_default)
+ i.vec_encoding = vex_encoding_evex;
+ else if (i.vec_encoding != vex_encoding_evex
+ && i.vec_encoding != vex_encoding_evex512)
+ return O_illegal;
if (!i.broadcast.bytes && !i.broadcast.type)
{
i.broadcast.bytes = i386_types[j].sz[0];
--- a/opcodes/i386-opc.tbl
+++ b/opcodes/i386-opc.tbl
@@ -131,6 +131,8 @@
#define EVexLIG EVex=EVEXLIG
#define EVexDYN EVex=EVEXDYN
+#define Disp8ShiftVL Disp8MemShift=DISP8_SHIFT_VL
+
#define Vsz256 Vsz=VSZ256
#define Vsz512 Vsz=VSZ512
@@ -1518,8 +1520,8 @@ vdivs<sd>, 0x<sd:spfx>5e, AVX, Modrm|Vex
vdppd, 0x6641, AVX, Modrm|Vex|Space0F3A|VexVVVV|VexWIG|NoSuf, { Imm8|Imm8S, Unspecified|BaseIndex|RegXMM, RegXMM, RegXMM }
vdpps, 0x6640, AVX, Modrm|Vex|Space0F3A|VexVVVV|VexWIG|CheckOperandSize|NoSuf, { Imm8|Imm8S, Unspecified|BaseIndex|RegXMM|RegYMM, RegXMM|RegYMM, RegXMM|RegYMM }
vextractf128, 0x6619, AVX, Modrm|Vex=2|Space0F3A|VexW=1|NoSuf, { Imm8, RegYMM, Unspecified|BaseIndex|RegXMM }
-vextractps, 0x6617, AVX, Modrm|Vex|Space0F3A|VexWIG|NoSuf, { Imm8, RegXMM, Reg32|Dword|Unspecified|BaseIndex }
-vextractps, 0x6617, AVX|x64, RegMem|Vex|Space0F3A|VexWIG|NoSuf, { Imm8, RegXMM, Reg64 }
+vextractps, 0x6617, AVX|AVX512F, Modrm|Vex128|EVex128|Space0F3A|VexWIG|Disp8MemShift=2|NoSuf, { Imm8, RegXMM, Reg32|Dword|Unspecified|BaseIndex }
+vextractps, 0x6617, AVX|AVX512F|x64, RegMem|Vex128|EVex128|Space0F3A|VexWIG|NoSuf, { Imm8, RegXMM, Reg64 }
vhaddpd, 0x667c, AVX, Modrm|Vex|Space0F|VexVVVV|VexWIG|CheckOperandSize|NoSuf, { Unspecified|BaseIndex|RegXMM|RegYMM, RegXMM|RegYMM, RegXMM|RegYMM }
vhaddps, 0xf27c, AVX, Modrm|Vex|Space0F|VexVVVV|VexWIG|CheckOperandSize|NoSuf, { Unspecified|BaseIndex|RegXMM|RegYMM, RegXMM|RegYMM, RegXMM|RegYMM }
vhsubpd, 0x667d, AVX, Modrm|Vex|Space0F|VexVVVV|VexWIG|CheckOperandSize|NoSuf, { Unspecified|BaseIndex|RegXMM|RegYMM, RegXMM|RegYMM, RegXMM|RegYMM }
@@ -1541,7 +1543,7 @@ vmovap<sd>, 0x<sd:ppfx>28, AVX, D|Modrm|
// by Intel AVX spec). To avoid extra template in gcc x86 backend and
// support assembler for AMD64, we accept 64bit operand on vmovd so
// that we can use one template for both SSE and AVX instructions.
-vmovd, 0x666e, AVX, D|Modrm|Vex=1|Space0F|NoSuf, { Reg32|Unspecified|BaseIndex, RegXMM }
+vmovd, 0x666e, AVX|AVX512F, D|Modrm|Vex128|EVex128|Space0F|Disp8MemShift=2|NoSuf, { Reg32|Unspecified|BaseIndex, RegXMM }
vmovd, 0x667e, AVX|x64, D|RegMem|Vex=1|Space0F|VexW=2|NoSuf|Size64, { RegXMM, Reg64 }
vmovddup, 0xf212, AVX, Modrm|Vex|Space0F|VexWIG|NoSuf, { Qword|Unspecified|BaseIndex|RegXMM, RegXMM }
vmovddup, 0xf212, AVX, Modrm|Vex=2|Space0F|VexWIG|NoSuf, { Unspecified|BaseIndex|RegYMM, RegYMM }
@@ -1559,7 +1561,7 @@ vmovntdqa, 0x662a, AVX|AVX2, Modrm|Vex|S
vmovntp<sd>, 0x<sd:ppfx>2b, AVX, Modrm|Vex|Space0F|VexWIG|CheckOperandSize|NoSuf, { RegXMM|RegYMM, Xmmword|Ymmword|Unspecified|BaseIndex }
vmovq, 0xf37e, AVX, Load|Modrm|Vex=1|Space0F|VexWIG|NoSuf, { Qword|Unspecified|BaseIndex|RegXMM, RegXMM }
vmovq, 0x66d6, AVX, Modrm|Vex=1|Space0F|VexWIG|NoSuf, { RegXMM, Qword|Unspecified|BaseIndex|RegXMM }
-vmovq, 0x666e, AVX|x64, D|Modrm|Vex=1|Space0F|VexW=2|NoSuf, { Reg64|Unspecified|BaseIndex, RegXMM }
+vmovq, 0x666e, AVX|AVX512F|x64, D|Modrm|Vex128|EVex128|Space0F|VexW1|Disp8MemShift=3|NoSuf, { Reg64|Unspecified|BaseIndex, RegXMM }
vmovs<sd>, 0x<sd:spfx>10, AVX, D|Modrm|VexLIG|Space0F|VexWIG|NoSuf, { <sd:elem>|Unspecified|BaseIndex, RegXMM }
vmovs<sd>, 0x<sd:spfx>10, AVX, D|Modrm|VexLIG|Space0F|VexVVVV|VexWIG|NoSuf, { RegXMM, RegXMM, RegXMM }
vmovshdup, 0xf316, AVX, Modrm|Vex|Space0F|VexWIG|CheckOperandSize|NoSuf, { Unspecified|BaseIndex|RegXMM|RegYMM, RegXMM|RegYMM }
@@ -1599,8 +1601,10 @@ vpcmpgtq, 0x6637, AVX|AVX2, Modrm|Vex|Sp
vpcmpistri, 0x6663, AVX, Modrm|Vex|Space0F3A|VexWIG|NoSuf, { Imm8, Unspecified|BaseIndex|RegXMM, RegXMM }
vpcmpistrm, 0x6662, AVX, Modrm|Vex|Space0F3A|VexWIG|NoSuf, { Imm8, Unspecified|BaseIndex|RegXMM, RegXMM }
vperm2f128, 0x6606, AVX, Modrm|Vex256|Space0F3A|VexVVVV|VexW0|NoSuf, { Imm8|Imm8S, Unspecified|BaseIndex|RegYMM, RegYMM, RegYMM }
-vpermilp<sd>, 0x660c | <sd:opc>, AVX, Modrm|Vex|Space0F38|VexVVVV|VexW0|CheckOperandSize|NoSuf, { Unspecified|BaseIndex|RegXMM|RegYMM, RegXMM|RegYMM, RegXMM|RegYMM }
-vpermilp<sd>, 0x6604 | <sd:opc>, AVX, Modrm|Vex|Space0F3A|VexW0|CheckOperandSize|NoSuf, { Imm8|Imm8S, Unspecified|BaseIndex|RegXMM|RegYMM, RegXMM|RegYMM }
+vpermilps, 0x660c, AVX|AVX512F, Modrm|Vex|EVexDYN|Masking|Space0F38|VexVVVV|VexW0|Broadcast|Disp8ShiftVL|CheckOperandSize|NoSuf, { RegXMM|RegYMM|RegZMM|Dword|Unspecified|BaseIndex, RegXMM|RegYMM|RegZMM, RegXMM|RegYMM|RegZMM }
+vpermilps, 0x6604, AVX|AVX512F, Modrm|Vex|EVexDYN|Masking|Space0F3A|VexW0|Broadcast|Disp8ShiftVL|CheckOperandSize|NoSuf, { Imm8|Imm8S, RegXMM|RegYMM|RegZMM|Dword|Unspecified|BaseIndex, RegXMM|RegYMM|RegZMM }
+vpermilpd, 0x660d, AVX, Modrm|Vex|Space0F38|VexVVVV|VexW0|CheckOperandSize|NoSuf, { Unspecified|BaseIndex|RegXMM|RegYMM, RegXMM|RegYMM, RegXMM|RegYMM }
+vpermilpd, 0x6605, AVX, Modrm|Vex|Space0F3A|VexW0|CheckOperandSize|NoSuf, { Imm8|Imm8S, Unspecified|BaseIndex|RegXMM|RegYMM, RegXMM|RegYMM }
vpextr<dq>, 0x6616, AVX|<dq:cpu64>, Modrm|Vex|Space0F3A|<dq:vexw64>|NoSuf, { Imm8, RegXMM, <dq:gpr>|Unspecified|BaseIndex }
vpextrw, 0x66c5, AVX, Load|Modrm|Vex|Space0F|VexWIG|No_bSuf|No_wSuf|No_sSuf, { Imm8, RegXMM, Reg32|Reg64 }
vpextr<bw>, 0x6614 | <bw:opc>, AVX, RegMem|Vex|Space0F3A|VexWIG|NoSuf, { Imm8, RegXMM, Reg32|Reg64 }
@@ -1632,18 +1636,18 @@ vpminub, 0x66da, AVX|AVX2, Modrm|C|Vex|S
vpminud, 0x663b, AVX|AVX2, Modrm|Vex|Space0F38|VexVVVV|VexWIG|CheckOperandSize|NoSuf, { Unspecified|BaseIndex|RegXMM|RegYMM, RegXMM|RegYMM, RegXMM|RegYMM }
vpminuw, 0x663a, AVX|AVX2, Modrm|Vex|Space0F38|VexVVVV|VexWIG|CheckOperandSize|NoSuf, { Unspecified|BaseIndex|RegXMM|RegYMM, RegXMM|RegYMM, RegXMM|RegYMM }
vpmovmskb, 0x66d7, AVX|AVX2, Modrm|Vex|Space0F|VexWIG|No_bSuf|No_wSuf|No_sSuf, { RegXMM|RegYMM, Reg32|Reg64 }
-vpmovsxbd, 0x6621, AVX, Modrm|Vex|Space0F38|VexWIG|NoSuf, { Dword|Unspecified|BaseIndex|RegXMM, RegXMM }
-vpmovsxbq, 0x6622, AVX, Modrm|Vex|Space0F38|VexWIG|NoSuf, { Word|Unspecified|BaseIndex|RegXMM, RegXMM }
+vpmovsxbd, 0x6621, AVX|AVX512F|AVX512VL, Modrm|Vex128|EVex128|Masking|Space0F38|VexWIG|Disp8MemShift=2|NoSuf, { RegXMM|Dword|Unspecified|BaseIndex, RegXMM }
+vpmovsxbq, 0x6622, AVX|AVX512F|AVX512VL, Modrm|Vex128|EVex128|Masking|Space0F38|VexWIG|Disp8MemShift=1|NoSuf, { RegXMM|Word|Unspecified|BaseIndex, RegXMM }
vpmovsxbw, 0x6620, AVX, Modrm|Vex|Space0F38|VexWIG|NoSuf, { Qword|Unspecified|BaseIndex|RegXMM, RegXMM }
vpmovsxdq, 0x6625, AVX, Modrm|Vex|Space0F38|VexWIG|NoSuf, { Qword|Unspecified|BaseIndex|RegXMM, RegXMM }
-vpmovsxwd, 0x6623, AVX, Modrm|Vex|Space0F38|VexWIG|NoSuf, { Qword|Unspecified|BaseIndex|RegXMM, RegXMM }
-vpmovsxwq, 0x6624, AVX, Modrm|Vex|Space0F38|VexWIG|NoSuf, { Dword|Unspecified|BaseIndex|RegXMM, RegXMM }
-vpmovzxbd, 0x6631, AVX, Modrm|Vex|Space0F38|VexWIG|NoSuf, { Dword|Unspecified|BaseIndex|RegXMM, RegXMM }
-vpmovzxbq, 0x6632, AVX, Modrm|Vex|Space0F38|VexWIG|NoSuf, { Word|Unspecified|BaseIndex|RegXMM, RegXMM }
+vpmovsxwd, 0x6623, AVX|AVX512F|AVX512VL, Modrm|Vex128|EVex128|Masking|Space0F38|VexWIG|Disp8MemShift=3|NoSuf, { RegXMM|Qword|Unspecified|BaseIndex, RegXMM }
+vpmovsxwq, 0x6624, AVX|AVX512F|AVX512VL, Modrm|Vex128|EVex128|Masking|Space0F38|VexWIG|Disp8MemShift=2|NoSuf, { RegXMM|Dword|Unspecified|BaseIndex, RegXMM }
+vpmovzxbd, 0x6631, AVX|AVX512F|AVX512VL, Modrm|Vex128|EVex128|Masking|Space0F38|VexWIG|Disp8MemShift=2|NoSuf, { RegXMM|Dword|Unspecified|BaseIndex, RegXMM }
+vpmovzxbq, 0x6632, AVX|AVX512F|AVX512VL, Modrm|Vex128|EVex128|Masking|Space0F38|VexWIG|Disp8MemShift=1|NoSuf, { RegXMM|Word|Unspecified|BaseIndex, RegXMM }
vpmovzxbw, 0x6630, AVX, Modrm|Vex|Space0F38|VexWIG|NoSuf, { Qword|Unspecified|BaseIndex|RegXMM, RegXMM }
vpmovzxdq, 0x6635, AVX, Modrm|Vex|Space0F38|VexWIG|NoSuf, { Qword|Unspecified|BaseIndex|RegXMM, RegXMM }
-vpmovzxwd, 0x6633, AVX, Modrm|Vex|Space0F38|VexWIG|NoSuf, { Qword|Unspecified|BaseIndex|RegXMM, RegXMM }
-vpmovzxwq, 0x6634, AVX, Modrm|Vex|Space0F38|VexWIG|NoSuf, { Dword|Unspecified|BaseIndex|RegXMM, RegXMM }
+vpmovzxwd, 0x6633, AVX|AVX512F|AVX512VL, Modrm|Vex128|EVex128|Masking|Space0F38|VexWIG|Disp8MemShift=3|NoSuf, { RegXMM|Qword|Unspecified|BaseIndex, RegXMM }
+vpmovzxwq, 0x6634, AVX|AVX512F|AVX512VL, Modrm|Vex128|EVex128|Masking|Space0F38|VexWIG|Disp8MemShift=2|NoSuf, { RegXMM|Dword|Unspecified|BaseIndex, RegXMM }
vpmuldq, 0x6628, AVX|AVX2, Modrm|Vex|Space0F38|VexVVVV|VexWIG|CheckOperandSize|NoSuf, { Unspecified|BaseIndex|RegXMM|RegYMM, RegXMM|RegYMM, RegXMM|RegYMM }
vpmulhrsw, 0x660b, AVX|AVX2, Modrm|Vex|Space0F38|VexVVVV|VexWIG|CheckOperandSize|NoSuf, { Unspecified|BaseIndex|RegXMM|RegYMM, RegXMM|RegYMM, RegXMM|RegYMM }
vpmulhuw, 0x66e4, AVX|AVX2, Modrm|C|Vex|Space0F|VexVVVV|VexWIG|CheckOperandSize|NoSuf, { Unspecified|BaseIndex|RegXMM|RegYMM, RegXMM|RegYMM, RegXMM|RegYMM }
@@ -1710,39 +1714,40 @@ vzeroupper, 0x77, AVX, Vex|Space0F|VexWI
// 256bit integer AVX2 instructions.
-vpmovsxbd, 0x6621, AVX2, Modrm|Vex=2|Space0F38|VexWIG|NoSuf, { Qword|Unspecified|BaseIndex|RegXMM, RegYMM }
-vpmovsxbq, 0x6622, AVX2, Modrm|Vex=2|Space0F38|VexWIG|NoSuf, { Dword|Unspecified|BaseIndex|RegXMM, RegYMM }
+vpmovsxbd, 0x6621, AVX2|AVX512F|AVX512VL, Modrm|Vex256|EVex256|Masking|Space0F38|VexWIG|Disp8MemShift=3|NoSuf, { RegXMM|Qword|Unspecified|BaseIndex, RegYMM }
+vpmovsxbq, 0x6622, AVX2|AVX512F|AVX512VL, Modrm|Vex256|EVex256|Masking|Space0F38|VexWIG|Disp8MemShift=2|NoSuf, { RegXMM|Dword|Unspecified|BaseIndex, RegYMM }
vpmovsxbw, 0x6620, AVX2, Modrm|Vex=2|Space0F38|VexWIG|NoSuf, { Unspecified|BaseIndex|RegXMM, RegYMM }
vpmovsxdq, 0x6625, AVX2, Modrm|Vex=2|Space0F38|VexWIG|NoSuf, { Unspecified|BaseIndex|RegXMM, RegYMM }
-vpmovsxwd, 0x6623, AVX2, Modrm|Vex=2|Space0F38|VexWIG|NoSuf, { Unspecified|BaseIndex|RegXMM, RegYMM }
-vpmovsxwq, 0x6624, AVX2, Modrm|Vex=2|Space0F38|VexWIG|NoSuf, { Qword|Unspecified|BaseIndex|RegXMM, RegYMM }
-vpmovzxbd, 0x6631, AVX2, Modrm|Vex=2|Space0F38|VexWIG|NoSuf, { Qword|Unspecified|BaseIndex|RegXMM, RegYMM }
-vpmovzxbq, 0x6632, AVX2, Modrm|Vex=2|Space0F38|VexWIG|NoSuf, { Dword|Unspecified|BaseIndex|RegXMM, RegYMM }
+vpmovsxwd, 0x6623, AVX2|AVX512F|AVX512VL, Modrm|Vex256|EVex256|Masking|Space0F38|VexWIG|Disp8MemShift=4|NoSuf, { RegXMM|Unspecified|BaseIndex, RegYMM }
+vpmovsxwq, 0x6624, AVX2|AVX512F|AVX512VL, Modrm|Vex256|EVex256|Masking|Space0F38|VexWIG|Disp8MemShift=3|NoSuf, { RegXMM|Qword|Unspecified|BaseIndex, RegYMM }
+vpmovzxbd, 0x6631, AVX2|AVX512F|AVX512VL, Modrm|Vex256|EVex256|Masking|Space0F38|VexWIG|Disp8MemShift=3|NoSuf, { RegXMM|Qword|Unspecified|BaseIndex, RegYMM }
+vpmovzxbq, 0x6632, AVX2|AVX512F|AVX512VL, Modrm|Vex256|EVex256|Masking|Space0F38|VexWIG|Disp8MemShift=2|NoSuf, { RegXMM|Dword|Unspecified|BaseIndex, RegYMM }
vpmovzxbw, 0x6630, AVX2, Modrm|Vex=2|Space0F38|VexWIG|NoSuf, { Unspecified|BaseIndex|RegXMM, RegYMM }
vpmovzxdq, 0x6635, AVX2, Modrm|Vex=2|Space0F38|VexWIG|NoSuf, { Unspecified|BaseIndex|RegXMM, RegYMM }
-vpmovzxwd, 0x6633, AVX2, Modrm|Vex=2|Space0F38|VexWIG|NoSuf, { Unspecified|BaseIndex|RegXMM, RegYMM }
-vpmovzxwq, 0x6634, AVX2, Modrm|Vex=2|Space0F38|VexWIG|NoSuf, { Qword|Unspecified|BaseIndex|RegXMM, RegYMM }
+vpmovzxwd, 0x6633, AVX2|AVX512F|AVX512VL, Modrm|Vex256|EVex256|Masking|Space0F38|VexWIG|Disp8MemShift=4|NoSuf, { RegXMM|Unspecified|BaseIndex, RegYMM }
+vpmovzxwq, 0x6634, AVX2|AVX512F|AVX512VL, Modrm|Vex256|EVex256|Masking|Space0F38|VexWIG|Disp8MemShift=3|NoSuf, { RegXMM|Qword|Unspecified|BaseIndex, RegYMM }
// New AVX2 instructions.
vbroadcasti128, 0x665A, AVX2, Modrm|Vex=2|Space0F38|VexW=1|NoSuf, { Xmmword|Unspecified|BaseIndex, RegYMM }
vbroadcastsd, 0x6619, AVX2, Modrm|Vex=2|Space0F38|VexW=1|NoSuf, { RegXMM, RegYMM }
-vbroadcastss, 0x6618, AVX2, Modrm|Vex|Space0F38|VexW=1|NoSuf, { RegXMM, RegXMM|RegYMM }
+vbroadcastss, 0x6618, AVX2|AVX512F, Modrm|Vex|EVexDYN|Masking|Space0F38|VexW0|Disp8MemShift=2|NoSuf, { RegXMM|Dword|Unspecified|BaseIndex, RegXMM|RegYMM|RegZMM }
vpblendd, 0x6602, AVX2, Modrm|Vex|Space0F3A|VexVVVV|VexW0|CheckOperandSize|NoSuf, { Imm8|Imm8S, Unspecified|BaseIndex|RegXMM|RegYMM, RegXMM|RegYMM, RegXMM|RegYMM }
vpbroadcast<bw>, 0x6678 | <bw:opc>, AVX2, Modrm|Vex|Space0F38|VexW0|NoSuf, { <bw:elem>|Unspecified|BaseIndex|RegXMM, RegXMM|RegYMM }
-vpbroadcast<dq>, 0x6658 | <dq:opc>, AVX2, Modrm|Vex|Space0F38|VexW0|NoSuf|Optimize, { <dq:elem>|Unspecified|BaseIndex|RegXMM, RegXMM|RegYMM }
+vpbroadcastd, 0x6658, AVX2|AVX512F, Modrm|Vex|EVexDYN|Masking|Space0F38|VexW0|Disp8MemShift|NoSuf, { RegXMM|Dword|Unspecified|BaseIndex, RegXMM|RegYMM|RegZMM }
+vpbroadcastq, 0x6659, AVX2, Modrm|Vex|Space0F38|VexW0|NoSuf|Optimize, { RegXMM|Qword|Unspecified|BaseIndex, RegXMM|RegYMM }
vperm2i128, 0x6646, AVX2, Modrm|Vex=2|Space0F3A|VexVVVV|VexW0|NoSuf, { Imm8|Imm8S, Unspecified|BaseIndex|RegYMM, RegYMM, RegYMM }
-vpermd, 0x6636, AVX2, Modrm|Vex256|Space0F38|VexVVVV|VexW0|NoSuf, { Unspecified|BaseIndex|RegYMM, RegYMM, RegYMM }
-vpermpd, 0x6601, AVX2, Modrm|Vex=2|Space0F3A|VexW1|NoSuf, { Imm8|Imm8S, Unspecified|BaseIndex|RegYMM, RegYMM }
-vpermps, 0x6616, AVX2, Modrm|Vex256|Space0F38|VexVVVV|VexW0|NoSuf, { Unspecified|BaseIndex|RegYMM, RegYMM, RegYMM }
-vpermq, 0x6600, AVX2, Modrm|Vex=2|Space0F3A|VexW1|NoSuf, { Imm8|Imm8S, Unspecified|BaseIndex|RegYMM, RegYMM }
+vpermd, 0x6636, AVX2|AVX512F, Modrm|Vex256|EVexDYN|Masking|Space0F38|VexVVVV|VexW0|Broadcast|Disp8ShiftVL|CheckOperandSize|NoSuf, { RegYMM|RegZMM|Dword|Unspecified|BaseIndex, RegYMM|RegZMM, RegYMM|RegZMM }
+vpermpd, 0x6601, AVX2|AVX512F, Modrm|Vex256|EVexDYN|Masking|Space0F3A|VexW1|Broadcast|Disp8ShiftVL|CheckOperandSize|NoSuf, { Imm8|Imm8S, RegYMM|RegZMM|Qword|Unspecified|BaseIndex, RegYMM|RegZMM }
+vpermps, 0x6616, AVX2|AVX512F, Modrm|Vex256|EVexDYN|Masking|Space0F38|VexVVVV|VexW0|Broadcast|Disp8ShiftVL|CheckOperandSize|NoSuf, { RegYMM|RegZMM|Dword|Unspecified|BaseIndex, RegYMM|RegZMM, RegYMM|RegZMM }
+vpermq, 0x6600, AVX2|AVX512F, Modrm|Vex256|EVexDYN|Masking|Space0F3A|VexW1|Broadcast|Disp8ShiftVL|CheckOperandSize|NoSuf, { Imm8|Imm8S, RegYMM|RegZMM|Qword|Unspecified|BaseIndex, RegYMM|RegZMM }
vextracti128, 0x6639, AVX2, Modrm|Vex=2|Space0F3A|VexW=1|NoSuf, { Imm8, RegYMM, Unspecified|BaseIndex|RegXMM }
vinserti128, 0x6638, AVX2, Modrm|Vex256|Space0F3A|VexVVVV|VexW0|NoSuf, { Imm8, Unspecified|BaseIndex|RegXMM, RegYMM, RegYMM }
vpmaskmov<dq>, 0x668e, AVX2, Modrm|Vex|Space0F38|VexVVVV|<dq:vexw>|CheckOperandSize|NoSuf, { RegXMM|RegYMM, RegXMM|RegYMM, Xmmword|Ymmword|Unspecified|BaseIndex }
vpmaskmov<dq>, 0x668c, AVX2, Modrm|Vex|Space0F38|VexVVVV|<dq:vexw>|CheckOperandSize|NoSuf, { Xmmword|Ymmword|Unspecified|BaseIndex, RegXMM|RegYMM, RegXMM|RegYMM }
-vpsllv<dq>, 0x6647, AVX2, Modrm|Vex|Space0F38|VexVVVV|<dq:vexw>|CheckOperandSize|NoSuf, { Unspecified|BaseIndex|RegXMM|RegYMM, RegXMM|RegYMM, RegXMM|RegYMM }
-vpsravd, 0x6646, AVX2, Modrm|Vex|Space0F38|VexVVVV|VexW0|CheckOperandSize|NoSuf, { Unspecified|BaseIndex|RegXMM|RegYMM, RegXMM|RegYMM, RegXMM|RegYMM }
-vpsrlv<dq>, 0x6645, AVX2, Modrm|Vex|Space0F38|VexVVVV|<dq:vexw>|CheckOperandSize|NoSuf, { Unspecified|BaseIndex|RegXMM|RegYMM, RegXMM|RegYMM, RegXMM|RegYMM }
+vpsllv<dq>, 0x6647, AVX2|AVX512F, Modrm|Vex|EVexDYN|Masking|Space0F38|VexVVVV|<dq:vexw>|Broadcast|Disp8ShiftVL|CheckOperandSize|NoSuf, { RegXMM|RegYMM|RegZMM|<dq:elem>|Unspecified|BaseIndex, RegXMM|RegYMM|RegZMM, RegXMM|RegYMM|RegZMM }
+vpsravd, 0x6646, AVX2|AVX512F, Modrm|Vex|EVexDYN|Masking|Space0F38|VexVVVV|VexW0|Broadcast|Disp8ShiftVL|CheckOperandSize|NoSuf, { RegXMM|RegYMM|RegZMM|Dword|Unspecified|BaseIndex, RegXMM|RegYMM|RegZMM, RegXMM|RegYMM|RegZMM }
+vpsrlv<dq>, 0x6645, AVX2|AVX512F, Modrm|Vex|EVexDYN|Masking|Space0F38|VexVVVV|<dq:vexw>|Broadcast|Disp8ShiftVL|CheckOperandSize|NoSuf, { RegXMM|RegYMM|RegZMM|<dq:elem>|Unspecified|BaseIndex, RegXMM|RegYMM|RegZMM, RegXMM|RegYMM|RegZMM }
// AVX gather instructions
vgatherdpd, 0x6692, AVX2, Modrm|Vex|Space0F38|VexVVVV|VexW1|SwapSources|CheckOperandSize|NoSuf|VecSIB128, { RegXMM|RegYMM, Qword|Unspecified|BaseIndex, RegXMM|RegYMM }
@@ -1779,7 +1784,7 @@ vpclmulhqhqdq, 0x6644/0x11, AVX|PCLMULQD
vgf2p8affineinvqb, 0x66cf, AVX|GFNI, Modrm|Vex|Space0F3A|VexVVVV|VexW1|CheckOperandSize|NoSuf, { Imm8, Unspecified|BaseIndex|RegXMM|RegYMM, RegXMM|RegYMM, RegXMM|RegYMM }
vgf2p8affineqb, 0x66ce, AVX|GFNI, Modrm|Vex|Space0F3A|VexVVVV|VexW1|CheckOperandSize|NoSuf, { Imm8, Unspecified|BaseIndex|RegXMM|RegYMM, RegXMM|RegYMM, RegXMM|RegYMM }
-vgf2p8mulb, 0x66cf, AVX|GFNI, Modrm|Vex|Space0F38|VexVVVV|VexW0|CheckOperandSize|NoSuf, { Unspecified|BaseIndex|RegXMM|RegYMM, RegXMM|RegYMM, RegXMM|RegYMM }
+vgf2p8mulb, 0x66cf, GFNI|AVX|AVX512F, Modrm|Vex|EVexDYN|Masking|Space0F38|VexVVVV|VexW0|Disp8ShiftVL|CheckOperandSize|NoSuf, { RegXMM|RegYMM|RegZMM|Unspecified|BaseIndex, RegXMM|RegYMM|RegZMM, RegXMM|RegYMM|RegZMM }
// FSGSBASE, RDRND and F16C
@@ -2082,8 +2087,6 @@ vpclmulhqhqdq, 0x6644/0x11, VPCLMULQDQ,
// AVX512F instructions.
-#define Disp8ShiftVL Disp8MemShift=DISP8_SHIFT_VL
-
<sdh:cpu:cpudq:ppfx:spfx:pfx:spc1:spc2:opc:vexw:elem, +
s:AVX512F:AVX512DQ::f3:66:Space0F:Space0F38:0:VexW0:Dword, +
d:AVX512F:AVX512DQ:66:f2:66:Space0F:Space0F38:1:VexW1:Qword, +
@@ -2142,9 +2145,7 @@ vpmuldq, 0x6628, AVX512F, Modrm|Masking|
vpmulld, 0x6640, AVX512F, Modrm|Masking|Space0F38|VexVVVV|VexW=1|Broadcast|Disp8ShiftVL|CheckOperandSize|NoSuf, { RegXMM|RegYMM|RegZMM|Dword|Unspecified|BaseIndex, RegXMM|RegYMM|RegZMM, RegXMM|RegYMM|RegZMM }
vprolv<dq>, 0x6615, AVX512F, Modrm|Masking|Space0F38|VexVVVV|<dq:vexw>|Broadcast|Disp8ShiftVL|CheckOperandSize|NoSuf, { RegXMM|RegYMM|RegZMM|<dq:elem>|Unspecified|BaseIndex, RegXMM|RegYMM|RegZMM, RegXMM|RegYMM|RegZMM }
vprorv<dq>, 0x6614, AVX512F, Modrm|Masking|Space0F38|VexVVVV|<dq:vexw>|Broadcast|Disp8ShiftVL|CheckOperandSize|NoSuf, { RegXMM|RegYMM|RegZMM|<dq:elem>|Unspecified|BaseIndex, RegXMM|RegYMM|RegZMM, RegXMM|RegYMM|RegZMM }
-vpsllv<dq>, 0x6647, AVX512F, Modrm|Masking|Space0F38|VexVVVV|<dq:vexw>|Broadcast|Disp8ShiftVL|CheckOperandSize|NoSuf, { RegXMM|RegYMM|RegZMM|<dq:elem>|Unspecified|BaseIndex, RegXMM|RegYMM|RegZMM, RegXMM|RegYMM|RegZMM }
-vpsrav<dq>, 0x6646, AVX512F, Modrm|Masking|Space0F38|VexVVVV|<dq:vexw>|Broadcast|Disp8ShiftVL|CheckOperandSize|NoSuf, { RegXMM|RegYMM|RegZMM|<dq:elem>|Unspecified|BaseIndex, RegXMM|RegYMM|RegZMM, RegXMM|RegYMM|RegZMM }
-vpsrlv<dq>, 0x6645, AVX512F, Modrm|Masking|Space0F38|VexVVVV|<dq:vexw>|Broadcast|Disp8ShiftVL|CheckOperandSize|NoSuf, { RegXMM|RegYMM|RegZMM|<dq:elem>|Unspecified|BaseIndex, RegXMM|RegYMM|RegZMM, RegXMM|RegYMM|RegZMM }
+vpsravq, 0x6646, AVX512F, Modrm|Masking|Space0F38|VexVVVV|VexW1|Broadcast|Disp8ShiftVL|CheckOperandSize|NoSuf, { RegXMM|RegYMM|RegZMM|Qword|Unspecified|BaseIndex, RegXMM|RegYMM|RegZMM, RegXMM|RegYMM|RegZMM }
vpternlog<dq>, 0x6625, AVX512F, Modrm|Masking|Space0F3A|VexVVVV|<dq:vexw>|Broadcast|Disp8ShiftVL|CheckOperandSize|NoSuf, { Imm8|Imm8S, RegXMM|RegYMM|RegZMM|<dq:elem>|Unspecified|BaseIndex, RegXMM|RegYMM|RegZMM, RegXMM|RegYMM|RegZMM }
vbroadcastf32x4, 0x661A, AVX512F, Modrm|Masking|Space0F38|VexW=1|Disp8MemShift=4|NoSuf, { XMMword|Unspecified|BaseIndex, RegYMM|RegZMM }
@@ -2153,10 +2154,9 @@ vbroadcasti32x4, 0x665A, AVX512F, Modrm|
vbroadcastf64x4, 0x661B, AVX512F, Modrm|EVex=1|Masking|Space0F38|VexW=2|Disp8MemShift=5|NoSuf, { YMMword|Unspecified|BaseIndex, RegZMM }
vbroadcasti64x4, 0x665B, AVX512F, Modrm|EVex=1|Masking|Space0F38|VexW=2|Disp8MemShift=5|NoSuf, { YMMword|Unspecified|BaseIndex, RegZMM }
-vbroadcastss, 0x6618, AVX512F, Modrm|Masking|Space0F38|VexW0|Disp8MemShift=2|NoSuf, { RegXMM|Dword|Unspecified|BaseIndex, RegXMM|RegYMM|RegZMM }
vbroadcastsd, 0x6619, AVX512F, Modrm|Masking|Space0F38|VexW1|Disp8MemShift=3|NoSuf, { RegXMM|Qword|Unspecified|BaseIndex, RegYMM|RegZMM }
-vpbroadcast<dq>, 0x6658 | <dq:opc>, AVX512F, Modrm|Masking|Space0F38|<dq:vexw>|Disp8MemShift|NoSuf, { RegXMM|<dq:elem>|Unspecified|BaseIndex, RegXMM|RegYMM|RegZMM }
+vpbroadcastq, 0x6659, AVX512F, Modrm|Masking|Space0F38|VexW1|Disp8MemShift|NoSuf, { RegXMM|Qword|Unspecified|BaseIndex, RegXMM|RegYMM|RegZMM }
vpbroadcast<dq>, 0x667c, AVX512F, Modrm|Masking|Space0F38|<dq:vexw64>|NoSuf, { <dq:gpr>, RegXMM|RegYMM|RegZMM }
vcmp<frel>p<sd>, 0x<sd:ppfx>C2/0x<frel:imm>, AVX512F, Modrm|Masking|Space0F|VexVVVV|<sd:vexw>|Broadcast|Disp8ShiftVL|CheckOperandSize|NoSuf|ImmExt|SAE, { RegXMM|RegYMM|RegZMM|<sd:elem>|Unspecified|BaseIndex, RegXMM|RegYMM|RegZMM, RegMask }
@@ -2246,9 +2246,6 @@ vextracti32x4, 0x6639, AVX512F, Modrm|Ma
vextractf64x4, 0x661B, AVX512F, Modrm|EVex=1|Masking|Space0F3A|VexW=2|Disp8MemShift=5|NoSuf, { Imm8, RegZMM, RegYMM|Unspecified|BaseIndex }
vextracti64x4, 0x663B, AVX512F, Modrm|EVex=1|Masking|Space0F3A|VexW=2|Disp8MemShift=5|NoSuf, { Imm8, RegZMM, RegYMM|Unspecified|BaseIndex }
-vextractps, 0x6617, AVX512F, Modrm|EVex128|Space0F3A|VexWIG|Disp8MemShift=2|NoSuf, { Imm8, RegXMM, Reg32|Dword|Unspecified|BaseIndex }
-vextractps, 0x6617, AVX512F|x64, RegMem|EVex128|Space0F3A|VexWIG|NoSuf, { Imm8, RegXMM, Reg64 }
-
vfixupimmp<sd>, 0x6654, AVX512F, Modrm|Masking|Space0F3A|VexVVVV|<sd:vexw>|Broadcast|Disp8ShiftVL|CheckOperandSize|NoSuf|SAE, { Imm8|Imm8S, RegXMM|RegYMM|RegZMM|<sd:elem>|Unspecified|BaseIndex, RegXMM|RegYMM|RegZMM, RegXMM|RegYMM|RegZMM }
vfixupimms<sd>, 0x6655, AVX512F, Modrm|EVexLIG|Masking|Space0F3A|VexVVVV|<sd:vexw>|Disp8MemShift|NoSuf|SAE, { Imm8|Imm8S, RegXMM|<sd:elem>|Unspecified|BaseIndex, RegXMM, RegXMM }
@@ -2304,8 +2301,6 @@ vmovap<sd>, 0x<sd:ppfx>28, AVX512F, D|Mo
vmovntp<sd>, 0x<sd:ppfx>2B, AVX512F, Modrm|Space0F|<sd:vexw>|Disp8ShiftVL|CheckOperandSize|NoSuf, { RegXMM|RegYMM|RegZMM, XMMword|YMMword|ZMMword|Unspecified|BaseIndex }
vmovup<sd>, 0x<sd:ppfx>10, AVX512F, D|Modrm|Masking|Space0F|<sd:vexw>|Disp8ShiftVL|CheckOperandSize|NoSuf, { RegXMM|RegYMM|RegZMM|Unspecified|BaseIndex, RegXMM|RegYMM|RegZMM }
-vmovd, 0x666E, AVX512F, D|Modrm|EVex=2|Space0F|Disp8MemShift=2|NoSuf, { Reg32|Unspecified|BaseIndex, RegXMM }
-
vmovddup, 0xF212, AVX512F, Modrm|Masking|Space0F|VexW=2|Disp8ShiftVL|CheckOperandSize|NoSuf, { RegYMM|RegZMM|Unspecified|BaseIndex, RegYMM|RegZMM }
vmovdqa64, 0x666F, AVX512F, D|Modrm|Masking|Space0F|VexW=2|Disp8ShiftVL|CheckOperandSize|NoSuf|Optimize, { RegXMM|RegYMM|RegZMM|Unspecified|BaseIndex, RegXMM|RegYMM|RegZMM }
@@ -2322,7 +2317,6 @@ vmovhp<sd>, 0x<sd:ppfx>17, AVX512F, Modr
vmovlp<sd>, 0x<sd:ppfx>12, AVX512F, Modrm|EVexLIG|Space0F|VexVVVV|<sd:vexw>|Disp8MemShift=3|NoSuf, { Qword|Unspecified|BaseIndex, RegXMM, RegXMM }
vmovlp<sd>, 0x<sd:ppfx>13, AVX512F, Modrm|EVexLIG|Space0F|<sd:vexw>|Disp8MemShift=3|NoSuf, { RegXMM, Qword|Unspecified|BaseIndex }
-vmovq, 0x666E, AVX512F|x64, D|Modrm|EVex128|Space0F|VexW1|Disp8MemShift=3|NoSuf, { Reg64|Unspecified|BaseIndex, RegXMM }
vmovq, 0xF37E, AVX512F, Load|Modrm|EVex=2|Space0F|VexW1|Disp8MemShift=3|NoSuf, { Qword|Unspecified|BaseIndex|RegXMM, RegXMM }
vmovq, 0x66D6, AVX512F, Modrm|EVex=2|Space0F|VexW1|Disp8MemShift=3|NoSuf, { RegXMM, Qword|Unspecified|BaseIndex|RegXMM }
@@ -2360,15 +2354,10 @@ vpcmp<irel>u<dq>, 0x661e/<irel:imm>, AVX
vptestm<dq>, 0x6627, AVX512F, Modrm|Masking|Space0F38|VexVVVV|<dq:vexw>|Broadcast|Disp8ShiftVL|CheckOperandSize|NoSuf, { RegXMM|RegYMM|RegZMM|<dq:elem>|Unspecified|BaseIndex, RegXMM|RegYMM|RegZMM, RegMask }
vptestnm<dq>, 0xf327, AVX512F, Modrm|Masking|Space0F38|VexVVVV|<dq:vexw>|Broadcast|Disp8ShiftVL|CheckOperandSize|NoSuf, { RegXMM|RegYMM|RegZMM|<dq:elem>|Unspecified|BaseIndex, RegXMM|RegYMM|RegZMM, RegMask }
-vpermd, 0x6636, AVX512F, Modrm|Masking|Space0F38|VexVVVV|VexW0|Broadcast|Disp8ShiftVL|CheckOperandSize|NoSuf, { RegYMM|RegZMM|Dword|Unspecified|BaseIndex, RegYMM|RegZMM, RegYMM|RegZMM }
-vpermps, 0x6616, AVX512F, Modrm|Masking|Space0F38|VexVVVV|VexW0|Broadcast|Disp8ShiftVL|CheckOperandSize|NoSuf, { RegYMM|RegZMM|Dword|Unspecified|BaseIndex, RegYMM|RegZMM, RegYMM|RegZMM }
-
-vpermilp<sd>, 0x6604 | <sd:opc>, AVX512F, Modrm|Masking|Space0F3A|<sd:vexw>|Broadcast|Disp8ShiftVL|CheckOperandSize|NoSuf, { Imm8|Imm8S, RegXMM|RegYMM|RegZMM|<sd:elem>|Unspecified|BaseIndex, RegXMM|RegYMM|RegZMM }
-vpermilp<sd>, 0x660C | <sd:opc>, AVX512F, Modrm|Masking|Space0F38|VexVVVV|<sd:vexw>|Broadcast|Disp8ShiftVL|CheckOperandSize|NoSuf, { RegXMM|RegYMM|RegZMM|<sd:elem>|Unspecified|BaseIndex, RegXMM|RegYMM|RegZMM, RegXMM|RegYMM|RegZMM }
+vpermilpd, 0x6605, AVX512F, Modrm|Masking|Space0F3A|VexW1|Broadcast|Disp8ShiftVL|CheckOperandSize|NoSuf, { Imm8|Imm8S, RegXMM|RegYMM|RegZMM|Qword|Unspecified|BaseIndex, RegXMM|RegYMM|RegZMM }
+vpermilpd, 0x660d, AVX512F, Modrm|Masking|Space0F38|VexVVVV|VexW1|Broadcast|Disp8ShiftVL|CheckOperandSize|NoSuf, { RegXMM|RegYMM|RegZMM|Qword|Unspecified|BaseIndex, RegXMM|RegYMM|RegZMM, RegXMM|RegYMM|RegZMM }
-vpermpd, 0x6601, AVX512F, Modrm|Masking|Space0F3A|VexW=2|Broadcast|Disp8ShiftVL|CheckOperandSize|NoSuf, { Imm8|Imm8S, RegYMM|RegZMM|Qword|Unspecified|BaseIndex, RegYMM|RegZMM }
vpermpd, 0x6616, AVX512F, Modrm|Masking|Space0F38|VexVVVV|VexW1|Broadcast|Disp8ShiftVL|CheckOperandSize|NoSuf, { RegYMM|RegZMM|Qword|Unspecified|BaseIndex, RegYMM|RegZMM, RegYMM|RegZMM }
-vpermq, 0x6600, AVX512F, Modrm|Masking|Space0F3A|VexW=2|Broadcast|Disp8ShiftVL|CheckOperandSize|NoSuf, { Imm8|Imm8S, RegYMM|RegZMM|Qword|Unspecified|BaseIndex, RegYMM|RegZMM }
vpermq, 0x6636, AVX512F, Modrm|Masking|Space0F38|VexVVVV|VexW1|Broadcast|Disp8ShiftVL|CheckOperandSize|NoSuf, { RegYMM|RegZMM|Qword|Unspecified|BaseIndex, RegYMM|RegZMM, RegYMM|RegZMM }
vpmovdb, 0xF331, AVX512F, Modrm|EVex=1|Masking|Space0F38|VexW=1|Disp8MemShift=4|NoSuf, { RegZMM, RegXMM|Unspecified|BaseIndex }
@@ -2593,31 +2582,11 @@ vpmovsqw, 0xF324, AVX512F|AVX512VL, Modr
vpmovusqw, 0xF314, AVX512F|AVX512VL, Modrm|EVex=2|Masking|Space0F38|VexW0|Disp8MemShift=2|NoSuf, { RegXMM, RegXMM|Dword|Unspecified|BaseIndex }
vpmovusqw, 0xF314, AVX512F|AVX512VL, Modrm|EVex=3|Masking|Space0F38|VexW0|Disp8MemShift=3|NoSuf, { RegYMM, RegXMM|Qword|Unspecified|BaseIndex }
-vpmovsxbd, 0x6621, AVX512F|AVX512VL, Modrm|EVex=2|Masking|Space0F38|VexWIG|Disp8MemShift=2|NoSuf, { RegXMM|Dword|Unspecified|BaseIndex, RegXMM }
-vpmovsxbd, 0x6621, AVX512F|AVX512VL, Modrm|EVex=3|Masking|Space0F38|VexWIG|Disp8MemShift=3|NoSuf, { RegXMM|Qword|Unspecified|BaseIndex, RegYMM }
-vpmovzxbd, 0x6631, AVX512F|AVX512VL, Modrm|EVex=2|Masking|Space0F38|VexWIG|Disp8MemShift=2|NoSuf, { RegXMM|Dword|Unspecified|BaseIndex, RegXMM }
-vpmovzxbd, 0x6631, AVX512F|AVX512VL, Modrm|EVex=3|Masking|Space0F38|VexWIG|Disp8MemShift=3|NoSuf, { RegXMM|Qword|Unspecified|BaseIndex, RegYMM }
-
-vpmovsxbq, 0x6622, AVX512F|AVX512VL, Modrm|EVex=2|Masking|Space0F38|VexWIG|Disp8MemShift=1|NoSuf, { RegXMM|Word|Unspecified|BaseIndex, RegXMM }
-vpmovsxbq, 0x6622, AVX512F|AVX512VL, Modrm|EVex=3|Masking|Space0F38|VexWIG|Disp8MemShift=2|NoSuf, { RegXMM|Dword|Unspecified|BaseIndex, RegYMM }
-vpmovzxbq, 0x6632, AVX512F|AVX512VL, Modrm|EVex=2|Masking|Space0F38|VexWIG|Disp8MemShift=1|NoSuf, { RegXMM|Word|Unspecified|BaseIndex, RegXMM }
-vpmovzxbq, 0x6632, AVX512F|AVX512VL, Modrm|EVex=3|Masking|Space0F38|VexWIG|Disp8MemShift=2|NoSuf, { RegXMM|Dword|Unspecified|BaseIndex, RegYMM }
-
vpmovsxdq, 0x6625, AVX512F|AVX512VL, Modrm|EVex=2|Masking|Space0F38|VexW0|Disp8MemShift=3|NoSuf, { RegXMM|Qword|Unspecified|BaseIndex, RegXMM }
vpmovsxdq, 0x6625, AVX512F|AVX512VL, Modrm|EVex=3|Masking|Space0F38|VexW=1|Disp8MemShift=4|NoSuf, { RegXMM|Unspecified|BaseIndex, RegYMM }
vpmovzxdq, 0x6635, AVX512F|AVX512VL, Modrm|EVex=2|Masking|Space0F38|VexW0|Disp8MemShift=3|NoSuf, { RegXMM|Qword|Unspecified|BaseIndex, RegXMM }
vpmovzxdq, 0x6635, AVX512F|AVX512VL, Modrm|EVex=3|Masking|Space0F38|VexW=1|Disp8MemShift=4|NoSuf, { RegXMM|Unspecified|BaseIndex, RegYMM }
-vpmovsxwd, 0x6623, AVX512F|AVX512VL, Modrm|EVex=2|Masking|Space0F38|VexWIG|Disp8MemShift=3|NoSuf, { RegXMM|Qword|Unspecified|BaseIndex, RegXMM }
-vpmovsxwd, 0x6623, AVX512F|AVX512VL, Modrm|EVex=3|Masking|Space0F38|VexWIG|Disp8MemShift=4|NoSuf, { RegXMM|Unspecified|BaseIndex, RegYMM }
-vpmovzxwd, 0x6633, AVX512F|AVX512VL, Modrm|EVex=2|Masking|Space0F38|VexWIG|Disp8MemShift=3|NoSuf, { RegXMM|Qword|Unspecified|BaseIndex, RegXMM }
-vpmovzxwd, 0x6633, AVX512F|AVX512VL, Modrm|EVex=3|Masking|Space0F38|VexWIG|Disp8MemShift=4|NoSuf, { RegXMM|Unspecified|BaseIndex, RegYMM }
-
-vpmovsxwq, 0x6624, AVX512F|AVX512VL, Modrm|EVex=2|Masking|Space0F38|VexWIG|Disp8MemShift=2|NoSuf, { RegXMM|Dword|Unspecified|BaseIndex, RegXMM }
-vpmovsxwq, 0x6624, AVX512F|AVX512VL, Modrm|EVex=3|Masking|Space0F38|VexWIG|Disp8MemShift=3|NoSuf, { RegXMM|Qword|Unspecified|BaseIndex, RegYMM }
-vpmovzxwq, 0x6634, AVX512F|AVX512VL, Modrm|EVex=2|Masking|Space0F38|VexWIG|Disp8MemShift=2|NoSuf, { RegXMM|Dword|Unspecified|BaseIndex, RegXMM }
-vpmovzxwq, 0x6634, AVX512F|AVX512VL, Modrm|EVex=3|Masking|Space0F38|VexWIG|Disp8MemShift=3|NoSuf, { RegXMM|Qword|Unspecified|BaseIndex, RegYMM }
-
// AVX512VL instructions end.
// AVX512BW instructions.
@@ -2960,7 +2929,6 @@ vpshufbitqmb, 0x668f, AVX512_BITALG, Mod
vgf2p8affineinvqb, 0x66cf, GFNI|AVX512F, Modrm|Masking|Space0F3A|VexVVVV|VexW1|Broadcast|Disp8ShiftVL|CheckOperandSize|NoSuf, { Imm8, RegXMM|RegYMM|RegZMM|Qword|Unspecified|BaseIndex, RegXMM|RegYMM|RegZMM, RegXMM|RegYMM|RegZMM }
vgf2p8affineqb, 0x66ce, GFNI|AVX512F, Modrm|Masking|Space0F3A|VexVVVV|VexW1|Broadcast|Disp8ShiftVL|CheckOperandSize|NoSuf, { Imm8, RegXMM|RegYMM|RegZMM|Qword|Unspecified|BaseIndex, RegXMM|RegYMM|RegZMM, RegXMM|RegYMM|RegZMM }
-vgf2p8mulb, 0x66cf, GFNI|AVX512F, Modrm|Masking|Space0F38|VexVVVV|VexW0|Disp8ShiftVL|CheckOperandSize|NoSuf, { RegXMM|RegYMM|RegZMM|Unspecified|BaseIndex, RegXMM|RegYMM|RegZMM, RegXMM|RegYMM|RegZMM }
// AVX512 + GFNI instructions end
More information about the Binutils
mailing list