[PATCH v2 2/4] x86/APX: extend SSE2AVX coverage
Jan Beulich
jbeulich@suse.com
Fri Apr 19 09:37:57 GMT 2024
Legacy encoded SIMD insns are converted to AVX ones in that mode. When
eGPR-s are in use, i.e. with APX, convert to AVX10 insns (where
available; there are quite a few which can't be converted).
Note that LDDQU is represented as VMOVDQU32 (and the prior use of the
sse3 template there needs dropping, to get the order right).
Note further that in a few cases, due to the use of templates, AVX512VL
is used when AVX512F would suffice. Since AVX10 is the main reference,
this shouldn't be too much of a problem.
---
To preempt the question: If we weren't to do this (i.e. leave legacy-
encoded SIMD insns using eGPR-s alone), I'd raise the counter question
of why these insns are supported by APX then in the first place.
By using a mask register (which supposedly shouldn't be used by legacy
SIMD code) we could likely convert further insns (by emitting a pair of
replacement ones).
---
v2: Correct MOVSD. Also deal with RCP{P,S}S and RSQRT{P,S}S. Re-work
<gfni>. Re-base.
--- a/gas/config/tc-i386.c
+++ b/gas/config/tc-i386.c
@@ -4133,7 +4133,7 @@ build_evex_prefix (void)
/* Check the REX.W bit and VEXW. */
if (i.tm.opcode_modifier.vexw == VEXWIG)
w = (evexwig == evexw1 || (i.rex & REX_W)) ? 1 : 0;
- else if (i.tm.opcode_modifier.vexw)
+ else if (i.tm.opcode_modifier.vexw && !(i.rex & REX_W))
w = i.tm.opcode_modifier.vexw == VEXW1 ? 1 : 0;
else
w = (flag_code == CODE_64BIT ? i.rex & REX_W : evexwig == evexw1) ? 1 : 0;
@@ -8278,7 +8278,12 @@ check_VecOperands (const insn_template *
if ((is_cpu (t, CpuXOP) && t->operands == 5)
|| (t->opcode_space == SPACE_0F3A
&& (t->base_opcode | 3) == 0x0b
- && is_cpu (t, CpuAPX_F)))
+ && (is_cpu (t, CpuAPX_F)
+ || (t->opcode_modifier.sse2avx && t->opcode_modifier.evex
+ && (!t->opcode_modifier.vex
+ || (i.encoding != encoding_default
+ && i.encoding != encoding_vex
+ && i.encoding != encoding_vex3))))))
{
if (i.op[0].imms->X_op != O_constant
|| !fits_in_imm4 (i.op[0].imms->X_add_number))
--- a/gas/doc/c-i386.texi
+++ b/gas/doc/c-i386.texi
@@ -312,7 +312,9 @@ when respective checks fail.
@cindex @samp{-msse2avx} option, x86-64
@item -msse2avx
This option specifies that the assembler should encode SSE instructions
-with VEX prefix.
+with VEX prefix, requiring AVX to be available. SSE instructions using
+extended GPRs will be encoded with EVEX prefix, requiring AVX512 or AVX10 to
+be available.
@cindex @samp{-muse-unaligned-vector-move} option, i386
@cindex @samp{-muse-unaligned-vector-move} option, x86-64
--- /dev/null
+++ b/gas/testsuite/gas/i386/sse2avx-apx.d
@@ -0,0 +1,261 @@
+#as: -msse2avx
+#objdump: -dw
+#name: x86-64 SSE+ with APX encoding
+
+.*: file format .*
+
+Disassembly of section .text:
+
+0+ <sse2avx>:
+[ ]*[a-f0-9]+: 62 f9 7c 08 5b 60 01 vcvtdq2ps 0x10\(%r16\),%xmm4
+[ ]*[a-f0-9]+: 62 f9 ff 08 e6 60 01 vcvtpd2dqx 0x10\(%r16\),%xmm4
+[ ]*[a-f0-9]+: 62 f9 fd 08 5a 60 01 vcvtpd2psx 0x10\(%r16\),%xmm4
+[ ]*[a-f0-9]+: 62 f9 7d 08 5b 60 01 vcvtps2dq 0x10\(%r16\),%xmm4
+[ ]*[a-f0-9]+: 62 f9 fd 08 e6 60 01 vcvttpd2dqx 0x10\(%r16\),%xmm4
+[ ]*[a-f0-9]+: 62 f9 7e 08 5b 60 01 vcvttps2dq 0x10\(%r16\),%xmm4
+[ ]*[a-f0-9]+: 62 f9 7e 08 6f 60 01 vmovdqu32 0x10\(%r16\),%xmm4
+[ ]*[a-f0-9]+: 62 f9 fd 08 28 60 01 vmovapd 0x10\(%r16\),%xmm4
+[ ]*[a-f0-9]+: 62 f9 7c 08 28 60 01 vmovaps 0x10\(%r16\),%xmm4
+[ ]*[a-f0-9]+: 62 f9 7d 08 6f 60 01 vmovdqa32 0x10\(%r16\),%xmm4
+[ ]*[a-f0-9]+: 62 f9 7e 08 6f 60 01 vmovdqu32 0x10\(%r16\),%xmm4
+[ ]*[a-f0-9]+: 62 fa 7d 08 2a 60 01 vmovntdqa 0x10\(%r16\),%xmm4
+[ ]*[a-f0-9]+: 62 f9 7e 08 16 60 01 vmovshdup 0x10\(%r16\),%xmm4
+[ ]*[a-f0-9]+: 62 f9 7e 08 12 60 01 vmovsldup 0x10\(%r16\),%xmm4
+[ ]*[a-f0-9]+: 62 f9 fd 08 10 60 01 vmovupd 0x10\(%r16\),%xmm4
+[ ]*[a-f0-9]+: 62 f9 7c 08 10 60 01 vmovups 0x10\(%r16\),%xmm4
+[ ]*[a-f0-9]+: 62 fa 7d 08 1c 60 01 vpabsb 0x10\(%r16\),%xmm4
+[ ]*[a-f0-9]+: 62 fa 7d 08 1d 60 01 vpabsw 0x10\(%r16\),%xmm4
+[ ]*[a-f0-9]+: 62 fa 7d 08 1e 60 01 vpabsd 0x10\(%r16\),%xmm4
+[ ]*[a-f0-9]+: 62 fa 7d 08 4c 60 01 vrcp14ps 0x10\(%r16\),%xmm4
+[ ]*[a-f0-9]+: 62 fa 7d 08 4e 60 01 vrsqrt14ps 0x10\(%r16\),%xmm4
+[ ]*[a-f0-9]+: 62 f9 fd 08 51 60 01 vsqrtpd 0x10\(%r16\),%xmm4
+[ ]*[a-f0-9]+: 62 f9 7c 08 51 60 01 vsqrtps 0x10\(%r16\),%xmm4
+[ ]*[a-f0-9]+: 62 f9 fd 08 29 60 01 vmovapd %xmm4,0x10\(%r16\)
+[ ]*[a-f0-9]+: 62 f9 7c 08 29 60 01 vmovaps %xmm4,0x10\(%r16\)
+[ ]*[a-f0-9]+: 62 f9 7d 08 7f 60 01 vmovdqa32 %xmm4,0x10\(%r16\)
+[ ]*[a-f0-9]+: 62 f9 7e 08 7f 60 01 vmovdqu32 %xmm4,0x10\(%r16\)
+[ ]*[a-f0-9]+: 62 f9 7d 08 e7 60 01 vmovntdq %xmm4,0x10\(%r16\)
+[ ]*[a-f0-9]+: 62 f9 fd 08 2b 60 01 vmovntpd %xmm4,0x10\(%r16\)
+[ ]*[a-f0-9]+: 62 f9 7c 08 2b 60 01 vmovntps %xmm4,0x10\(%r16\)
+[ ]*[a-f0-9]+: 62 f9 fd 08 11 60 01 vmovupd %xmm4,0x10\(%r16\)
+[ ]*[a-f0-9]+: 62 f9 7c 08 11 60 01 vmovups %xmm4,0x10\(%r16\)
+[ ]*[a-f0-9]+: 62 f9 cd 08 58 70 01 vaddpd 0x10\(%r16\),%xmm6,%xmm6
+[ ]*[a-f0-9]+: 62 f9 4c 08 58 70 01 vaddps 0x10\(%r16\),%xmm6,%xmm6
+[ ]*[a-f0-9]+: 62 fa 4d 08 dc 70 01 vaesenc 0x10\(%r16\),%xmm6,%xmm6
+[ ]*[a-f0-9]+: 62 fa 4d 08 dd 70 01 vaesenclast 0x10\(%r16\),%xmm6,%xmm6
+[ ]*[a-f0-9]+: 62 fa 4d 08 de 70 01 vaesdec 0x10\(%r16\),%xmm6,%xmm6
+[ ]*[a-f0-9]+: 62 fa 4d 08 df 70 01 vaesdeclast 0x10\(%r16\),%xmm6,%xmm6
+[ ]*[a-f0-9]+: 62 f9 cd 08 55 70 01 vandnpd 0x10\(%r16\),%xmm6,%xmm6
+[ ]*[a-f0-9]+: 62 f9 4c 08 55 70 01 vandnps 0x10\(%r16\),%xmm6,%xmm6
+[ ]*[a-f0-9]+: 62 f9 cd 08 54 70 01 vandpd 0x10\(%r16\),%xmm6,%xmm6
+[ ]*[a-f0-9]+: 62 f9 4c 08 54 70 01 vandps 0x10\(%r16\),%xmm6,%xmm6
+[ ]*[a-f0-9]+: 62 f9 cd 08 5e 70 01 vdivpd 0x10\(%r16\),%xmm6,%xmm6
+[ ]*[a-f0-9]+: 62 f9 4c 08 5e 70 01 vdivps 0x10\(%r16\),%xmm6,%xmm6
+[ ]*[a-f0-9]+: 62 fa 4d 08 cf 70 01 vgf2p8mulb 0x10\(%r16\),%xmm6,%xmm6
+[ ]*[a-f0-9]+: 62 f9 cd 08 5f 70 01 vmaxpd 0x10\(%r16\),%xmm6,%xmm6
+[ ]*[a-f0-9]+: 62 f9 4c 08 5f 70 01 vmaxps 0x10\(%r16\),%xmm6,%xmm6
+[ ]*[a-f0-9]+: 62 f9 cd 08 5d 70 01 vminpd 0x10\(%r16\),%xmm6,%xmm6
+[ ]*[a-f0-9]+: 62 f9 4c 08 5d 70 01 vminps 0x10\(%r16\),%xmm6,%xmm6
+[ ]*[a-f0-9]+: 62 f9 cd 08 59 70 01 vmulpd 0x10\(%r16\),%xmm6,%xmm6
+[ ]*[a-f0-9]+: 62 f9 4c 08 59 70 01 vmulps 0x10\(%r16\),%xmm6,%xmm6
+[ ]*[a-f0-9]+: 62 f9 cd 08 56 70 01 vorpd 0x10\(%r16\),%xmm6,%xmm6
+[ ]*[a-f0-9]+: 62 f9 4c 08 56 70 01 vorps 0x10\(%r16\),%xmm6,%xmm6
+[ ]*[a-f0-9]+: 62 f9 4d 08 63 70 01 vpacksswb 0x10\(%r16\),%xmm6,%xmm6
+[ ]*[a-f0-9]+: 62 f9 4d 08 6b 70 01 vpackssdw 0x10\(%r16\),%xmm6,%xmm6
+[ ]*[a-f0-9]+: 62 f9 4d 08 67 70 01 vpackuswb 0x10\(%r16\),%xmm6,%xmm6
+[ ]*[a-f0-9]+: 62 fa 4d 08 2b 70 01 vpackusdw 0x10\(%r16\),%xmm6,%xmm6
+[ ]*[a-f0-9]+: 62 f9 4d 08 fc 70 01 vpaddb 0x10\(%r16\),%xmm6,%xmm6
+[ ]*[a-f0-9]+: 62 f9 4d 08 fd 70 01 vpaddw 0x10\(%r16\),%xmm6,%xmm6
+[ ]*[a-f0-9]+: 62 f9 4d 08 fe 70 01 vpaddd 0x10\(%r16\),%xmm6,%xmm6
+[ ]*[a-f0-9]+: 62 f9 cd 08 d4 70 01 vpaddq 0x10\(%r16\),%xmm6,%xmm6
+[ ]*[a-f0-9]+: 62 f9 4d 08 ec 70 01 vpaddsb 0x10\(%r16\),%xmm6,%xmm6
+[ ]*[a-f0-9]+: 62 f9 4d 08 ed 70 01 vpaddsw 0x10\(%r16\),%xmm6,%xmm6
+[ ]*[a-f0-9]+: 62 f9 4d 08 dc 70 01 vpaddusb 0x10\(%r16\),%xmm6,%xmm6
+[ ]*[a-f0-9]+: 62 f9 4d 08 dd 70 01 vpaddusw 0x10\(%r16\),%xmm6,%xmm6
+[ ]*[a-f0-9]+: 62 f9 4d 08 db 70 01 vpandd 0x10\(%r16\),%xmm6,%xmm6
+[ ]*[a-f0-9]+: 62 f9 4d 08 df 70 01 vpandnd 0x10\(%r16\),%xmm6,%xmm6
+[ ]*[a-f0-9]+: 62 f9 4d 08 e0 70 01 vpavgb 0x10\(%r16\),%xmm6,%xmm6
+[ ]*[a-f0-9]+: 62 f9 4d 08 e3 70 01 vpavgw 0x10\(%r16\),%xmm6,%xmm6
+[ ]*[a-f0-9]+: 62 fb 4d 08 44 70 01 00 vpclmullqlqdq 0x10\(%r16\),%xmm6,%xmm6
+[ ]*[a-f0-9]+: 62 fb 4d 08 44 70 01 01 vpclmulhqlqdq 0x10\(%r16\),%xmm6,%xmm6
+[ ]*[a-f0-9]+: 62 fb 4d 08 44 70 01 10 vpclmullqhqdq 0x10\(%r16\),%xmm6,%xmm6
+[ ]*[a-f0-9]+: 62 fb 4d 08 44 70 01 11 vpclmulhqhqdq 0x10\(%r16\),%xmm6,%xmm6
+[ ]*[a-f0-9]+: 62 f9 4d 08 f5 70 01 vpmaddwd 0x10\(%r16\),%xmm6,%xmm6
+[ ]*[a-f0-9]+: 62 fa 4d 08 04 70 01 vpmaddubsw 0x10\(%r16\),%xmm6,%xmm6
+[ ]*[a-f0-9]+: 62 fa 4d 08 3c 70 01 vpmaxsb 0x10\(%r16\),%xmm6,%xmm6
+[ ]*[a-f0-9]+: 62 f9 4d 08 ee 70 01 vpmaxsw 0x10\(%r16\),%xmm6,%xmm6
+[ ]*[a-f0-9]+: 62 fa 4d 08 3d 70 01 vpmaxsd 0x10\(%r16\),%xmm6,%xmm6
+[ ]*[a-f0-9]+: 62 f9 4d 08 de 70 01 vpmaxub 0x10\(%r16\),%xmm6,%xmm6
+[ ]*[a-f0-9]+: 62 fa 4d 08 3e 70 01 vpmaxuw 0x10\(%r16\),%xmm6,%xmm6
+[ ]*[a-f0-9]+: 62 fa 4d 08 3f 70 01 vpmaxud 0x10\(%r16\),%xmm6,%xmm6
+[ ]*[a-f0-9]+: 62 fa 4d 08 38 70 01 vpminsb 0x10\(%r16\),%xmm6,%xmm6
+[ ]*[a-f0-9]+: 62 f9 4d 08 ea 70 01 vpminsw 0x10\(%r16\),%xmm6,%xmm6
+[ ]*[a-f0-9]+: 62 fa 4d 08 39 70 01 vpminsd 0x10\(%r16\),%xmm6,%xmm6
+[ ]*[a-f0-9]+: 62 f9 4d 08 da 70 01 vpminub 0x10\(%r16\),%xmm6,%xmm6
+[ ]*[a-f0-9]+: 62 fa 4d 08 3a 70 01 vpminuw 0x10\(%r16\),%xmm6,%xmm6
+[ ]*[a-f0-9]+: 62 fa 4d 08 3b 70 01 vpminud 0x10\(%r16\),%xmm6,%xmm6
+[ ]*[a-f0-9]+: 62 fa cd 08 28 70 01 vpmuldq 0x10\(%r16\),%xmm6,%xmm6
+[ ]*[a-f0-9]+: 62 f9 4d 08 e4 70 01 vpmulhuw 0x10\(%r16\),%xmm6,%xmm6
+[ ]*[a-f0-9]+: 62 fa 4d 08 0b 70 01 vpmulhrsw 0x10\(%r16\),%xmm6,%xmm6
+[ ]*[a-f0-9]+: 62 f9 4d 08 e5 70 01 vpmulhw 0x10\(%r16\),%xmm6,%xmm6
+[ ]*[a-f0-9]+: 62 f9 4d 08 d5 70 01 vpmullw 0x10\(%r16\),%xmm6,%xmm6
+[ ]*[a-f0-9]+: 62 fa 4d 08 40 70 01 vpmulld 0x10\(%r16\),%xmm6,%xmm6
+[ ]*[a-f0-9]+: 62 f9 cd 08 f4 70 01 vpmuludq 0x10\(%r16\),%xmm6,%xmm6
+[ ]*[a-f0-9]+: 62 f9 4d 08 eb 70 01 vpord 0x10\(%r16\),%xmm6,%xmm6
+[ ]*[a-f0-9]+: 62 f9 4d 08 f6 70 01 vpsadbw 0x10\(%r16\),%xmm6,%xmm6
+[ ]*[a-f0-9]+: 62 fa 4d 08 00 70 01 vpshufb 0x10\(%r16\),%xmm6,%xmm6
+[ ]*[a-f0-9]+: 62 f9 4d 08 f1 70 01 vpsllw 0x10\(%r16\),%xmm6,%xmm6
+[ ]*[a-f0-9]+: 62 f9 4d 08 f2 70 01 vpslld 0x10\(%r16\),%xmm6,%xmm6
+[ ]*[a-f0-9]+: 62 f9 cd 08 f3 70 01 vpsllq 0x10\(%r16\),%xmm6,%xmm6
+[ ]*[a-f0-9]+: 62 f9 4d 08 e1 70 01 vpsraw 0x10\(%r16\),%xmm6,%xmm6
+[ ]*[a-f0-9]+: 62 f9 4d 08 e2 70 01 vpsrad 0x10\(%r16\),%xmm6,%xmm6
+[ ]*[a-f0-9]+: 62 f9 4d 08 d1 70 01 vpsrlw 0x10\(%r16\),%xmm6,%xmm6
+[ ]*[a-f0-9]+: 62 f9 4d 08 d2 70 01 vpsrld 0x10\(%r16\),%xmm6,%xmm6
+[ ]*[a-f0-9]+: 62 f9 cd 08 d3 70 01 vpsrlq 0x10\(%r16\),%xmm6,%xmm6
+[ ]*[a-f0-9]+: 62 f9 4d 08 f8 70 01 vpsubb 0x10\(%r16\),%xmm6,%xmm6
+[ ]*[a-f0-9]+: 62 f9 4d 08 f9 70 01 vpsubw 0x10\(%r16\),%xmm6,%xmm6
+[ ]*[a-f0-9]+: 62 f9 4d 08 fa 70 01 vpsubd 0x10\(%r16\),%xmm6,%xmm6
+[ ]*[a-f0-9]+: 62 f9 cd 08 fb 70 01 vpsubq 0x10\(%r16\),%xmm6,%xmm6
+[ ]*[a-f0-9]+: 62 f9 4d 08 e8 70 01 vpsubsb 0x10\(%r16\),%xmm6,%xmm6
+[ ]*[a-f0-9]+: 62 f9 4d 08 e9 70 01 vpsubsw 0x10\(%r16\),%xmm6,%xmm6
+[ ]*[a-f0-9]+: 62 f9 4d 08 d8 70 01 vpsubusb 0x10\(%r16\),%xmm6,%xmm6
+[ ]*[a-f0-9]+: 62 f9 4d 08 d9 70 01 vpsubusw 0x10\(%r16\),%xmm6,%xmm6
+[ ]*[a-f0-9]+: 62 f9 4d 08 68 70 01 vpunpckhbw 0x10\(%r16\),%xmm6,%xmm6
+[ ]*[a-f0-9]+: 62 f9 4d 08 69 70 01 vpunpckhwd 0x10\(%r16\),%xmm6,%xmm6
+[ ]*[a-f0-9]+: 62 f9 4d 08 6a 70 01 vpunpckhdq 0x10\(%r16\),%xmm6,%xmm6
+[ ]*[a-f0-9]+: 62 f9 cd 08 6d 70 01 vpunpckhqdq 0x10\(%r16\),%xmm6,%xmm6
+[ ]*[a-f0-9]+: 62 f9 4d 08 60 70 01 vpunpcklbw 0x10\(%r16\),%xmm6,%xmm6
+[ ]*[a-f0-9]+: 62 f9 4d 08 61 70 01 vpunpcklwd 0x10\(%r16\),%xmm6,%xmm6
+[ ]*[a-f0-9]+: 62 f9 4d 08 62 70 01 vpunpckldq 0x10\(%r16\),%xmm6,%xmm6
+[ ]*[a-f0-9]+: 62 f9 cd 08 6c 70 01 vpunpcklqdq 0x10\(%r16\),%xmm6,%xmm6
+[ ]*[a-f0-9]+: 62 f9 4d 08 ef 70 01 vpxord 0x10\(%r16\),%xmm6,%xmm6
+[ ]*[a-f0-9]+: 62 f9 cd 08 5c 70 01 vsubpd 0x10\(%r16\),%xmm6,%xmm6
+[ ]*[a-f0-9]+: 62 f9 4c 08 5c 70 01 vsubps 0x10\(%r16\),%xmm6,%xmm6
+[ ]*[a-f0-9]+: 62 f9 cd 08 15 70 01 vunpckhpd 0x10\(%r16\),%xmm6,%xmm6
+[ ]*[a-f0-9]+: 62 f9 4c 08 15 70 01 vunpckhps 0x10\(%r16\),%xmm6,%xmm6
+[ ]*[a-f0-9]+: 62 f9 cd 08 14 70 01 vunpcklpd 0x10\(%r16\),%xmm6,%xmm6
+[ ]*[a-f0-9]+: 62 f9 4c 08 14 70 01 vunpcklps 0x10\(%r16\),%xmm6,%xmm6
+[ ]*[a-f0-9]+: 62 f9 cd 08 57 70 01 vxorpd 0x10\(%r16\),%xmm6,%xmm6
+[ ]*[a-f0-9]+: 62 f9 4c 08 57 70 01 vxorps 0x10\(%r16\),%xmm6,%xmm6
+[ ]*[a-f0-9]+: 62 f9 7d 08 70 70 01 64 vpshufd \$0x64,0x10\(%r16\),%xmm6
+[ ]*[a-f0-9]+: 62 f9 7e 08 70 70 01 64 vpshufhw \$0x64,0x10\(%r16\),%xmm6
+[ ]*[a-f0-9]+: 62 f9 7f 08 70 70 01 64 vpshuflw \$0x64,0x10\(%r16\),%xmm6
+[ ]*[a-f0-9]+: 62 fb fd 08 09 70 01 04 vrndscalepd \$(0x)?4,0x10\(%r16\),%xmm6
+[ ]*[a-f0-9]+: 62 fb 7d 08 08 70 01 04 vrndscaleps \$(0x)?4,0x10\(%r16\),%xmm6
+[ ]*[a-f0-9]+: 62 fb cd 08 ce 70 01 64 vgf2p8affineqb \$0x64,0x10\(%r16\),%xmm6,%xmm6
+[ ]*[a-f0-9]+: 62 fb cd 08 cf 70 01 64 vgf2p8affineinvqb \$0x64,0x10\(%r16\),%xmm6,%xmm6
+[ ]*[a-f0-9]+: 62 fb 4d 08 0f 70 01 64 vpalignr \$0x64,0x10\(%r16\),%xmm6,%xmm6
+[ ]*[a-f0-9]+: 62 fb 4d 08 44 70 01 64 vpclmulqdq \$0x64,0x10\(%r16\),%xmm6,%xmm6
+[ ]*[a-f0-9]+: 62 f9 cd 08 c6 70 01 64 vshufpd \$0x64,0x10\(%r16\),%xmm6,%xmm6
+[ ]*[a-f0-9]+: 62 f9 4c 08 c6 70 01 64 vshufps \$0x64,0x10\(%r16\),%xmm6,%xmm6
+[ ]*[a-f0-9]+: 62 f9 fd 08 2f 60 02 vcomisd 0x10\(%r16\),%xmm4
+[ ]*[a-f0-9]+: 62 f9 7e 08 e6 60 02 vcvtdq2pd 0x10\(%r16\),%xmm4
+[ ]*[a-f0-9]+: 62 f9 7e 08 e6 60 02 vcvtdq2pd 0x10\(%r16\),%xmm4
+[ ]*[a-f0-9]+: 62 f9 7c 08 5a 60 02 vcvtps2pd 0x10\(%r16\),%xmm4
+[ ]*[a-f0-9]+: 62 f9 ff 08 12 60 02 vmovddup 0x10\(%r16\),%xmm4
+[ ]*[a-f0-9]+: 62 f9 ff 08 10 60 02 vmovsd 0x10\(%r16\),%xmm4
+[ ]*[a-f0-9]+: 62 fa 7d 08 20 60 02 vpmovsxbw 0x10\(%r16\),%xmm4
+[ ]*[a-f0-9]+: 62 fa 7d 08 23 60 02 vpmovsxwd 0x10\(%r16\),%xmm4
+[ ]*[a-f0-9]+: 62 fa 7d 08 25 60 02 vpmovsxdq 0x10\(%r16\),%xmm4
+[ ]*[a-f0-9]+: 62 fa 7d 08 30 60 02 vpmovzxbw 0x10\(%r16\),%xmm4
+[ ]*[a-f0-9]+: 62 fa 7d 08 33 60 02 vpmovzxwd 0x10\(%r16\),%xmm4
+[ ]*[a-f0-9]+: 62 fa 7d 08 35 60 02 vpmovzxdq 0x10\(%r16\),%xmm4
+[ ]*[a-f0-9]+: 62 f9 fd 08 2e 60 02 vucomisd 0x10\(%r16\),%xmm4
+[ ]*[a-f0-9]+: 62 f9 fd 08 13 60 02 vmovlpd %xmm4,0x10\(%r16\)
+[ ]*[a-f0-9]+: 62 f9 7c 08 13 60 02 vmovlps %xmm4,0x10\(%r16\)
+[ ]*[a-f0-9]+: 62 f9 fd 08 17 60 02 vmovhpd %xmm4,0x10\(%r16\)
+[ ]*[a-f0-9]+: 62 f9 7c 08 17 60 02 vmovhps %xmm4,0x10\(%r16\)
+[ ]*[a-f0-9]+: 62 f9 ff 08 11 60 02 vmovsd %xmm4,0x10\(%r16\)
+[ ]*[a-f0-9]+: 62 f9 dd 08 12 60 02 vmovlpd 0x10\(%r16\),%xmm4,%xmm4
+[ ]*[a-f0-9]+: 62 f9 5c 08 12 60 02 vmovlps 0x10\(%r16\),%xmm4,%xmm4
+[ ]*[a-f0-9]+: 62 f9 dd 08 16 60 02 vmovhpd 0x10\(%r16\),%xmm4,%xmm4
+[ ]*[a-f0-9]+: 62 f9 5c 08 16 60 02 vmovhps 0x10\(%r16\),%xmm4,%xmm4
+[ ]*[a-f0-9]+: 62 f9 fd 08 7e e0 vmovq %xmm4,%r16
+[ ]*[a-f0-9]+: 62 f9 fd 08 6e e0 vmovq %r16,%xmm4
+[ ]*[a-f0-9]+: 62 f9 fd 08 7e e0 vmovq %xmm4,%r16
+[ ]*[a-f0-9]+: 62 f9 fd 08 6e e0 vmovq %r16,%xmm4
+[ ]*[a-f0-9]+: 62 f9 fd 08 d6 60 02 vmovq %xmm4,0x10\(%r16\)
+[ ]*[a-f0-9]+: 62 f9 fe 08 7e 60 02 vmovq 0x10\(%r16\),%xmm4
+[ ]*[a-f0-9]+: 62 e1 7f 08 2d c4 vcvtsd2si %xmm4,%r16d
+[ ]*[a-f0-9]+: 62 f9 7f 08 2d 48 02 vcvtsd2si 0x10\(%r16\),%ecx
+[ ]*[a-f0-9]+: 62 e1 7f 08 2c c4 vcvttsd2si %xmm4,%r16d
+[ ]*[a-f0-9]+: 62 f9 7f 08 2c 48 02 vcvttsd2si 0x10\(%r16\),%ecx
+[ ]*[a-f0-9]+: 62 e1 ff 08 2d c4 vcvtsd2si %xmm4,%r16
+[ ]*[a-f0-9]+: 62 f9 ff 08 2d 48 02 vcvtsd2si 0x10\(%r16\),%rcx
+[ ]*[a-f0-9]+: 62 e1 ff 08 2c c4 vcvttsd2si %xmm4,%r16
+[ ]*[a-f0-9]+: 62 f9 ff 08 2c 48 02 vcvttsd2si 0x10\(%r16\),%rcx
+[ ]*[a-f0-9]+: 62 f9 df 08 2a e0 vcvtsi2sd %r16,%xmm4,%xmm4
+[ ]*[a-f0-9]+: 62 f9 df 08 2a 60 02 vcvtsi2sdq 0x10\(%r16\),%xmm4,%xmm4
+[ ]*[a-f0-9]+: 62 f9 de 08 2a e0 vcvtsi2ss %r16,%xmm4,%xmm4
+[ ]*[a-f0-9]+: 62 f9 de 08 2a 60 02 vcvtsi2ssq 0x10\(%r16\),%xmm4,%xmm4
+[ ]*[a-f0-9]+: 62 fb dd 08 22 e0 64 vpinsrq \$0x64,%r16,%xmm4,%xmm4
+[ ]*[a-f0-9]+: 62 fb dd 08 22 60 02 64 vpinsrq \$0x64,0x10\(%r16\),%xmm4,%xmm4
+[ ]*[a-f0-9]+: 62 fb fd 08 16 e0 64 vpextrq \$0x64,%xmm4,%r16
+[ ]*[a-f0-9]+: 62 fb fd 08 16 60 02 64 vpextrq \$0x64,%xmm4,0x10\(%r16\)
+[ ]*[a-f0-9]+: 62 fb cd 08 0b 70 02 04 vrndscalesd \$(0x)?4,0x10\(%r16\),%xmm6,%xmm6
+[ ]*[a-f0-9]+: 62 f9 cf 08 58 70 02 vaddsd 0x10\(%r16\),%xmm6,%xmm6
+[ ]*[a-f0-9]+: 62 f9 cf 08 5a 70 02 vcvtsd2ss 0x10\(%r16\),%xmm6,%xmm6
+[ ]*[a-f0-9]+: 62 f9 cf 08 5e 70 02 vdivsd 0x10\(%r16\),%xmm6,%xmm6
+[ ]*[a-f0-9]+: 62 f9 cf 08 5f 70 02 vmaxsd 0x10\(%r16\),%xmm6,%xmm6
+[ ]*[a-f0-9]+: 62 f9 cf 08 5d 70 02 vminsd 0x10\(%r16\),%xmm6,%xmm6
+[ ]*[a-f0-9]+: 62 f9 cf 08 59 70 02 vmulsd 0x10\(%r16\),%xmm6,%xmm6
+[ ]*[a-f0-9]+: 62 f9 cf 08 51 70 02 vsqrtsd 0x10\(%r16\),%xmm6,%xmm6
+[ ]*[a-f0-9]+: 62 f9 cf 08 5c 70 02 vsubsd 0x10\(%r16\),%xmm6,%xmm6
+[ ]*[a-f0-9]+: 62 f9 4e 08 58 70 04 vaddss 0x10\(%r16\),%xmm6,%xmm6
+[ ]*[a-f0-9]+: 62 f9 4e 08 5a 70 04 vcvtss2sd 0x10\(%r16\),%xmm6,%xmm6
+[ ]*[a-f0-9]+: 62 f9 4e 08 5e 70 04 vdivss 0x10\(%r16\),%xmm6,%xmm6
+[ ]*[a-f0-9]+: 62 f9 4e 08 5f 70 04 vmaxss 0x10\(%r16\),%xmm6,%xmm6
+[ ]*[a-f0-9]+: 62 f9 4e 08 5d 70 04 vminss 0x10\(%r16\),%xmm6,%xmm6
+[ ]*[a-f0-9]+: 62 f9 4e 08 59 70 04 vmulss 0x10\(%r16\),%xmm6,%xmm6
+[ ]*[a-f0-9]+: 62 fa 4d 08 4d 70 04 vrcp14ss 0x10\(%r16\),%xmm6,%xmm6
+[ ]*[a-f0-9]+: 62 fa 4d 08 4f 70 04 vrsqrt14ss 0x10\(%r16\),%xmm6,%xmm6
+[ ]*[a-f0-9]+: 62 f9 4e 08 51 70 04 vsqrtss 0x10\(%r16\),%xmm6,%xmm6
+[ ]*[a-f0-9]+: 62 f9 4e 08 5c 70 04 vsubss 0x10\(%r16\),%xmm6,%xmm6
+[ ]*[a-f0-9]+: 62 f9 7c 08 2f 60 04 vcomiss 0x10\(%r16\),%xmm4
+[ ]*[a-f0-9]+: 62 f9 7e 08 10 60 04 vmovss 0x10\(%r16\),%xmm4
+[ ]*[a-f0-9]+: 62 fa 7d 08 21 60 04 vpmovsxbd 0x10\(%r16\),%xmm4
+[ ]*[a-f0-9]+: 62 fa 7d 08 24 60 04 vpmovsxwq 0x10\(%r16\),%xmm4
+[ ]*[a-f0-9]+: 62 fa 7d 08 31 60 04 vpmovzxbd 0x10\(%r16\),%xmm4
+[ ]*[a-f0-9]+: 62 fa 7d 08 34 60 04 vpmovzxwq 0x10\(%r16\),%xmm4
+[ ]*[a-f0-9]+: 62 f9 7c 08 2e 60 04 vucomiss 0x10\(%r16\),%xmm4
+[ ]*[a-f0-9]+: 62 f9 7e 08 11 60 04 vmovss %xmm4,0x10\(%r16\)
+[ ]*[a-f0-9]+: 62 f9 7d 08 7e e0 vmovd %xmm4,%r16d
+[ ]*[a-f0-9]+: 62 f9 7d 08 7e 60 04 vmovd %xmm4,0x10\(%r16\)
+[ ]*[a-f0-9]+: 62 f9 7d 08 6e e0 vmovd %r16d,%xmm4
+[ ]*[a-f0-9]+: 62 f9 7d 08 6e 60 04 vmovd 0x10\(%r16\),%xmm4
+[ ]*[a-f0-9]+: 62 e1 7e 08 2d c4 vcvtss2si %xmm4,%r16d
+[ ]*[a-f0-9]+: 62 f9 7e 08 2d 48 04 vcvtss2si 0x10\(%r16\),%ecx
+[ ]*[a-f0-9]+: 62 e1 7e 08 2c c4 vcvttss2si %xmm4,%r16d
+[ ]*[a-f0-9]+: 62 f9 7e 08 2c 48 04 vcvttss2si 0x10\(%r16\),%ecx
+[ ]*[a-f0-9]+: 62 e1 fe 08 2d c4 vcvtss2si %xmm4,%r16
+[ ]*[a-f0-9]+: 62 f9 fe 08 2d 48 04 vcvtss2si 0x10\(%r16\),%rcx
+[ ]*[a-f0-9]+: 62 e1 fe 08 2c c4 vcvttss2si %xmm4,%r16
+[ ]*[a-f0-9]+: 62 f9 fe 08 2c 48 04 vcvttss2si 0x10\(%r16\),%rcx
+[ ]*[a-f0-9]+: 62 fb fd 08 17 e0 64 vextractps \$0x64,%xmm4,%r16d
+[ ]*[a-f0-9]+: 62 fb 7d 08 16 e0 64 vpextrd \$0x64,%xmm4,%r16d
+[ ]*[a-f0-9]+: 62 fb 7d 08 16 60 04 64 vpextrd \$0x64,%xmm4,0x10\(%r16\)
+[ ]*[a-f0-9]+: 62 fb 7d 08 17 e0 64 vextractps \$0x64,%xmm4,%r16d
+[ ]*[a-f0-9]+: 62 fb 7d 08 17 60 04 64 vextractps \$0x64,%xmm4,0x10\(%r16\)
+[ ]*[a-f0-9]+: 62 fb 5d 08 22 e0 64 vpinsrd \$0x64,%r16d,%xmm4,%xmm4
+[ ]*[a-f0-9]+: 62 fb 5d 08 22 60 04 64 vpinsrd \$0x64,0x10\(%r16\),%xmm4,%xmm4
+[ ]*[a-f0-9]+: 62 f9 5f 08 2a e0 vcvtsi2sd %r16d,%xmm4,%xmm4
+[ ]*[a-f0-9]+: 62 f9 5f 08 2a 60 04 vcvtsi2sdl 0x10\(%r16\),%xmm4,%xmm4
+[ ]*[a-f0-9]+: 62 f9 5e 08 2a e0 vcvtsi2ss %r16d,%xmm4,%xmm4
+[ ]*[a-f0-9]+: 62 f9 5e 08 2a 60 04 vcvtsi2ssl 0x10\(%r16\),%xmm4,%xmm4
+[ ]*[a-f0-9]+: 62 fb 4d 08 21 70 04 64 vinsertps \$0x64,0x10\(%r16\),%xmm6,%xmm6
+[ ]*[a-f0-9]+: 62 fb 4d 08 0a 70 04 04 vrndscaless \$(0x)?4,0x10\(%r16\),%xmm6,%xmm6
+[ ]*[a-f0-9]+: 62 fa 7d 08 22 60 08 vpmovsxbq 0x10\(%r16\),%xmm4
+[ ]*[a-f0-9]+: 62 fa 7d 08 32 60 08 vpmovzxbq 0x10\(%r16\),%xmm4
+[ ]*[a-f0-9]+: 62 e1 7d 08 c5 c4 64 vpextrw \$0x64,%xmm4,%r16d
+[ ]*[a-f0-9]+: 62 e1 7d 08 c5 c4 64 vpextrw \$0x64,%xmm4,%r16d
+[ ]*[a-f0-9]+: 62 fb 7d 08 15 60 08 64 vpextrw \$0x64,%xmm4,0x10\(%r16\)
+[ ]*[a-f0-9]+: 62 f9 5d 08 c4 e0 64 vpinsrw \$0x64,%r16d,%xmm4,%xmm4
+[ ]*[a-f0-9]+: 62 f9 5d 08 c4 e0 64 vpinsrw \$0x64,%r16d,%xmm4,%xmm4
+[ ]*[a-f0-9]+: 62 f9 5d 08 c4 60 08 64 vpinsrw \$0x64,0x10\(%r16\),%xmm4,%xmm4
+[ ]*[a-f0-9]+: 62 fb 7d 08 14 e0 64 vpextrb \$0x64,%xmm4,%r16d
+[ ]*[a-f0-9]+: 62 fb 7d 08 14 e0 64 vpextrb \$0x64,%xmm4,%r16d
+[ ]*[a-f0-9]+: 62 fb 7d 08 14 60 10 64 vpextrb \$0x64,%xmm4,0x10\(%r16\)
+[ ]*[a-f0-9]+: 62 fb 5d 08 20 e0 64 vpinsrb \$0x64,%r16d,%xmm4,%xmm4
+[ ]*[a-f0-9]+: 62 fb 5d 08 20 e0 64 vpinsrb \$0x64,%r16d,%xmm4,%xmm4
+[ ]*[a-f0-9]+: 62 fb 5d 08 20 60 10 64 vpinsrb \$0x64,0x10\(%r16\),%xmm4,%xmm4
+#pass
--- /dev/null
+++ b/gas/testsuite/gas/i386/sse2avx-apx.s
@@ -0,0 +1,320 @@
+ .text
+ .sse_check warning
+sse2avx:
+
+# Tests for op mem128, xmm
+ cvtdq2ps 16(%r16),%xmm4
+ cvtpd2dq 16(%r16),%xmm4
+ cvtpd2ps 16(%r16),%xmm4
+ cvtps2dq 16(%r16),%xmm4
+ cvttpd2dq 16(%r16),%xmm4
+ cvttps2dq 16(%r16),%xmm4
+ lddqu 16(%r16),%xmm4
+ movapd 16(%r16),%xmm4
+ movaps 16(%r16),%xmm4
+ movdqa 16(%r16),%xmm4
+ movdqu 16(%r16),%xmm4
+ movntdqa 16(%r16),%xmm4
+ movshdup 16(%r16),%xmm4
+ movsldup 16(%r16),%xmm4
+ movupd 16(%r16),%xmm4
+ movups 16(%r16),%xmm4
+ pabsb 16(%r16),%xmm4
+ pabsw 16(%r16),%xmm4
+ pabsd 16(%r16),%xmm4
+ rcpps 16(%r16),%xmm4
+ rsqrtps 16(%r16),%xmm4
+ sqrtpd 16(%r16),%xmm4
+ sqrtps 16(%r16),%xmm4
+
+# Tests for op xmm, mem128
+ movapd %xmm4,16(%r16)
+ movaps %xmm4,16(%r16)
+ movdqa %xmm4,16(%r16)
+ movdqu %xmm4,16(%r16)
+ movntdq %xmm4,16(%r16)
+ movntpd %xmm4,16(%r16)
+ movntps %xmm4,16(%r16)
+ movupd %xmm4,16(%r16)
+ movups %xmm4,16(%r16)
+
+# Tests for op mem128, xmm[, xmm]
+ addpd 16(%r16),%xmm6
+ addps 16(%r16),%xmm6
+ aesenc 16(%r16),%xmm6
+ aesenclast 16(%r16),%xmm6
+ aesdec 16(%r16),%xmm6
+ aesdeclast 16(%r16),%xmm6
+ andnpd 16(%r16),%xmm6
+ andnps 16(%r16),%xmm6
+ andpd 16(%r16),%xmm6
+ andps 16(%r16),%xmm6
+ divpd 16(%r16),%xmm6
+ divps 16(%r16),%xmm6
+ gf2p8mulb 16(%r16),%xmm6
+ maxpd 16(%r16),%xmm6
+ maxps 16(%r16),%xmm6
+ minpd 16(%r16),%xmm6
+ minps 16(%r16),%xmm6
+ mulpd 16(%r16),%xmm6
+ mulps 16(%r16),%xmm6
+ orpd 16(%r16),%xmm6
+ orps 16(%r16),%xmm6
+ packsswb 16(%r16),%xmm6
+ packssdw 16(%r16),%xmm6
+ packuswb 16(%r16),%xmm6
+ packusdw 16(%r16),%xmm6
+ paddb 16(%r16),%xmm6
+ paddw 16(%r16),%xmm6
+ paddd 16(%r16),%xmm6
+ paddq 16(%r16),%xmm6
+ paddsb 16(%r16),%xmm6
+ paddsw 16(%r16),%xmm6
+ paddusb 16(%r16),%xmm6
+ paddusw 16(%r16),%xmm6
+ pand 16(%r16),%xmm6
+ pandn 16(%r16),%xmm6
+ pavgb 16(%r16),%xmm6
+ pavgw 16(%r16),%xmm6
+ pclmullqlqdq 16(%r16),%xmm6
+ pclmulhqlqdq 16(%r16),%xmm6
+ pclmullqhqdq 16(%r16),%xmm6
+ pclmulhqhqdq 16(%r16),%xmm6
+ pmaddwd 16(%r16),%xmm6
+ pmaddubsw 16(%r16),%xmm6
+ pmaxsb 16(%r16),%xmm6
+ pmaxsw 16(%r16),%xmm6
+ pmaxsd 16(%r16),%xmm6
+ pmaxub 16(%r16),%xmm6
+ pmaxuw 16(%r16),%xmm6
+ pmaxud 16(%r16),%xmm6
+ pminsb 16(%r16),%xmm6
+ pminsw 16(%r16),%xmm6
+ pminsd 16(%r16),%xmm6
+ pminub 16(%r16),%xmm6
+ pminuw 16(%r16),%xmm6
+ pminud 16(%r16),%xmm6
+ pmuldq 16(%r16),%xmm6
+ pmulhuw 16(%r16),%xmm6
+ pmulhrsw 16(%r16),%xmm6
+ pmulhw 16(%r16),%xmm6
+ pmullw 16(%r16),%xmm6
+ pmulld 16(%r16),%xmm6
+ pmuludq 16(%r16),%xmm6
+ por 16(%r16),%xmm6
+ psadbw 16(%r16),%xmm6
+ pshufb 16(%r16),%xmm6
+ psllw 16(%r16),%xmm6
+ pslld 16(%r16),%xmm6
+ psllq 16(%r16),%xmm6
+ psraw 16(%r16),%xmm6
+ psrad 16(%r16),%xmm6
+ psrlw 16(%r16),%xmm6
+ psrld 16(%r16),%xmm6
+ psrlq 16(%r16),%xmm6
+ psubb 16(%r16),%xmm6
+ psubw 16(%r16),%xmm6
+ psubd 16(%r16),%xmm6
+ psubq 16(%r16),%xmm6
+ psubsb 16(%r16),%xmm6
+ psubsw 16(%r16),%xmm6
+ psubusb 16(%r16),%xmm6
+ psubusw 16(%r16),%xmm6
+ punpckhbw 16(%r16),%xmm6
+ punpckhwd 16(%r16),%xmm6
+ punpckhdq 16(%r16),%xmm6
+ punpckhqdq 16(%r16),%xmm6
+ punpcklbw 16(%r16),%xmm6
+ punpcklwd 16(%r16),%xmm6
+ punpckldq 16(%r16),%xmm6
+ punpcklqdq 16(%r16),%xmm6
+ pxor 16(%r16),%xmm6
+ subpd 16(%r16),%xmm6
+ subps 16(%r16),%xmm6
+ unpckhpd 16(%r16),%xmm6
+ unpckhps 16(%r16),%xmm6
+ unpcklpd 16(%r16),%xmm6
+ unpcklps 16(%r16),%xmm6
+ xorpd 16(%r16),%xmm6
+ xorps 16(%r16),%xmm6
+
+# Tests for op imm8, mem128, xmm
+ pshufd $100,16(%r16),%xmm6
+ pshufhw $100,16(%r16),%xmm6
+ pshuflw $100,16(%r16),%xmm6
+ roundpd $4,16(%r16),%xmm6
+ roundps $4,16(%r16),%xmm6
+
+# Tests for op imm8, mem128, xmm[, xmm]
+ gf2p8affineqb $100,16(%r16),%xmm6
+ gf2p8affineinvqb $100,16(%r16),%xmm6
+ palignr $100,16(%r16),%xmm6
+ pclmulqdq $100,16(%r16),%xmm6
+ shufpd $100,16(%r16),%xmm6
+ shufps $100,16(%r16),%xmm6
+
+# Tests for op mem64, xmm
+ comisd 16(%r16),%xmm4
+ cvtdq2pd 16(%r16),%xmm4
+ cvtpi2pd 16(%r16),%xmm4
+ cvtps2pd 16(%r16),%xmm4
+ movddup 16(%r16),%xmm4
+ movsd 16(%r16),%xmm4
+ pmovsxbw 16(%r16),%xmm4
+ pmovsxwd 16(%r16),%xmm4
+ pmovsxdq 16(%r16),%xmm4
+ pmovzxbw 16(%r16),%xmm4
+ pmovzxwd 16(%r16),%xmm4
+ pmovzxdq 16(%r16),%xmm4
+ ucomisd 16(%r16),%xmm4
+
+# Tests for op xmm, mem64
+ movlpd %xmm4,16(%r16)
+ movlps %xmm4,16(%r16)
+ movhpd %xmm4,16(%r16)
+ movhps %xmm4,16(%r16)
+ movsd %xmm4,16(%r16)
+
+# Tests for op mem64, xmm[, xmm]
+ movlpd 16(%r16),%xmm4
+ movlps 16(%r16),%xmm4
+ movhpd 16(%r16),%xmm4
+ movhps 16(%r16),%xmm4
+
+# Tests for op xmm, regq/mem64
+# Tests for op regq/mem64, xmm
+ movd %xmm4,%r16
+ movd %r16,%xmm4
+ movq %xmm4,%r16
+ movq %r16,%xmm4
+ movq %xmm4,16(%r16)
+ movq 16(%r16),%xmm4
+
+# Tests for op xmm/mem64, regl
+ cvtsd2si %xmm4,%r16d
+ cvtsd2si 16(%r16),%ecx
+ cvttsd2si %xmm4,%r16d
+ cvttsd2si 16(%r16),%ecx
+
+# Tests for op xmm/mem64, regq
+ cvtsd2si %xmm4,%r16
+ cvtsd2si 16(%r16),%rcx
+ cvttsd2si %xmm4,%r16
+ cvttsd2si 16(%r16),%rcx
+
+# Tests for op regq/mem64, xmm[, xmm]
+ cvtsi2sdq %r16,%xmm4
+ cvtsi2sdq 16(%r16),%xmm4
+ cvtsi2ssq %r16,%xmm4
+ cvtsi2ssq 16(%r16),%xmm4
+
+# Tests for op imm8, regq/mem64, xmm[, xmm]
+ pinsrq $100,%r16,%xmm4
+ pinsrq $100,16(%r16),%xmm4
+
+# Tests for op imm8, xmm, regq/mem64
+ pextrq $100,%xmm4,%r16
+ pextrq $100,%xmm4,16(%r16)
+
+# Tests for op imm8, mem64, xmm[, xmm]
+ roundsd $4,16(%r16),%xmm6
+
+# Tests for op mem64, xmm[, xmm]
+ addsd 16(%r16),%xmm6
+ cvtsd2ss 16(%r16),%xmm6
+ divsd 16(%r16),%xmm6
+ maxsd 16(%r16),%xmm6
+ minsd 16(%r16),%xmm6
+ mulsd 16(%r16),%xmm6
+ sqrtsd 16(%r16),%xmm6
+ subsd 16(%r16),%xmm6
+
+# Tests for op mem32, xmm[, xmm]
+ addss 16(%r16),%xmm6
+ cvtss2sd 16(%r16),%xmm6
+ divss 16(%r16),%xmm6
+ maxss 16(%r16),%xmm6
+ minss 16(%r16),%xmm6
+ mulss 16(%r16),%xmm6
+ rcpss 16(%r16),%xmm6
+ rsqrtss 16(%r16),%xmm6
+ sqrtss 16(%r16),%xmm6
+ subss 16(%r16),%xmm6
+
+# Tests for op mem32, xmm
+ comiss 16(%r16),%xmm4
+ movss 16(%r16),%xmm4
+ pmovsxbd 16(%r16),%xmm4
+ pmovsxwq 16(%r16),%xmm4
+ pmovzxbd 16(%r16),%xmm4
+ pmovzxwq 16(%r16),%xmm4
+ ucomiss 16(%r16),%xmm4
+
+# Tests for op xmm, mem32
+ movss %xmm4,16(%r16)
+
+# Tests for op xmm, regl/mem32
+# Tests for op regl/mem32, xmm
+ movd %xmm4,%r16d
+ movd %xmm4,16(%r16)
+ movd %r16d,%xmm4
+ movd 16(%r16),%xmm4
+
+# Tests for op xmm/mem32, regl
+ cvtss2si %xmm4,%r16d
+ cvtss2si 16(%r16),%ecx
+ cvttss2si %xmm4,%r16d
+ cvttss2si 16(%r16),%ecx
+
+# Tests for op xmm/mem32, regq
+ cvtss2si %xmm4,%r16
+ cvtss2si 16(%r16),%rcx
+ cvttss2si %xmm4,%r16
+ cvttss2si 16(%r16),%rcx
+
+# Tests for op imm8, xmm, regq/mem32
+ extractps $100,%xmm4,%r16
+
+# Tests for op imm8, xmm, regl/mem32
+ pextrd $100,%xmm4,%r16d
+ pextrd $100,%xmm4,16(%r16)
+ extractps $100,%xmm4,%r16d
+ extractps $100,%xmm4,16(%r16)
+
+# Tests for op imm8, regl/mem32, xmm[, xmm]
+ pinsrd $100,%r16d,%xmm4
+ pinsrd $100,16(%r16),%xmm4
+
+# Tests for op regl/mem32, xmm[, xmm]
+ cvtsi2sd %r16d,%xmm4
+ cvtsi2sd 16(%r16),%xmm4
+ cvtsi2ss %r16d,%xmm4
+ cvtsi2ss 16(%r16),%xmm4
+
+# Tests for op imm8, mem32, xmm[, xmm]
+ insertps $100,16(%r16),%xmm6
+ roundss $4,16(%r16),%xmm6
+
+# Tests for op mem16, xmm
+ pmovsxbq 16(%r16),%xmm4
+ pmovzxbq 16(%r16),%xmm4
+
+# Tests for op imm8, xmm, regl/mem16
+ pextrw $100,%xmm4,%r16d
+ pextrw $100,%xmm4,%r16
+ pextrw $100,%xmm4,16(%r16)
+
+# Tests for op imm8, regl/mem16, xmm[, xmm]
+ pinsrw $100,%r16d,%xmm4
+ pinsrw $100,%r16,%xmm4
+ pinsrw $100,16(%r16),%xmm4
+
+# Tests for op imm8, xmm, regl/mem8
+ pextrb $100,%xmm4,%r16d
+ pextrb $100,%xmm4,%r16
+ pextrb $100,%xmm4,16(%r16)
+
+# Tests for op imm8, regl/mem8, xmm[, xmm]
+ pinsrb $100,%r16d,%xmm4
+ pinsrb $100,%r16,%xmm4
+ pinsrb $100,16(%r16),%xmm4
--- a/gas/testsuite/gas/i386/x86-64.exp
+++ b/gas/testsuite/gas/i386/x86-64.exp
@@ -390,6 +390,7 @@ run_dump_test "x86-64-apx-jmpabs-inval"
run_dump_test "x86-64-apx-nf"
run_dump_test "x86-64-apx-nf-intel"
run_dump_test "x86-64-apx_f-evex"
+run_dump_test "sse2avx-apx"
run_dump_test "x86-64-avx512f-rcigrz-intel"
run_dump_test "x86-64-avx512f-rcigrz"
run_dump_test "x86-64-clwb"
--- a/gas/testsuite/gas/i386/x86-64-sse2avx.d
+++ b/gas/testsuite/gas/i386/x86-64-sse2avx.d
@@ -746,9 +746,19 @@ Disassembly of section .text:
[ ]*[a-f0-9]+: c5 79 7e c8 vmovd %xmm9,%eax
[ ]*[a-f0-9]+: c4 a1 79 7e c8 vmovd %xmm1,%eax
[ ]*[a-f0-9]+: c4 e1 f9 7e c8 vmovq %xmm1,%rax
+[ ]*[a-f0-9]+: 62 f1 7d 08 7e c8 \{evex\} vmovd %xmm1,%eax
[ ]*[a-f0-9]+: c5 f9 7e c8 vmovd %xmm1,%eax
[ ]*[a-f0-9]+: c5 f9 7e c8 vmovd %xmm1,%eax
[ ]*[a-f0-9]+: c4 e1 79 7e c8 vmovd %xmm1,%eax
+[ ]*[a-f0-9]+: 62 f9 7d 08 7e c8 vmovd %xmm1,%r16d
+[ ]*[a-f0-9]+: 62 f9 7d 08 7e c8 vmovd %xmm1,%r16d
+[ ]*[a-f0-9]+: 62 d9 7d 08 7e c8 vmovd %xmm1,%r24d
+[ ]*[a-f0-9]+: 62 79 7d 08 7e c8 vmovd %xmm9,%r16d
+[ ]*[a-f0-9]+: 62 b9 7d 08 7e c8 vmovd %xmm1,%r16d
+[ ]*[a-f0-9]+: 62 f9 fd 08 7e c8 vmovq %xmm1,%r16
+[ ]*[a-f0-9]+: 62 f9 7d 08 7e c8 vmovd %xmm1,%r16d
+[ ]*[a-f0-9]+: 62 f9 7d 08 7e c8 vmovd %xmm1,%r16d
+[ ]*[a-f0-9]+: 62 f9 7d 08 7e c8 vmovd %xmm1,%r16d
[ ]*[a-f0-9]+: c5 f8 ae 11 vldmxcsr \(%rcx\)
[ ]*[a-f0-9]+: c5 f8 ae 19 vstmxcsr \(%rcx\)
[ ]*[a-f0-9]+: c5 f8 5b f4 vcvtdq2ps %xmm4,%xmm6
--- a/gas/testsuite/gas/i386/x86-64-sse2avx.s
+++ b/gas/testsuite/gas/i386/x86-64-sse2avx.s
@@ -847,10 +847,21 @@ _start:
rex.r movd %xmm1, %eax
rex.x movd %xmm1, %eax
rex.w movd %xmm1, %eax
+ {evex} movd %xmm1, %eax
{rex} movd %xmm1, %eax
{rex2} movd %xmm1, %eax
{vex3} movd %xmm1, %eax
+ movd %xmm1, %r16d
+ rex movd %xmm1, %r16d
+ rex.b movd %xmm1, %r16d
+ rex.r movd %xmm1, %r16d
+ rex.x movd %xmm1, %r16d
+ rex.w movd %xmm1, %r16d
+ {evex} movd %xmm1, %r16d
+ {rex} movd %xmm1, %r16d
+ {rex2} movd %xmm1, %r16d
+
.intel_syntax noprefix
# Tests for op mem64
ldmxcsr DWORD PTR [rcx]
--- a/opcodes/i386-opc.tbl
+++ b/opcodes/i386-opc.tbl
@@ -1004,10 +1004,40 @@ pause, 0xf390, i186, NoSuf, {}
$avx:AVX:66:Vex128|VexVVVV|VexW0|SSE2AVX:RegXMM:Xmmword, +
$sse:SSE2:66::RegXMM:Xmmword, +
$mmx:MMX:::RegMMX:Qword>
+// As above, but also allowing AVX512 (EVEX) encoding, to transform
+// in particular insns using eGPR-s.
+<MMX:cpu:pfx:attr:reg:mem, +
+ $avx:AVX|AVX512VL:66:Vex128|EVex128|VexVVVV|VexW0|Disp8MemShift=4|SSE2AVX:RegXMM:Xmmword, +
+ $sse:SSE2:66::RegXMM:Xmmword, +
+ $mmx:MMX:::RegMMX:Qword>
+<MMXdq:opc:cpu:pfx:attr:reg:mem, +
+ d:0:AVX|AVX512VL:66:Vex128|EVex128|VexVVVV|VexW0|Disp8MemShift=4|SSE2AVX:RegXMM:Xmmword, +
+ d:0:SSE2:66::RegXMM:Xmmword, +
+ d:0:MMX:::RegMMX:Qword, +
+ q:1:AVX:66:Vex128|VexVVVV|VexW0|SSE2AVX:RegXMM:Xmmword, +
+ q:1:AVX512VL:66:EVex128|VexVVVV|VexW1|Disp8MemShift=4|SSE2AVX:RegXMM:Xmmword, +
+ q:1:SSE2:66::RegXMM:Xmmword, +
+ q:1:MMX:::RegMMX:Qword>
+<MMXBW:cpu:pfx:attr:reg:mem, +
+ $avx:AVX:66:Vex128|VexVVVV|VexW0|SSE2AVX:RegXMM:Xmmword, +
+ $apx:AVX512BW&AVX512VL:66:EVex128|VexVVVV|VexW0|Disp8MemShift=4|SSE2AVX:RegXMM:Xmmword, +
+ $sse:SSE2:66::RegXMM:Xmmword, +
+ $mmx:MMX:::RegMMX:Qword>
<sse2:cpu:attr:scal:vvvv, +
$avx:AVX:Vex128|VexW0|SSE2AVX:VexLIG|VexW0|SSE2AVX:VexVVVV, +
$sse:SSE2:::>
+<SSE2BW:cpu:attr:vvvv, +
+ $avx:AVX:Vex128|VexW0|SSE2AVX:VexVVVV, +
+ $apx:AVX512BW&AVX512VL:EVex128|VexW0|Disp8MemShift=4|SSE2AVX:VexVVVV, +
+ $sse:SSE2::>
+<SSE2D:cpu:attr:scal:vvvv, +
+ $avx:AVX|AVX512VL:Vex128|EVex128|VexW0|Disp8MemShift=4|SSE2AVX:VexLIG|EVexLIG|VexW0|Disp8MemShift=2|SSE2AVX:VexVVVV, +
+ $sse:SSE2:::>
+<SSE2Q:cpu:attr:scal:vvvv, +
+ $avx:AVX:Vex128|VexW0|SSE2AVX:VexLIG|VexW0|SSE2AVX:VexVVVV, +
+ $apx:AVX512VL:EVex128|VexW1|Disp8MemShift=4|SSE2AVX:EVexLIG|VexW1|Disp8MemShift=3|SSE2AVX:VexVVVV, +
+ $sse:SSE2:::>
<bw:opc:vexw:elem:kcpu:kpfx:cpubmi, +
b:0:VexW0:Byte:AVX512DQ:66:AVX512VBMI, +
@@ -1022,8 +1052,8 @@ emms, 0xf77, MMX, NoSuf, {}
// copying between Reg64/Mem64 and RegXMM/RegMMX, as is mandated by Intel's
// spec). AMD's spec, having been in existence for much longer, failed to
// recognize that and specified movd for 32- and 64-bit operations.
-movd, 0x666e, AVX, D|Modrm|Vex128|Space0F|VexW0|NoSuf|SSE2AVX, { Reg32|Unspecified|BaseIndex, RegXMM }
-movd, 0x666e, AVX&x64, D|Modrm|Vex=1|Space0F|VexW1|NoSuf|Size64|SSE2AVX, { Reg64|BaseIndex, RegXMM }
+movd, 0x666e, AVX|AVX512F, D|Modrm|Vex128|EVex128|Space0F|VexW0|Disp8MemShift=2|NoSuf|SSE2AVX, { Reg32|Unspecified|BaseIndex, RegXMM }
+movd, 0x666e, x64&(AVX|AVX512F), D|Modrm|Vex128|EVex128|Space0F|VexW1|Disp8MemShift=3|NoSuf|Size64|SSE2AVX, { Reg64|BaseIndex, RegXMM }
movd, 0x660f6e, SSE2, D|Modrm|IgnoreSize|NoSuf, { Reg32|Unspecified|BaseIndex, RegXMM }
movd, 0x660f6e, SSE2&x64, D|Modrm|NoSuf|Size64, { Reg64|BaseIndex, RegXMM }
// The MMX templates have to remain after at least the SSE2AVX ones.
@@ -1031,247 +1061,280 @@ movd, 0xf6e, MMX, D|Modrm|IgnoreSize|NoS
movd, 0xf6e, MMX&x64, D|Modrm|NoSuf|Size64, { Reg64|BaseIndex, RegMMX }
movq, 0xf37e, AVX, Load|Modrm|Vex128|Space0F|VexW0|NoSuf|SSE2AVX, { Qword|Unspecified|BaseIndex|RegXMM, RegXMM }
movq, 0x66d6, AVX, Modrm|Vex128|Space0F|VexW0|NoSuf|SSE2AVX, { RegXMM, Qword|Unspecified|BaseIndex|RegXMM }
-movq, 0x666e, AVX&x64, D|Modrm|Vex=1|Space0F|VexW1|NoSuf|Size64|SSE2AVX, { Reg64|Unspecified|BaseIndex, RegXMM }
+movq, 0xf37e, AVX512F, Load|Modrm|EVex128|Space0F|VexW1|Disp8MemShift=3|NoSuf|SSE2AVX, { Qword|Unspecified|BaseIndex|RegXMM, RegXMM }
+movq, 0x66d6, AVX512F, Modrm|EVex128|Space0F|VexW1|Disp8MemShift=3|NoSuf|SSE2AVX, { RegXMM, Qword|Unspecified|BaseIndex|RegXMM }
+movq, 0x666e, x64&(AVX|AVX512F), D|Modrm|Vex128|EVex128|Space0F|VexW1|Disp8MemShift=3|NoSuf|Size64|SSE2AVX, { Reg64|Unspecified|BaseIndex, RegXMM }
movq, 0xf30f7e, SSE2, Load|Modrm|NoSuf, { Unspecified|Qword|BaseIndex|RegXMM, RegXMM }
movq, 0x660fd6, SSE2, Modrm|NoSuf, { RegXMM, Unspecified|Qword|BaseIndex|RegXMM }
movq, 0x660f6e, SSE2&x64, D|Modrm|NoSuf|Size64, { Reg64|Unspecified|BaseIndex, RegXMM }
// The MMX templates have to remain after at least the SSE2AVX ones.
movq, 0xf6f, MMX, D|Modrm|NoSuf, { Unspecified|Qword|BaseIndex|RegMMX, RegMMX }
movq, 0xf6e, MMX&x64, D|Modrm|NoSuf|Size64, { Reg64|Unspecified|BaseIndex, RegMMX }
-packssdw<mmx>, 0x<mmx:pfx>0f6b, <mmx:cpu>, Modrm|<mmx:attr>|NoSuf, { <mmx:reg>|<mmx:mem>|Unspecified|BaseIndex, <mmx:reg> }
-packsswb<mmx>, 0x<mmx:pfx>0f63, <mmx:cpu>, Modrm|<mmx:attr>|NoSuf, { <mmx:reg>|<mmx:mem>|Unspecified|BaseIndex, <mmx:reg> }
-packuswb<mmx>, 0x<mmx:pfx>0f67, <mmx:cpu>, Modrm|<mmx:attr>|NoSuf, { <mmx:reg>|<mmx:mem>|Unspecified|BaseIndex, <mmx:reg> }
-padd<bw><mmx>, 0x<mmx:pfx>0ffc | <bw:opc>, <mmx:cpu>, Modrm|<mmx:attr>|C|NoSuf, { <mmx:reg>|<mmx:mem>|Unspecified|BaseIndex, <mmx:reg> }
-paddd<mmx>, 0x<mmx:pfx>0ffe, <mmx:cpu>, Modrm|<mmx:attr>|C|NoSuf, { <mmx:reg>|<mmx:mem>|Unspecified|BaseIndex, <mmx:reg> }
-paddq<sse2>, 0x660fd4, <sse2:cpu>, Modrm|<sse2:attr>|<sse2:vvvv>|C|NoSuf, { RegXMM|Unspecified|BaseIndex, RegXMM }
+packssdw<MMXBW>, 0x<MMXBW:pfx>0f6b, <MMXBW:cpu>, Modrm|<MMXBW:attr>|NoSuf, { <MMXBW:reg>|<MMXBW:mem>|Unspecified|BaseIndex, <MMXBW:reg> }
+packsswb<MMXBW>, 0x<MMXBW:pfx>0f63, <MMXBW:cpu>, Modrm|<MMXBW:attr>|NoSuf, { <MMXBW:reg>|<MMXBW:mem>|Unspecified|BaseIndex, <MMXBW:reg> }
+packuswb<MMXBW>, 0x<MMXBW:pfx>0f67, <MMXBW:cpu>, Modrm|<MMXBW:attr>|NoSuf, { <MMXBW:reg>|<MMXBW:mem>|Unspecified|BaseIndex, <MMXBW:reg> }
+padd<bw><MMXBW>, 0x<MMXBW:pfx>0ffc | <bw:opc>, <MMXBW:cpu>, Modrm|<MMXBW:attr>|C|NoSuf, { <MMXBW:reg>|<MMXBW:mem>|Unspecified|BaseIndex, <MMXBW:reg> }
+paddd<MMX>, 0x<MMX:pfx>0ffe, <MMX:cpu>, Modrm|<MMX:attr>|C|NoSuf, { <MMX:reg>|<MMX:mem>|Unspecified|BaseIndex, <MMX:reg> }
+paddq<SSE2Q>, 0x660fd4, <SSE2Q:cpu>, Modrm|<SSE2Q:attr>|<SSE2Q:vvvv>|C|NoSuf, { RegXMM|Unspecified|BaseIndex, RegXMM }
paddq, 0xfd4, SSE2, Modrm|NoSuf, { Qword|Unspecified|BaseIndex|RegMMX, RegMMX }
-padds<bw><mmx>, 0x<mmx:pfx>0fec | <bw:opc>, <mmx:cpu>, Modrm|<mmx:attr>|C|NoSuf, { <mmx:reg>|<mmx:mem>|Unspecified|BaseIndex, <mmx:reg> }
-paddus<bw><mmx>, 0x<mmx:pfx>0fdc | <bw:opc>, <mmx:cpu>, Modrm|<mmx:attr>|C|NoSuf, { <mmx:reg>|<mmx:mem>|Unspecified|BaseIndex, <mmx:reg> }
-pand<mmx>, 0x<mmx:pfx>0fdb, <mmx:cpu>, Modrm|<mmx:attr>|C|NoSuf, { <mmx:reg>|<mmx:mem>|Unspecified|BaseIndex, <mmx:reg> }
-pandn<mmx>, 0x<mmx:pfx>0fdf, <mmx:cpu>, Modrm|<mmx:attr>|NoSuf, { <mmx:reg>|<mmx:mem>|Unspecified|BaseIndex, <mmx:reg> }
+padds<bw><MMXBW>, 0x<MMXBW:pfx>0fec | <bw:opc>, <MMXBW:cpu>, Modrm|<MMXBW:attr>|C|NoSuf, { <MMXBW:reg>|<MMXBW:mem>|Unspecified|BaseIndex, <MMXBW:reg> }
+paddus<bw><MMXBW>, 0x<MMXBW:pfx>0fdc | <bw:opc>, <MMXBW:cpu>, Modrm|<MMXBW:attr>|C|NoSuf, { <MMXBW:reg>|<MMXBW:mem>|Unspecified|BaseIndex, <MMXBW:reg> }
+pand<MMX>, 0x<MMX:pfx>0fdb, <MMX:cpu>, Modrm|<MMX:attr>|C|NoSuf, { <MMX:reg>|<MMX:mem>|Unspecified|BaseIndex, <MMX:reg> }
+pandn<MMX>, 0x<MMX:pfx>0fdf, <MMX:cpu>, Modrm|<MMX:attr>|NoSuf, { <MMX:reg>|<MMX:mem>|Unspecified|BaseIndex, <MMX:reg> }
pcmpeq<bw><mmx>, 0x<mmx:pfx>0f74 | <bw:opc>, <mmx:cpu>, Modrm|<mmx:attr>|C|NoSuf, { <mmx:reg>|<mmx:mem>|Unspecified|BaseIndex, <mmx:reg> }
pcmpeqd<mmx>, 0x<mmx:pfx>0f76, <mmx:cpu>, Modrm|<mmx:attr>|C|NoSuf, { <mmx:reg>|<mmx:mem>|Unspecified|BaseIndex, <mmx:reg> }
pcmpgt<bw><mmx>, 0x<mmx:pfx>0f64 | <bw:opc>, <mmx:cpu>, Modrm|<mmx:attr>|NoSuf|Optimize, { <mmx:reg>|<mmx:mem>|Unspecified|BaseIndex, <mmx:reg> }
pcmpgtd<mmx>, 0x<mmx:pfx>0f66, <mmx:cpu>, Modrm|<mmx:attr>|NoSuf|Optimize, { <mmx:reg>|<mmx:mem>|Unspecified|BaseIndex, <mmx:reg> }
-pmaddwd<mmx>, 0x<mmx:pfx>0ff5, <mmx:cpu>, Modrm|<mmx:attr>|C|NoSuf, { <mmx:reg>|<mmx:mem>|Unspecified|BaseIndex, <mmx:reg> }
-pmulhw<mmx>, 0x<mmx:pfx>0fe5, <mmx:cpu>, Modrm|<mmx:attr>|C|NoSuf, { <mmx:reg>|<mmx:mem>|Unspecified|BaseIndex, <mmx:reg> }
-pmullw<mmx>, 0x<mmx:pfx>0fd5, <mmx:cpu>, Modrm|<mmx:attr>|C|NoSuf, { <mmx:reg>|<mmx:mem>|Unspecified|BaseIndex, <mmx:reg> }
-por<mmx>, 0x<mmx:pfx>0feb, <mmx:cpu>, Modrm|<mmx:attr>|C|NoSuf, { <mmx:reg>|<mmx:mem>|Unspecified|BaseIndex, <mmx:reg> }
-psllw<mmx>, 0x<mmx:pfx>0ff1, <mmx:cpu>, Modrm|<mmx:attr>|NoSuf, { <mmx:reg>|<mmx:mem>|Unspecified|BaseIndex, <mmx:reg> }
-psllw<mmx>, 0x<mmx:pfx>0f71/6, <mmx:cpu>, Modrm|<mmx:attr>|NoSuf, { Imm8, <mmx:reg> }
-psll<dq><mmx>, 0x<mmx:pfx>0ff2 | <dq:opc>, <mmx:cpu>, Modrm|<mmx:attr>|NoSuf, { <mmx:reg>|<mmx:mem>|Unspecified|BaseIndex, <mmx:reg> }
-psll<dq><mmx>, 0x<mmx:pfx>0f72 | <dq:opc>/6, <mmx:cpu>, Modrm|<mmx:attr>|NoSuf, { Imm8, <mmx:reg> }
-psraw<mmx>, 0x<mmx:pfx>0fe1, <mmx:cpu>, Modrm|<mmx:attr>|NoSuf, { <mmx:reg>|<mmx:mem>|Unspecified|BaseIndex, <mmx:reg> }
-psraw<mmx>, 0x<mmx:pfx>0f71/4, <mmx:cpu>, Modrm|<mmx:attr>|NoSuf, { Imm8, <mmx:reg> }
-psrad<mmx>, 0x<mmx:pfx>0fe2, <mmx:cpu>, Modrm|<mmx:attr>|NoSuf, { <mmx:reg>|<mmx:mem>|Unspecified|BaseIndex, <mmx:reg> }
-psrad<mmx>, 0x<mmx:pfx>0f72/4, <mmx:cpu>, Modrm|<mmx:attr>|NoSuf, { Imm8, <mmx:reg> }
-psrlw<mmx>, 0x<mmx:pfx>0fd1, <mmx:cpu>, Modrm|<mmx:attr>|NoSuf, { <mmx:reg>|<mmx:mem>|Unspecified|BaseIndex, <mmx:reg> }
-psrlw<mmx>, 0x<mmx:pfx>0f71/2, <mmx:cpu>, Modrm|<mmx:attr>|NoSuf, { Imm8, <mmx:reg> }
-psrl<dq><mmx>, 0x<mmx:pfx>0fd2 | <dq:opc>, <mmx:cpu>, Modrm|<mmx:attr>|NoSuf, { <mmx:reg>|<mmx:mem>|Unspecified|BaseIndex, <mmx:reg> }
-psrl<dq><mmx>, 0x<mmx:pfx>0f72 | <dq:opc>/2, <mmx:cpu>, Modrm|<mmx:attr>|NoSuf, { Imm8, <mmx:reg> }
-psub<bw><mmx>, 0x<mmx:pfx>0ff8 | <bw:opc>, <mmx:cpu>, Modrm|<mmx:attr>|NoSuf, { <mmx:reg>|<mmx:mem>|Unspecified|BaseIndex, <mmx:reg> }
-psubd<mmx>, 0x<mmx:pfx>0ffa, <mmx:cpu>, Modrm|<mmx:attr>|NoSuf, { <mmx:reg>|<mmx:mem>|Unspecified|BaseIndex, <mmx:reg> }
-psubq<sse2>, 0x660ffb, <sse2:cpu>, Modrm|<sse2:attr>|<sse2:vvvv>|NoSuf, { RegXMM|Unspecified|BaseIndex, RegXMM }
+pmaddwd<MMXBW>, 0x<MMXBW:pfx>0ff5, <MMXBW:cpu>, Modrm|<MMXBW:attr>|C|NoSuf, { <MMXBW:reg>|<MMXBW:mem>|Unspecified|BaseIndex, <MMXBW:reg> }
+pmulhw<MMXBW>, 0x<MMXBW:pfx>0fe5, <MMXBW:cpu>, Modrm|<MMXBW:attr>|C|NoSuf, { <MMXBW:reg>|<MMXBW:mem>|Unspecified|BaseIndex, <MMXBW:reg> }
+pmullw<MMXBW>, 0x<MMXBW:pfx>0fd5, <MMXBW:cpu>, Modrm|<MMXBW:attr>|C|NoSuf, { <MMXBW:reg>|<MMXBW:mem>|Unspecified|BaseIndex, <MMXBW:reg> }
+por<MMX>, 0x<MMX:pfx>0feb, <MMX:cpu>, Modrm|<MMX:attr>|C|NoSuf, { <MMX:reg>|<MMX:mem>|Unspecified|BaseIndex, <MMX:reg> }
+psllw<MMXBW>, 0x<MMXBW:pfx>0ff1, <MMXBW:cpu>, Modrm|<MMXBW:attr>|NoSuf, { <MMXBW:reg>|<MMXBW:mem>|Unspecified|BaseIndex, <MMXBW:reg> }
+psllw<MMXBW>, 0x<MMXBW:pfx>0f71/6, <MMXBW:cpu>, Modrm|<MMXBW:attr>|NoSuf, { Imm8, <MMXBW:reg> }
+psll<MMXdq>, 0x<MMXdq:pfx>0ff2 | <MMXdq:opc>, <MMXdq:cpu>, Modrm|<MMXdq:attr>|NoSuf, { <MMXdq:reg>|<MMXdq:mem>|Unspecified|BaseIndex, <MMXdq:reg> }
+psll<MMXdq>, 0x<MMXdq:pfx>0f72 | <MMXdq:opc>/6, <MMXdq:cpu>, Modrm|<MMXdq:attr>|NoSuf, { Imm8, <MMXdq:reg> }
+psraw<MMXBW>, 0x<MMXBW:pfx>0fe1, <MMXBW:cpu>, Modrm|<MMXBW:attr>|NoSuf, { <MMXBW:reg>|<MMXBW:mem>|Unspecified|BaseIndex, <MMXBW:reg> }
+psraw<MMXBW>, 0x<MMXBW:pfx>0f71/4, <MMXBW:cpu>, Modrm|<MMXBW:attr>|NoSuf, { Imm8, <MMXBW:reg> }
+psrad<MMX>, 0x<MMX:pfx>0fe2, <MMX:cpu>, Modrm|<MMX:attr>|NoSuf, { <MMX:reg>|<MMX:mem>|Unspecified|BaseIndex, <MMX:reg> }
+psrad<MMX>, 0x<MMX:pfx>0f72/4, <MMX:cpu>, Modrm|<MMX:attr>|NoSuf, { Imm8, <MMX:reg> }
+psrlw<MMXBW>, 0x<MMXBW:pfx>0fd1, <MMXBW:cpu>, Modrm|<MMXBW:attr>|NoSuf, { <MMXBW:reg>|<MMXBW:mem>|Unspecified|BaseIndex, <MMXBW:reg> }
+psrlw<MMXBW>, 0x<MMXBW:pfx>0f71/2, <MMXBW:cpu>, Modrm|<MMXBW:attr>|NoSuf, { Imm8, <MMXBW:reg> }
+psrl<MMXdq>, 0x<MMXdq:pfx>0fd2 | <MMXdq:opc>, <MMXdq:cpu>, Modrm|<MMXdq:attr>|NoSuf, { <MMXdq:reg>|<MMXdq:mem>|Unspecified|BaseIndex, <MMXdq:reg> }
+psrl<MMXdq>, 0x<MMXdq:pfx>0f72 | <MMXdq:opc>/2, <MMXdq:cpu>, Modrm|<MMXdq:attr>|NoSuf, { Imm8, <MMXdq:reg> }
+psub<bw><MMXBW>, 0x<MMXBW:pfx>0ff8 | <bw:opc>, <MMXBW:cpu>, Modrm|<MMXBW:attr>|NoSuf, { <MMXBW:reg>|<MMXBW:mem>|Unspecified|BaseIndex, <MMXBW:reg> }
+psubd<MMX>, 0x<MMX:pfx>0ffa, <MMX:cpu>, Modrm|<MMX:attr>|NoSuf, { <MMX:reg>|<MMX:mem>|Unspecified|BaseIndex, <MMX:reg> }
+psubq<SSE2Q>, 0x660ffb, <SSE2Q:cpu>, Modrm|<SSE2Q:attr>|<SSE2Q:vvvv>|NoSuf, { RegXMM|Unspecified|BaseIndex, RegXMM }
psubq, 0xffb, SSE2, Modrm|NoSuf, { Qword|Unspecified|BaseIndex|RegMMX, RegMMX }
-psubs<bw><mmx>, 0x<mmx:pfx>0fe8 | <bw:opc>, <mmx:cpu>, Modrm|<mmx:attr>|NoSuf, { <mmx:reg>|<mmx:mem>|Unspecified|BaseIndex, <mmx:reg> }
-psubus<bw><mmx>, 0x<mmx:pfx>0fd8 | <bw:opc>, <mmx:cpu>, Modrm|<mmx:attr>|NoSuf, { <mmx:reg>|<mmx:mem>|Unspecified|BaseIndex, <mmx:reg> }
-punpckhbw<mmx>, 0x<mmx:pfx>0f68, <mmx:cpu>, Modrm|<mmx:attr>|NoSuf, { <mmx:reg>|<mmx:mem>|Unspecified|BaseIndex, <mmx:reg> }
-punpckhwd<mmx>, 0x<mmx:pfx>0f69, <mmx:cpu>, Modrm|<mmx:attr>|NoSuf, { <mmx:reg>|<mmx:mem>|Unspecified|BaseIndex, <mmx:reg> }
-punpckhdq<mmx>, 0x<mmx:pfx>0f6a, <mmx:cpu>, Modrm|<mmx:attr>|NoSuf, { <mmx:reg>|<mmx:mem>|Unspecified|BaseIndex, <mmx:reg> }
-punpcklbw<sse2>, 0x660f60, <sse2:cpu>, Modrm|<sse2:attr>|<sse2:vvvv>|NoSuf, { RegXMM|Unspecified|BaseIndex, RegXMM }
+psubs<bw><MMXBW>, 0x<MMXBW:pfx>0fe8 | <bw:opc>, <MMXBW:cpu>, Modrm|<MMXBW:attr>|NoSuf, { <MMXBW:reg>|<MMXBW:mem>|Unspecified|BaseIndex, <MMXBW:reg> }
+psubus<bw><MMXBW>, 0x<MMXBW:pfx>0fd8 | <bw:opc>, <MMXBW:cpu>, Modrm|<MMXBW:attr>|NoSuf, { <MMXBW:reg>|<MMXBW:mem>|Unspecified|BaseIndex, <MMXBW:reg> }
+punpckhbw<MMXBW>, 0x<MMXBW:pfx>0f68, <MMXBW:cpu>, Modrm|<MMXBW:attr>|NoSuf, { <MMXBW:reg>|<MMXBW:mem>|Unspecified|BaseIndex, <MMXBW:reg> }
+punpckhwd<MMXBW>, 0x<MMXBW:pfx>0f69, <MMXBW:cpu>, Modrm|<MMXBW:attr>|NoSuf, { <MMXBW:reg>|<MMXBW:mem>|Unspecified|BaseIndex, <MMXBW:reg> }
+punpckhdq<MMX>, 0x<MMX:pfx>0f6a, <MMX:cpu>, Modrm|<MMX:attr>|NoSuf, { <MMX:reg>|<MMX:mem>|Unspecified|BaseIndex, <MMX:reg> }
+punpcklbw<SSE2BW>, 0x660f60, <SSE2BW:cpu>, Modrm|<SSE2BW:attr>|<SSE2BW:vvvv>|NoSuf, { RegXMM|Unspecified|BaseIndex, RegXMM }
punpcklbw, 0xf60, MMX, Modrm|NoSuf, { Dword|Unspecified|BaseIndex|RegMMX, RegMMX }
-punpcklwd<sse2>, 0x660f61, <sse2:cpu>, Modrm|<sse2:attr>|<sse2:vvvv>|NoSuf, { RegXMM|Unspecified|BaseIndex, RegXMM }
+punpcklwd<SSE2BW>, 0x660f61, <SSE2BW:cpu>, Modrm|<SSE2BW:attr>|<SSE2BW:vvvv>|NoSuf, { RegXMM|Unspecified|BaseIndex, RegXMM }
punpcklwd, 0xf61, MMX, Modrm|NoSuf, { Dword|Unspecified|BaseIndex|RegMMX, RegMMX }
-punpckldq<sse2>, 0x660f62, <sse2:cpu>, Modrm|<sse2:attr>|<sse2:vvvv>|NoSuf, { RegXMM|Unspecified|BaseIndex, RegXMM }
+punpckldq<SSE2D>, 0x660f62, <SSE2D:cpu>, Modrm|<SSE2D:attr>|<SSE2D:vvvv>|NoSuf, { RegXMM|Unspecified|BaseIndex, RegXMM }
punpckldq, 0xf62, MMX, Modrm|NoSuf, { Dword|Unspecified|BaseIndex|RegMMX, RegMMX }
-pxor<mmx>, 0x<mmx:pfx>0fef, <mmx:cpu>, Modrm|<mmx:attr>|C|NoSuf, { <mmx:reg>|<mmx:mem>|Unspecified|BaseIndex, <mmx:reg> }
+pxor<MMX>, 0x<MMX:pfx>0fef, <MMX:cpu>, Modrm|<MMX:attr>|C|NoSuf, { <MMX:reg>|<MMX:mem>|Unspecified|BaseIndex, <MMX:reg> }
// SSE instructions.
<sse:cpu:attr:scal:vvvv, +
$avx:AVX:Vex128|VexW0|SSE2AVX:VexLIG|VexW0|SSE2AVX:VexVVVV, +
$sse:SSE:::>
+<SSE:cpu:attr:scal:vvvv, +
+ $avx:AVX|AVX512VL:Vex128|EVex128|VexW0|Disp8MemShift=4|SSE2AVX:VexLIG|EVexLIG|VexW0|Disp8MemShift=2|SSE2AVX:VexVVVV, +
+ $sse:SSE:::>
+<SSEDQ:cpu:attr, +
+ $avx:AVX:Vex128|VexW0|VexVVVV|SSE2AVX, +
+ $apx:AVX512DQ&AVX512VL:EVex128|VexW0|VexVVVV|Disp8MemShift=4|SSE2AVX, +
+ $sse:SSE:>
+<SSERP:recip:rsqrt:cpu:attr, +
+ $avx:0x0f53:0x0f52:AVX:Vex128|VexW0|SSE2AVX, +
+ $apx:0x660f384c:0x660f384e:AVX512VL:EVex128|VexW0|Disp8MemShift=4|SSE2AVX, +
+ $sse:0x0f53:0x0f52:SSE:::>
+<SSERS:recip:rsqrt:cpu:attr, +
+ $avx:0xf30f53:0xf30f52:AVX:VexLIG|VexW0|VexVVVV|SSE2AVX, +
+ $apx:0x660f384d:0x660f384f:AVX512F:EVexLIG|VexW0|VexVVVV|Disp8MemShift=2|SSE2AVX, +
+ $sse:0xf30f53:0xf30f52:SSE:::>
<frel:imm:comm, eq:0:C, lt:1:, le:2:, unord:3:C, neq:4:C, nlt:5:, nle:6:, ord:7:C>
-addps<sse>, 0x0f58, <sse:cpu>, Modrm|<sse:attr>|<sse:vvvv>|NoSuf, { RegXMM|Unspecified|BaseIndex, RegXMM }
-addss<sse>, 0xf30f58, <sse:cpu>, Modrm|<sse:scal>|<sse:vvvv>|NoSuf, { Dword|Unspecified|BaseIndex|RegXMM, RegXMM }
-andnps<sse>, 0x0f55, <sse:cpu>, Modrm|<sse:attr>|<sse:vvvv>|NoSuf, { RegXMM|Unspecified|BaseIndex, RegXMM }
-andps<sse>, 0x0f54, <sse:cpu>, Modrm|<sse:attr>|<sse:vvvv>|C|NoSuf, { RegXMM|Unspecified|BaseIndex, RegXMM }
+addps<SSE>, 0x0f58, <SSE:cpu>, Modrm|<SSE:attr>|<SSE:vvvv>|NoSuf, { RegXMM|Unspecified|BaseIndex, RegXMM }
+addss<SSE>, 0xf30f58, <SSE:cpu>, Modrm|<SSE:scal>|<SSE:vvvv>|NoSuf, { Dword|Unspecified|BaseIndex|RegXMM, RegXMM }
+andnps<SSEDQ>, 0x0f55, <SSEDQ:cpu>, Modrm|<SSEDQ:attr>|NoSuf, { RegXMM|Unspecified|BaseIndex, RegXMM }
+andps<SSEDQ>, 0x0f54, <SSEDQ:cpu>, Modrm|<SSEDQ:attr>|C|NoSuf, { RegXMM|Unspecified|BaseIndex, RegXMM }
cmp<frel>ps<sse>, 0x0fc2/<frel:imm>, <sse:cpu>, Modrm|<sse:attr>|<sse:vvvv>|<frel:comm>|NoSuf|ImmExt, { RegXMM|Unspecified|BaseIndex, RegXMM }
cmp<frel>ss<sse>, 0xf30fc2/<frel:imm>, <sse:cpu>, Modrm|<sse:scal>|<sse:vvvv>|<frel:comm>|NoSuf|ImmExt, { RegXMM|Dword|Unspecified|BaseIndex, RegXMM }
cmpps<sse>, 0x0fc2, <sse:cpu>, Modrm|<sse:attr>|<sse:vvvv>|NoSuf, { Imm8, RegXMM|Unspecified|BaseIndex, RegXMM }
cmpss<sse>, 0xf30fc2, <sse:cpu>, Modrm|<sse:scal>|<sse:vvvv>|NoSuf, { Imm8, Dword|Unspecified|BaseIndex|RegXMM, RegXMM }
-comiss<sse>, 0x0f2f, <sse:cpu>, Modrm|<sse:scal>|NoSuf, { Dword|Unspecified|BaseIndex|RegXMM, RegXMM }
+comiss<SSE>, 0x0f2f, <SSE:cpu>, Modrm|<SSE:scal>|NoSuf, { Dword|Unspecified|BaseIndex|RegXMM, RegXMM }
cvtpi2ps, 0xf2a, SSE, Modrm|NoSuf, { Qword|Unspecified|BaseIndex|RegMMX, RegXMM }
cvtps2pi, 0xf2d, SSE, Modrm|NoSuf, { Qword|Unspecified|BaseIndex|RegXMM, RegMMX }
cvtsi2ss<sse>, 0xf30f2a, <sse:cpu>&No64, Modrm|<sse:scal>|<sse:vvvv>|IgnoreSize|No_bSuf|No_wSuf|No_sSuf|No_qSuf, { Reg32|Unspecified|BaseIndex, RegXMM }
-cvtsi2ss, 0xf32a, AVX&x64, Modrm|Vex=3|Space0F|VexVVVV|IgnoreSize|No_bSuf|No_wSuf|No_sSuf|SSE2AVX|ATTSyntax, { Reg32|Reg64|Unspecified|BaseIndex, RegXMM }
-cvtsi2ss, 0xf32a, AVX&x64, Modrm|Vex=3|Space0F|VexVVVV|No_bSuf|No_wSuf|No_sSuf|SSE2AVX|IntelSyntax, { Reg32|Reg64|Unspecified|BaseIndex, RegXMM }
+cvtsi2ss, 0xf32a, x64&(AVX|AVX512F), Modrm|VexLIG|EVexLIG|Space0F|VexVVVV|IgnoreSize|Disp8ShiftVL|No_bSuf|No_wSuf|No_sSuf|SSE2AVX|ATTSyntax, { Reg32|Reg64|Unspecified|BaseIndex, RegXMM }
+cvtsi2ss, 0xf32a, x64&(AVX|AVX512F), Modrm|VexLIG|EVexLIG|Space0F|VexVVVV|Disp8ShiftVL|No_bSuf|No_wSuf|No_sSuf|SSE2AVX|IntelSyntax, { Reg32|Reg64|Unspecified|BaseIndex, RegXMM }
cvtsi2ss, 0xf30f2a, SSE&x64, Modrm|IgnoreSize|No_bSuf|No_wSuf|No_sSuf|ATTSyntax, { Reg32|Reg64|Unspecified|BaseIndex, RegXMM }
cvtsi2ss, 0xf30f2a, SSE&x64, Modrm|No_bSuf|No_wSuf|No_sSuf|IntelSyntax, { Reg32|Reg64|Unspecified|BaseIndex, RegXMM }
-cvtss2si, 0xf32d, AVX, Modrm|VexLIG|Space0F|No_bSuf|No_wSuf|No_sSuf|SSE2AVX, { Dword|Unspecified|BaseIndex|RegXMM, Reg32|Reg64 }
+cvtss2si, 0xf32d, AVX|AVX512F, Modrm|VexLIG|EVexLIG|Space0F|Disp8MemShift=2|No_bSuf|No_wSuf|No_sSuf|SSE2AVX, { Dword|Unspecified|BaseIndex|RegXMM, Reg32|Reg64 }
cvtss2si, 0xf30f2d, SSE, Modrm|IgnoreSize|No_bSuf|No_wSuf|No_sSuf, { Dword|Unspecified|BaseIndex|RegXMM, Reg32|Reg64 }
cvttps2pi, 0xf2c, SSE, Modrm|NoSuf, { Qword|Unspecified|BaseIndex|RegXMM, RegMMX }
-cvttss2si, 0xf32c, AVX, Modrm|VexLIG|Space0F|No_bSuf|No_wSuf|No_sSuf|SSE2AVX, { Dword|Unspecified|BaseIndex|RegXMM, Reg32|Reg64 }
+cvttss2si, 0xf32c, AVX|AVX512F, Modrm|VexLIG|EVexLIG|Space0F|Disp8MemShift=2|No_bSuf|No_wSuf|No_sSuf|SSE2AVX, { Dword|Unspecified|BaseIndex|RegXMM, Reg32|Reg64 }
cvttss2si, 0xf30f2c, SSE, Modrm|IgnoreSize|No_bSuf|No_wSuf|No_sSuf, { Dword|Unspecified|BaseIndex|RegXMM, Reg32|Reg64 }
-divps<sse>, 0x0f5e, <sse:cpu>, Modrm|<sse:attr>|<sse:vvvv>|NoSuf, { RegXMM|Unspecified|BaseIndex, RegXMM }
-divss<sse>, 0xf30f5e, <sse:cpu>, Modrm|<sse:scal>|<sse:vvvv>|NoSuf, { Dword|Unspecified|BaseIndex|RegXMM, RegXMM }
+divps<SSE>, 0x0f5e, <SSE:cpu>, Modrm|<SSE:attr>|<SSE:vvvv>|NoSuf, { RegXMM|Unspecified|BaseIndex, RegXMM }
+divss<SSE>, 0xf30f5e, <SSE:cpu>, Modrm|<SSE:scal>|<SSE:vvvv>|NoSuf, { Dword|Unspecified|BaseIndex|RegXMM, RegXMM }
ldmxcsr<sse>, 0x0fae/2, <sse:cpu>, Modrm|<sse:attr>|NoSuf, { Dword|Unspecified|BaseIndex }
maskmovq, 0xff7, SSE|3dnowA, Modrm|NoSuf, { RegMMX, RegMMX }
-maxps<sse>, 0x0f5f, <sse:cpu>, Modrm|<sse:attr>|<sse:vvvv>|NoSuf, { RegXMM|Unspecified|BaseIndex, RegXMM }
-maxss<sse>, 0xf30f5f, <sse:cpu>, Modrm|<sse:scal>|<sse:vvvv>|NoSuf, { Dword|Unspecified|BaseIndex|RegXMM, RegXMM }
-minps<sse>, 0x0f5d, <sse:cpu>, Modrm|<sse:attr>|<sse:vvvv>|NoSuf, { RegXMM|Unspecified|BaseIndex, RegXMM }
-minss<sse>, 0xf30f5d, <sse:cpu>, Modrm|<sse:scal>|<sse:vvvv>|NoSuf, { Dword|Unspecified|BaseIndex|RegXMM, RegXMM }
-movaps<sse>, 0x0f28, <sse:cpu>, D|Modrm|<sse:attr>|NoSuf, { RegXMM|Unspecified|BaseIndex, RegXMM }
+maxps<SSE>, 0x0f5f, <SSE:cpu>, Modrm|<SSE:attr>|<SSE:vvvv>|NoSuf, { RegXMM|Unspecified|BaseIndex, RegXMM }
+maxss<SSE>, 0xf30f5f, <SSE:cpu>, Modrm|<SSE:scal>|<SSE:vvvv>|NoSuf, { Dword|Unspecified|BaseIndex|RegXMM, RegXMM }
+minps<SSE>, 0x0f5d, <SSE:cpu>, Modrm|<SSE:attr>|<SSE:vvvv>|NoSuf, { RegXMM|Unspecified|BaseIndex, RegXMM }
+minss<SSE>, 0xf30f5d, <SSE:cpu>, Modrm|<SSE:scal>|<SSE:vvvv>|NoSuf, { Dword|Unspecified|BaseIndex|RegXMM, RegXMM }
+movaps<SSE>, 0x0f28, <SSE:cpu>, D|Modrm|<SSE:attr>|NoSuf, { RegXMM|Unspecified|BaseIndex, RegXMM }
movhlps<sse>, 0x0f12, <sse:cpu>, Modrm|<sse:attr>|<sse:vvvv>|NoSuf, { RegXMM, RegXMM }
-movhps, 0x16, AVX, Modrm|Vex|Space0F|VexVVVV|VexW0|NoSuf|SSE2AVX, { Qword|Unspecified|BaseIndex, RegXMM }
-movhps, 0x17, AVX, Modrm|Vex|Space0F|VexW0|NoSuf|SSE2AVX, { RegXMM, Qword|Unspecified|BaseIndex }
+movhps, 0x16, AVX|AVX512F, Modrm|Vex128|EVex128|Space0F|VexVVVV|VexW0|Disp8MemShift=3|NoSuf|SSE2AVX, { Qword|Unspecified|BaseIndex, RegXMM }
+movhps, 0x17, AVX|AVX512F, Modrm|Vex128|EVex128|Space0F|VexW0|Disp8MemShift=3|NoSuf|SSE2AVX, { RegXMM, Qword|Unspecified|BaseIndex }
movhps, 0xf16, SSE, D|Modrm|NoSuf, { Qword|Unspecified|BaseIndex, RegXMM }
movlhps<sse>, 0x0f16, <sse:cpu>, Modrm|<sse:attr>|<sse:vvvv>|NoSuf, { RegXMM, RegXMM }
-movlps, 0x12, AVX, Modrm|Vex|Space0F|VexVVVV|VexW0|NoSuf|SSE2AVX, { Qword|Unspecified|BaseIndex, RegXMM }
-movlps, 0x13, AVX, Modrm|Vex|Space0F|VexW0|NoSuf|SSE2AVX, { RegXMM, Qword|Unspecified|BaseIndex }
+movlps, 0x12, AVX|AVX512F, Modrm|Vex128|EVex128|Space0F|VexVVVV|VexW0|Disp8MemShift=3|NoSuf|SSE2AVX, { Qword|Unspecified|BaseIndex, RegXMM }
+movlps, 0x13, AVX|AVX512F, Modrm|Vex128|EVex128|Space0F|VexW0|Disp8MemShift=3|NoSuf|SSE2AVX, { RegXMM, Qword|Unspecified|BaseIndex }
movlps, 0xf12, SSE, D|Modrm|NoSuf, { Qword|Unspecified|BaseIndex, RegXMM }
movmskps<sse>, 0x0f50, <sse:cpu>, Modrm|<sse:attr>|IgnoreSize|No_bSuf|No_wSuf|No_sSuf|NoRex64, { RegXMM, Reg32|Reg64 }
-movntps<sse>, 0x0f2b, <sse:cpu>, Modrm|<sse:attr>|NoSuf, { RegXMM, Xmmword|Unspecified|BaseIndex }
+movntps<SSE>, 0x0f2b, <SSE:cpu>, Modrm|<SSE:attr>|NoSuf, { RegXMM, Xmmword|Unspecified|BaseIndex }
movntq, 0xfe7, SSE|3dnowA, Modrm|NoSuf, { RegMMX, Qword|Unspecified|BaseIndex }
-movntdq<sse2>, 0x660fe7, <sse2:cpu>, Modrm|<sse2:attr>|NoSuf, { RegXMM, Xmmword|Unspecified|BaseIndex }
-movss, 0xf310, AVX, D|Modrm|VexLIG|Space0F|VexW0|NoSuf|SSE2AVX, { Dword|Unspecified|BaseIndex, RegXMM }
+movntdq<SSE2D>, 0x660fe7, <SSE2D:cpu>, Modrm|<SSE2D:attr>|NoSuf, { RegXMM, Xmmword|Unspecified|BaseIndex }
+movss, 0xf310, AVX|AVX512F, D|Modrm|VexLIG|EVexLIG|Space0F|VexW0|Disp8MemShift=2|NoSuf|SSE2AVX, { Dword|Unspecified|BaseIndex, RegXMM }
movss, 0xf310, AVX, D|Modrm|VexLIG|Space0F|VexVVVV|VexW0|NoSuf|SSE2AVX, { RegXMM, RegXMM }
movss, 0xf30f10, SSE, D|Modrm|NoSuf, { Dword|Unspecified|BaseIndex|RegXMM, RegXMM }
-movups<sse>, 0x0f10, <sse:cpu>, D|Modrm|<sse:attr>|NoSuf, { RegXMM|Unspecified|BaseIndex, RegXMM }
-mulps<sse>, 0x0f59, <sse:cpu>, Modrm|<sse:attr>|<sse:vvvv>|NoSuf, { RegXMM|Unspecified|BaseIndex, RegXMM }
-mulss<sse>, 0xf30f59, <sse:cpu>, Modrm|<sse:scal>|<sse:vvvv>|NoSuf, { Dword|Unspecified|BaseIndex|RegXMM, RegXMM }
-orps<sse>, 0x0f56, <sse:cpu>, Modrm|<sse:attr>|<sse:vvvv>|C|NoSuf, { RegXMM|Unspecified|BaseIndex, RegXMM }
+movups<SSE>, 0x0f10, <SSE:cpu>, D|Modrm|<SSE:attr>|NoSuf, { RegXMM|Unspecified|BaseIndex, RegXMM }
+mulps<SSE>, 0x0f59, <SSE:cpu>, Modrm|<SSE:attr>|<SSE:vvvv>|NoSuf, { RegXMM|Unspecified|BaseIndex, RegXMM }
+mulss<SSE>, 0xf30f59, <SSE:cpu>, Modrm|<SSE:scal>|<SSE:vvvv>|NoSuf, { Dword|Unspecified|BaseIndex|RegXMM, RegXMM }
+orps<SSEDQ>, 0x0f56, <SSEDQ:cpu>, Modrm|<SSEDQ:attr>|C|NoSuf, { RegXMM|Unspecified|BaseIndex, RegXMM }
pavg<bw>, 0xfe0 | (3 * <bw:opc>), SSE|3dnowA, Modrm|NoSuf, { Qword|Unspecified|BaseIndex|RegMMX, RegMMX }
-pavg<bw><sse2>, 0x660fe0 | (3 * <bw:opc>), <sse2:cpu>, Modrm|<sse2:attr>|<sse2:vvvv>|C|NoSuf, { RegXMM|Unspecified|BaseIndex, RegXMM }
-pextrw<sse2>, 0x660fc5, <sse2:cpu>, Load|Modrm|<sse2:attr>|No_bSuf|No_wSuf|No_sSuf|IgnoreSize|NoRex64, { Imm8, RegXMM, Reg32|Reg64 }
+pavg<bw><SSE2BW>, 0x660fe0 | (3 * <bw:opc>), <SSE2BW:cpu>, Modrm|<SSE2BW:attr>|<SSE2BW:vvvv>|C|NoSuf, { RegXMM|Unspecified|BaseIndex, RegXMM }
+pextrw<SSE2D>, 0x660fc5, <SSE2D:cpu>, Load|Modrm|<SSE2D:attr>|No_bSuf|No_wSuf|No_sSuf|IgnoreSize|NoRex64, { Imm8, RegXMM, Reg32|Reg64 }
pextrw, 0xfc5, SSE|3dnowA, Modrm|IgnoreSize|No_bSuf|No_wSuf|No_sSuf|NoRex64, { Imm8, RegMMX, Reg32|Reg64 }
-pinsrw<sse2>, 0x660fc4, <sse2:cpu>, Modrm|<sse2:attr>|<sse2:vvvv>|No_bSuf|No_wSuf|No_sSuf|IgnoreSize|NoRex64, { Imm8, Reg32|Reg64, RegXMM }
-pinsrw<sse2>, 0x660fc4, <sse2:cpu>, Modrm|<sse2:attr>|<sse2:vvvv>|NoSuf, { Imm8, Word|Unspecified|BaseIndex, RegXMM }
+pinsrw<SSE2D>, 0x660fc4, <SSE2D:cpu>, Modrm|<SSE2D:attr>|<SSE2D:vvvv>|No_bSuf|No_wSuf|No_sSuf|IgnoreSize|NoRex64, { Imm8, Reg32|Reg64, RegXMM }
+pinsrw<SSE2D>, 0x660fc4, <SSE2D:cpu>, Modrm|<SSE2D:attr>|<SSE2D:vvvv>|Disp8MemShift|NoSuf, { Imm8, Word|Unspecified|BaseIndex, RegXMM }
pinsrw, 0xfc4, SSE|3dnowA, Modrm|IgnoreSize|No_bSuf|No_wSuf|No_sSuf|NoRex64, { Imm8, Reg32|Reg64, RegMMX }
pinsrw, 0xfc4, SSE|3dnowA, Modrm|NoSuf, { Imm8, Word|Unspecified|BaseIndex, RegMMX }
-pmaxsw<sse2>, 0x660fee, <sse2:cpu>, Modrm|<sse2:attr>|<sse2:vvvv>|C|NoSuf, { RegXMM|Unspecified|BaseIndex, RegXMM }
+pmaxsw<SSE2BW>, 0x660fee, <SSE2BW:cpu>, Modrm|<SSE2BW:attr>|<SSE2BW:vvvv>|C|NoSuf, { RegXMM|Unspecified|BaseIndex, RegXMM }
pmaxsw, 0xfee, SSE|3dnowA, Modrm|NoSuf, { Qword|Unspecified|BaseIndex|RegMMX, RegMMX }
-pmaxub<sse2>, 0x660fde, <sse2:cpu>, Modrm|<sse2:attr>|<sse2:vvvv>|C|NoSuf, { RegXMM|Unspecified|BaseIndex, RegXMM }
+pmaxub<SSE2BW>, 0x660fde, <SSE2BW:cpu>, Modrm|<SSE2BW:attr>|<SSE2BW:vvvv>|C|NoSuf, { RegXMM|Unspecified|BaseIndex, RegXMM }
pmaxub, 0xfde, SSE|3dnowA, Modrm|NoSuf, { Qword|Unspecified|BaseIndex|RegMMX, RegMMX }
-pminsw<sse2>, 0x660fea, <sse2:cpu>, Modrm|<sse2:attr>|<sse2:vvvv>|C|NoSuf, { RegXMM|Unspecified|BaseIndex, RegXMM }
+pminsw<SSE2BW>, 0x660fea, <SSE2BW:cpu>, Modrm|<SSE2BW:attr>|<SSE2BW:vvvv>|C|NoSuf, { RegXMM|Unspecified|BaseIndex, RegXMM }
pminsw, 0xfea, SSE|3dnowA, Modrm|NoSuf, { Qword|Unspecified|BaseIndex|RegMMX, RegMMX }
-pminub<sse2>, 0x660fda, <sse2:cpu>, Modrm|<sse2:attr>|<sse2:vvvv>|C|NoSuf, { RegXMM|Unspecified|BaseIndex, RegXMM }
+pminub<SSE2BW>, 0x660fda, <SSE2BW:cpu>, Modrm|<SSE2BW:attr>|<SSE2BW:vvvv>|C|NoSuf, { RegXMM|Unspecified|BaseIndex, RegXMM }
pminub, 0xfda, SSE|3dnowA, Modrm|NoSuf, { Qword|Unspecified|BaseIndex|RegMMX, RegMMX }
pmovmskb<sse2>, 0x660fd7, <sse2:cpu>, Modrm|<sse2:attr>|IgnoreSize|No_bSuf|No_wSuf|No_sSuf|NoRex64, { RegXMM, Reg32|Reg64 }
pmovmskb, 0xfd7, SSE|3dnowA, Modrm|IgnoreSize|No_bSuf|No_wSuf|No_sSuf|NoRex64, { RegMMX, Reg32|Reg64 }
-pmulhuw<sse2>, 0x660fe4, <sse2:cpu>, Modrm|<sse2:attr>|<sse2:vvvv>|C|NoSuf, { RegXMM|Unspecified|BaseIndex, RegXMM }
+pmulhuw<SSE2BW>, 0x660fe4, <SSE2BW:cpu>, Modrm|<SSE2BW:attr>|<SSE2BW:vvvv>|C|NoSuf, { RegXMM|Unspecified|BaseIndex, RegXMM }
pmulhuw, 0xfe4, SSE|3dnowA, Modrm|NoSuf, { Qword|Unspecified|BaseIndex|RegMMX, RegMMX }
prefetchnta, 0xf18/0, SSE|3dnowA, Modrm|Anysize|IgnoreSize|NoSuf, { BaseIndex }
prefetcht0, 0xf18/1, SSE|3dnowA, Modrm|Anysize|IgnoreSize|NoSuf, { BaseIndex }
prefetcht1, 0xf18/2, SSE|3dnowA, Modrm|Anysize|IgnoreSize|NoSuf, { BaseIndex }
prefetcht2, 0xf18/3, SSE|3dnowA, Modrm|Anysize|IgnoreSize|NoSuf, { BaseIndex }
psadbw, 0xff6, SSE|3dnowA, Modrm|NoSuf, { Qword|Unspecified|BaseIndex|RegMMX, RegMMX }
-psadbw<sse2>, 0x660ff6, <sse2:cpu>, Modrm|<sse2:attr>|<sse2:vvvv>|C|NoSuf, { RegXMM|Unspecified|BaseIndex, RegXMM }
+psadbw<SSE2BW>, 0x660ff6, <SSE2BW:cpu>, Modrm|<SSE2BW:attr>|<SSE2BW:vvvv>|C|NoSuf, { RegXMM|Unspecified|BaseIndex, RegXMM }
pshufw, 0xf70, SSE|3dnowA, Modrm|NoSuf, { Imm8|Imm8S, Qword|Unspecified|BaseIndex|RegMMX, RegMMX }
-rcpps<sse>, 0x0f53, <sse:cpu>, Modrm|<sse:attr>|NoSuf, { RegXMM|Unspecified|BaseIndex, RegXMM }
-rcpss<sse>, 0xf30f53, <sse:cpu>, Modrm|<sse:scal>|<sse:vvvv>|NoSuf, { Dword|Unspecified|BaseIndex|RegXMM, RegXMM }
-rsqrtps<sse>, 0x0f52, <sse:cpu>, Modrm|<sse:attr>|NoSuf, { RegXMM|Unspecified|BaseIndex, RegXMM }
-rsqrtss<sse>, 0xf30f52, <sse:cpu>, Modrm|<sse:scal>|<sse:vvvv>|NoSuf, { Dword|Unspecified|BaseIndex|RegXMM, RegXMM }
+rcpps<SSERP>, <SSERP:recip>, <SSERP:cpu>, Modrm|<SSERP:attr>|NoSuf, { RegXMM|Unspecified|BaseIndex, RegXMM }
+rcpss<SSERS>, <SSERS:recip>, <SSERS:cpu>, Modrm|<SSERS:attr>|NoSuf, { Dword|Unspecified|BaseIndex|RegXMM, RegXMM }
+rsqrtps<SSERP>, <SSERP:rsqrt>, <SSERP:cpu>, Modrm|<SSERP:attr>|NoSuf, { RegXMM|Unspecified|BaseIndex, RegXMM }
+rsqrtss<SSERS>, <SSERS:rsqrt>, <SSERS:cpu>, Modrm|<SSERS:attr>|NoSuf, { Dword|Unspecified|BaseIndex|RegXMM, RegXMM }
sfence, 0xfaef8, SSE|3dnowA, NoSuf, {}
-shufps<sse>, 0x0fc6, <sse:cpu>, Modrm|<sse:attr>|<sse:vvvv>|NoSuf, { Imm8|Imm8S, RegXMM|Unspecified|BaseIndex, RegXMM }
-sqrtps<sse>, 0x0f51, <sse:cpu>, Modrm|<sse:attr>|NoSuf, { RegXMM|Unspecified|BaseIndex, RegXMM }
-sqrtss<sse>, 0xf30f51, <sse:cpu>, Modrm|<sse:scal>|<sse:vvvv>|NoSuf, { Dword|Unspecified|BaseIndex|RegXMM, RegXMM }
+shufps<SSE>, 0x0fc6, <SSE:cpu>, Modrm|<SSE:attr>|<SSE:vvvv>|NoSuf, { Imm8|Imm8S, RegXMM|Unspecified|BaseIndex, RegXMM }
+sqrtps<SSE>, 0x0f51, <SSE:cpu>, Modrm|<SSE:attr>|NoSuf, { RegXMM|Unspecified|BaseIndex, RegXMM }
+sqrtss<SSE>, 0xf30f51, <SSE:cpu>, Modrm|<SSE:scal>|<SSE:vvvv>|NoSuf, { Dword|Unspecified|BaseIndex|RegXMM, RegXMM }
stmxcsr<sse>, 0x0fae/3, <sse:cpu>, Modrm|<sse:attr>|NoSuf, { Dword|Unspecified|BaseIndex }
-subps<sse>, 0x0f5c, <sse:cpu>, Modrm|<sse:attr>|<sse:vvvv>|NoSuf, { RegXMM|Unspecified|BaseIndex, RegXMM }
-subss<sse>, 0xf30f5c, <sse:cpu>, Modrm|<sse:scal>|<sse:vvvv>|NoSuf, { Dword|Unspecified|BaseIndex|RegXMM, RegXMM }
-ucomiss<sse>, 0x0f2e, <sse:cpu>, Modrm|<sse:scal>|NoSuf, { Dword|Unspecified|BaseIndex|RegXMM, RegXMM }
-unpckhps<sse>, 0x0f15, <sse:cpu>, Modrm|<sse:attr>|<sse:vvvv>|NoSuf, { RegXMM|Unspecified|BaseIndex, RegXMM }
-unpcklps<sse>, 0x0f14, <sse:cpu>, Modrm|<sse:attr>|<sse:vvvv>|NoSuf, { RegXMM|Unspecified|BaseIndex, RegXMM }
-xorps<sse>, 0x0f57, <sse:cpu>, Modrm|<sse:attr>|<sse:vvvv>|C|NoSuf, { RegXMM|Unspecified|BaseIndex, RegXMM }
+subps<SSE>, 0x0f5c, <SSE:cpu>, Modrm|<SSE:attr>|<SSE:vvvv>|NoSuf, { RegXMM|Unspecified|BaseIndex, RegXMM }
+subss<SSE>, 0xf30f5c, <SSE:cpu>, Modrm|<SSE:scal>|<SSE:vvvv>|NoSuf, { Dword|Unspecified|BaseIndex|RegXMM, RegXMM }
+ucomiss<SSE>, 0x0f2e, <SSE:cpu>, Modrm|<SSE:scal>|NoSuf, { Dword|Unspecified|BaseIndex|RegXMM, RegXMM }
+unpckhps<SSE>, 0x0f15, <SSE:cpu>, Modrm|<SSE:attr>|<SSE:vvvv>|NoSuf, { RegXMM|Unspecified|BaseIndex, RegXMM }
+unpcklps<SSE>, 0x0f14, <SSE:cpu>, Modrm|<SSE:attr>|<SSE:vvvv>|NoSuf, { RegXMM|Unspecified|BaseIndex, RegXMM }
+xorps<SSEDQ>, 0x0f57, <SSEDQ:cpu>, Modrm|<SSEDQ:attr>|C|NoSuf, { RegXMM|Unspecified|BaseIndex, RegXMM }
// SSE2 instructions.
-addpd<sse2>, 0x660f58, <sse2:cpu>, Modrm|<sse2:attr>|<sse2:vvvv>|NoSuf, { RegXMM|Unspecified|BaseIndex, RegXMM }
-addsd<sse2>, 0xf20f58, <sse2:cpu>, Modrm|<sse2:scal>|<sse2:vvvv>|NoSuf, { Qword|Unspecified|BaseIndex|RegXMM, RegXMM }
-andnpd<sse2>, 0x660f55, <sse2:cpu>, Modrm|<sse2:attr>|<sse2:vvvv>|NoSuf, { RegXMM|Unspecified|BaseIndex, RegXMM }
-andpd<sse2>, 0x660f54, <sse2:cpu>, Modrm|<sse2:attr>|<sse2:vvvv>|C|NoSuf, { RegXMM|Unspecified|BaseIndex, RegXMM }
+<SSE2DQ:cpu:attr, +
+ $avx:AVX:Vex128|VexW0|VexVVVV|SSE2AVX, +
+ $apx:AVX512DQ&AVX512VL:EVex128|VexW1|VexVVVV|Disp8MemShift=4|SSE2AVX, +
+ $sse:SSE2:>
+
+addpd<SSE2Q>, 0x660f58, <SSE2Q:cpu>, Modrm|<SSE2Q:attr>|<SSE2Q:vvvv>|NoSuf, { RegXMM|Unspecified|BaseIndex, RegXMM }
+addsd<SSE2Q>, 0xf20f58, <SSE2Q:cpu>, Modrm|<SSE2Q:scal>|<SSE2Q:vvvv>|NoSuf, { Qword|Unspecified|BaseIndex|RegXMM, RegXMM }
+andnpd<SSE2DQ>, 0x660f55, <SSE2DQ:cpu>, Modrm|<SSE2DQ:attr>|NoSuf, { RegXMM|Unspecified|BaseIndex, RegXMM }
+andpd<SSE2DQ>, 0x660f54, <SSE2DQ:cpu>, Modrm|<SSE2DQ:attr>|C|NoSuf, { RegXMM|Unspecified|BaseIndex, RegXMM }
cmp<frel>pd<sse2>, 0x660fc2/<frel:imm>, <sse2:cpu>, Modrm|<sse2:attr>|<sse2:vvvv>|<frel:comm>|NoSuf|ImmExt, { RegXMM|Unspecified|BaseIndex, RegXMM }
cmp<frel>sd<sse2>, 0xf20fc2/<frel:imm>, <sse2:cpu>, Modrm|<sse2:scal>|<sse2:vvvv>|<frel:comm>|NoSuf|ImmExt, { RegXMM|Qword|Unspecified|BaseIndex, RegXMM }
cmppd<sse2>, 0x660fc2, <sse2:cpu>, Modrm|<sse2:attr>|<sse2:vvvv>|NoSuf, { Imm8, RegXMM|Unspecified|BaseIndex, RegXMM }
cmpsd<sse2>, 0xf20fc2, <sse2:cpu>, Modrm|<sse2:scal>|<sse2:vvvv>|NoSuf, { Imm8, Qword|Unspecified|BaseIndex|RegXMM, RegXMM }
-comisd<sse2>, 0x660f2f, <sse2:cpu>, Modrm|<sse2:scal>|NoSuf, { Qword|Unspecified|BaseIndex|RegXMM, RegXMM }
+comisd<SSE2Q>, 0x660f2f, <SSE2Q:cpu>, Modrm|<SSE2Q:scal>|NoSuf, { Qword|Unspecified|BaseIndex|RegXMM, RegXMM }
cvtpi2pd, 0x660f2a, SSE2, Modrm|NoSuf, { RegMMX, RegXMM }
-cvtpi2pd, 0xf3e6, AVX, Modrm|Vex|Space0F|VexW0|NoSuf|SSE2AVX, { Qword|Unspecified|BaseIndex, RegXMM }
+cvtpi2pd, 0xf3e6, AVX|AVX512VL, Modrm|Vex128|EVex128|Space0F|VexW0|Disp8MemShift=3|NoSuf|SSE2AVX, { Qword|Unspecified|BaseIndex, RegXMM }
cvtpi2pd, 0x660f2a, SSE2, Modrm|NoSuf, { Qword|Unspecified|BaseIndex, RegXMM }
cvtsi2sd<sse2>, 0xf20f2a, <sse2:cpu>&No64, Modrm|IgnoreSize|<sse2:scal>|<sse2:vvvv>|No_bSuf|No_wSuf|No_sSuf|No_qSuf, { Reg32|Unspecified|BaseIndex, RegXMM }
-cvtsi2sd, 0xf22a, AVX&x64, Modrm|Vex=3|Space0F|VexVVVV|IgnoreSize|No_bSuf|No_wSuf|No_sSuf|SSE2AVX|ATTSyntax, { Reg32|Reg64|Unspecified|BaseIndex, RegXMM }
-cvtsi2sd, 0xf22a, AVX&x64, Modrm|Vex=3|Space0F|VexVVVV|No_bSuf|No_wSuf|No_sSuf|SSE2AVX|IntelSyntax, { Reg32|Reg64|Unspecified|BaseIndex, RegXMM }
+cvtsi2sd, 0xf22a, x64&(AVX|AVX512F), Modrm|VexLIG|EVexLIG|Space0F|VexVVVV|Disp8ShiftVL|IgnoreSize|No_bSuf|No_wSuf|No_sSuf|SSE2AVX|ATTSyntax, { Reg32|Reg64|Unspecified|BaseIndex, RegXMM }
+cvtsi2sd, 0xf22a, x64&(AVX|AVX512F), Modrm|VexLIG|EVexLIG|Space0F|VexVVVV|Disp8ShiftVL|No_bSuf|No_wSuf|No_sSuf|SSE2AVX|IntelSyntax, { Reg32|Reg64|Unspecified|BaseIndex, RegXMM }
cvtsi2sd, 0xf20f2a, SSE2&x64, Modrm|IgnoreSize|No_bSuf|No_wSuf|No_sSuf|ATTSyntax, { Reg32|Reg64|Unspecified|BaseIndex, RegXMM }
cvtsi2sd, 0xf20f2a, SSE2&x64, Modrm|No_bSuf|No_wSuf|No_sSuf|IntelSyntax, { Reg32|Reg64|Unspecified|BaseIndex, RegXMM }
-divpd<sse2>, 0x660f5e, <sse2:cpu>, Modrm|<sse2:attr>|<sse2:vvvv>|NoSuf, { RegXMM|Unspecified|BaseIndex, RegXMM }
-divsd<sse2>, 0xf20f5e, <sse2:cpu>, Modrm|<sse2:scal>|<sse2:vvvv>|NoSuf, { Qword|Unspecified|BaseIndex|RegXMM, RegXMM }
-maxpd<sse2>, 0x660f5f, <sse2:cpu>, Modrm|<sse2:attr>|<sse2:vvvv>|NoSuf, { RegXMM|Unspecified|BaseIndex, RegXMM }
-maxsd<sse2>, 0xf20f5f, <sse2:cpu>, Modrm|<sse2:scal>|<sse2:vvvv>|NoSuf, { Qword|Unspecified|BaseIndex|RegXMM, RegXMM }
-minpd<sse2>, 0x660f5d, <sse2:cpu>, Modrm|<sse2:attr>|<sse2:vvvv>|NoSuf, { RegXMM|Unspecified|BaseIndex, RegXMM }
-minsd<sse2>, 0xf20f5d, <sse2:cpu>, Modrm|<sse2:scal>|<sse2:vvvv>|NoSuf, { Qword|Unspecified|BaseIndex|RegXMM, RegXMM }
-movapd<sse2>, 0x660f28, <sse2:cpu>, D|Modrm|<sse2:attr>|NoSuf, { RegXMM|Unspecified|BaseIndex, RegXMM }
+divpd<SSE2Q>, 0x660f5e, <SSE2Q:cpu>, Modrm|<SSE2Q:attr>|<SSE2Q:vvvv>|NoSuf, { RegXMM|Unspecified|BaseIndex, RegXMM }
+divsd<SSE2Q>, 0xf20f5e, <SSE2Q:cpu>, Modrm|<SSE2Q:scal>|<SSE2Q:vvvv>|NoSuf, { Qword|Unspecified|BaseIndex|RegXMM, RegXMM }
+maxpd<SSE2Q>, 0x660f5f, <SSE2Q:cpu>, Modrm|<SSE2Q:attr>|<SSE2Q:vvvv>|NoSuf, { RegXMM|Unspecified|BaseIndex, RegXMM }
+maxsd<SSE2Q>, 0xf20f5f, <SSE2Q:cpu>, Modrm|<SSE2Q:scal>|<SSE2Q:vvvv>|NoSuf, { Qword|Unspecified|BaseIndex|RegXMM, RegXMM }
+minpd<SSE2Q>, 0x660f5d, <SSE2Q:cpu>, Modrm|<SSE2Q:attr>|<SSE2Q:vvvv>|NoSuf, { RegXMM|Unspecified|BaseIndex, RegXMM }
+minsd<SSE2Q>, 0xf20f5d, <SSE2Q:cpu>, Modrm|<SSE2Q:scal>|<SSE2Q:vvvv>|NoSuf, { Qword|Unspecified|BaseIndex|RegXMM, RegXMM }
+movapd<SSE2Q>, 0x660f28, <SSE2Q:cpu>, D|Modrm|<SSE2Q:attr>|NoSuf, { RegXMM|Unspecified|BaseIndex, RegXMM }
movhpd, 0x6616, AVX, Modrm|Vex|Space0F|VexVVVV|VexW0|NoSuf|SSE2AVX, { Qword|Unspecified|BaseIndex, RegXMM }
+movhpd, 0x6616, AVX512F, Modrm|EVex128|Space0F|VexVVVV|VexW1|Disp8MemShift=3|NoSuf|SSE2AVX, { Qword|Unspecified|BaseIndex, RegXMM }
movhpd, 0x6617, AVX, Modrm|Vex|Space0F|VexW0|NoSuf|SSE2AVX, { RegXMM, Qword|Unspecified|BaseIndex }
+movhpd, 0x6617, AVX512F, Modrm|EVex128|Space0F|VexW1|Disp8MemShift=3|NoSuf|SSE2AVX, { RegXMM, Qword|Unspecified|BaseIndex }
movhpd, 0x660f16, SSE2, D|Modrm|NoSuf, { Qword|Unspecified|BaseIndex, RegXMM }
movlpd, 0x6612, AVX, Modrm|Vex|Space0F|VexVVVV|VexW0|NoSuf|SSE2AVX, { Qword|Unspecified|BaseIndex, RegXMM }
+movlpd, 0x6612, AVX512F, Modrm|EVex128|Space0F|VexVVVV|VexW1|Disp8MemShift=3|NoSuf|SSE2AVX, { Qword|Unspecified|BaseIndex, RegXMM }
movlpd, 0x6613, AVX, Modrm|Vex|Space0F|VexW0|NoSuf|SSE2AVX, { RegXMM, Qword|Unspecified|BaseIndex }
+movlpd, 0x6613, AVX512F, Modrm|EVex128|Space0F|VexW1|Disp8MemShift=3|NoSuf|SSE2AVX, { RegXMM, Qword|Unspecified|BaseIndex }
movlpd, 0x660f12, SSE2, D|Modrm|NoSuf, { Qword|Unspecified|BaseIndex, RegXMM }
movmskpd<sse2>, 0x660f50, <sse2:cpu>, Modrm|<sse2:attr>|IgnoreSize|No_bSuf|No_wSuf|No_sSuf|NoRex64, { RegXMM, Reg32|Reg64 }
-movntpd<sse2>, 0x660f2b, <sse2:cpu>, Modrm|<sse2:attr>|NoSuf, { RegXMM, Xmmword|Unspecified|BaseIndex }
+movntpd<SSE2Q>, 0x660f2b, <SSE2Q:cpu>, Modrm|<SSE2Q:attr>|NoSuf, { RegXMM, Xmmword|Unspecified|BaseIndex }
movsd, 0xf210, AVX, D|Modrm|VexLIG|Space0F|VexW0|NoSuf|SSE2AVX, { Qword|Unspecified|BaseIndex, RegXMM }
+movsd, 0xf210, AVX512F, D|Modrm|EVexLIG|Space0F|VexW1|Disp8MemShift=3|NoSuf|SSE2AVX, { Qword|Unspecified|BaseIndex, RegXMM }
movsd, 0xf210, AVX, D|Modrm|VexLIG|Space0F|VexVVVV|VexW0|NoSuf|SSE2AVX, { RegXMM, RegXMM }
movsd, 0xf20f10, SSE2, D|Modrm|NoSuf, { Qword|Unspecified|BaseIndex|RegXMM, RegXMM }
-movupd<sse2>, 0x660f10, <sse2:cpu>, D|Modrm|<sse2:attr>|NoSuf, { RegXMM|Unspecified|BaseIndex, RegXMM }
-mulpd<sse2>, 0x660f59, <sse2:cpu>, Modrm|<sse2:attr>|<sse2:vvvv>|NoSuf, { RegXMM|Unspecified|BaseIndex, RegXMM }
-mulsd<sse2>, 0xf20f59, <sse2:cpu>, Modrm|<sse2:scal>|<sse2:vvvv>|NoSuf, { Qword|Unspecified|BaseIndex|RegXMM, RegXMM }
-orpd<sse2>, 0x660f56, <sse2:cpu>, Modrm|<sse2:attr>|<sse2:vvvv>|C|NoSuf, { RegXMM|Unspecified|BaseIndex, RegXMM }
-shufpd<sse2>, 0x660fc6, <sse2:cpu>, Modrm|<sse2:attr>|<sse2:vvvv>|NoSuf, { Imm8|Imm8S, RegXMM|Unspecified|BaseIndex, RegXMM }
-sqrtpd<sse2>, 0x660f51, <sse2:cpu>, Modrm|<sse2:attr>|NoSuf, { RegXMM|Unspecified|BaseIndex, RegXMM }
-sqrtsd<sse2>, 0xf20f51, <sse2:cpu>, Modrm|<sse2:scal>|<sse2:vvvv>|NoSuf, { Qword|Unspecified|BaseIndex|RegXMM, RegXMM }
-subpd<sse2>, 0x660f5c, <sse2:cpu>, Modrm|<sse2:attr>|<sse2:vvvv>|NoSuf, { RegXMM|Unspecified|BaseIndex, RegXMM }
-subsd<sse2>, 0xf20f5c, <sse2:cpu>, Modrm|<sse2:scal>|<sse2:vvvv>|NoSuf, { Qword|Unspecified|BaseIndex|RegXMM, RegXMM }
-ucomisd<sse2>, 0x660f2e, <sse2:cpu>, Modrm|<sse2:scal>|NoSuf, { Qword|Unspecified|BaseIndex|RegXMM, RegXMM }
-unpckhpd<sse2>, 0x660f15, <sse2:cpu>, Modrm|<sse2:attr>|<sse2:vvvv>|NoSuf, { RegXMM|Unspecified|BaseIndex, RegXMM }
-unpcklpd<sse2>, 0x660f14, <sse2:cpu>, Modrm|<sse2:attr>|<sse2:vvvv>|NoSuf, { RegXMM|Unspecified|BaseIndex, RegXMM }
-xorpd<sse2>, 0x660f57, <sse2:cpu>, Modrm|<sse2:attr>|<sse2:vvvv>|C|NoSuf, { RegXMM|Unspecified|BaseIndex, RegXMM }
-cvtdq2pd<sse2>, 0xf30fe6, <sse2:cpu>, Modrm|<sse2:attr>|NoSuf, { Qword|Unspecified|BaseIndex|RegXMM, RegXMM }
-cvtpd2dq<sse2>, 0xf20fe6, <sse2:cpu>, Modrm|<sse2:attr>|NoSuf, { RegXMM|Unspecified|BaseIndex, RegXMM }
-cvtdq2ps<sse2>, 0x0f5b, <sse2:cpu>, Modrm|<sse2:attr>|NoSuf, { RegXMM|Unspecified|BaseIndex, RegXMM }
+movupd<SSE2Q>, 0x660f10, <SSE2Q:cpu>, D|Modrm|<SSE2Q:attr>|NoSuf, { RegXMM|Unspecified|BaseIndex, RegXMM }
+mulpd<SSE2Q>, 0x660f59, <SSE2Q:cpu>, Modrm|<SSE2Q:attr>|<SSE2Q:vvvv>|NoSuf, { RegXMM|Unspecified|BaseIndex, RegXMM }
+mulsd<SSE2Q>, 0xf20f59, <SSE2Q:cpu>, Modrm|<SSE2Q:scal>|<SSE2Q:vvvv>|NoSuf, { Qword|Unspecified|BaseIndex|RegXMM, RegXMM }
+orpd<SSE2DQ>, 0x660f56, <SSE2DQ:cpu>, Modrm|<SSE2DQ:attr>|C|NoSuf, { RegXMM|Unspecified|BaseIndex, RegXMM }
+shufpd<SSE2Q>, 0x660fc6, <SSE2Q:cpu>, Modrm|<SSE2Q:attr>|<SSE2Q:vvvv>|NoSuf, { Imm8|Imm8S, RegXMM|Unspecified|BaseIndex, RegXMM }
+sqrtpd<SSE2Q>, 0x660f51, <SSE2Q:cpu>, Modrm|<SSE2Q:attr>|NoSuf, { RegXMM|Unspecified|BaseIndex, RegXMM }
+sqrtsd<SSE2Q>, 0xf20f51, <SSE2Q:cpu>, Modrm|<SSE2Q:scal>|<SSE2Q:vvvv>|NoSuf, { Qword|Unspecified|BaseIndex|RegXMM, RegXMM }
+subpd<SSE2Q>, 0x660f5c, <SSE2Q:cpu>, Modrm|<SSE2Q:attr>|<SSE2Q:vvvv>|NoSuf, { RegXMM|Unspecified|BaseIndex, RegXMM }
+subsd<SSE2Q>, 0xf20f5c, <SSE2Q:cpu>, Modrm|<SSE2Q:scal>|<SSE2Q:vvvv>|NoSuf, { Qword|Unspecified|BaseIndex|RegXMM, RegXMM }
+ucomisd<SSE2Q>, 0x660f2e, <SSE2Q:cpu>, Modrm|<SSE2Q:scal>|NoSuf, { Qword|Unspecified|BaseIndex|RegXMM, RegXMM }
+unpckhpd<SSE2Q>, 0x660f15, <SSE2Q:cpu>, Modrm|<SSE2Q:attr>|<SSE2Q:vvvv>|NoSuf, { RegXMM|Unspecified|BaseIndex, RegXMM }
+unpcklpd<SSE2Q>, 0x660f14, <SSE2Q:cpu>, Modrm|<SSE2Q:attr>|<SSE2Q:vvvv>|NoSuf, { RegXMM|Unspecified|BaseIndex, RegXMM }
+xorpd<SSE2DQ>, 0x660f57, <SSE2DQ:cpu>, Modrm|<SSE2DQ:attr>|C|NoSuf, { RegXMM|Unspecified|BaseIndex, RegXMM }
+cvtdq2pd<SSE2D>, 0xf30fe6, <SSE2D:cpu>, Modrm|<SSE2D:attr>|Disp8MemShift|NoSuf, { Qword|Unspecified|BaseIndex|RegXMM, RegXMM }
+cvtpd2dq<SSE2Q>, 0xf20fe6, <SSE2Q:cpu>, Modrm|<SSE2Q:attr>|NoSuf, { RegXMM|Unspecified|BaseIndex, RegXMM }
+cvtdq2ps<SSE2D>, 0x0f5b, <SSE2D:cpu>, Modrm|<SSE2D:attr>|NoSuf, { RegXMM|Unspecified|BaseIndex, RegXMM }
cvtpd2pi, 0x660f2d, SSE2, Modrm|NoSuf, { RegXMM|Unspecified|BaseIndex, RegMMX }
-cvtpd2ps<sse2>, 0x660f5a, <sse2:cpu>, Modrm|<sse2:attr>|NoSuf, { RegXMM|Unspecified|BaseIndex, RegXMM }
-cvtps2pd<sse2>, 0x0f5a, <sse2:cpu>, Modrm|<sse2:attr>|NoSuf, { Qword|Unspecified|BaseIndex|RegXMM, RegXMM }
-cvtps2dq<sse2>, 0x660f5b, <sse2:cpu>, Modrm|<sse2:attr>|NoSuf, { RegXMM|Unspecified|BaseIndex, RegXMM }
-cvtsd2si, 0xf22d, AVX, Modrm|VexLIG|Space0F|No_bSuf|No_wSuf|No_sSuf|SSE2AVX, { Qword|Unspecified|BaseIndex|RegXMM, Reg32|Reg64 }
+cvtpd2ps<SSE2Q>, 0x660f5a, <SSE2Q:cpu>, Modrm|<SSE2Q:attr>|NoSuf, { RegXMM|Unspecified|BaseIndex, RegXMM }
+cvtps2pd<SSE2D>, 0x0f5a, <SSE2D:cpu>, Modrm|<SSE2D:attr>|Disp8MemShift|NoSuf, { Qword|Unspecified|BaseIndex|RegXMM, RegXMM }
+cvtps2dq<SSE2D>, 0x660f5b, <SSE2D:cpu>, Modrm|<SSE2D:attr>|NoSuf, { RegXMM|Unspecified|BaseIndex, RegXMM }
+cvtsd2si, 0xf22d, AVX|AVX512F, Modrm|VexLIG|EVexLIG|Space0F|Disp8MemShift=3|No_bSuf|No_wSuf|No_sSuf|SSE2AVX, { Qword|Unspecified|BaseIndex|RegXMM, Reg32|Reg64 }
cvtsd2si, 0xf20f2d, SSE2, Modrm|IgnoreSize|No_bSuf|No_wSuf|No_sSuf, { Qword|Unspecified|BaseIndex|RegXMM, Reg32|Reg64 }
-cvtsd2ss<sse2>, 0xf20f5a, <sse2:cpu>, Modrm|<sse2:scal>|<sse2:vvvv>|NoSuf, { Qword|Unspecified|BaseIndex|RegXMM, RegXMM }
-cvtss2sd<sse2>, 0xf30f5a, <sse2:cpu>, Modrm|<sse2:scal>|<sse2:vvvv>|NoSuf, { Dword|Unspecified|BaseIndex|RegXMM, RegXMM }
-
+cvtsd2ss<SSE2Q>, 0xf20f5a, <SSE2Q:cpu>, Modrm|<SSE2Q:scal>|<SSE2Q:vvvv>|NoSuf, { Qword|Unspecified|BaseIndex|RegXMM, RegXMM }
+cvtss2sd<SSE2D>, 0xf30f5a, <SSE2D:cpu>, Modrm|<SSE2D:scal>|<SSE2D:vvvv>|NoSuf, { Dword|Unspecified|BaseIndex|RegXMM, RegXMM }
cvttpd2pi, 0x660f2c, SSE2, Modrm|NoSuf, { RegXMM|Unspecified|BaseIndex, RegMMX }
-cvttsd2si, 0xf22c, AVX, Modrm|VexLIG|Space0F|No_bSuf|No_wSuf|No_sSuf|SSE2AVX, { Qword|Unspecified|BaseIndex|RegXMM, Reg32|Reg64 }
+cvttsd2si, 0xf22c, AVX|AVX512F, Modrm|VexLIG|EVexLIG|Space0F|Disp8MemShift=3|No_bSuf|No_wSuf|No_sSuf|SSE2AVX, { Qword|Unspecified|BaseIndex|RegXMM, Reg32|Reg64 }
cvttsd2si, 0xf20f2c, SSE2, Modrm|IgnoreSize|No_bSuf|No_wSuf|No_sSuf, { Qword|Unspecified|BaseIndex|RegXMM, Reg32|Reg64 }
-cvttpd2dq<sse2>, 0x660fe6, <sse2:cpu>, Modrm|<sse2:attr>|NoSuf, { RegXMM|Unspecified|BaseIndex, RegXMM }
-cvttps2dq<sse2>, 0xf30f5b, <sse2:cpu>, Modrm|<sse2:attr>|NoSuf, { RegXMM|Unspecified|BaseIndex, RegXMM }
+cvttpd2dq<SSE2Q>, 0x660fe6, <SSE2Q:cpu>, Modrm|<SSE2Q:attr>|NoSuf, { RegXMM|Unspecified|BaseIndex, RegXMM }
+cvttps2dq<SSE2D>, 0xf30f5b, <SSE2D:cpu>, Modrm|<SSE2D:attr>|NoSuf, { RegXMM|Unspecified|BaseIndex, RegXMM }
maskmovdqu<sse2>, 0x660ff7, <sse2:cpu>, Modrm|<sse2:attr>|NoSuf, { RegXMM, RegXMM }
-movdqa<sse2>, 0x660f6f, <sse2:cpu>, D|Modrm|<sse2:attr>|NoSuf, { RegXMM|Unspecified|BaseIndex, RegXMM }
-movdqu<sse2>, 0xf30f6f, <sse2:cpu>, D|Modrm|<sse2:attr>|NoSuf, { RegXMM|Unspecified|BaseIndex, RegXMM }
+movdqa<SSE2D>, 0x660f6f, <SSE2D:cpu>, D|Modrm|<SSE2D:attr>|NoSuf, { RegXMM|Unspecified|BaseIndex, RegXMM }
+movdqu<SSE2D>, 0xf30f6f, <SSE2D:cpu>, D|Modrm|<SSE2D:attr>|NoSuf, { RegXMM|Unspecified|BaseIndex, RegXMM }
movdq2q, 0xf20fd6, SSE2, Modrm|NoSuf, { RegXMM, RegMMX }
movq2dq, 0xf30fd6, SSE2, Modrm|NoSuf, { RegMMX, RegXMM }
-pmuludq<sse2>, 0x660ff4, <sse2:cpu>, Modrm|<sse2:attr>|<sse2:vvvv>|C|NoSuf, { RegXMM|Unspecified|BaseIndex, RegXMM }
+pmuludq<SSE2Q>, 0x660ff4, <SSE2Q:cpu>, Modrm|<SSE2Q:attr>|<SSE2Q:vvvv>|C|NoSuf, { RegXMM|Unspecified|BaseIndex, RegXMM }
pmuludq, 0xff4, SSE2, Modrm|NoSuf, { Qword|Unspecified|BaseIndex|RegMMX, RegMMX }
-pshufd<sse2>, 0x660f70, <sse2:cpu>, Modrm|<sse2:attr>|NoSuf, { Imm8|Imm8S, RegXMM|Unspecified|BaseIndex, RegXMM }
-pshufhw<sse2>, 0xf30f70, <sse2:cpu>, Modrm|<sse2:attr>|NoSuf, { Imm8|Imm8S, RegXMM|Unspecified|BaseIndex, RegXMM }
-pshuflw<sse2>, 0xf20f70, <sse2:cpu>, Modrm|<sse2:attr>|NoSuf, { Imm8|Imm8S, RegXMM|Unspecified|BaseIndex, RegXMM }
+pshufd<SSE2D>, 0x660f70, <SSE2D:cpu>, Modrm|<SSE2D:attr>|NoSuf, { Imm8|Imm8S, RegXMM|Unspecified|BaseIndex, RegXMM }
+pshufhw<SSE2BW>, 0xf30f70, <SSE2BW:cpu>, Modrm|<SSE2BW:attr>|NoSuf, { Imm8|Imm8S, RegXMM|Unspecified|BaseIndex, RegXMM }
+pshuflw<SSE2BW>, 0xf20f70, <SSE2BW:cpu>, Modrm|<SSE2BW:attr>|NoSuf, { Imm8|Imm8S, RegXMM|Unspecified|BaseIndex, RegXMM }
pslldq<sse2>, 0x660f73/7, <sse2:cpu>, Modrm|<sse2:attr>|<sse2:vvvv>|NoSuf, { Imm8, RegXMM }
psrldq<sse2>, 0x660f73/3, <sse2:cpu>, Modrm|<sse2:attr>|<sse2:vvvv>|NoSuf, { Imm8, RegXMM }
-punpckhqdq<sse2>, 0x660f6d, <sse2:cpu>, Modrm|<sse2:attr>|<sse2:vvvv>|NoSuf, { RegXMM|Unspecified|BaseIndex, RegXMM }
-punpcklqdq<sse2>, 0x660f6c, <sse2:cpu>, Modrm|<sse2:attr>|<sse2:vvvv>|NoSuf, { RegXMM|Unspecified|BaseIndex, RegXMM }
+punpckhqdq<SSE2Q>, 0x660f6d, <SSE2Q:cpu>, Modrm|<SSE2Q:attr>|<SSE2Q:vvvv>|NoSuf, { RegXMM|Unspecified|BaseIndex, RegXMM }
+punpcklqdq<SSE2Q>, 0x660f6c, <SSE2Q:cpu>, Modrm|<SSE2Q:attr>|<SSE2Q:vvvv>|NoSuf, { RegXMM|Unspecified|BaseIndex, RegXMM }
<frel>
// SSE3 instructions.
<sse3:cpu:attr:vvvv, $avx:AVX:Vex128|VexW0|SSE2AVX:VexVVVV, $sse:SSE3::>
+<SSE3D:cpu:attr, +
+ $avx:AVX|AVX512VL:Vex128|EVex128|VexW0|Disp8MemShift=4|SSE2AVX, +
+ $sse:SSE3:>
+<SSE3Q:cpu:attr, +
+ $avx:AVX:Vex128|VexW0|SSE2AVX:VexVVVV, +
+ $apx:AVX512VL:EVex128|VexW1|Disp8MemShift=3|SSE2AVX, +
+ $sse:SSE3:>
addsubpd<sse3>, 0x660fd0, <sse3:cpu>, Modrm|<sse3:attr>|<sse3:vvvv>|NoSuf, { RegXMM|Unspecified|BaseIndex, RegXMM }
addsubps<sse3>, 0xf20fd0, <sse3:cpu>, Modrm|<sse3:attr>|<sse3:vvvv>|NoSuf, { RegXMM|Unspecified|BaseIndex, RegXMM }
@@ -1279,10 +1342,13 @@ haddpd<sse3>, 0x660f7c, <sse3:cpu>, Modr
haddps<sse3>, 0xf20f7c, <sse3:cpu>, Modrm|<sse3:attr>|<sse3:vvvv>|NoSuf, { RegXMM|Unspecified|BaseIndex, RegXMM }
hsubpd<sse3>, 0x660f7d, <sse3:cpu>, Modrm|<sse3:attr>|<sse3:vvvv>|NoSuf, { RegXMM|Unspecified|BaseIndex, RegXMM }
hsubps<sse3>, 0xf20f7d, <sse3:cpu>, Modrm|<sse3:attr>|<sse3:vvvv>|NoSuf, { RegXMM|Unspecified|BaseIndex, RegXMM }
-lddqu<sse3>, 0xf20ff0, <sse3:cpu>, Modrm|<sse3:attr>|NoSuf, { Xmmword|Unspecified|BaseIndex, RegXMM }
-movddup<sse3>, 0xf20f12, <sse3:cpu>, Modrm|<sse3:attr>|NoSuf, { Qword|Unspecified|BaseIndex|RegXMM, RegXMM }
-movshdup<sse3>, 0xf30f16, <sse3:cpu>, Modrm|<sse3:attr>|NoSuf, { RegXMM|Unspecified|BaseIndex, RegXMM }
-movsldup<sse3>, 0xf30f12, <sse3:cpu>, Modrm|<sse3:attr>|NoSuf, { RegXMM|Unspecified|BaseIndex, RegXMM }
+lddqu, 0xf20ff0, AVX, Modrm|Vex128|VexW0|SSE2AVX|NoSuf, { Xmmword|Unspecified|BaseIndex, RegXMM }
+// For use with eGPR-s in the memory operand, utilize VMOVDQU32.
+lddqu, 0xf30f6f, AVX512VL, Modrm|EVex128|VexW0|Disp8MemShift=4|SSE2AVX|NoSuf, { Xmmword|Unspecified|BaseIndex, RegXMM }
+lddqu, 0xf20ff0, SSE3, Modrm|NoSuf, { Xmmword|Unspecified|BaseIndex, RegXMM }
+movddup<SSE3Q>, 0xf20f12, <SSE3Q:cpu>, Modrm|<SSE3Q:attr>|NoSuf, { Qword|Unspecified|BaseIndex|RegXMM, RegXMM }
+movshdup<SSE3D>, 0xf30f16, <SSE3D:cpu>, Modrm|<SSE3D:attr>|NoSuf, { RegXMM|Unspecified|BaseIndex, RegXMM }
+movsldup<SSE3D>, 0xf30f12, <SSE3D:cpu>, Modrm|<SSE3D:attr>|NoSuf, { RegXMM|Unspecified|BaseIndex, RegXMM }
// FPU instructions also covered by SSE3 CPUID flag.
@@ -1352,6 +1418,15 @@ invpcid, 0xf3f2, INVPCID&APX_F, Modrm|No
$avx:AVX:66:Vex128|VexW0|SSE2AVX:VexVVVV:RegXMM:Xmmword, +
$sse:SSSE3:66:::RegXMM:Xmmword, +
$mmx:SSSE3::::RegMMX:Qword>
+<SSSE3BW:cpu:pfx:attr:vvvv:reg:mem, +
+ $avx:AVX:66:Vex128|VexW0|SSE2AVX:VexVVVV:RegXMM:Xmmword, +
+ $apx:AVX512BW&AVX512VL:66:EVex128|VexW0|Disp8MemShift=4|SSE2AVX:VexVVVV:RegXMM:Xmmword, +
+ $sse:SSSE3:66:::RegXMM:Xmmword, +
+ $mmx:SSSE3::::RegMMX:Qword>
+<SSSE3D:cpu:pfx:attr:reg:mem, +
+ $avx:AVX|AVX512VL:66:Vex128|EVex128|VexW0|Disp8MemShift=4|SSE2AVX:RegXMM:Xmmword, +
+ $sse:SSSE3:66::RegXMM:Xmmword, +
+ $mmx:SSSE3:::RegMMX:Qword>
phaddw<ssse3>, 0x<ssse3:pfx>0f3801, <ssse3:cpu>, Modrm|<ssse3:attr>|<ssse3:vvvv>|NoSuf, { <ssse3:reg>|<ssse3:mem>|Unspecified|BaseIndex, <ssse3:reg> }
phaddd<ssse3>, 0x<ssse3:pfx>0f3802, <ssse3:cpu>, Modrm|<ssse3:attr>|<ssse3:vvvv>|NoSuf, { <ssse3:reg>|<ssse3:mem>|Unspecified|BaseIndex, <ssse3:reg> }
@@ -1359,18 +1434,34 @@ phaddsw<ssse3>, 0x<ssse3:pfx>0f3803, <ss
phsubw<ssse3>, 0x<ssse3:pfx>0f3805, <ssse3:cpu>, Modrm|<ssse3:attr>|<ssse3:vvvv>|NoSuf, { <ssse3:reg>|<ssse3:mem>|Unspecified|BaseIndex, <ssse3:reg> }
phsubd<ssse3>, 0x<ssse3:pfx>0f3806, <ssse3:cpu>, Modrm|<ssse3:attr>|<ssse3:vvvv>|NoSuf, { <ssse3:reg>|<ssse3:mem>|Unspecified|BaseIndex, <ssse3:reg> }
phsubsw<ssse3>, 0x<ssse3:pfx>0f3807, <ssse3:cpu>, Modrm|<ssse3:attr>|<ssse3:vvvv>|NoSuf, { <ssse3:reg>|<ssse3:mem>|Unspecified|BaseIndex, <ssse3:reg> }
-pmaddubsw<ssse3>, 0x<ssse3:pfx>0f3804, <ssse3:cpu>, Modrm|<ssse3:attr>|<ssse3:vvvv>|NoSuf, { <ssse3:reg>|<ssse3:mem>|Unspecified|BaseIndex, <ssse3:reg> }
-pmulhrsw<ssse3>, 0x<ssse3:pfx>0f380b, <ssse3:cpu>, Modrm|<ssse3:attr>|<ssse3:vvvv>|NoSuf, { <ssse3:reg>|<ssse3:mem>|Unspecified|BaseIndex, <ssse3:reg> }
-pshufb<ssse3>, 0x<ssse3:pfx>0f3800, <ssse3:cpu>, Modrm|<ssse3:attr>|<ssse3:vvvv>|NoSuf, { <ssse3:reg>|<ssse3:mem>|Unspecified|BaseIndex, <ssse3:reg> }
+pmaddubsw<SSSE3BW>, 0x<SSSE3BW:pfx>0f3804, <SSSE3BW:cpu>, Modrm|<SSSE3BW:attr>|<SSSE3BW:vvvv>|NoSuf, { <SSSE3BW:reg>|<SSSE3BW:mem>|Unspecified|BaseIndex, <SSSE3BW:reg> }
+pmulhrsw<SSSE3BW>, 0x<SSSE3BW:pfx>0f380b, <SSSE3BW:cpu>, Modrm|<SSSE3BW:attr>|<SSSE3BW:vvvv>|NoSuf, { <SSSE3BW:reg>|<SSSE3BW:mem>|Unspecified|BaseIndex, <SSSE3BW:reg> }
+pshufb<SSSE3BW>, 0x<SSSE3BW:pfx>0f3800, <SSSE3BW:cpu>, Modrm|<SSSE3BW:attr>|<SSSE3BW:vvvv>|NoSuf, { <SSSE3BW:reg>|<SSSE3BW:mem>|Unspecified|BaseIndex, <SSSE3BW:reg> }
psign<bw><ssse3>, 0x<ssse3:pfx>0f3808 | <bw:opc>, <ssse3:cpu>, Modrm|<ssse3:attr>|<ssse3:vvvv>|NoSuf, { <ssse3:reg>|<ssse3:mem>|Unspecified|BaseIndex, <ssse3:reg> }
psignd<ssse3>, 0x<ssse3:pfx>0f380a, <ssse3:cpu>, Modrm|<ssse3:attr>|<ssse3:vvvv>|NoSuf, { <ssse3:reg>|<ssse3:mem>|Unspecified|BaseIndex, <ssse3:reg> }
-palignr<ssse3>, 0x<ssse3:pfx>0f3a0f, <ssse3:cpu>, Modrm|<ssse3:attr>|<ssse3:vvvv>|NoSuf, { Imm8, <ssse3:reg>|<ssse3:mem>|Unspecified|BaseIndex, <ssse3:reg> }
-pabs<bw><ssse3>, 0x<ssse3:pfx>0f381c | <bw:opc>, <ssse3:cpu>, Modrm|<ssse3:attr>|NoSuf, { <ssse3:reg>|<ssse3:mem>|Unspecified|BaseIndex, <ssse3:reg> }
-pabsd<ssse3>, 0x<ssse3:pfx>0f381e, <ssse3:cpu>, Modrm|<ssse3:attr>|NoSuf, { <ssse3:reg>|<ssse3:mem>|Unspecified|BaseIndex, <ssse3:reg> }
+palignr<SSSE3BW>, 0x<SSSE3BW:pfx>0f3a0f, <SSSE3BW:cpu>, Modrm|<SSSE3BW:attr>|<SSSE3BW:vvvv>|NoSuf, { Imm8, <SSSE3BW:reg>|<SSSE3BW:mem>|Unspecified|BaseIndex, <SSSE3BW:reg> }
+pabs<bw><SSSE3BW>, 0x<SSSE3BW:pfx>0f381c | <bw:opc>, <SSSE3BW:cpu>, Modrm|<SSSE3BW:attr>|NoSuf, { <SSSE3BW:reg>|<SSSE3BW:mem>|Unspecified|BaseIndex, <SSSE3BW:reg> }
+pabsd<SSSE3D>, 0x<SSSE3D:pfx>0f381e, <SSSE3D:cpu>, Modrm|<SSSE3D:attr>|NoSuf, { <SSSE3D:reg>|<SSSE3D:mem>|Unspecified|BaseIndex, <SSSE3D:reg> }
// SSE4.1 instructions.
<sse41:cpu:attr:scal:vvvv, $avx:AVX:Vex128|VexW0|SSE2AVX:VexLIG|VexW0|SSE2AVX:VexVVVV, $sse:SSE4_1:::>
+<SSE41BW:cpu:attr:vvvv, +
+ $avx:AVX:Vex128|VexW0|SSE2AVX:VexVVVV, +
+ $apx:AVX512BW&AVX512VL:EVex128|VexW0|Disp8MemShift=4|SSE2AVX:VexVVVV, +
+ $sse:SSE4_1::>
+<SSE41DQ:cpu:attr:vvvv, +
+ $avx:AVX:Vex128|VexW0|SSE2AVX:VexVVVV, +
+ $apx:AVX512DQ&AVX512VL:EVex128|VexW0|Disp8MemShift=4|SSE2AVX:VexVVVV, +
+ $sse:SSE4_1::>
+<SSE41D:cpu:attr:scal:vvvv, +
+ $avx:AVX|AVX512VL:Vex128|EVex128|VexW0|Disp8MemShift=4|SSE2AVX:VexLIG|EVexLIG|VexW0|Disp8MemShift=2|SSE2AVX:VexVVVV, +
+ $sse:SSE4_1:::>
+<SSE41Q:cpu:attr:scal:vvvv, +
+ $avx:AVX:Vex128|VexW0|SSE2AVX:VexLIG|VexW0|SSE2AVX:VexVVVV, +
+ $apx:AVX512VL:EVex128|VexW1|Disp8MemShift=4|SSE2AVX:EVexLIG|VexW1|Disp8MemShift=3|SSE2AVX:VexVVVV, +
+ $sse:SSE4_1:::>
+
<sd:ppfx:spfx:opc:vexw:elem, s::f3:0:VexW0:Dword, d:66:f2:1:VexW1:Qword>
blendp<sd><sse41>, 0x660f3a0c | <sd:opc>, <sse41:cpu>, Modrm|<sse41:attr>|<sse41:vvvv>|NoSuf, { Imm8|Imm8S, RegXMM|Unspecified|BaseIndex, RegXMM }
@@ -1379,56 +1470,60 @@ blendvp<sd>, 0x664a | <sd:opc>, AVX, Mod
blendvp<sd>, 0x660f3814 | <sd:opc>, SSE4_1, Modrm|NoSuf, { Acc|Xmmword, RegXMM|Unspecified|BaseIndex, RegXMM }
blendvp<sd>, 0x660f3814 | <sd:opc>, SSE4_1, Modrm|NoSuf, { RegXMM|Unspecified|BaseIndex, RegXMM }
dpp<sd><sse41>, 0x660f3a40 | <sd:opc>, <sse41:cpu>, Modrm|<sse41:attr>|<sse41:vvvv>|NoSuf, { Imm8|Imm8S, RegXMM|Unspecified|BaseIndex, RegXMM }
-extractps, 0x6617, AVX, Modrm|Vex128|Space0F3A|VexW0|NoSuf|SSE2AVX, { Imm8, RegXMM, Reg32|Unspecified|BaseIndex }
-extractps, 0x6617, AVX&x64, RegMem|Vex128|Space0F3A|VexW1|NoSuf|SSE2AVX, { Imm8, RegXMM, Reg64 }
+extractps, 0x6617, AVX|AVX512F, Modrm|Vex128|EVex128|Space0F3A|VexW0|Disp8MemShift=2|NoSuf|SSE2AVX, { Imm8, RegXMM, Reg32|Unspecified|BaseIndex }
+extractps, 0x6617, x64&(AVX|AVX512F), RegMem|Vex128|EVex128|Space0F3A|VexW1|NoSuf|SSE2AVX, { Imm8, RegXMM, Reg64 }
extractps, 0x660f3a17, SSE4_1, Modrm|IgnoreSize|NoSuf, { Imm8, RegXMM, Reg32|Unspecified|BaseIndex }
extractps, 0x660f3a17, SSE4_1&x64, RegMem|NoSuf|NoRex64, { Imm8, RegXMM, Reg64 }
-insertps<sse41>, 0x660f3a21, <sse41:cpu>, Modrm|<sse41:attr>|<sse41:vvvv>|NoSuf, { Imm8, Dword|Unspecified|BaseIndex|RegXMM, RegXMM }
-movntdqa<sse41>, 0x660f382a, <sse41:cpu>, Modrm|<sse41:attr>|NoSuf, { Xmmword|Unspecified|BaseIndex, RegXMM }
+insertps<SSE41D>, 0x660f3a21, <SSE41D:cpu>, Modrm|<SSE41D:attr>|<SSE41D:vvvv>|Disp8MemShift|NoSuf, { Imm8, Dword|Unspecified|BaseIndex|RegXMM, RegXMM }
+movntdqa<SSE41D>, 0x660f382a, <SSE41D:cpu>, Modrm|<SSE41D:attr>|NoSuf, { Xmmword|Unspecified|BaseIndex, RegXMM }
mpsadbw<sse41>, 0x660f3a42, <sse41:cpu>, Modrm|<sse41:attr>|<sse41:vvvv>|NoSuf, { Imm8|Imm8S, RegXMM|Unspecified|BaseIndex, RegXMM }
-packusdw<sse41>, 0x660f382b, <sse41:cpu>, Modrm|<sse41:attr>|<sse41:vvvv>|NoSuf, { RegXMM|Unspecified|BaseIndex, RegXMM }
+packusdw<SSE41BW>, 0x660f382b, <SSE41BW:cpu>, Modrm|<SSE41BW:attr>|<SSE41BW:vvvv>|NoSuf, { RegXMM|Unspecified|BaseIndex, RegXMM }
pblendvb, 0x664c, AVX, Modrm|Vex128|Space0F3A|VexVVVV|VexW0|NoSuf|SSE2AVX, { Acc|Xmmword, RegXMM|Unspecified|BaseIndex, RegXMM }
pblendvb, 0x664c, AVX, Modrm|Vex128|Space0F3A|VexVVVV|VexW0|NoSuf|Implicit1stXmm0|SSE2AVX, { RegXMM|Unspecified|BaseIndex, RegXMM }
pblendvb, 0x660f3810, SSE4_1, Modrm|NoSuf, { Acc|Xmmword, RegXMM|Unspecified|BaseIndex, RegXMM }
pblendvb, 0x660f3810, SSE4_1, Modrm|NoSuf, { RegXMM|Unspecified|BaseIndex, RegXMM }
pblendw<sse41>, 0x660f3a0e, <sse41:cpu>, Modrm|<sse41:attr>|<sse41:vvvv>|NoSuf, { Imm8|Imm8S, RegXMM|Unspecified|BaseIndex, RegXMM }
pcmpeqq<sse41>, 0x660f3829, <sse41:cpu>, Modrm|<sse41:attr>|<sse41:vvvv>|NoSuf|Optimize, { RegXMM|Unspecified|BaseIndex, RegXMM }
-pextr<bw><sse41>, 0x660f3a14 | <bw:opc>, <sse41:cpu>, RegMem|<sse41:attr>|NoSuf|IgnoreSize|NoRex64, { Imm8, RegXMM, Reg32|Reg64 }
-pextr<bw><sse41>, 0x660f3a14 | <bw:opc>, <sse41:cpu>, Modrm|<sse41:attr>|NoSuf, { Imm8, RegXMM, <bw:elem>|Unspecified|BaseIndex }
-pextrd<sse41>, 0x660f3a16, <sse41:cpu>, Modrm|<sse41:attr>|NoSuf|IgnoreSize, { Imm8, RegXMM, Reg32|Unspecified|BaseIndex }
+pextr<bw><SSE41BW>, 0x660f3a14 | <bw:opc>, <SSE41BW:cpu>, RegMem|<SSE41BW:attr>|NoSuf|IgnoreSize|NoRex64, { Imm8, RegXMM, Reg32|Reg64 }
+pextr<bw><SSE41BW>, 0x660f3a14 | <bw:opc>, <SSE41BW:cpu>, Modrm|<SSE41BW:attr>|Disp8MemShift|NoSuf, { Imm8, RegXMM, <bw:elem>|Unspecified|BaseIndex }
+pextrd<SSE41DQ>, 0x660f3a16, <SSE41DQ:cpu>, Modrm|<SSE41DQ:attr>|Disp8MemShift|NoSuf|IgnoreSize, { Imm8, RegXMM, Reg32|Unspecified|BaseIndex }
pextrq, 0x6616, AVX&x64, Modrm|Vex|Space0F3A|VexW1|NoSuf|SSE2AVX, { Imm8, RegXMM, Reg64|Unspecified|BaseIndex }
+pextrq, 0x6616, AVX512DQ&AVX512VL&x64, Modrm|EVex128|Space0F3A|VexW1|Disp8MemShift=3|NoSuf|SSE2AVX, { Imm8, RegXMM, Reg64|Unspecified|BaseIndex }
pextrq, 0x660f3a16, SSE4_1&x64, Modrm|Size64|NoSuf, { Imm8, RegXMM, Reg64|Unspecified|BaseIndex }
phminposuw<sse41>, 0x660f3841, <sse41:cpu>, Modrm|<sse41:attr>|NoSuf, { RegXMM|Unspecified|BaseIndex, RegXMM }
-pinsrb<sse41>, 0x660f3a20, <sse41:cpu>, Modrm|<sse41:attr>|<sse41:vvvv>|NoSuf|IgnoreSize|NoRex64, { Imm8, Reg32|Reg64, RegXMM }
-pinsrb<sse41>, 0x660f3a20, <sse41:cpu>, Modrm|<sse41:attr>|<sse41:vvvv>|NoSuf, { Imm8, Byte|Unspecified|BaseIndex, RegXMM }
-pinsrd<sse41>, 0x660f3a22, <sse41:cpu>, Modrm|<sse41:attr>|<sse41:vvvv>|NoSuf|IgnoreSize, { Imm8, Reg32|Unspecified|BaseIndex, RegXMM }
+pinsrb<SSE41BW>, 0x660f3a20, <SSE41BW:cpu>, Modrm|<SSE41BW:attr>|<SSE41BW:vvvv>|NoSuf|IgnoreSize|NoRex64, { Imm8, Reg32|Reg64, RegXMM }
+pinsrb<SSE41BW>, 0x660f3a20, <SSE41BW:cpu>, Modrm|<SSE41BW:attr>|<SSE41BW:vvvv>|Disp8MemShift|NoSuf, { Imm8, Byte|Unspecified|BaseIndex, RegXMM }
+pinsrd<SSE41DQ>, 0x660f3a22, <SSE41DQ:cpu>, Modrm|<SSE41DQ:attr>|<SSE41DQ:vvvv>|Disp8MemShift|NoSuf|IgnoreSize, { Imm8, Reg32|Unspecified|BaseIndex, RegXMM }
pinsrq, 0x6622, AVX&x64, Modrm|Vex|Space0F3A|VexVVVV|VexW1|NoSuf|SSE2AVX, { Imm8, Reg64|Unspecified|BaseIndex, RegXMM }
+pinsrq, 0x6622, AVX512DQ&AVX512VL&AVX&x64, Modrm|EVex128|Space0F3A|VexVVVV|VexW1|Disp8MemShift=3|NoSuf|SSE2AVX, { Imm8, Reg64|Unspecified|BaseIndex, RegXMM }
pinsrq, 0x660f3a22, SSE4_1&x64, Modrm|Size64|NoSuf, { Imm8, Reg64|Unspecified|BaseIndex, RegXMM }
-pmaxsb<sse41>, 0x660f383c, <sse41:cpu>, Modrm|<sse41:attr>|<sse41:vvvv>|NoSuf, { RegXMM|Unspecified|BaseIndex, RegXMM }
-pmaxsd<sse41>, 0x660f383d, <sse41:cpu>, Modrm|<sse41:attr>|<sse41:vvvv>|NoSuf, { RegXMM|Unspecified|BaseIndex, RegXMM }
-pmaxud<sse41>, 0x660f383f, <sse41:cpu>, Modrm|<sse41:attr>|<sse41:vvvv>|NoSuf, { RegXMM|Unspecified|BaseIndex, RegXMM }
-pmaxuw<sse41>, 0x660f383e, <sse41:cpu>, Modrm|<sse41:attr>|<sse41:vvvv>|NoSuf, { RegXMM|Unspecified|BaseIndex, RegXMM }
-pminsb<sse41>, 0x660f3838, <sse41:cpu>, Modrm|<sse41:attr>|<sse41:vvvv>|NoSuf, { RegXMM|Unspecified|BaseIndex, RegXMM }
-pminsd<sse41>, 0x660f3839, <sse41:cpu>, Modrm|<sse41:attr>|<sse41:vvvv>|NoSuf, { RegXMM|Unspecified|BaseIndex, RegXMM }
-pminud<sse41>, 0x660f383b, <sse41:cpu>, Modrm|<sse41:attr>|<sse41:vvvv>|NoSuf, { RegXMM|Unspecified|BaseIndex, RegXMM }
-pminuw<sse41>, 0x660f383a, <sse41:cpu>, Modrm|<sse41:attr>|<sse41:vvvv>|NoSuf, { RegXMM|Unspecified|BaseIndex, RegXMM }
-pmovsxbw<sse41>, 0x660f3820, <sse41:cpu>, Modrm|<sse41:attr>|NoSuf, { Qword|Unspecified|BaseIndex|RegXMM, RegXMM }
-pmovsxbd<sse41>, 0x660f3821, <sse41:cpu>, Modrm|<sse41:attr>|NoSuf, { Dword|Unspecified|BaseIndex|RegXMM, RegXMM }
-pmovsxbq<sse41>, 0x660f3822, <sse41:cpu>, Modrm|<sse41:attr>|NoSuf, { Word|Unspecified|BaseIndex|RegXMM, RegXMM }
-pmovsxwd<sse41>, 0x660f3823, <sse41:cpu>, Modrm|<sse41:attr>|NoSuf, { Qword|Unspecified|BaseIndex|RegXMM, RegXMM }
-pmovsxwq<sse41>, 0x660f3824, <sse41:cpu>, Modrm|<sse41:attr>|NoSuf, { Dword|Unspecified|BaseIndex|RegXMM, RegXMM }
-pmovsxdq<sse41>, 0x660f3825, <sse41:cpu>, Modrm|<sse41:attr>|NoSuf, { Qword|Unspecified|BaseIndex|RegXMM, RegXMM }
-pmovzxbw<sse41>, 0x660f3830, <sse41:cpu>, Modrm|<sse41:attr>|NoSuf, { Qword|Unspecified|BaseIndex|RegXMM, RegXMM }
-pmovzxbd<sse41>, 0x660f3831, <sse41:cpu>, Modrm|<sse41:attr>|NoSuf, { Dword|Unspecified|BaseIndex|RegXMM, RegXMM }
-pmovzxbq<sse41>, 0x660f3832, <sse41:cpu>, Modrm|<sse41:attr>|NoSuf, { Word|Unspecified|BaseIndex|RegXMM, RegXMM }
-pmovzxwd<sse41>, 0x660f3833, <sse41:cpu>, Modrm|<sse41:attr>|NoSuf, { Qword|Unspecified|BaseIndex|RegXMM, RegXMM }
-pmovzxwq<sse41>, 0x660f3834, <sse41:cpu>, Modrm|<sse41:attr>|NoSuf, { Dword|Unspecified|BaseIndex|RegXMM, RegXMM }
-pmovzxdq<sse41>, 0x660f3835, <sse41:cpu>, Modrm|<sse41:attr>|NoSuf, { Qword|Unspecified|BaseIndex|RegXMM, RegXMM }
-pmuldq<sse41>, 0x660f3828, <sse41:cpu>, Modrm|<sse41:attr>|<sse41:vvvv>|NoSuf, { RegXMM|Unspecified|BaseIndex, RegXMM }
-pmulld<sse41>, 0x660f3840, <sse41:cpu>, Modrm|<sse41:attr>|<sse41:vvvv>|NoSuf, { RegXMM|Unspecified|BaseIndex, RegXMM }
+pmaxsb<SSE41BW>, 0x660f383c, <SSE41BW:cpu>, Modrm|<SSE41BW:attr>|<SSE41BW:vvvv>|NoSuf, { RegXMM|Unspecified|BaseIndex, RegXMM }
+pmaxsd<SSE41D>, 0x660f383d, <SSE41D:cpu>, Modrm|<SSE41D:attr>|<SSE41D:vvvv>|NoSuf, { RegXMM|Unspecified|BaseIndex, RegXMM }
+pmaxud<SSE41D>, 0x660f383f, <SSE41D:cpu>, Modrm|<SSE41D:attr>|<SSE41D:vvvv>|NoSuf, { RegXMM|Unspecified|BaseIndex, RegXMM }
+pmaxuw<SSE41BW>, 0x660f383e, <SSE41BW:cpu>, Modrm|<SSE41BW:attr>|<SSE41BW:vvvv>|NoSuf, { RegXMM|Unspecified|BaseIndex, RegXMM }
+pminsb<SSE41BW>, 0x660f3838, <SSE41BW:cpu>, Modrm|<SSE41BW:attr>|<SSE41BW:vvvv>|NoSuf, { RegXMM|Unspecified|BaseIndex, RegXMM }
+pminsd<SSE41D>, 0x660f3839, <SSE41D:cpu>, Modrm|<SSE41D:attr>|<SSE41D:vvvv>|NoSuf, { RegXMM|Unspecified|BaseIndex, RegXMM }
+pminud<SSE41D>, 0x660f383b, <SSE41D:cpu>, Modrm|<SSE41D:attr>|<SSE41D:vvvv>|NoSuf, { RegXMM|Unspecified|BaseIndex, RegXMM }
+pminuw<SSE41BW>, 0x660f383a, <SSE41BW:cpu>, Modrm|<SSE41BW:attr>|<SSE41BW:vvvv>|NoSuf, { RegXMM|Unspecified|BaseIndex, RegXMM }
+pmovsxbw<SSE41BW>, 0x660f3820, <SSE41BW:cpu>, Modrm|<SSE41BW:attr>|Disp8MemShift|NoSuf, { Qword|Unspecified|BaseIndex|RegXMM, RegXMM }
+pmovsxbd<SSE41D>, 0x660f3821, <SSE41D:cpu>, Modrm|<SSE41D:attr>|Disp8MemShift|NoSuf, { Dword|Unspecified|BaseIndex|RegXMM, RegXMM }
+pmovsxbq<SSE41D>, 0x660f3822, <SSE41D:cpu>, Modrm|<SSE41D:attr>|Disp8MemShift|NoSuf, { Word|Unspecified|BaseIndex|RegXMM, RegXMM }
+pmovsxwd<SSE41D>, 0x660f3823, <SSE41D:cpu>, Modrm|<SSE41D:attr>|Disp8MemShift|NoSuf, { Qword|Unspecified|BaseIndex|RegXMM, RegXMM }
+pmovsxwq<SSE41D>, 0x660f3824, <SSE41D:cpu>, Modrm|<SSE41D:attr>|Disp8MemShift|NoSuf, { Dword|Unspecified|BaseIndex|RegXMM, RegXMM }
+pmovsxdq<SSE41D>, 0x660f3825, <SSE41D:cpu>, Modrm|<SSE41D:attr>|Disp8MemShift|NoSuf, { Qword|Unspecified|BaseIndex|RegXMM, RegXMM }
+pmovzxbw<SSE41BW>, 0x660f3830, <SSE41BW:cpu>, Modrm|<SSE41BW:attr>|Disp8MemShift|NoSuf, { Qword|Unspecified|BaseIndex|RegXMM, RegXMM }
+pmovzxbd<SSE41D>, 0x660f3831, <SSE41D:cpu>, Modrm|<SSE41D:attr>|Disp8MemShift|NoSuf, { Dword|Unspecified|BaseIndex|RegXMM, RegXMM }
+pmovzxbq<SSE41D>, 0x660f3832, <SSE41D:cpu>, Modrm|<SSE41D:attr>|Disp8MemShift|NoSuf, { Word|Unspecified|BaseIndex|RegXMM, RegXMM }
+pmovzxwd<SSE41D>, 0x660f3833, <SSE41D:cpu>, Modrm|<SSE41D:attr>|Disp8MemShift|NoSuf, { Qword|Unspecified|BaseIndex|RegXMM, RegXMM }
+pmovzxwq<SSE41D>, 0x660f3834, <SSE41D:cpu>, Modrm|<SSE41D:attr>|Disp8MemShift|NoSuf, { Dword|Unspecified|BaseIndex|RegXMM, RegXMM }
+pmovzxdq<SSE41D>, 0x660f3835, <SSE41D:cpu>, Modrm|<SSE41D:attr>|Disp8MemShift|NoSuf, { Qword|Unspecified|BaseIndex|RegXMM, RegXMM }
+pmuldq<SSE41Q>, 0x660f3828, <SSE41Q:cpu>, Modrm|<SSE41Q:attr>|<SSE41Q:vvvv>|NoSuf, { RegXMM|Unspecified|BaseIndex, RegXMM }
+pmulld<SSE41D>, 0x660f3840, <SSE41D:cpu>, Modrm|<SSE41D:attr>|<SSE41D:vvvv>|NoSuf, { RegXMM|Unspecified|BaseIndex, RegXMM }
ptest<sse41>, 0x660f3817, <sse41:cpu>, Modrm|<sse41:attr>|NoSuf, { RegXMM|Unspecified|BaseIndex, RegXMM }
-roundp<sd><sse41>, 0x660f3a08 | <sd:opc>, <sse41:cpu>, Modrm|<sse41:attr>|NoSuf, { Imm8, RegXMM|Unspecified|BaseIndex, RegXMM }
-rounds<sd><sse41>, 0x660f3a0a | <sd:opc>, <sse41:cpu>, Modrm|<sse41:scal>|<sse41:vvvv>|NoSuf, { Imm8, <sd:elem>|Unspecified|BaseIndex|RegXMM, RegXMM }
+roundpd<SSE41Q>, 0x660f3a09, <SSE41Q:cpu>, Modrm|<SSE41Q:attr>|NoSuf, { Imm8, RegXMM|Unspecified|BaseIndex, RegXMM }
+roundps<SSE41D>, 0x660f3a08, <SSE41D:cpu>, Modrm|<SSE41D:attr>|NoSuf, { Imm8, RegXMM|Unspecified|BaseIndex, RegXMM }
+roundsd<SSE41Q>, 0x660f3a0b, <SSE41Q:cpu>, Modrm|<SSE41Q:scal>|<SSE41Q:vvvv>|NoSuf, { Imm8, Qword|Unspecified|BaseIndex|RegXMM, RegXMM }
+roundss<SSE41D>, 0x660f3a0a, <SSE41D:cpu>, Modrm|<SSE41D:scal>|<SSE41D:vvvv>|NoSuf, { Imm8, Dword|Unspecified|BaseIndex|RegXMM, RegXMM }
// SSE4.2 instructions.
@@ -1465,31 +1560,38 @@ xsaveopt64, 0xfae/6, Xsaveopt&x64, Modrm
// AES instructions.
<aes:cpu:attr:vvvv, $avx:AVX&:Vex128|VexW0|SSE2AVX:VexVVVV, $sse:::>
-
-aesdec<aes>, 0x660f38de, <aes:cpu>AES, Modrm|<aes:attr>|<aes:vvvv>|NoSuf, { RegXMM|Unspecified|BaseIndex, RegXMM }
-aesdeclast<aes>, 0x660f38df, <aes:cpu>AES, Modrm|<aes:attr>|<aes:vvvv>|NoSuf, { RegXMM|Unspecified|BaseIndex, RegXMM }
-aesenc<aes>, 0x660f38dc, <aes:cpu>AES, Modrm|<aes:attr>|<aes:vvvv>|NoSuf, { RegXMM|Unspecified|BaseIndex, RegXMM }
-aesenclast<aes>, 0x660f38dd, <aes:cpu>AES, Modrm|<aes:attr>|<aes:vvvv>|NoSuf, { RegXMM|Unspecified|BaseIndex, RegXMM }
+<AES:cpu:attr, +
+ $avx:&(AVX|AVX512VL):Vex128|EVex128|VexW0|VexVVVV|Disp8MemShift=4|SSE2AVX, +
+ $sse::>
+
+aesdec<AES>, 0x660f38de, AES<AES:cpu>, Modrm|<AES:attr>|NoSuf, { RegXMM|Unspecified|BaseIndex, RegXMM }
+aesdeclast<AES>, 0x660f38df, AES<AES:cpu>, Modrm|<AES:attr>|NoSuf, { RegXMM|Unspecified|BaseIndex, RegXMM }
+aesenc<AES>, 0x660f38dc, AES<AES:cpu>, Modrm|<AES:attr>|NoSuf, { RegXMM|Unspecified|BaseIndex, RegXMM }
+aesenclast<AES>, 0x660f38dd, AES<AES:cpu>, Modrm|<AES:attr>|NoSuf, { RegXMM|Unspecified|BaseIndex, RegXMM }
aesimc<aes>, 0x660f38db, <aes:cpu>AES, Modrm|<aes:attr>|NoSuf, { RegXMM|Unspecified|BaseIndex, RegXMM }
aeskeygenassist<aes>, 0x660f3adf, <aes:cpu>AES, Modrm|<aes:attr>|NoSuf, { Imm8, RegXMM|Unspecified|BaseIndex, RegXMM }
// PCLMULQDQ
-<pclmul:cpu:attr, $avx:AVX&:Vex128|VexW0|SSE2AVX|VexVVVV, $sse::>
-
-pclmulqdq<pclmul>, 0x660f3a44, <pclmul:cpu>PCLMULQDQ, Modrm|<pclmul:attr>|NoSuf, { Imm8|Imm8S, RegXMM|Unspecified|BaseIndex, RegXMM }
-pclmullqlqdq<pclmul>, 0x660f3a44/0x00, <pclmul:cpu>PCLMULQDQ, Modrm|<pclmul:attr>|NoSuf|ImmExt, { RegXMM|Unspecified|BaseIndex, RegXMM }
-pclmulhqlqdq<pclmul>, 0x660f3a44/0x01, <pclmul:cpu>PCLMULQDQ, Modrm|<pclmul:attr>|NoSuf|ImmExt, { RegXMM|Unspecified|BaseIndex, RegXMM }
-pclmullqhqdq<pclmul>, 0x660f3a44/0x10, <pclmul:cpu>PCLMULQDQ, Modrm|<pclmul:attr>|NoSuf|ImmExt, { RegXMM|Unspecified|BaseIndex, RegXMM }
-pclmulhqhqdq<pclmul>, 0x660f3a44/0x11, <pclmul:cpu>PCLMULQDQ, Modrm|<pclmul:attr>|NoSuf|ImmExt, { RegXMM|Unspecified|BaseIndex, RegXMM }
+<pclmul:cpu:attr, +
+ $avx:&(AVX|AVX512VL):Vex128|EVex128|VexW0|VexVVVV|Disp8MemShift=4|SSE2AVX, +
+ $sse::>
+
+pclmulqdq<pclmul>, 0x660f3a44, PCLMULQDQ<pclmul:cpu>, Modrm|<pclmul:attr>|NoSuf, { Imm8|Imm8S, RegXMM|Unspecified|BaseIndex, RegXMM }
+pclmullqlqdq<pclmul>, 0x660f3a44/0x00, PCLMULQDQ<pclmul:cpu>, Modrm|<pclmul:attr>|NoSuf|ImmExt, { RegXMM|Unspecified|BaseIndex, RegXMM }
+pclmulhqlqdq<pclmul>, 0x660f3a44/0x01, PCLMULQDQ<pclmul:cpu>, Modrm|<pclmul:attr>|NoSuf|ImmExt, { RegXMM|Unspecified|BaseIndex, RegXMM }
+pclmullqhqdq<pclmul>, 0x660f3a44/0x10, PCLMULQDQ<pclmul:cpu>, Modrm|<pclmul:attr>|NoSuf|ImmExt, { RegXMM|Unspecified|BaseIndex, RegXMM }
+pclmulhqhqdq<pclmul>, 0x660f3a44/0x11, PCLMULQDQ<pclmul:cpu>, Modrm|<pclmul:attr>|NoSuf|ImmExt, { RegXMM|Unspecified|BaseIndex, RegXMM }
// GFNI
-<gfni:cpu:w0:w1, $avx:AVX&:Vex128|VexW0|SSE2AVX|VexVVVV:Vex128|VexW1|SSE2AVX|VexVVVV, $sse:::>
-
-gf2p8affineqb<gfni>, 0x660f3ace, <gfni:cpu>GFNI, Modrm|<gfni:w1>|NoSuf, { Imm8, RegXMM|Unspecified|BaseIndex, RegXMM }
-gf2p8affineinvqb<gfni>, 0x660f3acf, <gfni:cpu>GFNI, Modrm|<gfni:w1>|NoSuf, { Imm8, RegXMM|Unspecified|BaseIndex, RegXMM }
-gf2p8mulb<gfni>, 0x660f38cf, <gfni:cpu>GFNI, Modrm|<gfni:w0>|NoSuf, { RegXMM|Unspecified|BaseIndex, RegXMM }
+<gfni:cpu:attr:vexw0:vexw1, +
+ $avx:&(AVX|AVX512VL):Vex128|EVex128|VexVVVV|Disp8MemShift=4|SSE2AVX:VexW0:VexW1, +
+ $sse::::>
+
+gf2p8affineqb<gfni>, 0x660f3ace, GFNI<gfni:cpu>, Modrm|<gfni:attr>|<gfni:vexw1>|NoSuf, { Imm8, RegXMM|Unspecified|BaseIndex, RegXMM }
+gf2p8affineinvqb<gfni>, 0x660f3acf, GFNI<gfni:cpu>, Modrm|<gfni:attr>|<gfni:vexw1>|NoSuf, { Imm8, RegXMM|Unspecified|BaseIndex, RegXMM }
+gf2p8mulb<gfni>, 0x660f38cf, GFNI<gfni:cpu>, Modrm|<gfni:attr>|<gfni:vexw0>|NoSuf, { RegXMM|Unspecified|BaseIndex, RegXMM }
// AVX instructions.
More information about the Binutils
mailing list