This is the mail archive of the
binutils@sourceware.org
mailing list for the binutils project.
[PATCH] x86: Add NoXmmWordMem
- From: "H.J. Lu" <hongjiu dot lu at intel dot com>
- To: binutils at sourceware dot org
- Cc: Jan Beulich <jbeulich at novell dot com>, Igor Tsimbalist <igor dot v dot tsimbalist at intel dot com>
- Date: Tue, 17 Jul 2018 12:58:36 -0700
- Subject: [PATCH] x86: Add NoXmmWordMem
- Reply-to: "H.J. Lu" <hjl dot tools at gmail dot com>
After
commit 1b54b8d7e4fc8055f9220a5287e8a94d8a65a88d
Author: Jan Beulich <jbeulich@novell.com>
Date: Mon Dec 18 09:36:14 2017 +0100
x86: fold RegXMM/RegYMM/RegZMM into RegSIMD
... qualified by their respective sizes, allowing to drop FirstXmm0 at
the same time.
folded RegXMM, RegYMM and RegZMM into RegSIMD, it's no longer impossible
to distinguish if XMMWORD can represent a memory reference when operand
specification contains SIMD register. For example, template operands
specification like these
RegXMM|...|Xmmword|...
and
RegXMM|...
The Xmmword bitfield is always set by RegXMM which is represented by
"RegSIMD|Xmmword". This patch adds NoXmmWordMem to opcode_modifiers
to indicate that Xmmword memory isn't allowed.
gas/
PR gas/23418
* config/tc-i386.c (match_mem_size): Reject Xmmword memory if
it isn't allowed.
* testsuite/gas/i386/xmmword.s: Add tests for vcvtps2qq and
vcvtps2uqq.
* testsuite/gas/i386/xmmword.l: Updated.
opcodes/
PR gas/23418
* i386-gen.c (opcode_modifiers): Add NoXmmWordMem.
* i386-opc.h (NoXmmWordMem): New.
(Byte): Update comments.
(Word): Likewise.
(Dword): Likewise.
(Fword): Likewise.
(Qword): Likewise.
(Tbyte): Likewise.
(Xmmword): Likewise.
(Ymmword): Likewise.
(Zmmword): Likewise.
* i386-opc.tbl: Add NoXmmWordMem to vcvtps2qq and vcvtps2uqq.
* i386-tbl.h: Regenerated.
---
gas/config/tc-i386.c | 50 +-
gas/testsuite/gas/i386/xmmword.l | 2 +
gas/testsuite/gas/i386/xmmword.s | 3 +
opcodes/i386-gen.c | 1 +
opcodes/i386-opc.h | 22 +-
opcodes/i386-opc.tbl | 4 +-
opcodes/i386-tbl.h | 10122 ++++++++++++++---------------
7 files changed, 5112 insertions(+), 5092 deletions(-)
diff --git a/gas/config/tc-i386.c b/gas/config/tc-i386.c
index fd2e81740d..8a1ebcdd60 100644
--- a/gas/config/tc-i386.c
+++ b/gas/config/tc-i386.c
@@ -1935,26 +1935,36 @@ match_simd_size (const insn_template *t, unsigned int wanted, unsigned int given
static INLINE int
match_mem_size (const insn_template *t, unsigned int wanted, unsigned int given)
{
- return (match_reg_size (t, wanted, given)
- && !((i.types[given].bitfield.unspecified
- && !i.broadcast
- && !t->operand_types[wanted].bitfield.unspecified)
- || (i.types[given].bitfield.fword
- && !t->operand_types[wanted].bitfield.fword)
- /* For scalar opcode templates to allow register and memory
- operands at the same time, some special casing is needed
- here. Also for v{,p}broadcast*, {,v}pmov{s,z}*, and
- down-conversion vpmov*. */
- || ((t->operand_types[wanted].bitfield.regsimd
- && !t->opcode_modifier.broadcast
- && (t->operand_types[wanted].bitfield.byte
- || t->operand_types[wanted].bitfield.word
- || t->operand_types[wanted].bitfield.dword
- || t->operand_types[wanted].bitfield.qword))
- ? (i.types[given].bitfield.xmmword
- || i.types[given].bitfield.ymmword
- || i.types[given].bitfield.zmmword)
- : !match_simd_size(t, wanted, given))));
+ if (!match_reg_size (t, wanted, given))
+ return 0;
+
+ if ((i.broadcast
+ || !i.types[given].bitfield.unspecified
+ || t->operand_types[wanted].bitfield.unspecified)
+ && (!i.types[given].bitfield.fword
+ || t->operand_types[wanted].bitfield.fword))
+ {
+ /* For scalar opcode templates to allow register and memory
+ operands at the same time, some special casing is needed
+ here. Also for v{,p}broadcast*, {,v}pmov{s,z}*, and
+ down-conversion vpmov*. Reject XMMWORD memory if it isn't
+ allowed. */
+ if (t->operand_types[wanted].bitfield.regsimd
+ && (t->operand_types[wanted].bitfield.byte
+ || t->operand_types[wanted].bitfield.word
+ || t->operand_types[wanted].bitfield.dword
+ || t->operand_types[wanted].bitfield.qword))
+ return ((t->opcode_modifier.broadcast
+ || (!i.types[given].bitfield.xmmword
+ && !i.types[given].bitfield.ymmword
+ && !i.types[given].bitfield.zmmword))
+ && (!t->opcode_modifier.noxmmwordmem
+ || !i.types[given].bitfield.xmmword));
+ else
+ return match_simd_size(t, wanted, given);
+ }
+
+ return 0;
}
/* Return value has MATCH_STRAIGHT set if there is no size conflict on any
diff --git a/gas/testsuite/gas/i386/xmmword.l b/gas/testsuite/gas/i386/xmmword.l
index ce3af8d150..dc45c03c08 100644
--- a/gas/testsuite/gas/i386/xmmword.l
+++ b/gas/testsuite/gas/i386/xmmword.l
@@ -99,3 +99,5 @@
.*:127: Error: .* `vpmovzxwq'
.*:128: Error: .* `vpmovzxwq'
.*:129: Error: .* `vpmovzxwq'
+.*:131: Error: .* `vcvtps2qq'
+.*:132: Error: .* `vcvtps2uqq'
diff --git a/gas/testsuite/gas/i386/xmmword.s b/gas/testsuite/gas/i386/xmmword.s
index 47d2d8d595..b29448455e 100644
--- a/gas/testsuite/gas/i386/xmmword.s
+++ b/gas/testsuite/gas/i386/xmmword.s
@@ -127,3 +127,6 @@ xmmword:
vpmovzxwq xmm0{k7}, xmmword ptr [eax]
vpmovzxwq ymm0, xmmword ptr [eax]
vpmovzxwq ymm0{k7}, xmmword ptr [eax]
+
+ vcvtps2qq xmm0, xmmword ptr [rax]
+ vcvtps2uqq xmm0, xmmword ptr [rax]
diff --git a/opcodes/i386-gen.c b/opcodes/i386-gen.c
index 9b5314cfbc..9bbd1b93c2 100644
--- a/opcodes/i386-gen.c
+++ b/opcodes/i386-gen.c
@@ -647,6 +647,7 @@ static bitfield opcode_modifiers[] =
BITFIELD (SAE),
BITFIELD (Disp8MemShift),
BITFIELD (NoDefMask),
+ BITFIELD (NoXmmWordMem),
BITFIELD (ImplicitQuadGroup),
BITFIELD (Optimize),
BITFIELD (ATTMnemonic),
diff --git a/opcodes/i386-opc.h b/opcodes/i386-opc.h
index 4f128309f7..4dbfa68699 100644
--- a/opcodes/i386-opc.h
+++ b/opcodes/i386-opc.h
@@ -575,6 +575,9 @@ enum
/* Default mask isn't allowed. */
NoDefMask,
+ /* No XMMWORD memory */
+ NoXmmWordMem,
+
/* The second operand must be a vector register, {x,y,z}mmN, where N is a multiple of 4.
It implicitly denotes the register group of {x,y,z}mmN - {x,y,z}mm(N + 3).
*/
@@ -654,6 +657,7 @@ typedef struct i386_opcode_modifier
unsigned int sae:1;
unsigned int disp8memshift:3;
unsigned int nodefmask:1;
+ unsigned int noxmmwordmem:1;
unsigned int implicitquadgroup:1;
unsigned int optimize:1;
unsigned int attmnemonic:1;
@@ -736,23 +740,23 @@ enum
RegMem,
/* Memory. */
Mem,
- /* BYTE memory. */
+ /* BYTE size. */
Byte,
- /* WORD memory. 2 byte */
+ /* WORD size. 2 byte */
Word,
- /* DWORD memory. 4 byte */
+ /* DWORD size. 4 byte */
Dword,
- /* FWORD memory. 6 byte */
+ /* FWORD size. 6 byte */
Fword,
- /* QWORD memory. 8 byte */
+ /* QWORD size. 8 byte */
Qword,
- /* TBYTE memory. 10 byte */
+ /* TBYTE size. 10 byte */
Tbyte,
- /* XMMWORD memory. */
+ /* XMMWORD size. */
Xmmword,
- /* YMMWORD memory. */
+ /* YMMWORD size. */
Ymmword,
- /* ZMMWORD memory. */
+ /* ZMMWORD size. */
Zmmword,
/* Unspecified memory size. */
Unspecified,
diff --git a/opcodes/i386-opc.tbl b/opcodes/i386-opc.tbl
index 92d7f11b75..3bcffa1a8e 100644
--- a/opcodes/i386-opc.tbl
+++ b/opcodes/i386-opc.tbl
@@ -5406,11 +5406,11 @@ vcvtpd2uqq, 2, 0x6679, None, 1, CpuAVX512DQ|CpuAVX512VL, Modrm|EVex=3|Masking=3|
vcvtpd2uqq, 3, 0x6679, None, 1, CpuAVX512DQ, Modrm|EVex=1|Masking=3|VexOpcode=0|VexW=2|IgnoreSize|No_bSuf|No_wSuf|No_lSuf|No_sSuf|No_qSuf|No_ldSuf|StaticRounding|SAE, { Imm8, RegZMM, RegZMM }
vcvtps2qq, 2, 0x667B, None, 1, CpuAVX512DQ, Modrm|EVex=1|Masking=3|VexOpcode=0|VexW=1|Broadcast|Disp8MemShift=5|IgnoreSize|No_bSuf|No_wSuf|No_lSuf|No_sSuf|No_qSuf|No_ldSuf, { RegYMM|Dword|YMMword|Unspecified|BaseIndex, RegZMM }
-vcvtps2qq, 2, 0x667B, None, 1, CpuAVX512DQ|CpuAVX512VL, Modrm|EVex=2|Masking=3|VexOpcode=0|VexW=1|Broadcast|Disp8MemShift=3|IgnoreSize|No_bSuf|No_wSuf|No_lSuf|No_sSuf|No_qSuf|No_ldSuf, { RegXMM|Dword|Qword|Unspecified|BaseIndex, RegXMM }
+vcvtps2qq, 2, 0x667B, None, 1, CpuAVX512DQ|CpuAVX512VL, Modrm|EVex=2|Masking=3|VexOpcode=0|VexW=1|Broadcast|NoXmmWordMem|Disp8MemShift=3|IgnoreSize|No_bSuf|No_wSuf|No_lSuf|No_sSuf|No_qSuf|No_ldSuf, { RegXMM|Dword|Qword|Unspecified|BaseIndex, RegXMM }
vcvtps2qq, 2, 0x667B, None, 1, CpuAVX512DQ|CpuAVX512VL, Modrm|EVex=3|Masking=3|VexOpcode=0|VexW=1|Broadcast|Disp8MemShift=4|IgnoreSize|No_bSuf|No_wSuf|No_lSuf|No_sSuf|No_qSuf|No_ldSuf, { RegXMM|Dword|XMMword|Unspecified|BaseIndex, RegYMM }
vcvtps2qq, 3, 0x667B, None, 1, CpuAVX512DQ, Modrm|EVex=1|Masking=3|VexOpcode=0|VexW=1|IgnoreSize|No_bSuf|No_wSuf|No_lSuf|No_sSuf|No_qSuf|No_ldSuf|StaticRounding|SAE, { Imm8, RegYMM, RegZMM }
vcvtps2uqq, 2, 0x6679, None, 1, CpuAVX512DQ, Modrm|EVex=1|Masking=3|VexOpcode=0|VexW=1|Broadcast|Disp8MemShift=5|IgnoreSize|No_bSuf|No_wSuf|No_lSuf|No_sSuf|No_qSuf|No_ldSuf, { RegYMM|Dword|YMMword|Unspecified|BaseIndex, RegZMM }
-vcvtps2uqq, 2, 0x6679, None, 1, CpuAVX512DQ|CpuAVX512VL, Modrm|EVex=2|Masking=3|VexOpcode=0|VexW=1|Broadcast|Disp8MemShift=3|IgnoreSize|No_bSuf|No_wSuf|No_lSuf|No_sSuf|No_qSuf|No_ldSuf, { RegXMM|Dword|Qword|Unspecified|BaseIndex, RegXMM }
+vcvtps2uqq, 2, 0x6679, None, 1, CpuAVX512DQ|CpuAVX512VL, Modrm|EVex=2|Masking=3|VexOpcode=0|VexW=1|Broadcast|NoXmmWordMem|Disp8MemShift=3|IgnoreSize|No_bSuf|No_wSuf|No_lSuf|No_sSuf|No_qSuf|No_ldSuf, { RegXMM|Dword|Qword|Unspecified|BaseIndex, RegXMM }
vcvtps2uqq, 2, 0x6679, None, 1, CpuAVX512DQ|CpuAVX512VL, Modrm|EVex=3|Masking=3|VexOpcode=0|VexW=1|Broadcast|Disp8MemShift=4|IgnoreSize|No_bSuf|No_wSuf|No_lSuf|No_sSuf|No_qSuf|No_ldSuf, { RegXMM|Dword|XMMword|Unspecified|BaseIndex, RegYMM }
vcvtps2uqq, 3, 0x6679, None, 1, CpuAVX512DQ, Modrm|EVex=1|Masking=3|VexOpcode=0|VexW=1|IgnoreSize|No_bSuf|No_wSuf|No_lSuf|No_sSuf|No_qSuf|No_ldSuf|StaticRounding|SAE, { Imm8, RegYMM, RegZMM }