[PATCH 1/8] Support APX GPR32 with rex2 prefix
Jan Beulich
jbeulich@suse.com
Thu Sep 21 15:27:56 GMT 2023
On 19.09.2023 17:25, Cui, Lili wrote:
> --- a/gas/config/tc-i386.c
> +++ b/gas/config/tc-i386.c
> @@ -247,6 +247,7 @@ enum i386_error
> invalid_vector_register_set,
> invalid_tmm_register_set,
> invalid_dest_and_src_register_set,
> + invalid_pseudo_prefix,
> unsupported_vector_index_register,
> unsupported_broadcast,
> broadcast_needed,
> @@ -353,6 +354,7 @@ struct _i386_insn
> modrm_byte rm;
> rex_byte rex;
> rex_byte vrex;
> + rex_byte rex2; // for extends gpr32 r16-r31
Malformed comment. I'm not convinced one needs to be here in the first place.
> @@ -405,6 +407,11 @@ struct _i386_insn
> /* Compressed disp8*N attribute. */
> unsigned int memshift;
>
> + /* No CSPAZO flags update.*/
> + bool has_nf;
> +
> + bool has_zero_upper;
> +
> /* Prefer load or store in encoding. */
> enum
> {
> @@ -426,6 +433,9 @@ struct _i386_insn
> /* Prefer the REX byte in encoding. */
> bool rex_encoding;
>
> + /* Prefer the REX2 byte in encoding. */
> + bool rex2_encoding;
What is "the REX2 byte"? There are two bytes involved there ...
> @@ -1165,6 +1175,7 @@ static const arch_entry cpu_arch[] =
> VECARCH (sm4, SM4, ANY_SM4, reset),
> SUBARCH (pbndkb, PBNDKB, PBNDKB, false),
> VECARCH (avx10.1, AVX10_1, ANY_AVX512F, set),
> + SUBARCH (apx_f, APX_F, APX_F, false),
> };
>
> #undef SUBARCH
> @@ -1694,6 +1705,7 @@ is_cpu (const insn_template *t, enum i386_cpu cpu)
> case CpuHLE: return t->cpu.bitfield.cpuhle;
> case CpuAVX512F: return t->cpu.bitfield.cpuavx512f;
> case CpuAVX512VL: return t->cpu.bitfield.cpuavx512vl;
> + case CpuAPX_F: return t->cpu.bitfield.cpuapx_f;
Nit: Please get padding right.
> case Cpu64: return t->cpu.bitfield.cpu64;
> case CpuNo64: return t->cpu.bitfield.cpuno64;
> default:
> @@ -2332,6 +2344,9 @@ register_number (const reg_entry *r)
> if (r->reg_flags & RegRex)
> nr += 8;
>
> + if (r->reg_flags & RegRex2)
> + nr += 16;
> +
> if (r->reg_flags & RegVRex)
> nr += 16;
>
> @@ -3832,6 +3847,18 @@ is_any_vex_encoding (const insn_template *t)
> return t->opcode_modifier.vex || is_evex_encoding (t);
> }
>
> +static INLINE bool
> +is_any_apx_encoding (void)
> +{
> + return i.rex2 || i.rex2_encoding;
> +}
> +
> +static INLINE bool
> +is_any_apx_rex2_encoding (void)
> +{
> + return (i.rex2 && i.vex.length == 2) || i.rex2_encoding;
> +}
There's no particularly good place to make this remark: I was expecting
REX2 handling to rather follow REX handling, not VEX/EVEX one. I certainly
consider at least the first helper's name misleading (APX also includes
various EVEX encodings, after all), and I also don't really like you
(ab)using i.vex.length for REX2 handling.
> @@ -4089,6 +4116,19 @@ build_evex_prefix (void)
> i.vex.bytes[3] |= i.mask.reg->reg_num;
> }
>
> +/* Build (2 bytes) rex2 prefix.
> + | D5h |
> + | m | R4 X4 B4 | W R X B |
> +*/
> +static void
> +build_rex2_prefix (void)
> +{
> + i.vex.length = 2;
> + i.vex.bytes[0] = 0xd5;
> + i.vex.bytes[1] = ((i.tm.opcode_space << 7)
> + | (i.rex2 << 4) | i.rex);
> +}
> +
> static void
> process_immext (void)
> {
> @@ -4354,12 +4394,12 @@ optimize_encoding (void)
> i.suffix = 0;
> /* Convert to byte registers. */
> if (i.types[1].bitfield.word)
> - j = 16;
> + j = 16 + 16; // new 16 apx additional gprs.
> else if (i.types[1].bitfield.dword)
> - j = 32;
> + j = 32 + 16 * 2; // new 16 apx additional gprs
> else
> - j = 48;
> - if (!(i.op[1].regs->reg_flags & RegRex) && base_regnum < 4)
> + j = 48 + 16 * 3; // new 16 apx additional gprs
> + if (!(i.op[1].regs->reg_flags & (RegRex | RegRex2)) && base_regnum < 4)
> j += 8;
This is getting unwieldy: There are too many hard-coded literal numbers
here, and there continues to be zero indication in i386-reg.tbl that the
order of entries is actually relevant.
Also again, please write wellformed comments (when such are useful).
> @@ -5269,6 +5309,9 @@ md_assemble (char *line)
> case invalid_dest_and_src_register_set:
> err_msg = _("destination and source registers must be distinct");
> break;
> + case invalid_pseudo_prefix:
> + err_msg = _("unsupport rex2 pseudo prefix");
If at all, "unsupported". Maybe better "cannot be used here"?
> @@ -5498,7 +5541,17 @@ md_assemble (char *line)
> as_warn (_("translating to `%sp'"), insn_name (&i.tm));
> }
>
> - if (is_any_vex_encoding (&i.tm))
> + if (is_any_apx_encoding ())
> + {
> + if (!is_any_vex_encoding (&i.tm)
I think you should be able to use a cheaper predicate here. No VEX-
encoded APX insns exist, aiui.
> + && i.tm.opcode_space <= SPACE_0F
> + && !i.vex.register_specifier && !i.has_nf && !i.has_zero_upper)
Is the i.vex.register_specifier check really needed here? Any such template
would be an EVEX one, wouldn't it (so the earlier check already covered
those)?
> + build_rex2_prefix ();
> +
> + /* The individual REX.RXBW bits got consumed. */
> + i.rex &= REX_OPCODE;
As to my earlier naming remark - much of course depends on what the further
plans here are.
> + }
> + else if (is_any_vex_encoding (&i.tm))
> {
> if (!cpu_arch_flags.bitfield.cpui286)
> {
> @@ -5514,6 +5567,13 @@ md_assemble (char *line)
> return;
> }
>
> + /* Check for explicit REX2 prefix. */
> + if (i.rex2 || i.rex2_encoding)
> + {
> + as_bad (_("REX2 prefix invalid with `%s'"), insn_name (&i.tm));
> + return;
> + }
> +
> if (i.tm.opcode_modifier.vex)
> build_vex_prefix (t);
> else
> @@ -5553,11 +5613,11 @@ md_assemble (char *line)
> && (i.op[1].regs->reg_flags & RegRex64) != 0)
> || (((i.types[0].bitfield.class == Reg && i.types[0].bitfield.byte)
> || (i.types[1].bitfield.class == Reg && i.types[1].bitfield.byte))
> - && i.rex != 0))
> + && (i.rex != 0 || i.rex2!=0)))
Nit: Please get coding style right (also elsewhere).
> {
> int x;
> -
> - i.rex |= REX_OPCODE;
> + if (!i.rex2)
> + i.rex |= REX_OPCODE;
> for (x = 0; x < 2; x++)
> {
> /* Look for 8 bit operand that uses old registers. */
> @@ -5567,9 +5627,16 @@ md_assemble (char *line)
> gas_assert (!(i.op[x].regs->reg_flags & RegRex));
> /* In case it is "hi" register, give up. */
> if (i.op[x].regs->reg_num > 3)
> - as_bad (_("can't encode register '%s%s' in an "
> - "instruction requiring REX prefix."),
> - register_prefix, i.op[x].regs->reg_name);
> + {
> + if (i.rex)
> + as_bad (_("can't encode register '%s%s' in an "
> + "instruction requiring REX prefix."),
> + register_prefix, i.op[x].regs->reg_name);
> + else
> + as_bad (_("can't encode register '%s%s' in an "
> + "instruction requiring REX2 prefix."),
> + register_prefix, i.op[x].regs->reg_name);
> + }
I don't think separate messages are needed here, Just alter the
existing one to say "... REX/REX2 ...".
> @@ -5580,7 +5647,7 @@ md_assemble (char *line)
> }
> }
>
> - if (i.rex == 0 && i.rex_encoding)
> + if ((i.rex == 0 && i.rex_encoding) || (i.rex2 == 0 && i.rex2_encoding))
> {
> /* Check if we can add a REX_OPCODE byte. Look for 8 bit operand
> that uses legacy register. If it is "hi" register, don't add
I think this comment wants updating as well, so there's no question of
it having gone stale (by mentioning only REX_OPCODE).
> @@ -6899,6 +6971,42 @@ VEX_check_encoding (const insn_template *t)
> return 0;
> }
>
> +/* Check if Egprs operands are valid for the instruction. */
> +
> +static int
> +check_EgprOperands (const insn_template *t)
> +{
> + if (t->opcode_modifier.no_egpr)
> + {
> + for (unsigned int op = 0; op < i.operands; op++)
> + {
> + if (i.types[op].bitfield.class != Reg)
> + continue;
> +
> + if (i.op[op].regs->reg_flags & RegRex2)
> + {
> + i.error = register_type_mismatch;
Already here I wonder if re-using this error indicator (and hence
issuing the same error message as is issued for other reasons) is
going to be helpful. However, ...
> + return 1;
> + }
> + }
> +
> + if ((i.index_reg && (i.index_reg->reg_flags & RegRex2))
> + || (i.base_reg && (i.base_reg->reg_flags & RegRex2)))
> + {
> + i.error = register_type_mismatch;
... here I'm certain it needs to be a different one. It should be
made obvious that the register used is part of the address for the
memory operand, not a register one.
> + return 1;
> + }
> +
> + /* Check pseudo prefix {rex2} are valid. */
> + if (i.rex2_encoding)
> + {
> + i.error = invalid_pseudo_prefix;
> + return 1;
> + }
> + }
> + return 0;
> +}
Transiently, until more checking is added (like patch 2 in the second
series you've sent), you'll break all kinds of insns which don't
have No_egpr set, but which aren't valid to be used with the extended
registers (e.g. all VEX encodings, to name one large group).
> @@ -13985,6 +14115,14 @@ static bool check_register (const reg_entry *r)
> i.vec_encoding = vex_encoding_error;
> }
>
> + if (r->reg_flags & RegRex2)
> + {
> + if (!cpu_arch_flags.bitfield.cpuapx_f
> + || flag_code != CODE_64BIT
> + || i.rex_encoding)
I'm not sure i.rex_encoding is valid to check (already) here. Or else
you'd also need to check i.vec_encoding.
> --- /dev/null
> +++ b/gas/testsuite/gas/i386/x86-64-apx-egpr-inval.s
> @@ -0,0 +1,18 @@
> +# Check Illegal 64bit APX instructions
> + .text
> + .arch .noapx_f
> + test $0x7, %r17d
> + .arch .apx_f
> + test $0x7, %r17d
> + xsave (%r16, %rbx)
> + xsave64 (%r16, %rbx)
> + xrstor (%r16, %rbx)
> + xrstor64 (%r16, %rbx)
> + xsaves (%r16, %rbx)
> + xsaves64 (%r16, %rbx)
> + xrstors (%r16, %rbx)
> + xrstors64 (%r16, %rbx)
> + xsaveopt (%r16, %rbx)
> + xsaveopt64 (%r16, %rbx)
> + xsavec (%r16, %rbx)
> + xsavec64 (%r16, %rbx)
Don't you also want to check the index register?
> --- /dev/null
> +++ b/gas/testsuite/gas/i386/x86-64-apx-rex2-inval.d
> @@ -0,0 +1,29 @@
> +#as:
> +#objdump: -dw
> +#name: x86-64 APX use gpr32 with rex2 prefix illegal check
> +#source: x86-64-apx-rex2-inval.s
> +
> +.*: +file format .*
> +
> +
> +Disassembly of section .text:
> +
> +0+ <_start>:
> +\s*[a-f0-9]+:\s*d5 f0 d5 f0\s+{rex2} pmullw %mm0,%mm6
> +\s*[a-f0-9]+:\s*d5 f9 d5 f9\s+{rex2} pmullw %mm1,%mm7
> +\s*[a-f0-9]+:\s*d5 88 d5 f9\s+{rex2} pmullw %mm1,%mm7
> +\s*[a-f0-9]+:\s*d5 f7 d5 f9\s+{rex2} pmullw %mm1,%mm7
> +\s*[a-f0-9]+:\s*d5 80 d5 f9\s+{rex2} pmullw %mm1,%mm7
> +\s*[a-f0-9]+:\s*66 d5 f9 d5 f9\s+{rex2} pmullw %xmm9,%xmm7
These all look valid, yet the test name says "invalid" and the title says
"illegal". Can you clarify what this is about?
Also may I ask that you use [ ] instead of \s, to aid readability?
> +\s*[a-f0-9]+:\s*66 41\s+data16 rex.B
> +\s*[a-f0-9]+:\s*d5 f9 d5 f9\s+{rex2} pmullw %mm1,%mm7
> +\s*[a-f0-9]+:\s*d5 ff 21 f8\s+{rex2} mov %db15,%r24
> +\s*[a-f0-9]+:\s*d5 01 21 00\s+{rex2} and %eax,\(%r8\)
> +\s*[a-f0-9]+:\s*d5 00 00 f7\s+{rex2} add %sil,%dil
> +\s*[a-f0-9]+:\s*d5 ff 20 f8\s+{rex2} mov %cr15,%r24
> +\s*[a-f0-9]+:\s*d5 81 ae\s+\(bad\)
> +\s*[a-f0-9]+:\s*27\s+\(bad\)
> +\s*[a-f0-9]+:\s*d5 c1 38\s+\(bad\)
> +\s*[a-f0-9]+:\s*f6\s+.byte 0xf6
> +\s*[a-f0-9]+:\s*07\s+\(bad\)
> +#pass
> diff --git a/gas/testsuite/gas/i386/x86-64-apx-rex2-inval.s b/gas/testsuite/gas/i386/x86-64-apx-rex2-inval.s
> new file mode 100644
> index 00000000000..51dd8df79d6
> --- /dev/null
> +++ b/gas/testsuite/gas/i386/x86-64-apx-rex2-inval.s
> @@ -0,0 +1,25 @@
> +# Check 64bit instructions with rex2 prefix bad encoding
> +
> + .allow_index_reg
> + .text
> +_start:
> +# check {rex2} pseudo prefix to force REX2 encoding.
> +.byte 0xd5, 0xf0, 0xd5, 0xf0
> +.byte 0xd5, 0xf9, 0xd5, 0xf9
> +.byte 0xd5, 0x88, 0xd5, 0xf9
> +.byte 0xd5, 0xf7, 0xd5, 0xf9
> +.byte 0xd5, 0x80, 0xd5, 0xf9
> +
> +.byte 0x66
> +.byte 0xd5, 0xf9, 0xd5, 0xf9
> +.byte 0x66, 0x41
> +.byte 0xd5, 0xf9, 0xd5, 0xf9
> +.byte 0xd5, 0xff, 0x21, 0xf8
> +.byte 0xd5, 0x01, 0x21, 0x00
> +.byte 0xd5, 0x00, 0x00, 0xf7
> +.byte 0xd5, 0xff, 0x20, 0xf8
> +# check xsave/xstore are not allowed to use rex2.
> +.byte 0xd5, 0x81, 0xae, 0x27
> +# check rex2 only use for map0/1
> +.byte 0xd5, 0xc1, 0x38, 0xf6, 0x07
Please try to limit .byte use in source as much as possible. Emitting
bogus prefixes may require its use, but that should be about it.
> --- /dev/null
> +++ b/gas/testsuite/gas/i386/x86-64-apx-rex2.s
> @@ -0,0 +1,175 @@
> +# Check 64bit instructions with rex2 prefix encoding
> +
> + .allow_index_reg
> + .text
> +_start:
> + test $0x7, %r24b
> + test $0x7, %r24d
> + test $0x7, %r24
> + test $0x7, %r24w
> +## R bit
> + leal (%rax), %r16d
> + leal (%rax), %r17d
> + leal (%rax), %r18d
> + leal (%rax), %r19d
> + leal (%rax), %r20d
> + leal (%rax), %r21d
> + leal (%rax), %r22d
> + leal (%rax), %r23d
> + leal (%rax), %r24d
> + leal (%rax), %r25d
> + leal (%rax), %r26d
> + leal (%rax), %r27d
> + leal (%rax), %r28d
> + leal (%rax), %r29d
> + leal (%rax), %r30d
> + leal (%rax), %r31d
> +## X bit
> + leal (,%r16), %eax
> + leal (,%r17), %eax
> + leal (,%r18), %eax
> + leal (,%r19), %eax
> + leal (,%r20), %eax
> + leal (,%r21), %eax
> + leal (,%r22), %eax
> + leal (,%r23), %eax
> + leal (,%r24), %eax
> + leal (,%r25), %eax
> + leal (,%r26), %eax
> + leal (,%r27), %eax
> + leal (,%r28), %eax
> + leal (,%r29), %eax
> + leal (,%r30), %eax
> + leal (,%r31), %eax
> +## B bit
> + leal (%r16), %eax
> + leal (%r17), %eax
> + leal (%r18), %eax
> + leal (%r19), %eax
> + leal (%r20), %eax
> + leal (%r21), %eax
> + leal (%r22), %eax
> + leal (%r23), %eax
> + leal (%r24), %eax
> + leal (%r25), %eax
> + leal (%r26), %eax
> + leal (%r27), %eax
> + leal (%r28), %eax
> + leal (%r29), %eax
> + leal (%r30), %eax
> + leal (%r31), %eax
> +## SIB
> + leal 1(%r20), %eax
> + leal 1(%r28), %eax
> + leal 129(%r20), %eax
> + leal 129(%r28), %eax
I don't see why the comment says "SIB" for these.
> +## W bit
> + leaq (%rax), %r15
> + leaq (%rax), %r16
> + leaq (%r15), %rax
> + leaq (%r16), %rax
> + leaq (,%r15), %rax
> + leaq (,%r16), %rax
> +## M bit
> + imull %eax, %r15d
> + imull %eax, %r16d
> + punpckldq (%r18), %mm2 #D5906212
Please ensure consistent indentation, and please omit meaningless comments.
> +## AddRegFrm
> + movl $1, %r16d
>From here onwards I'm afraid I can't decipher any of the comments. In many
cases the choice of what to test (and what not) looks pretty random.
> --- a/opcodes/i386-dis.c
> +++ b/opcodes/i386-dis.c
(I'll look at the disassembler parts separately. This and the other patches
are quite a bit too large anyway.)
> --- a/opcodes/i386-gen.c
> +++ b/opcodes/i386-gen.c
> @@ -380,6 +380,7 @@ static bitfield cpu_flags[] =
> BITFIELD (RAO_INT),
> BITFIELD (FRED),
> BITFIELD (LKGS),
> + BITFIELD (APX_F),
> BITFIELD (MWAITX),
> BITFIELD (CLZERO),
> BITFIELD (OSPKE),
> @@ -469,6 +470,7 @@ static bitfield opcode_modifiers[] =
> BITFIELD (ATTSyntax),
> BITFIELD (IntelSyntax),
> BITFIELD (ISA64),
> + BITFIELD (No_egpr),
> };
>
Additionally a dependency of APX_F on XSAVE needs introducing.
> --- a/opcodes/i386-opc.h
> +++ b/opcodes/i386-opc.h
> @@ -317,6 +317,8 @@ enum i386_cpu
> CpuAVX512F,
> /* Intel AVX-512 VL Instructions support required. */
> CpuAVX512VL,
> + /* Intel APX Instructions support required. */
> + CpuAPX_F,
The comment kind of misses the F in the feature identifier.
> @@ -742,6 +745,10 @@ enum
> #define INTEL64 2
> #define INTEL64ONLY 3
> ISA64,
> +
> + /* egprs (r16-r31) on instruction illegal. */
> + No_egpr,
I'm not overly happy with the name and spelling. How about NoEgpr? That's
more in line with the majority of the attributes.
> @@ -789,6 +796,7 @@ typedef struct i386_opcode_modifier
> unsigned int attsyntax:1;
> unsigned int intelsyntax:1;
> unsigned int isa64:2;
> + unsigned int no_egpr:1;
> } i386_opcode_modifier;
>
> /* Operand classes. */
> @@ -988,7 +996,7 @@ typedef struct insn_template
> AMD 3DNow! instructions.
> If this template has no extension opcode (the usual case) use None
> Instructions */
> - signed int extension_opcode:9;
> + signed int extension_opcode:0xA;
Why?
> @@ -1001,7 +1009,8 @@ typedef struct insn_template
> #define Prefix_VEX3 6 /* {vex3} */
> #define Prefix_EVEX 7 /* {evex} */
> #define Prefix_REX 8 /* {rex} */
> -#define Prefix_NoOptimize 9 /* {nooptimize} */
> +#define Prefix_REX2 9 /* {rex2} */
> +#define Prefix_NoOptimize 0xA /* {nooptimize} */
Any reason to use a hex number here?
> @@ -1028,6 +1037,7 @@ typedef struct
> #define RegRex 0x1 /* Extended register. */
> #define RegRex64 0x2 /* Extended 8 bit register. */
> #define RegVRex 0x4 /* Extended vector register. */
> +#define RegRex2 0x8 /* Extended rex2 interge register. */
Since I expect / hope the bit will be reused for extended EVEX encodings,
I don't think "rex2" should be mentioned here. Also "integer" please.
> @@ -93,6 +141,22 @@ r12, Class=Reg|Qword|BaseIndex, RegRex, 4, Dw2Inval, 12
> r13, Class=Reg|Qword|BaseIndex, RegRex, 5, Dw2Inval, 13
> r14, Class=Reg|Qword|BaseIndex, RegRex, 6, Dw2Inval, 14
> r15, Class=Reg|Qword|BaseIndex, RegRex, 7, Dw2Inval, 15
> +r16, Class=Reg|Qword|BaseIndex, RegRex2, 0, Dw2Inval, 130
> +r17, Class=Reg|Qword|BaseIndex, RegRex2, 1, Dw2Inval, 131
> +r18, Class=Reg|Qword|BaseIndex, RegRex2, 2, Dw2Inval, 132
> +r19, Class=Reg|Qword|BaseIndex, RegRex2, 3, Dw2Inval, 133
> +r20, Class=Reg|Qword|BaseIndex, RegRex2, 4, Dw2Inval, 134
> +r21, Class=Reg|Qword|BaseIndex, RegRex2, 5, Dw2Inval, 135
> +r22, Class=Reg|Qword|BaseIndex, RegRex2, 6, Dw2Inval, 136
> +r23, Class=Reg|Qword|BaseIndex, RegRex2, 7, Dw2Inval, 137
> +r24, Class=Reg|Qword|BaseIndex, RegRex2|RegRex, 0, Dw2Inval, 138
> +r25, Class=Reg|Qword|BaseIndex, RegRex2|RegRex, 1, Dw2Inval, 139
> +r26, Class=Reg|Qword|BaseIndex, RegRex2|RegRex, 2, Dw2Inval, 140
> +r27, Class=Reg|Qword|BaseIndex, RegRex2|RegRex, 3, Dw2Inval, 141
> +r28, Class=Reg|Qword|BaseIndex, RegRex2|RegRex, 4, Dw2Inval, 142
> +r29, Class=Reg|Qword|BaseIndex, RegRex2|RegRex, 5, Dw2Inval, 143
> +r30, Class=Reg|Qword|BaseIndex, RegRex2|RegRex, 6, Dw2Inval, 144
> +r31, Class=Reg|Qword|BaseIndex, RegRex2|RegRex, 7, Dw2Inval, 145
I wonder how the Dwarf register number were chosen ...
Jan
More information about the Binutils
mailing list