[PATCH 5/8] Support APX NDD optimized encoding.
Cui, Lili
lili.cui@intel.com
Tue Sep 19 15:25:24 GMT 2023
From: "Hu, Lin1" <lin1.hu@intel.com>
This patch aims to optimize:
add %r16, %r15, %r15 -> add %r16, %r15
gas/ChangeLog:
* config/tc-i386.c (optimize_NDD_to_nonNDD): New function.
(match_template): If we can optimzie APX NDD insns, so rematch
template.
* testsuite/gas/i386/x86-64.exp: Add test.
* testsuite/gas/i386/x86-64-apx-ndd-optimize.d: New test.
* testsuite/gas/i386/x86-64-apx-ndd-optimize.s: Ditto.
---
gas/config/tc-i386.c | 49 +++++++
.../gas/i386/x86-64-apx-ndd-optimize.d | 120 ++++++++++++++++++
.../gas/i386/x86-64-apx-ndd-optimize.s | 115 +++++++++++++++++
gas/testsuite/gas/i386/x86-64.exp | 1 +
4 files changed, 285 insertions(+)
create mode 100644 gas/testsuite/gas/i386/x86-64-apx-ndd-optimize.d
create mode 100644 gas/testsuite/gas/i386/x86-64-apx-ndd-optimize.s
diff --git a/gas/config/tc-i386.c b/gas/config/tc-i386.c
index 381e389bb04..fba97ae37d8 100644
--- a/gas/config/tc-i386.c
+++ b/gas/config/tc-i386.c
@@ -7091,6 +7091,46 @@ check_EgprOperands (const insn_template *t)
return 0;
}
+/* Optimize APX NDD insns to non-NDD insns. */
+
+static int
+optimize_NDD_to_nonNDD (const insn_template *t)
+{
+ if (t->opcode_modifier.vexvvvv
+ && t->opcode_space == SPACE_EVEXMAP4
+ && i.reg_operands >= 2
+ && (i.types[i.operands - 1].bitfield.dword
+ || i.types[i.operands - 1].bitfield.qword))
+ {
+ int tmp_flag = -1;
+ int dest = i.operands - 1;
+ int src1 = (i.operands > 2) ? i.operands - 2 : 0;
+ int src2 = (i.operands > 3) ? i.operands - 3 : 0;
+
+ if (i.op[src1].regs == i.op[dest].regs)
+ tmp_flag = src2;
+ /* adcx and adox don't have D bit. */
+ else if (i.op[src2].regs == i.op[dest].regs
+ && (t->opcode_modifier.d
+ || t->mnem_off == MN_adcx
+ || t->mnem_off == MN_adox)
+ && (t->mnem_off != MN_sub)
+ && (t->mnem_off != MN_sbb))
+ tmp_flag = src1;
+ if (tmp_flag != -1)
+ {
+ --i.operands;
+ --i.reg_operands;
+ --i.tm.operands;
+
+ if (tmp_flag != src2)
+ swap_2_operands (tmp_flag, src2);
+ return 1;
+ }
+ }
+ return 0;
+}
+
/* Helper function for the progress() macro in match_template(). */
static INLINE enum i386_error progress (enum i386_error new,
enum i386_error last,
@@ -7562,6 +7602,15 @@ match_template (char mnem_suffix)
slip through to break. */
}
+ /* If we can optimize a NDD insn to non-NDD insn, like
+ add %r16, %r8, %r8 -> add %r16, %r8, then rematch template. */
+ if (optimize_NDD_to_nonNDD (t))
+ {
+ t = current_templates->start;
+ --t;
+ continue;
+ }
+
/* Check if VEX/EVEX encoding requirements can be satisfied. */
if (VEX_check_encoding (t))
{
diff --git a/gas/testsuite/gas/i386/x86-64-apx-ndd-optimize.d b/gas/testsuite/gas/i386/x86-64-apx-ndd-optimize.d
new file mode 100644
index 00000000000..173b368da71
--- /dev/null
+++ b/gas/testsuite/gas/i386/x86-64-apx-ndd-optimize.d
@@ -0,0 +1,120 @@
+#objdump: -drw
+#name: x86-64 APX NDD optimized encoding
+#source: x86-64-apx-ndd-optimize.s
+
+.*: +file format .*
+
+
+Disassembly of section .text:
+
+0+ <_start>:
+\s*[a-f0-9]+:\s*62 f4 7d 18 ff c0\s+inc\s+%ax,%ax
+\s*[a-f0-9]+:\s*ff c0\s+inc\s+%eax
+\s*[a-f0-9]+:\s*48 ff c0\s+inc\s+%rax
+\s*[a-f0-9]+:\s*d5 18 ff c0\s+inc\s+%r16
+\s*[a-f0-9]+:\s*62 44 3c 18 00 f8\s+add\s+%r31b,%r8b,%r8b
+\s*[a-f0-9]+:\s*62 44 04 10 00 f8\s+add\s+%r31b,%r8b,%r31b
+\s*[a-f0-9]+:\s*62 44 3c 18 00 f8\s+add\s+%r31b,%r8b,%r8b
+\s*[a-f0-9]+:\s*d5 4d 01 f8\s+add\s+%r31,%r8
+\s*[a-f0-9]+:\s*d5 4d 01 f8\s+add\s+%r31,%r8
+\s*[a-f0-9]+:\s*d5 45 01 f8\s+add\s+%r31d,%r8d
+\s*[a-f0-9]+:\s*d5 45 01 f8\s+add\s+%r31d,%r8d
+\s*[a-f0-9]+:\s*62 44 3d 18 01 f8\s+add\s+%r31w,%r8w,%r8w
+\s*[a-f0-9]+:\s*62 44 3d 18 01 f8\s+add\s+%r31w,%r8w,%r8w
+\s*[a-f0-9]+:\s*d5 4d 01 f8\s+add\s+%r31,%r8
+\s*[a-f0-9]+:\s*d5 1d 03 c7\s+add\s+%r31,%r8
+\s*[a-f0-9]+:\s*d5 4d 03 38\s+add\s+\(%r8\),%r31
+\s*[a-f0-9]+:\s*d5 1d 03 07\s+add\s+\(%r31\),%r8
+\s*[a-f0-9]+:\s*49 81 c7 33 44 34 12\s+add\s+\$0x12344433,%r15
+\s*[a-f0-9]+:\s*49 81 c0 11 22 33 f4\s+add\s+\$0xfffffffff4332211,%r8
+\s*[a-f0-9]+:\s*d5 18 ff c9\s+dec\s+%r17
+\s*[a-f0-9]+:\s*d5 18 f7 d1\s+not\s+%r17
+\s*[a-f0-9]+:\s*d5 18 f7 d9\s+neg\s+%r17
+\s*[a-f0-9]+:\s*d5 1c 29 f9\s+sub\s+%r15,%r17
+\s*[a-f0-9]+:\s*62 54 04 18 29 38\s+sub\s+%r15d,\(%r8\),%r15d
+\s*[a-f0-9]+:\s*d5 49 2b 04 07\s+sub\s+\(%r15,%rax,1\),%r16
+\s*[a-f0-9]+:\s*d5 19 81 ee 34 12 00 00\s+sub\s+\$0x1234,%r30
+\s*[a-f0-9]+:\s*d5 1c 19 f9\s+sbb\s+%r15,%r17
+\s*[a-f0-9]+:\s*62 54 84 18 19 38\s+sbb\s+%r15,\(%r8\),%r15
+\s*[a-f0-9]+:\s*d5 49 1b 04 07\s+sbb\s+\(%r15,%rax,1\),%r16
+\s*[a-f0-9]+:\s*d5 19 81 de 34 12 00 00\s+sbb\s+\$0x1234,%r30
+\s*[a-f0-9]+:\s*d5 1c 11 f9\s+adc\s+%r15,%r17
+\s*[a-f0-9]+:\s*45 13 38\s+adc\s+\(%r8\),%r15d
+\s*[a-f0-9]+:\s*d5 49 13 04 07\s+adc\s+\(%r15,%rax,1\),%r16
+\s*[a-f0-9]+:\s*d5 19 81 d6 34 12 00 00\s+adc\s+\$0x1234,%r30
+\s*[a-f0-9]+:\s*d5 1c 09 f9\s+or\s+ %r15,%r17
+\s*[a-f0-9]+:\s*45 0b 38\s+or\s+ \(%r8\),%r15d
+\s*[a-f0-9]+:\s*d5 49 0b 04 07\s+or\s+\(%r15,%rax,1\),%r16
+\s*[a-f0-9]+:\s*d5 19 81 ce 34 12 00 00\s+or\s+\$0x1234,%r30
+\s*[a-f0-9]+:\s*d5 1c 31 f9\s+xor\s+%r15,%r17
+\s*[a-f0-9]+:\s*45 33 38\s+xor\s+\(%r8\),%r15d
+\s*[a-f0-9]+:\s*d5 49 33 04 07\s+xor\s+\(%r15,%rax,1\),%r16
+\s*[a-f0-9]+:\s*d5 19 81 f6 34 12 00 00\s+xor\s+\$0x1234,%r30
+\s*[a-f0-9]+:\s*d5 1c 21 f9\s+and\s+%r15,%r17
+\s*[a-f0-9]+:\s*45 23 38\s+and\s+\(%r8\),%r15d
+\s*[a-f0-9]+:\s*d5 49 23 04 07\s+and\s+\(%r15,%rax,1\),%r16
+\s*[a-f0-9]+:\s*d5 19 81 e6 34 12 00 00\s+and\s+\$0x1234,%r30
+\s*[a-f0-9]+:\s*d5 19 81 e6 34 12 00 00\s+and\s+\$0x1234,%r30
+\s*[a-f0-9]+:\s*d5 19 d1 cf\s+ror\s+%r31
+\s*[a-f0-9]+:\s*62 dc 04 10 d0 cf\s+ror\s+%r31b,%r31b
+\s*[a-f0-9]+:\s*49 c1 cc 02\s+ror\s+\$0x2,%r12
+\s*[a-f0-9]+:\s*d5 19 d1 c7\s+rol\s+%r31
+\s*[a-f0-9]+:\s*62 dc 04 10 d0 c7\s+rol\s+%r31b,%r31b
+\s*[a-f0-9]+:\s*49 c1 c4 02\s+rol\s+\$0x2,%r12
+\s*[a-f0-9]+:\s*d5 19 d1 df\s+rcr\s+%r31
+\s*[a-f0-9]+:\s*62 dc 04 10 d0 df\s+rcr\s+%r31b,%r31b
+\s*[a-f0-9]+:\s*62 d4 1c 18 c0 dc 02\s+rcr\s+\$0x2,%r12b,%r12b
+\s*[a-f0-9]+:\s*49 c1 dc 02\s+rcr\s+\$0x2,%r12
+\s*[a-f0-9]+:\s*d5 19 d1 d7\s+rcl\s+%r31
+\s*[a-f0-9]+:\s*62 dc 04 10 d0 d7\s+rcl\s+%r31b,%r31b
+\s*[a-f0-9]+:\s*62 d4 1c 18 c0 d4 02\s+rcl\s+\$0x2,%r12b,%r12b
+\s*[a-f0-9]+:\s*49 c1 d4 02\s+rcl\s+\$0x2,%r12
+\s*[a-f0-9]+:\s*d5 19 d1 e7\s+shl\s+%r31
+\s*[a-f0-9]+:\s*62 dc 04 10 d0 e7 \s+shl\s+%r31b,%r31b
+\s*[a-f0-9]+:\s*62 d4 1c 18 c0 e4 02\s+shl\s+\$0x2,%r12b,%r12b
+\s*[a-f0-9]+:\s*49 c1 e4 02\s+shl\s+\$0x2,%r12
+\s*[a-f0-9]+:\s*d5 19 d1 ff\s+sar\s+%r31
+\s*[a-f0-9]+:\s*62 dc 04 10 d0 ff\s+sar\s+%r31b,%r31b
+\s*[a-f0-9]+:\s*62 d4 1c 18 c0 fc 02\s+sar\s+\$0x2,%r12b,%r12b
+\s*[a-f0-9]+:\s*49 c1 fc 02\s+sar\s+\$0x2,%r12
+\s*[a-f0-9]+:\s*d5 19 d1 e7\s+shl\s+%r31
+\s*[a-f0-9]+:\s*62 dc 04 10 d0 e7\s+shl\s+%r31b,%r31b
+\s*[a-f0-9]+:\s*62 d4 1c 18 c0 e4 02\s+shl\s+\$0x2,%r12b,%r12b
+\s*[a-f0-9]+:\s*49 c1 e4 02\s+shl\s+\$0x2,%r12
+\s*[a-f0-9]+:\s*d5 19 d1 ef\s+shr\s+%r31
+\s*[a-f0-9]+:\s*62 dc 04 10 d0 ef\s+shr\s+%r31b,%r31b
+\s*[a-f0-9]+:\s*62 d4 1c 18 c0 ec 02\s+shr\s+\$0x2,%r12b,%r12b
+\s*[a-f0-9]+:\s*49 c1 ec 02\s+shr\s+\$0x2,%r12
+\s*[a-f0-9]+:\s*62 74 9c 18 24 20 01\s+shld\s+\$0x1,%r12,\(%rax\),%r12
+\s*[a-f0-9]+:\s*62 54 1d 18 24 c4 02\s+shld\s+\$0x2,%r8w,%r12w,%r12w
+\s*[a-f0-9]+:\s*62 74 35 18 a5 08\s+shld\s+ %cl,%r9w,\(%rax\),%r9w
+\s*[a-f0-9]+:\s*d5 9c a5 e0\s+shld\s+%cl,%r12,%r16
+\s*[a-f0-9]+:\s*62 7c 15 18 a5 2c 83\s+shld\s+%cl,%r13w,\(%r19,%rax,4\),%r13w
+\s*[a-f0-9]+:\s*62 74 9c 18 2c 20 01\s+shrd\s+\$0x1,%r12,\(%rax\),%r12
+\s*[a-f0-9]+:\s*4d 0f ac ec 01\s+shrd\s+\$0x1,%r13,%r12
+\s*[a-f0-9]+:\s*62 54 1d 18 2c c4 02\s+shrd\s+\$0x2,%r8w,%r12w,%r12w
+\s*[a-f0-9]+:\s*62 74 35 18 ad 08\s+shrd\s+%cl,%r9w,\(%rax\),%r9w
+\s*[a-f0-9]+:\s*d5 9c ad e0\s+shrd\s+%cl,%r12,%r16
+\s*[a-f0-9]+:\s*62 7c 15 18 ad 2c 83\s+shrd\s+%cl,%r13w,\(%r19,%rax,4\),%r13w
+\s*[a-f0-9]+:\s*66 45 0f 38 f6 c7\s+adcx\s+%r15d,%r8d
+\s*[a-f0-9]+:\s*62 14 79 08 66 04 3f\s+adcx\s+\(%r15,%r31,1\),%r8d
+\s*[a-f0-9]+:\s*f3 45 0f 38 f6 c7\s+adox\s+%r15d,%r8d
+\s*[a-f0-9]+:\s*62 14 7a 08 66 04 3f\s+adox\s+\(%r15,%r31,1\),%r8d
+\s*[a-f0-9]+:\s*67 0f 40 90 90 90 90 90\s+cmovo\s+-0x6f6f6f70\(%eax\),%edx
+\s*[a-f0-9]+:\s*67 0f 41 90 90 90 90 90\s+cmovno\s+-0x6f6f6f70\(%eax\),%edx
+\s*[a-f0-9]+:\s*67 0f 42 90 90 90 90 90\s+cmovb\s+-0x6f6f6f70\(%eax\),%edx
+\s*[a-f0-9]+:\s*67 0f 43 90 90 90 90 90\s+cmovae\s+-0x6f6f6f70\(%eax\),%edx
+\s*[a-f0-9]+:\s*67 0f 44 90 90 90 90 90\s+cmove\s+-0x6f6f6f70\(%eax\),%edx
+\s*[a-f0-9]+:\s*67 0f 45 90 90 90 90 90\s+cmovne\s+-0x6f6f6f70\(%eax\),%edx
+\s*[a-f0-9]+:\s*67 0f 46 90 90 90 90 90\s+cmovbe\s+-0x6f6f6f70\(%eax\),%edx
+\s*[a-f0-9]+:\s*67 0f 47 90 90 90 90 90\s+cmova\s+-0x6f6f6f70\(%eax\),%edx
+\s*[a-f0-9]+:\s*67 0f 48 90 90 90 90 90\s+cmovs\s+-0x6f6f6f70\(%eax\),%edx
+\s*[a-f0-9]+:\s*67 0f 49 90 90 90 90 90\s+cmovns\s+-0x6f6f6f70\(%eax\),%edx
+\s*[a-f0-9]+:\s*67 0f 4a 90 90 90 90 90\s+cmovp\s+-0x6f6f6f70\(%eax\),%edx
+\s*[a-f0-9]+:\s*67 0f 4b 90 90 90 90 90\s+cmovnp\s+-0x6f6f6f70\(%eax\),%edx
+\s*[a-f0-9]+:\s*67 0f 4c 90 90 90 90 90\s+cmovl\s+-0x6f6f6f70\(%eax\),%edx
+\s*[a-f0-9]+:\s*67 0f 4d 90 90 90 90 90\s+cmovge\s+-0x6f6f6f70\(%eax\),%edx
+\s*[a-f0-9]+:\s*67 0f 4e 90 90 90 90 90\s+cmovle\s+-0x6f6f6f70\(%eax\),%edx
+\s*[a-f0-9]+:\s*67 0f 4f 90 90 90 90 90\s+cmovg\s+-0x6f6f6f70\(%eax\),%edx
+\s*[a-f0-9]+:\s*67 0f af 90 09 09 09 00\s+imul\s+0x90909\(%eax\),%edx
+\s*[a-f0-9]+:\s*d5 aa af 94 f8 09 09 00 00\s+imul\s+0x909\(%rax,%r31,8\),%rdx
diff --git a/gas/testsuite/gas/i386/x86-64-apx-ndd-optimize.s b/gas/testsuite/gas/i386/x86-64-apx-ndd-optimize.s
new file mode 100644
index 00000000000..139d875d5a7
--- /dev/null
+++ b/gas/testsuite/gas/i386/x86-64-apx-ndd-optimize.s
@@ -0,0 +1,115 @@
+# Check 64bit APX NDD instructions with optimized encoding
+
+ .allow_index_reg
+ .text
+_start:
+inc %ax,%ax
+inc %eax,%eax
+inc %rax,%rax
+inc %r16,%r16
+add %r31b,%r8b,%r8b
+add %r31b,%r8b,%r31b
+addb %r31b,%r8b,%r8b
+add %r31,%r8,%r8
+addq %r31,%r8,%r8
+add %r31d,%r8d,%r8d
+addl %r31d,%r8d,%r8d
+add %r31w,%r8w,%r8w
+addw %r31w,%r8w,%r8w
+{store} add %r31,%r8,%r8
+{load} add %r31,%r8,%r8
+add %r31,(%r8),%r31
+add (%r31),%r8,%r8
+add $0x12344433,%r15,%r15
+add $0xfffffffff4332211,%r8,%r8
+dec %r17,%r17
+not %r17,%r17
+neg %r17,%r17
+sub %r15,%r17,%r17
+sub %r15d,(%r8),%r15d
+sub (%r15,%rax,1),%r16,%r16
+sub $0x1234,%r30,%r30
+sbb %r15,%r17,%r17
+sbb %r15,(%r8),%r15
+sbb (%r15,%rax,1),%r16,%r16
+sbb $0x1234,%r30,%r30
+adc %r15,%r17,%r17
+adc %r15d,(%r8),%r15d
+adc (%r15,%rax,1),%r16,%r16
+adc $0x1234,%r30,%r30
+or %r15,%r17,%r17
+or %r15d,(%r8),%r15d
+or (%r15,%rax,1),%r16,%r16
+or $0x1234,%r30,%r30
+xor %r15,%r17,%r17
+xor %r15d,(%r8),%r15d
+xor (%r15,%rax,1),%r16,%r16
+xor $0x1234,%r30,%r30
+and %r15,%r17,%r17
+and %r15d,(%r8),%r15d
+and (%r15,%rax,1),%r16,%r16
+and $0x1234,%r30,%r30
+and $0x1234,%r30
+ror %r31,%r31
+rorb %r31b,%r31b
+ror $0x2,%r12,%r12
+rol %r31,%r31
+rolb %r31b,%r31b
+rol $0x2,%r12,%r12
+rcr %r31,%r31
+rcrb %r31b,%r31b
+rcr $0x2,%r12b,%r12b
+rcr $0x2,%r12,%r12
+rcl %r31,%r31
+rclb %r31b,%r31b
+rcl $0x2,%r12b,%r12b
+rcl $0x2,%r12,%r12
+shl %r31,%r31
+shlb %r31b,%r31b
+shl $0x2,%r12b,%r12b
+shl $0x2,%r12,%r12
+sar %r31,%r31
+sarb %r31b,%r31b
+sar $0x2,%r12b,%r12b
+sar $0x2,%r12,%r12
+shl %r31,%r31
+shlb %r31b,%r31b
+shl $0x2,%r12b,%r12b
+shl $0x2,%r12,%r12
+shr %r31,%r31
+shrb %r31b,%r31b
+shr $0x2,%r12b,%r12b
+shr $0x2,%r12,%r12
+shld $0x1,%r12,(%rax),%r12
+shld $0x2,%r8w,%r12w,%r12w
+shld %cl,%r9w,(%rax),%r9w
+shld %cl,%r12,%r16,%r16
+shld %cl,%r13w,(%r19,%rax,4),%r13w
+shrd $0x1,%r12,(%rax),%r12
+shrd $0x1,%r13,%r12,%r12
+shrd $0x2,%r8w,%r12w,%r12w
+shrd %cl,%r9w,(%rax),%r9w
+shrd %cl,%r12,%r16,%r16
+shrd %cl,%r13w,(%r19,%rax,4),%r13w
+adcx %r15d,%r8d,%r8d
+adcx (%r15,%r31,1),%r8d,%r8d
+adox %r15d,%r8d,%r8d
+adox (%r15,%r31,1),%r8d,%r8d
+cmovo 0x90909090(%eax),%edx,%edx
+cmovno 0x90909090(%eax),%edx,%edx
+cmovb 0x90909090(%eax),%edx,%edx
+cmovae 0x90909090(%eax),%edx,%edx
+cmove 0x90909090(%eax),%edx,%edx
+cmovne 0x90909090(%eax),%edx,%edx
+cmovbe 0x90909090(%eax),%edx,%edx
+cmova 0x90909090(%eax),%edx,%edx
+cmovs 0x90909090(%eax),%edx,%edx
+cmovns 0x90909090(%eax),%edx,%edx
+cmovp 0x90909090(%eax),%edx,%edx
+cmovnp 0x90909090(%eax),%edx,%edx
+cmovl 0x90909090(%eax),%edx,%edx
+cmovge 0x90909090(%eax),%edx,%edx
+cmovle 0x90909090(%eax),%edx,%edx
+cmovg 0x90909090(%eax),%edx,%edx
+imul 0x90909(%eax),%edx,%edx
+imul 0x909(%rax,%r31,8),%rdx,%rdx
diff --git a/gas/testsuite/gas/i386/x86-64.exp b/gas/testsuite/gas/i386/x86-64.exp
index ca1583c6f88..c48430ca7cb 100644
--- a/gas/testsuite/gas/i386/x86-64.exp
+++ b/gas/testsuite/gas/i386/x86-64.exp
@@ -549,6 +549,7 @@ run_dump_test "x86-64-optimize-6"
run_list_test "x86-64-optimize-7a" "-I${srcdir}/$subdir -march=+noavx -al"
run_dump_test "x86-64-optimize-7b"
run_list_test "x86-64-optimize-8" "-I${srcdir}/$subdir -march=+noavx2 -al"
+run_dump_test "x86-64-apx-ndd-optimize"
run_dump_test "x86-64-align-branch-1a"
run_dump_test "x86-64-align-branch-1b"
run_dump_test "x86-64-align-branch-1c"
--
2.25.1
More information about the Binutils
mailing list