[PATCH] x86: VCVTNEPS2BF16{X,Y} should permit broadcasting

Jan Beulich jbeulich@suse.com
Mon Jan 20 12:00:00 GMT 2020


Just like other VCVT*{X,Y} templates do, and to allow the programmer
flexibility (might be relevant in particular when heavily macro-izing
code), the two templates should also have Broadcast set, just like their
X/Y-suffix-less counterparts. This in turn requires them to also have
* Dword set on their memory operands, to cover the logic added to i386gen
  by 4a1b91eabbe7 ("x86: Expand Broadcast to 3 bits"),
* Xmmword/Ymmword set on their memory operands, to satisfy broadcast
  sizing logic in gas itself.
Otherwise ATTSyntax templates wouldn't need such operand size attributes.

While extending the test cases, also add Intel syntax broadcast forms
without explicit size specifiers.

gas/
2020-01-XX  Jan Beulich  <jbeulich@suse.com>

	* testsuite/gas/i386/avx512_bf16_vl.s,
	testsuite/gas/i386/x86-64-avx512_bf16_vl.s: Add broadcast forms
	of VCVTNEPS2BF16{X,Y}. Add operand-size less Intel syntax
	broadcast forms of VCVTNEPS2BF16.
	* testsuite/gas/i386/avx512_bf16_vl.d,
	testsuite/gas/i386/x86-64-avx512_bf16_vl.d: Adjust expectations.

opcodes/
2020-01-XX  Jan Beulich  <jbeulich@suse.com>

	* i386-opc.tbl (vcvtneps2bf16x): Add Broadcast, Xmmword, and
	Dword.
	(vcvtneps2bf16y): Add Broadcast, Ymmword, and Dword.
	* i386-tbl.h: Re-generate.

---
Arguably, just like other VCVT*{X,Y}, the ones here could then also be
made permit RegXMM/RegYMM as source operand. Personally I'd prefer this,
but there was resistance to such in similar earlier cases.

--- a/gas/testsuite/gas/i386/avx512_bf16_vl.d
+++ b/gas/testsuite/gas/i386/avx512_bf16_vl.d
@@ -23,9 +23,11 @@ Disassembly of section .text:
 [ 	]*[a-f0-9]+:	62 f2 7e 28 72 f5    	vcvtneps2bf16 %ymm5,%xmm6
 [ 	]*[a-f0-9]+:	62 f2 7e 0f 72 b4 f4 00 00 00 10 	vcvtneps2bf16x 0x10000000\(%esp,%esi,8\),%xmm6\{%k7\}
 [ 	]*[a-f0-9]+:	62 f2 7e 18 72 31    	vcvtneps2bf16 \(%ecx\)\{1to4\},%xmm6
+[ 	]*[a-f0-9]+:	62 f2 7e 18 72 31    	vcvtneps2bf16 \(%ecx\)\{1to4\},%xmm6
 [ 	]*[a-f0-9]+:	62 f2 7e 08 72 71 7f 	vcvtneps2bf16x 0x7f0\(%ecx\),%xmm6
 [ 	]*[a-f0-9]+:	62 f2 7e 9f 72 b2 00 f8 ff ff 	vcvtneps2bf16 -0x800\(%edx\)\{1to4\},%xmm6\{%k7\}\{z\}
 [ 	]*[a-f0-9]+:	62 f2 7e 38 72 31    	vcvtneps2bf16 \(%ecx\)\{1to8\},%xmm6
+[ 	]*[a-f0-9]+:	62 f2 7e 38 72 31    	vcvtneps2bf16 \(%ecx\)\{1to8\},%xmm6
 [ 	]*[a-f0-9]+:	62 f2 7e 28 72 71 7f 	vcvtneps2bf16y 0xfe0\(%ecx\),%xmm6
 [ 	]*[a-f0-9]+:	62 f2 7e bf 72 b2 00 f0 ff ff 	vcvtneps2bf16 -0x1000\(%edx\)\{1to8\},%xmm6\{%k7\}\{z\}
 [ 	]*[a-f0-9]+:	62 f2 56 28 52 f4    	vdpbf16ps %ymm4,%ymm5,%ymm6
@@ -52,9 +54,11 @@ Disassembly of section .text:
 [ 	]*[a-f0-9]+:	62 f2 7e 28 72 f5    	vcvtneps2bf16 %ymm5,%xmm6
 [ 	]*[a-f0-9]+:	62 f2 7e 0f 72 b4 f4 00 00 00 10 	vcvtneps2bf16x 0x10000000\(%esp,%esi,8\),%xmm6\{%k7\}
 [ 	]*[a-f0-9]+:	62 f2 7e 18 72 31    	vcvtneps2bf16 \(%ecx\)\{1to4\},%xmm6
+[ 	]*[a-f0-9]+:	62 f2 7e 18 72 31    	vcvtneps2bf16 \(%ecx\)\{1to4\},%xmm6
 [ 	]*[a-f0-9]+:	62 f2 7e 08 72 71 7f 	vcvtneps2bf16x 0x7f0\(%ecx\),%xmm6
 [ 	]*[a-f0-9]+:	62 f2 7e 9f 72 b2 00 f8 ff ff 	vcvtneps2bf16 -0x800\(%edx\)\{1to4\},%xmm6\{%k7\}\{z\}
 [ 	]*[a-f0-9]+:	62 f2 7e 38 72 31    	vcvtneps2bf16 \(%ecx\)\{1to8\},%xmm6
+[ 	]*[a-f0-9]+:	62 f2 7e 38 72 31    	vcvtneps2bf16 \(%ecx\)\{1to8\},%xmm6
 [ 	]*[a-f0-9]+:	62 f2 7e 28 72 71 7f 	vcvtneps2bf16y 0xfe0\(%ecx\),%xmm6
 [ 	]*[a-f0-9]+:	62 f2 7e bf 72 b2 00 f0 ff ff 	vcvtneps2bf16 -0x1000\(%edx\)\{1to8\},%xmm6\{%k7\}\{z\}
 [ 	]*[a-f0-9]+:	62 f2 56 28 52 f4    	vdpbf16ps %ymm4,%ymm5,%ymm6
--- a/gas/testsuite/gas/i386/avx512_bf16_vl.s
+++ b/gas/testsuite/gas/i386/avx512_bf16_vl.s
@@ -17,9 +17,11 @@ _start:
 	vcvtneps2bf16	%ymm5, %xmm6	 #AVX512{BF16,VL}
 	vcvtneps2bf16x	0x10000000(%esp, %esi, 8), %xmm6{%k7}	 #AVX512{BF16,VL} MASK_ENABLING
 	vcvtneps2bf16	(%ecx){1to4}, %xmm6	 #AVX512{BF16,VL} BROADCAST_EN
+	vcvtneps2bf16x	(%ecx){1to4}, %xmm6	 #AVX512{BF16,VL} BROADCAST_EN
 	vcvtneps2bf16x	2032(%ecx), %xmm6	 #AVX512{BF16,VL} Disp8
 	vcvtneps2bf16	-2048(%edx){1to4}, %xmm6{%k7}{z}	 #AVX512{BF16,VL} Disp8 BROADCAST_EN MASK_ENABLING ZEROCTL
 	vcvtneps2bf16	(%ecx){1to8}, %xmm6	 #AVX512{BF16,VL} BROADCAST_EN
+	vcvtneps2bf16y	(%ecx){1to8}, %xmm6	 #AVX512{BF16,VL} BROADCAST_EN
 	vcvtneps2bf16y	4064(%ecx), %xmm6	 #AVX512{BF16,VL} Disp8
 	vcvtneps2bf16	-4096(%edx){1to8}, %xmm6{%k7}{z}	 #AVX512{BF16,VL} Disp8 BROADCAST_EN MASK_ENABLING ZEROCTL
 	vdpbf16ps	%ymm4, %ymm5, %ymm6	 #AVX512{BF16,VL}
@@ -47,9 +49,11 @@ _start:
 	vcvtneps2bf16	xmm6, xmm5	 #AVX512{BF16,VL}
 	vcvtneps2bf16	xmm6, ymm5	 #AVX512{BF16,VL}
 	vcvtneps2bf16	xmm6{k7}, XMMWORD PTR [esp+esi*8+0x10000000]	 #AVX512{BF16,VL} MASK_ENABLING
+	vcvtneps2bf16	xmm6, [ecx]{1to4}	 #AVX512{BF16,VL} BROADCAST_EN
 	vcvtneps2bf16	xmm6, DWORD PTR [ecx]{1to4}	 #AVX512{BF16,VL} BROADCAST_EN
 	vcvtneps2bf16	xmm6, XMMWORD PTR [ecx+2032]	 #AVX512{BF16,VL} Disp8
 	vcvtneps2bf16	xmm6{k7}{z}, DWORD PTR [edx-2048]{1to4}	 #AVX512{BF16,VL} Disp8 BROADCAST_EN MASK_ENABLING ZEROCTL
+	vcvtneps2bf16	xmm6, [ecx]{1to8}	 #AVX512{BF16,VL} BROADCAST_EN
 	vcvtneps2bf16	xmm6, DWORD PTR [ecx]{1to8}	 #AVX512{BF16,VL} BROADCAST_EN
 	vcvtneps2bf16	xmm6, YMMWORD PTR [ecx+4064]	 #AVX512{BF16,VL} Disp8
 	vcvtneps2bf16	xmm6{k7}{z}, DWORD PTR [edx-4096]{1to8}	 #AVX512{BF16,VL} Disp8 BROADCAST_EN MASK_ENABLING ZEROCTL
--- a/gas/testsuite/gas/i386/x86-64-avx512_bf16_vl.d
+++ b/gas/testsuite/gas/i386/x86-64-avx512_bf16_vl.d
@@ -23,9 +23,11 @@ Disassembly of section .text:
 [ 	]*[a-f0-9]+:	62 02 7e 28 72 f5    	vcvtneps2bf16 %ymm29,%xmm30
 [ 	]*[a-f0-9]+:	62 22 7e 0f 72 b4 f5 00 00 00 10 	vcvtneps2bf16x 0x10000000\(%rbp,%r14,8\),%xmm30\{%k7\}
 [ 	]*[a-f0-9]+:	62 c2 7e 18 72 29    	vcvtneps2bf16 \(%r9\)\{1to4\},%xmm21
+[ 	]*[a-f0-9]+:	62 f2 7e 18 72 09    	vcvtneps2bf16 \(%rcx\)\{1to4\},%xmm1
 [ 	]*[a-f0-9]+:	62 62 7e 08 72 71 7f 	vcvtneps2bf16x 0x7f0\(%rcx\),%xmm30
 [ 	]*[a-f0-9]+:	62 62 7e 9f 72 aa 00 f8 ff ff 	vcvtneps2bf16 -0x800\(%rdx\)\{1to4\},%xmm29\{%k7\}\{z\}
 [ 	]*[a-f0-9]+:	62 c2 7e 38 72 31    	vcvtneps2bf16 \(%r9\)\{1to8\},%xmm22
+[ 	]*[a-f0-9]+:	62 f2 7e 38 72 11    	vcvtneps2bf16 \(%rcx\)\{1to8\},%xmm2
 [ 	]*[a-f0-9]+:	62 e2 7e 28 72 79 7f 	vcvtneps2bf16y 0xfe0\(%rcx\),%xmm23
 [ 	]*[a-f0-9]+:	62 62 7e bf 72 9a 00 f0 ff ff 	vcvtneps2bf16 -0x1000\(%rdx\)\{1to8\},%xmm27\{%k7\}\{z\}
 [ 	]*[a-f0-9]+:	62 02 16 20 52 f4    	vdpbf16ps %ymm28,%ymm29,%ymm30
@@ -51,9 +53,11 @@ Disassembly of section .text:
 [ 	]*[a-f0-9]+:	62 02 7e 08 72 f5    	vcvtneps2bf16 %xmm29,%xmm30
 [ 	]*[a-f0-9]+:	62 02 7e 28 72 f5    	vcvtneps2bf16 %ymm29,%xmm30
 [ 	]*[a-f0-9]+:	62 22 7e 0f 72 b4 f5 00 00 00 10 	vcvtneps2bf16x 0x10000000\(%rbp,%r14,8\),%xmm30\{%k7\}
+[ 	]*[a-f0-9]+:	62 f2 7e 18 72 29    	vcvtneps2bf16 \(%rcx\)\{1to4\},%xmm5
 [ 	]*[a-f0-9]+:	62 42 7e 18 72 09    	vcvtneps2bf16 \(%r9\)\{1to4\},%xmm25
 [ 	]*[a-f0-9]+:	62 62 7e 08 72 71 7f 	vcvtneps2bf16x 0x7f0\(%rcx\),%xmm30
 [ 	]*[a-f0-9]+:	62 62 7e 9f 72 b2 00 f8 ff ff 	vcvtneps2bf16 -0x800\(%rdx\)\{1to4\},%xmm30\{%k7\}\{z\}
+[ 	]*[a-f0-9]+:	62 f2 7e 38 72 21    	vcvtneps2bf16 \(%rcx\)\{1to8\},%xmm4
 [ 	]*[a-f0-9]+:	62 42 7e 38 72 01    	vcvtneps2bf16 \(%r9\)\{1to8\},%xmm24
 [ 	]*[a-f0-9]+:	62 62 7e 28 72 71 7f 	vcvtneps2bf16y 0xfe0\(%rcx\),%xmm30
 [ 	]*[a-f0-9]+:	62 62 7e bf 72 b2 00 f0 ff ff 	vcvtneps2bf16 -0x1000\(%rdx\)\{1to8\},%xmm30\{%k7\}\{z\}
--- a/gas/testsuite/gas/i386/x86-64-avx512_bf16_vl.s
+++ b/gas/testsuite/gas/i386/x86-64-avx512_bf16_vl.s
@@ -17,9 +17,11 @@ _start:
 	vcvtneps2bf16	%ymm29, %xmm30	 #AVX512{BF16,VL}
 	vcvtneps2bf16x	0x10000000(%rbp, %r14, 8), %xmm30{%k7}	 #AVX512{BF16,VL} MASK_ENABLING
 	vcvtneps2bf16	(%r9){1to4}, %xmm21	 #AVX512{BF16,VL} BROADCAST_EN
+	vcvtneps2bf16x	(%rcx){1to4}, %xmm1	 #AVX512{BF16,VL} BROADCAST_EN
 	vcvtneps2bf16x	2032(%rcx), %xmm30	 #AVX512{BF16,VL} Disp8
 	vcvtneps2bf16	-2048(%rdx){1to4}, %xmm29{%k7}{z}	 #AVX512{BF16,VL} Disp8 BROADCAST_EN MASK_ENABLING ZEROCTL
 	vcvtneps2bf16	(%r9){1to8}, %xmm22	 #AVX512{BF16,VL} BROADCAST_EN
+	vcvtneps2bf16y	(%rcx){1to8}, %xmm2	 #AVX512{BF16,VL} BROADCAST_EN
 	vcvtneps2bf16y	4064(%rcx), %xmm23	 #AVX512{BF16,VL} Disp8
 	vcvtneps2bf16	-4096(%rdx){1to8}, %xmm27{%k7}{z}	 #AVX512{BF16,VL} Disp8 BROADCAST_EN MASK_ENABLING ZEROCTL
 	vdpbf16ps	%ymm28, %ymm29, %ymm30	 #AVX512{BF16,VL}
@@ -47,9 +49,11 @@ _start:
 	vcvtneps2bf16	xmm30, xmm29	 #AVX512{BF16,VL}
 	vcvtneps2bf16	xmm30, ymm29	 #AVX512{BF16,VL}
 	vcvtneps2bf16	xmm30{k7}, XMMWORD PTR [rbp+r14*8+0x10000000]	 #AVX512{BF16,VL} MASK_ENABLING
+	vcvtneps2bf16	xmm5, [rcx]{1to4}	 #AVX512{BF16,VL} BROADCAST_EN
 	vcvtneps2bf16	xmm25, DWORD PTR [r9]{1to4}	 #AVX512{BF16,VL} BROADCAST_EN
 	vcvtneps2bf16	xmm30, XMMWORD PTR [rcx+2032]	 #AVX512{BF16,VL} Disp8
 	vcvtneps2bf16	xmm30{k7}{z}, DWORD PTR [rdx-2048]{1to4}	 #AVX512{BF16,VL} Disp8 BROADCAST_EN MASK_ENABLING ZEROCTL
+	vcvtneps2bf16	xmm4, [rcx]{1to8}	 #AVX512{BF16,VL} BROADCAST_EN
 	vcvtneps2bf16	xmm24, DWORD PTR [r9]{1to8}	 #AVX512{BF16,VL} BROADCAST_EN
 	vcvtneps2bf16	xmm30, YMMWORD PTR [rcx+4064]	 #AVX512{BF16,VL} Disp8
 	vcvtneps2bf16	xmm30{k7}{z}, DWORD PTR [rdx-4096]{1to8}	 #AVX512{BF16,VL} Disp8 BROADCAST_EN MASK_ENABLING ZEROCTL
--- a/opcodes/i386-opc.tbl
+++ b/opcodes/i386-opc.tbl
@@ -4771,8 +4771,8 @@ vcvtneps2bf16, 2, 0xf372, None, 1, CpuAV
 vcvtneps2bf16, 2, 0xf372, None, 1, CpuAVX512_BF16|CpuAVX512VL, Modrm|VexOpcode|EVex256|Masking=3|VexW0|Broadcast|Disp8MemShift=5|No_bSuf|No_wSuf|No_lSuf|No_sSuf|No_qSuf|No_ldSuf, { RegYMM|Dword|BaseIndex, RegXMM }
 vcvtneps2bf16, 2, 0xf372, None, 1, CpuAVX512_BF16, Modrm|VexOpcode|EVex512|Masking=3|VexW0|Broadcast|Disp8MemShift=6|No_bSuf|No_wSuf|No_lSuf|No_sSuf|No_qSuf|No_ldSuf, { RegZMM|Dword|Unspecified|BaseIndex, RegYMM }
 
-vcvtneps2bf16x, 2, 0xf372, None, 1, CpuAVX512_BF16|CpuAVX512VL, Modrm|VexOpcode|EVex128|Masking=3|VexW0|Disp8MemShift=4|No_bSuf|No_wSuf|No_lSuf|No_sSuf|No_qSuf|No_ldSuf|ATTSyntax, { Unspecified|BaseIndex, RegXMM }
-vcvtneps2bf16y, 2, 0xf372, None, 1, CpuAVX512_BF16|CpuAVX512VL, Modrm|VexOpcode|EVex256|Masking=3|VexW0|Disp8MemShift=5|No_bSuf|No_wSuf|No_lSuf|No_sSuf|No_qSuf|No_ldSuf|ATTSyntax, { Unspecified|BaseIndex, RegXMM }
+vcvtneps2bf16x, 2, 0xf372, None, 1, CpuAVX512_BF16|CpuAVX512VL, Modrm|VexOpcode|EVex128|Masking=3|VexW0|Broadcast|Disp8MemShift=4|No_bSuf|No_wSuf|No_lSuf|No_sSuf|No_qSuf|No_ldSuf|ATTSyntax, { Xmmword|Dword|Unspecified|BaseIndex, RegXMM }
+vcvtneps2bf16y, 2, 0xf372, None, 1, CpuAVX512_BF16|CpuAVX512VL, Modrm|VexOpcode|EVex256|Masking=3|VexW0|Broadcast|Disp8MemShift=5|No_bSuf|No_wSuf|No_lSuf|No_sSuf|No_qSuf|No_ldSuf|ATTSyntax, { Ymmword|Dword|Unspecified|BaseIndex, RegXMM }
 
 vdpbf16ps, 3, 0xf352, None, 1, CpuAVX512_BF16, Modrm|VexOpcode|VexVVVV|Masking=3|VexW0|Broadcast|Disp8ShiftVL|CheckRegSize|No_bSuf|No_wSuf|No_lSuf|No_sSuf|No_qSuf|No_ldSuf, { RegXMM|RegYMM|RegZMM|Dword|Unspecified|BaseIndex, RegXMM|RegYMM|RegZMM, RegXMM|RegYMM|RegZMM }
 



More information about the Binutils mailing list