This is the mail archive of the binutils@sourceware.org mailing list for the binutils project.


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]
Other format: [Raw text]

RE: [PATCH 3/4] Arm64: correct {su,us}dot SIMD encodings


Hi Jan,

Thanks! Same as the rest this one looks ok too but you still need a maintainer to approve.

Cheers,
Tamar

> -----Original Message-----
> From: binutils-owner@sourceware.org <binutils-owner@sourceware.org>
> On Behalf Of Jan Beulich
> Sent: Friday, December 27, 2019 10:40
> To: binutils@sourceware.org
> Cc: Marcus Shawcroft <Marcus.Shawcroft@arm.com>; Mihail Ionescu
> <Mihail.Ionescu@arm.com>; Richard Earnshaw
> <Richard.Earnshaw@arm.com>
> Subject: [PATCH 3/4] Arm64: correct {su,us}dot SIMD encodings
> 
> According to the specification these permit the Q bit to control the
> vector length operated on, and hence this bit should not already be set
> in the opcode table entries (it rather needs setting dynamically). Note
> how the test case output did also not match its input. Besides
> correcting the test case also extend it to cover both forms.
> 
> gas/
> 2020-01-XX  Jan Beulich  <jbeulich@suse.com>
> 
> 	* testsuite/gas/aarch64/i8mm.s: Add 128-bit form tests for
> 	by-element usdot. Add 64-bit form tests for by-element sudot.
> 	* testsuite/gas/aarch64/i8mm.d: Adjust expectations.
> 
> opcodes/
> 2020-01-XX  Jan Beulich  <jbeulich@suse.com>
> 
> 	* opcodes/aarch64-tbl.h (aarch64_opcode_table): Correct SIMD
> 	forms of SUDOT and USDOT.
> 
> --- a/gas/testsuite/gas/aarch64/i8mm.d
> +++ b/gas/testsuite/gas/aarch64/i8mm.d
> @@ -29,15 +29,23 @@ Disassembly of section \.text:
>   *[0-9a-f]+:	6e80a400 	ummla	v0\.4s, v0\.16b, v0\.16b
>   *[0-9a-f]+:	4e80ac00 	usmmla	v0\.4s, v0\.16b, v0\.16b
>   *[0-9a-f]+:	4e9baeb1 	usmmla	v17\.4s, v21\.16b, v27\.16b
> - *[0-9a-f]+:	4e9b9eb1 	usdot	v17\.2s, v21\.8b, v27\.8b
> - *[0-9a-f]+:	4e809c00 	usdot	v0\.2s, v0\.8b, v0\.8b
> - *[0-9a-f]+:	4e9b9eb1 	usdot	v17\.2s, v21\.8b, v27\.8b
> - *[0-9a-f]+:	4e809c00 	usdot	v0\.2s, v0\.8b, v0\.8b
> - *[0-9a-f]+:	4fbbfab1 	usdot	v17\.2s, v21\.8b, v27\.4b\[3\]
> - *[0-9a-f]+:	4fa0f800 	usdot	v0\.2s, v0\.8b, v0\.4b\[3\]
> - *[0-9a-f]+:	4f9bf2b1 	usdot	v17\.2s, v21\.8b, v27\.4b\[0\]
> - *[0-9a-f]+:	4f80f000 	usdot	v0\.2s, v0\.8b, v0\.4b\[0\]
> - *[0-9a-f]+:	4f3bfab1 	sudot	v17\.2s, v21\.8b, v27\.4b\[3\]
> - *[0-9a-f]+:	4f20f800 	sudot	v0\.2s, v0\.8b, v0\.4b\[3\]
> - *[0-9a-f]+:	4f1bf2b1 	sudot	v17\.2s, v21\.8b, v27\.4b\[0\]
> - *[0-9a-f]+:	4f00f000 	sudot	v0\.2s, v0\.8b, v0\.4b\[0\]
> + *[0-9a-f]+:	0e9b9eb1 	usdot	v17\.2s, v21\.8b, v27\.8b
> + *[0-9a-f]+:	0e809c00 	usdot	v0\.2s, v0\.8b, v0\.8b
> + *[0-9a-f]+:	4e9b9eb1 	usdot	v17\.4s, v21\.16b, v27\.16b
> + *[0-9a-f]+:	4e809c00 	usdot	v0\.4s, v0\.16b, v0\.16b
> + *[0-9a-f]+:	0fbbfab1 	usdot	v17\.2s, v21\.8b, v27\.4b\[3\]
> + *[0-9a-f]+:	0fa0f800 	usdot	v0\.2s, v0\.8b, v0\.4b\[3\]
> + *[0-9a-f]+:	0f9bf2b1 	usdot	v17\.2s, v21\.8b, v27\.4b\[0\]
> + *[0-9a-f]+:	0f80f000 	usdot	v0\.2s, v0\.8b, v0\.4b\[0\]
> + *[0-9a-f]+:	4fbbfab1 	usdot	v17\.4s, v21\.16b, v27\.4b\[3\]
> + *[0-9a-f]+:	4fa0f800 	usdot	v0\.4s, v0\.16b, v0\.4b\[3\]
> + *[0-9a-f]+:	4f9bf2b1 	usdot	v17\.4s, v21\.16b, v27\.4b\[0\]
> + *[0-9a-f]+:	4f80f000 	usdot	v0\.4s, v0\.16b, v0\.4b\[0\]
> + *[0-9a-f]+:	0f3bfab1 	sudot	v17\.2s, v21\.8b, v27\.4b\[3\]
> + *[0-9a-f]+:	0f20f800 	sudot	v0\.2s, v0\.8b, v0\.4b\[3\]
> + *[0-9a-f]+:	0f1bf2b1 	sudot	v17\.2s, v21\.8b, v27\.4b\[0\]
> + *[0-9a-f]+:	0f00f000 	sudot	v0\.2s, v0\.8b, v0\.4b\[0\]
> + *[0-9a-f]+:	4f3bfab1 	sudot	v17\.4s, v21\.16b, v27\.4b\[3\]
> + *[0-9a-f]+:	4f20f800 	sudot	v0\.4s, v0\.16b, v0\.4b\[3\]
> + *[0-9a-f]+:	4f1bf2b1 	sudot	v17\.4s, v21\.16b, v27\.4b\[0\]
> + *[0-9a-f]+:	4f00f000 	sudot	v0\.4s, v0\.16b, v0\.4b\[0\]
> --- a/gas/testsuite/gas/aarch64/i8mm.s
> +++ b/gas/testsuite/gas/aarch64/i8mm.s
> @@ -49,7 +49,15 @@ usdot	v17.2s, v21.8b, v27.4b[3]
>  usdot	v0.2s, v0.8b, v0.4b[3]
>  usdot	v17.2s, v21.8b, v27.4b[0]
>  usdot	v0.2s, v0.8b, v0.4b[0]
> +usdot	v17.4s, v21.16b, v27.4b[3]
> +usdot	v0.4s, v0.16b, v0.4b[3]
> +usdot	v17.4s, v21.16b, v27.4b[0]
> +usdot	v0.4s, v0.16b, v0.4b[0]
> 
> +sudot	v17.2s, v21.8b, v27.4b[3]
> +sudot	v0.2s, v0.8b, v0.4b[3]
> +sudot	v17.2s, v21.8b, v27.4b[0]
> +sudot	v0.2s, v0.8b, v0.4b[0]
>  sudot	v17.4s, v21.16b, v27.4b[3]
>  sudot	v0.4s, v0.16b, v0.4b[3]
>  sudot	v17.4s, v21.16b, v27.4b[0]
> --- a/opcodes/aarch64-tbl.h
> +++ b/opcodes/aarch64-tbl.h
> @@ -5092,9 +5092,9 @@ struct aarch64_opcode aarch64_opcode_tab
>    INT8MATMUL_INSN ("smmla",  0x4e80a400, 0xffe0fc00, aarch64_misc, OP3
> (Vd, Vn, Vm), QL_MMLA64, 0),
>    INT8MATMUL_INSN ("ummla",  0x6e80a400, 0xffe0fc00, aarch64_misc, OP3
> (Vd, Vn, Vm), QL_MMLA64, 0),
>    INT8MATMUL_INSN ("usmmla", 0x4e80ac00, 0xffe0fc00, aarch64_misc,
> OP3 (Vd, Vn, Vm), QL_MMLA64, 0),
> -  INT8MATMUL_INSN ("usdot",  0x4e809c00, 0xffe0fc00, aarch64_misc, OP3
> (Vd, Vn, Vm), QL_V3DOT, F_SIZEQ),
> -  INT8MATMUL_INSN ("usdot",  0x4f80f000, 0xffc0f400, dotproduct, OP3 (Vd,
> Vn, Em), QL_V2DOT, F_SIZEQ),
> -  INT8MATMUL_INSN ("sudot",  0x4f00f000, 0xffc0f400, dotproduct, OP3 (Vd,
> Vn, Em), QL_V2DOT, F_SIZEQ),
> +  INT8MATMUL_INSN ("usdot",  0x0e809c00, 0xbfe0fc00, aarch64_misc, OP3
> (Vd, Vn, Vm), QL_V3DOT, F_SIZEQ),
> +  INT8MATMUL_INSN ("usdot",  0x0f80f000, 0xbfc0f400, dotproduct, OP3
> (Vd, Vn, Em), QL_V2DOT, F_SIZEQ),
> +  INT8MATMUL_INSN ("sudot",  0x0f00f000, 0xbfc0f400, dotproduct, OP3
> (Vd, Vn, Em), QL_V2DOT, F_SIZEQ),
> 
>    /* BFloat instructions.  */
>    BFLOAT16_SVE_INSNC ("bfdot",  0x64608000, 0xffe0fc00, sve_misc, OP3
> (SVE_Zd, SVE_Zn, SVE_Zm_16), OP_SVE_SHH, 0, C_SCAN_MOVPRFX, 0),


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]