This is the mail archive of the
binutils@sourceware.org
mailing list for the binutils project.
[AArch64][PATCH 6/14] Support FP16 Vector Indexed Element instructions.
- From: Matthew Wahab <matthew dot wahab at foss dot arm dot com>
- To: binutils at sourceware dot org
- Date: Fri, 11 Dec 2015 12:07:34 +0000
- Subject: [AArch64][PATCH 6/14] Support FP16 Vector Indexed Element instructions.
- Authentication-results: sourceware.org; auth=none
- References: <566AB800 dot 1090308 at foss dot arm dot com>
ARMv8.2 adds 16-bit floating point operations as an optional extension
to the floating point and Adv.SIMD support. This patch adds FP16
instructions to the group Vector Indexed Element, making them available
when +simd+fp16 is enabled.
The instructions added are: FMLA, FMLS, FMUL and FMULX.
The general form for these instructions is
<OP> <V>.<T>, <V>.<T>, <V>.h[<idx>]
where T is 4h or 8h
Tested the series for aarch64-none-linux-gnu with cross-compiled
check-binutils and check-gas.
Ok for trunk?
Matthew
gas/testsuite/
2015-12-10 Matthew Wahab <matthew.wahab@arm.com>
* gas/aarch64/advsimd-fp16.d: Update expected output.
* gas/aarch64/advsimd-fp16.s: Add tests for vector indexed element
instructions.
opcodes/
2015-12-10 Matthew Wahab <matthew.wahab@arm.com>
* aarch64-asm-2.c: Regenerate.
* aarch64-dis-2.c: Regenerate.
* aarch64-opc-2.c: Regenerate.
* aarch64-tbl.h (QL_ELEMENT_FP_H): New.
(aarch64_opcode_table): Add fp16 versions of fmla, fmls, fmul and
fmulx to the vector indexed element group.
>From 6e027b6bea14c0e4883bfeca20ebaa94f04ebf13 Mon Sep 17 00:00:00 2001
From: Matthew Wahab <matthew.wahab@arm.com>
Date: Thu, 10 Sep 2015 14:50:56 +0100
Subject: [PATCH 06/14] [AArch64] Add FP16 Vector indexed element instructions
(V).
---
gas/testsuite/gas/aarch64/advsimd-fp16.d | 28 +
gas/testsuite/gas/aarch64/advsimd-fp16.s | 18 +
opcodes/aarch64-asm-2.c | 682 ++++-----
opcodes/aarch64-dis-2.c | 2408 +++++++++++++++---------------
opcodes/aarch64-opc-2.c | 106 +-
opcodes/aarch64-tbl.h | 15 +
6 files changed, 1681 insertions(+), 1576 deletions(-)
diff --git a/gas/testsuite/gas/aarch64/advsimd-fp16.d b/gas/testsuite/gas/aarch64/advsimd-fp16.d
index 9c0e945..3fd4dcb 100644
--- a/gas/testsuite/gas/aarch64/advsimd-fp16.d
+++ b/gas/testsuite/gas/aarch64/advsimd-fp16.d
@@ -428,3 +428,31 @@ Disassembly of section \.text:
[0-9a-f]+: 5ea1f820 frecpx s0, s1
[0-9a-f]+: 5ef9f820 frecpx h0, h1
[0-9a-f]+: 5ef9f800 frecpx h0, h0
+ [0-9a-f]+: 4fc31841 fmla v1.2d, v2.2d, v3.d\[1\]
+ [0-9a-f]+: 0f831841 fmla v1.2s, v2.2s, v3.s\[2\]
+ [0-9a-f]+: 4fa31041 fmla v1.4s, v2.4s, v3.s\[1\]
+ [0-9a-f]+: 0f001000 fmla v0.4h, v0.4h, v0.h\[0\]
+ [0-9a-f]+: 0f031041 fmla v1.4h, v2.4h, v3.h\[0\]
+ [0-9a-f]+: 4f001000 fmla v0.8h, v0.8h, v0.h\[0\]
+ [0-9a-f]+: 4f031041 fmla v1.8h, v2.8h, v3.h\[0\]
+ [0-9a-f]+: 4fc35841 fmls v1.2d, v2.2d, v3.d\[1\]
+ [0-9a-f]+: 0f835841 fmls v1.2s, v2.2s, v3.s\[2\]
+ [0-9a-f]+: 4fa35041 fmls v1.4s, v2.4s, v3.s\[1\]
+ [0-9a-f]+: 0f005000 fmls v0.4h, v0.4h, v0.h\[0\]
+ [0-9a-f]+: 0f035041 fmls v1.4h, v2.4h, v3.h\[0\]
+ [0-9a-f]+: 4f005000 fmls v0.8h, v0.8h, v0.h\[0\]
+ [0-9a-f]+: 4f035041 fmls v1.8h, v2.8h, v3.h\[0\]
+ [0-9a-f]+: 4fc39841 fmul v1.2d, v2.2d, v3.d\[1\]
+ [0-9a-f]+: 0f839841 fmul v1.2s, v2.2s, v3.s\[2\]
+ [0-9a-f]+: 4fa39041 fmul v1.4s, v2.4s, v3.s\[1\]
+ [0-9a-f]+: 0f009000 fmul v0.4h, v0.4h, v0.h\[0\]
+ [0-9a-f]+: 0f039041 fmul v1.4h, v2.4h, v3.h\[0\]
+ [0-9a-f]+: 4f009000 fmul v0.8h, v0.8h, v0.h\[0\]
+ [0-9a-f]+: 4f039041 fmul v1.8h, v2.8h, v3.h\[0\]
+ [0-9a-f]+: 6fc39841 fmulx v1.2d, v2.2d, v3.d\[1\]
+ [0-9a-f]+: 2f839841 fmulx v1.2s, v2.2s, v3.s\[2\]
+ [0-9a-f]+: 6fa39041 fmulx v1.4s, v2.4s, v3.s\[1\]
+ [0-9a-f]+: 2f009000 fmulx v0.4h, v0.4h, v0.h\[0\]
+ [0-9a-f]+: 2f039041 fmulx v1.4h, v2.4h, v3.h\[0\]
+ [0-9a-f]+: 6f009000 fmulx v0.8h, v0.8h, v0.h\[0\]
+ [0-9a-f]+: 6f039041 fmulx v1.8h, v2.8h, v3.h\[0\]
diff --git a/gas/testsuite/gas/aarch64/advsimd-fp16.s b/gas/testsuite/gas/aarch64/advsimd-fp16.s
index 25c69a9..ebdb97a 100644
--- a/gas/testsuite/gas/aarch64/advsimd-fp16.s
+++ b/gas/testsuite/gas/aarch64/advsimd-fp16.s
@@ -154,3 +154,21 @@
stworeg_misc frecpe
stworeg_misc frsqrte
stworeg_misc frecpx
+
+ /* Vector indexed element. */
+
+ .macro indexed_elem, op
+ \op v1.2d, v2.2d, v3.d[1]
+ \op v1.2s, v2.2s, v3.s[2]
+ \op v1.4s, v2.4s, v3.s[1]
+ \op v0.4h, v0.4h, v0.h[0]
+ \op v1.4h, v2.4h, v3.h[0]
+ \op v0.8h, v0.8h, v0.h[0]
+ \op v1.8h, v2.8h, v3.h[0]
+ .endm
+
+ indexed_elem fmla
+ indexed_elem fmls
+
+ indexed_elem fmul
+ indexed_elem fmulx
diff --git a/opcodes/aarch64-tbl.h b/opcodes/aarch64-tbl.h
index b2e4808..fe21511 100644
--- a/opcodes/aarch64-tbl.h
+++ b/opcodes/aarch64-tbl.h
@@ -1259,6 +1259,13 @@
QLF3(V_2D, V_2D, S_D), \
}
+/* e.g. FMLA <V><d>, <V><n>, <Vm>.<Ts>[<index>]. */
+#define QL_ELEMENT_FP_H \
+{ \
+ QLF3 (V_4H, V_4H, S_H), \
+ QLF3 (V_8H, V_8H, S_H), \
+}
+
/* e.g. MOVI <Vd>.4S, #<imm8> {, LSL #<amount>}. */
#define QL_SIMD_IMM_S0W \
{ \
@@ -1459,8 +1466,14 @@ struct aarch64_opcode aarch64_opcode_table[] =
{"sqdmulh", 0xf00c000, 0xbf00f400, asimdelem, 0, SIMD, OP3 (Vd, Vn, Em), QL_ELEMENT, F_SIZEQ},
{"sqrdmulh", 0xf00d000, 0xbf00f400, asimdelem, 0, SIMD, OP3 (Vd, Vn, Em), QL_ELEMENT, F_SIZEQ},
{"fmla", 0xf801000, 0xbf80f400, asimdelem, 0, SIMD, OP3 (Vd, Vn, Em), QL_ELEMENT_FP, F_SIZEQ},
+ {"fmla", 0xf001000, 0xbfe0fc00, asimdelem, 0, SIMD_F16,
+ OP3 (Vd, Vn, Em), QL_ELEMENT_FP_H, F_SIZEQ},
{"fmls", 0xf805000, 0xbf80f400, asimdelem, 0, SIMD, OP3 (Vd, Vn, Em), QL_ELEMENT_FP, F_SIZEQ},
+ {"fmls", 0xf005000, 0xbfe0fc00, asimdelem, 0, SIMD_F16,
+ OP3 (Vd, Vn, Em), QL_ELEMENT_FP_H, F_SIZEQ},
{"fmul", 0xf809000, 0xbf80f400, asimdelem, 0, SIMD, OP3 (Vd, Vn, Em), QL_ELEMENT_FP, F_SIZEQ},
+ {"fmul", 0xf009000, 0xbfe0fc00, asimdelem, 0, SIMD_F16,
+ OP3 (Vd, Vn, Em), QL_ELEMENT_FP_H, F_SIZEQ},
{"mla", 0x2f000000, 0xbf00f400, asimdelem, 0, SIMD, OP3 (Vd, Vn, Em), QL_ELEMENT, F_SIZEQ},
{"umlal", 0x2f002000, 0xff00f400, asimdelem, 0, SIMD, OP3 (Vd, Vn, Em), QL_ELEMENT_L, F_SIZEQ},
{"umlal2", 0x6f002000, 0xff00f400, asimdelem, 0, SIMD, OP3 (Vd, Vn, Em), QL_ELEMENT_L2, F_SIZEQ},
@@ -1470,6 +1483,8 @@ struct aarch64_opcode aarch64_opcode_table[] =
{"umull", 0x2f00a000, 0xff00f400, asimdelem, 0, SIMD, OP3 (Vd, Vn, Em), QL_ELEMENT_L, F_SIZEQ},
{"umull2", 0x6f00a000, 0xff00f400, asimdelem, 0, SIMD, OP3 (Vd, Vn, Em), QL_ELEMENT_L2, F_SIZEQ},
{"fmulx", 0x2f809000, 0xbf80f400, asimdelem, 0, SIMD, OP3 (Vd, Vn, Em), QL_ELEMENT_FP, F_SIZEQ},
+ {"fmulx", 0x2f009000, 0xbfe0fc00, asimdelem, 0, SIMD_F16,
+ OP3 (Vd, Vn, Em), QL_ELEMENT_FP_H, F_SIZEQ},
{"sqrdmlah", 0x2f00d000, 0xbf00f400, asimdelem, 0, RDMA, OP3 (Vd, Vn, Em), QL_ELEMENT, F_SIZEQ},
{"sqrdmlsh", 0x2f00f000, 0xbf00f400, asimdelem, 0, RDMA, OP3 (Vd, Vn, Em), QL_ELEMENT, F_SIZEQ},
/* AdvSIMD EXT. */
--
2.1.4