This is the mail archive of the
binutils@sourceware.org
mailing list for the binutils project.
[OPCODES][ARM][1/3]Add armv8.2 fp16 instruction dissembler support.
- From: Renlin Li <renlin dot li at foss dot arm dot com>
- To: "binutils at sourceware dot org" <binutils at sourceware dot org>
- Cc: Nicholas Clifton <nickc at redhat dot com>, Marcus Shawcroft <Marcus dot Shawcroft at arm dot com>, Ramana Radhakrishnan <Ramana dot Radhakrishnan at arm dot com>, Richard Earnshaw <Richard dot Earnshaw at arm dot com>
- Date: Fri, 19 Feb 2016 11:23:54 +0000
- Subject: [OPCODES][ARM][1/3]Add armv8.2 fp16 instruction dissembler support.
- Authentication-results: sourceware.org; auth=none
Hi all,
This is a patch to add fp16 instruction dissembler support for armv8.2.
For AArch32 Scalar floating-point, a new set of scalar data processing instructions are added
using Coprocessor9 for half-precision calculations or conversions.
For all of these instructions being added, having the cond field as anything other than 1110 is
UNPREDICTABLE. The behavior is not guaranteed.
To be specific, the following instructions are added.
New VMOVX and VINS instructions are added to allow the extraction and insertion of the
upper 16 bits of a 32-bit vector register.
vins.f16
vmovx.f16
vabs.f16
vadd.f16
vcmp.f16
vcmpe.f16
VCVT (between floating-point and fixed-point)
vcvt.f16.s32
vcvt.f16.u32
vcvt.s32.f16
vcvt.u32.f16
VCVT (between floating-point and integer, both directions)
vcvt.s32.f16
vcvt.u32.f16
vcvt.f16.u32
vcvt.f16.s32
vcvta.u32.f16
vcvta.s32.f16
vcvtm.u32.f16
vcvtm.s32.f16
vcvtn.u32.f16
vcvtn.s32.f16
vcvtp.u32.f16
vcvtp.s32.f16
vcvtr.u32.f16
vcvtr.s32.f16
vdiv.f16
vfma.f16
vfms.f16
vfnma.f16
vfnms.f16
vmaxnm.f16
vminnm.f16
fmla.f16
vmls.f16
VMOV(between general-purpose register and half-precision register, immediate)
vmov.f16
vmov.f16
vmul.f16
vneg.f16
vnmla.f16
vnmls.f16
vnmul.f16
vrinta.f16
vrintm.f16
vrintn.f16
vrintp.f16
vrintr.f16
vrintx.f16
vrintz.f16
vseleq.f16
vselge.f16
vselgt.f16
vselvs.f16
vsqrt.f16
vsub.f16
load/store literal or address from register with optional offset
vldr.16
vstr.16
Binutils, gas, ld checked Okay without any issues. Okay to commit?
opcodes/ChangeLog:
2016-02-19 Renlin Li <renlin.li@arm.com>
* arm-dis.c (coprocessor_opcodes): Add fp16 instruction entries.
(print_insn_coprocessor): Support fp16 instruction.
gas/ChangeLog:
2016-02-19 Renlin Li <renlin.li@arm.com>
* testsuite/gas/arm/copro.d: Adjust output.
* testsuite/gas/arm/copro.s: Adjust co-processor num.
diff --git a/gas/testsuite/gas/arm/copro.d b/gas/testsuite/gas/arm/copro.d
index eb7b4549b6bd9ff5c7384f631ade5bb4c9e9abf0..e9ed2ccb6f9055bba6ce76aa2ae10dc3e34c3192 100644
--- a/gas/testsuite/gas/arm/copro.d
+++ b/gas/testsuite/gas/arm/copro.d
@@ -29,7 +29,7 @@ Disassembly of section .text:
0+04c <[^>]*> fc834603 stc2 6, cr4, \[r3\], \{3\}
0+050 <[^>]*> ecd43704 ldcl 7, cr3, \[r4\], \{4\}
0+054 <[^>]*> ecc52805 stcl 8, cr2, \[r5\], \{5\}
-0+058 <[^>]*> fcd61906 ldc2l 9, cr1, \[r6\], \{6\}
+0+058 <[^>]*> fcd61c06 ldc2l 12, cr1, \[r6\], \{6\}
0+05c <[^>]*> fcc70c07 stc2l 12, cr0, \[r7\], \{7\}
0+060 <[^>]*> ecd88cff ldcl 12, cr8, \[r8\], \{255\}.*
0+064 <[^>]*> ecc99cfe stcl 12, cr9, \[r9\], \{254\}.*
diff --git a/gas/testsuite/gas/arm/copro.s b/gas/testsuite/gas/arm/copro.s
index f03f5aed0605e26e11954e377d47da4a94e33402..53533d48c208399d00048526bef272c4d2daaea4 100644
--- a/gas/testsuite/gas/arm/copro.s
+++ b/gas/testsuite/gas/arm/copro.s
@@ -31,8 +31,8 @@ bar:
stc2 p6, c4, [r3], {3}
ldcl 7, c3, [r4], {4}
stcl p8, c2, [r5], {5}
- ldc2l 9, c1, [r6], {6}
- @ using '10, 11' below results in an invalid stc2l instruction.
+ @ using '9, 10, 11' below results in an invalid ldc2l/stc2l instruction.
+ ldc2l 12, c1, [r6], {6}
stc2l p12, c0, [r7], {7}
@ using '11' below results in an (invalid) Neon vldmia instruction.
ldcl 12, c8, [r8], {255}
diff --git a/opcodes/arm-dis.c b/opcodes/arm-dis.c
index b25354f5be40c959a9cc01efda96198b9b91edaf..b38e0b8c838c5fa35f22aa3ee7785cca3653ea22 100644
--- a/opcodes/arm-dis.c
+++ b/opcodes/arm-dis.c
@@ -890,6 +890,80 @@ static const struct opcode32 coprocessor_opcodes[] =
0xfe100010, 0xff100010,
"mrc2%c\t%8-11d, %21-23d, %12-15r, cr%16-19d, cr%0-3d, {%5-7d}"},
+ /* ARMv8.2 half-precision Floating point coprocessor 9 (VFP) instructions.
+ cp_num: bit <11:8> == 0b1001.
+ cond: bit <31:28> == 0b1110, otherwise, it's UNPREDICTABLE. */
+ {ARM_FEATURE_CORE_HIGH (ARM_EXT2_FP16_INST),
+ 0x0eb009c0, 0x0fbf0fd0, "vabs%c.f16\t%y1, %y0"},
+ {ARM_FEATURE_CORE_HIGH (ARM_EXT2_FP16_INST),
+ 0x0e300900, 0x0fb00f50, "vadd%c.f16\t%y1, %y2, %y0"},
+ {ARM_FEATURE_CORE_HIGH (ARM_EXT2_FP16_INST),
+ 0x0eb40940, 0x0fbf0f50, "vcmp%7'e%c.f16\t%y1, %y0"},
+ {ARM_FEATURE_CORE_HIGH (ARM_EXT2_FP16_INST),
+ 0x0eb50940, 0x0fbf0f70, "vcmp%7'e%c.f16\t%y1, #0.0"},
+ {ARM_FEATURE_CORE_HIGH (ARM_EXT2_FP16_INST),
+ 0x0eba09c0, 0x0fbe0fd0, "vcvt%c.f16.%16?us%7?31%7?26\t%y1, %y1, #%5,0-3k"},
+ {ARM_FEATURE_CORE_HIGH (ARM_EXT2_FP16_INST),
+ 0x0ebe09c0, 0x0fbe0fd0, "vcvt%c.%16?us%7?31%7?26.f16\t%y1, %y1, #%5,0-3k"},
+ {ARM_FEATURE_CORE_HIGH (ARM_EXT2_FP16_INST),
+ 0x0ebc0940, 0x0fbe0f50, "vcvt%7`r%c.%16?su32.f16\t%y1, %y0"},
+ {ARM_FEATURE_CORE_HIGH (ARM_EXT2_FP16_INST),
+ 0x0eb80940, 0x0fbf0f50, "vcvt%c.f16.%7?su32\t%y1, %y0"},
+ {ARM_FEATURE_CORE_HIGH (ARM_EXT2_FP16_INST),
+ 0xfebc0940, 0xffbc0f50, "vcvt%16-17?mpna%u.%7?su32.f16\t%y1, %y0"},
+ {ARM_FEATURE_CORE_HIGH (ARM_EXT2_FP16_INST),
+ 0x0e800900, 0x0fb00f50, "vdiv%c.f16\t%y1, %y2, %y0"},
+ {ARM_FEATURE_CORE_HIGH (ARM_EXT2_FP16_INST),
+ 0x0ea00900, 0x0fb00f50, "vfma%c.f16\t%y1, %y2, %y0"},
+ {ARM_FEATURE_CORE_HIGH (ARM_EXT2_FP16_INST),
+ 0x0ea00940, 0x0fb00f50, "vfms%c.f16\t%y1, %y2, %y0"},
+ {ARM_FEATURE_CORE_HIGH (ARM_EXT2_FP16_INST),
+ 0x0e900940, 0x0fb00f50, "vfnma%c.f16\t%y1, %y2, %y0"},
+ {ARM_FEATURE_CORE_HIGH (ARM_EXT2_FP16_INST),
+ 0x0e900900, 0x0fb00f50, "vfnms%c.f16\t%y1, %y2, %y0"},
+ {ARM_FEATURE_CORE_HIGH (ARM_EXT2_FP16_INST),
+ 0xfeb00ac0, 0xffbf0fd0, "vins.f16\t%y1, %y0"},
+ {ARM_FEATURE_CORE_HIGH (ARM_EXT2_FP16_INST),
+ 0xfeb00a40, 0xffbf0fd0, "vmovx%c.f16\t%y1, %y0"},
+ {ARM_FEATURE_CORE_HIGH (ARM_EXT2_FP16_INST),
+ 0x0d100900, 0x0f300f00, "vldr%c.16\t%y1, %A"},
+ {ARM_FEATURE_CORE_HIGH (ARM_EXT2_FP16_INST),
+ 0x0d000900, 0x0f300f00, "vstr%c.16\t%y1, %A"},
+ {ARM_FEATURE_CORE_HIGH (ARM_EXT2_FP16_INST),
+ 0xfe800900, 0xffb00f50, "vmaxnm%c.f16\t%y1, %y2, %y0"},
+ {ARM_FEATURE_CORE_HIGH (ARM_EXT2_FP16_INST),
+ 0xfe800940, 0xffb00f50, "vminnm%c.f16\t%y1, %y2, %y0"},
+ {ARM_FEATURE_CORE_HIGH (ARM_EXT2_FP16_INST),
+ 0x0e000900, 0x0fb00f50, "vmla%c.f16\t%y1, %y2, %y0"},
+ {ARM_FEATURE_CORE_HIGH (ARM_EXT2_FP16_INST),
+ 0x0e000940, 0x0fb00f50, "vmls%c.f16\t%y1, %y2, %y0"},
+ {ARM_FEATURE_CORE_HIGH (ARM_EXT2_FP16_INST),
+ 0x0e100910, 0x0ff00f7f, "vmov%c.f16\t%12-15r, %y2"},
+ {ARM_FEATURE_CORE_HIGH (ARM_EXT2_FP16_INST),
+ 0x0e000910, 0x0ff00f7f, "vmov%c.f16\t%y2, %12-15r"},
+ {ARM_FEATURE_CORE_HIGH (ARM_EXT2_FP16_INST),
+ 0xeb00900, 0x0fb00ff0, "vmov%c.f16\t%y1, #%0-3,16-19E"},
+ {ARM_FEATURE_CORE_HIGH (ARM_EXT2_FP16_INST),
+ 0x0e200900, 0x0fb00f50, "vmul%c.f16\t%y1, %y2, %y0"},
+ {ARM_FEATURE_CORE_HIGH (ARM_EXT2_FP16_INST),
+ 0x0eb10940, 0x0fbf0fd0, "vneg%c.f16\t%y1, %y0"},
+ {ARM_FEATURE_CORE_HIGH (ARM_EXT2_FP16_INST),
+ 0x0e100940, 0x0fb00f50, "vnmla%c.f16\t%y1, %y2, %y0"},
+ {ARM_FEATURE_CORE_HIGH (ARM_EXT2_FP16_INST),
+ 0x0e100900, 0x0fb00f50, "vnmls%c.f16\t%y1, %y2, %y0"},
+ {ARM_FEATURE_CORE_HIGH (ARM_EXT2_FP16_INST),
+ 0x0e200940, 0x0fb00f50, "vnmul%c.f16\t%y1, %y2, %y0"},
+ {ARM_FEATURE_CORE_HIGH (ARM_EXT2_FP16_INST),
+ 0x0eb60940, 0x0fbe0f50, "vrint%7,16??xzr%c.f16\t%y1, %y0"},
+ {ARM_FEATURE_CORE_HIGH (ARM_EXT2_FP16_INST),
+ 0xfeb80940, 0xffbc0fd0, "vrint%16-17?mpna%u.f16\t%y1, %y0"},
+ {ARM_FEATURE_CORE_HIGH (ARM_EXT2_FP16_INST),
+ 0xfe000900, 0xff800f50, "vsel%20-21c%u.f16\t%y1, %y2, %y0"},
+ {ARM_FEATURE_CORE_HIGH (ARM_EXT2_FP16_INST),
+ 0x0eb109c0, 0x0fbf0fd0, "vsqrt%c.f16\t%y1, %y0"},
+ {ARM_FEATURE_CORE_HIGH (ARM_EXT2_FP16_INST),
+ 0x0e300940, 0x0fb00f50, "vsub%c.f16\t%y1, %y2, %y0"},
+
{ARM_FEATURE_CORE_LOW (0), 0, 0, 0}
};
@@ -3270,7 +3344,7 @@ print_insn_coprocessor (bfd_vma pc,
|| insn->value == 0xfc100000 /* ldc2 */
|| insn->value == 0xfc000000) /* stc2 */
{
- if (cp_num == 10 || cp_num == 11)
+ if (cp_num == 9 || cp_num == 10 || cp_num == 11)
is_unpredictable = TRUE;
}
else if (insn->value == 0x0e000000 /* cdp */
@@ -3281,7 +3355,7 @@ print_insn_coprocessor (bfd_vma pc,
|| insn->value == 0x0c000000) /* stc */
{
/* Floating-point instructions. */
- if (cp_num == 10 || cp_num == 11)
+ if (cp_num == 9 || cp_num == 10 || cp_num == 11)
continue;
}
@@ -3298,14 +3372,20 @@ print_insn_coprocessor (bfd_vma pc,
case 'A':
{
int rn = (given >> 16) & 0xf;
- bfd_vma offset = given & 0xff;
+ bfd_vma offset = given & 0xff;
func (stream, "[%s", arm_regnames [(given >> 16) & 0xf]);
if (PRE_BIT_SET || WRITEBACK_BIT_SET)
{
/* Not unindexed. The offset is scaled. */
- offset = offset * 4;
+ if (cp_num == 9)
+ /* vldr.16/vstr.16 will shift the address
+ left by 1 bit only. */
+ offset = offset * 2;
+ else
+ offset = offset * 4;
+
if (NEGATIVE_BIT_SET)
offset = - offset;
if (rn != 15)
@@ -3375,6 +3455,9 @@ print_insn_coprocessor (bfd_vma pc,
/* Fall through. */
case 'c':
+ if (cond != COND_UNCOND && cp_num == 9)
+ is_unpredictable = TRUE;
+
func (stream, "%s", arm_conditional[cond]);
break;