This is the mail archive of the
binutils@sourceware.org
mailing list for the binutils project.
[Patch/ARM] Support v8.2 fp16 simd instructions
- From: Jiong Wang <jiong dot wang at foss dot arm dot com>
- To: "binutils at sourceware dot org" <binutils at sourceware dot org>
- Date: Wed, 2 Mar 2016 15:19:08 +0000
- Subject: [Patch/ARM] Support v8.2 fp16 simd instructions
- Authentication-results: sourceware.org; auth=none
This patch add support for new FP16 instructions introduced in ARMv8.2-a
architecture.
Since we have supported FP16 in GAS's infrastructure in scalar patch, the
SIMD support is quite straightforward, we just need to enlarge related types to
include N_F16.
Testcases added to make sure encoding for new instructions are correct under both
arm and thumb mode. This patch also contains rejection testcases to make sure new
FP16 instructions rejected on architectures don't contains such feature.
2016-03-02 Jiong Wang <jiong.wang@arm.com>
gas/
* config/tc-arm.c (N_S_32): New.
(N_F_32): Likewise.
(N_SUF_32): Support N_F16.
(N_IF_32): Likewise.
(neon_dyadic_misc): Likewise.
(do_neon_cmp): Likewise.
(do_neon_cmp_inv): Likewise.
(do_neon_mul): Likewise.
(do_neon_fcmp_absolute): Likewise.
(do_neon_step): Likewise.
(do_neon_abs_neg): Likewise.
(CVT_FLAVOR_VAR): Likewise.
(do_neon_cvt_1): Likewise.
(do_neon_recip_est): Likewise.
(do_vmaxnm): Likewise.
(do_vrint_1): Likewise.
(neon_check_type): Check architecture support for FP16
extension.
(insns): Update comments.
opcode/
* arm-dis.c (neon_opcodes): Support new FP16 instructions.
gas/testsuite/
* gas/arm/armv8-2-fp16-simd.s: New test source.
* gas/arm/armv8-2-fp16-simd.d: New testcase for arm mode.
* gas/arm/armv8-2-fp16-simd-thumb.d: New testcase for thumb mode.
* gas/arm/armv8-2-fp16-simd-warning.d: New rejection test for arm mode.
* gas/arm/armv8-2-fp16-simd-warning-thumb.d: New rejection test for thumb
mode.
* gas/arm/armv8-2-fp16-simd-warning.l: New expected rejection error file.
diff --git a/gas/config/tc-arm.c b/gas/config/tc-arm.c
index 5d3bae4ace346fbc7e936a7b3a81b42d2c49c5cd..c1a6f76697c9d3ddd51afd069725d6945baed249 100644
--- a/gas/config/tc-arm.c
+++ b/gas/config/tc-arm.c
@@ -13419,9 +13419,11 @@ enum neon_type_mask
#define N_SU_ALL (N_S8 | N_S16 | N_S32 | N_S64 | N_U8 | N_U16 | N_U32 | N_U64)
#define N_SU_32 (N_S8 | N_S16 | N_S32 | N_U8 | N_U16 | N_U32)
#define N_SU_16_64 (N_S16 | N_S32 | N_S64 | N_U16 | N_U32 | N_U64)
-#define N_SUF_32 (N_SU_32 | N_F32)
+#define N_S_32 (N_S8 | N_S16 | N_S32)
+#define N_F_16_32 (N_F16 | N_F32)
+#define N_SUF_32 (N_SU_32 | N_F_16_32)
#define N_I_ALL (N_I8 | N_I16 | N_I32 | N_I64)
-#define N_IF_32 (N_I8 | N_I16 | N_I32 | N_F32)
+#define N_IF_32 (N_I8 | N_I16 | N_I32 | N_F16 | N_F32)
#define N_F_ALL (N_F16 | N_F32 | N_F64)
/* Pass this as the first type argument to neon_check_type to ignore types
@@ -13891,6 +13893,15 @@ neon_check_type (unsigned els, enum neon_shape ns, ...)
k_type = g_type;
k_size = g_size;
key_allowed = thisarg & ~N_KEY;
+
+ /* Check architecture constraint on FP16 extension. */
+ if (k_size == 16
+ && k_type == NT_float
+ && ! ARM_CPU_HAS_FEATURE (cpu_variant, arm_ext_fp16))
+ {
+ inst.error = _(BAD_FP16);
+ return badtype;
+ }
}
}
else
@@ -14702,7 +14713,7 @@ neon_dyadic_misc (enum neon_el_type ubit_meaning, unsigned types,
if (et.type == NT_float)
{
NEON_ENCODE (FLOAT, inst);
- neon_three_same (neon_quad (rs), 0, -1);
+ neon_three_same (neon_quad (rs), 0, et.size == 16 ? (int) et.size : -1);
}
else
{
@@ -14863,13 +14874,13 @@ neon_compare (unsigned regtypes, unsigned immtypes, int invert)
static void
do_neon_cmp (void)
{
- neon_compare (N_SUF_32, N_S8 | N_S16 | N_S32 | N_F32, FALSE);
+ neon_compare (N_SUF_32, N_S_32 | N_F_16_32, FALSE);
}
static void
do_neon_cmp_inv (void)
{
- neon_compare (N_SUF_32, N_S8 | N_S16 | N_S32 | N_F32, TRUE);
+ neon_compare (N_SUF_32, N_S_32 | N_F_16_32, TRUE);
}
static void
@@ -14997,7 +15008,7 @@ do_neon_mul (void)
if (inst.operands[2].isscalar)
do_neon_mac_maybe_scalar ();
else
- neon_dyadic_misc (NT_poly, N_I8 | N_I16 | N_I32 | N_F32 | N_P8, 0);
+ neon_dyadic_misc (NT_poly, N_I8 | N_I16 | N_I32 | N_F16 | N_F32 | N_P8, 0);
}
static void
@@ -15026,9 +15037,10 @@ static void
do_neon_fcmp_absolute (void)
{
enum neon_shape rs = neon_select_shape (NS_DDD, NS_QQQ, NS_NULL);
- neon_check_type (3, rs, N_EQK, N_EQK, N_F32 | N_KEY);
+ struct neon_type_el et = neon_check_type (3, rs, N_EQK, N_EQK,
+ N_F_16_32 | N_KEY);
/* Size field comes from bit mask. */
- neon_three_same (neon_quad (rs), 1, -1);
+ neon_three_same (neon_quad (rs), 1, et.size == 16 ? (int) et.size : -1);
}
static void
@@ -15042,8 +15054,9 @@ static void
do_neon_step (void)
{
enum neon_shape rs = neon_select_shape (NS_DDD, NS_QQQ, NS_NULL);
- neon_check_type (3, rs, N_EQK, N_EQK, N_F32 | N_KEY);
- neon_three_same (neon_quad (rs), 0, -1);
+ struct neon_type_el et = neon_check_type (3, rs, N_EQK, N_EQK,
+ N_F_16_32 | N_KEY);
+ neon_three_same (neon_quad (rs), 0, et.size == 16 ? (int) et.size : -1);
}
static void
@@ -15059,7 +15072,7 @@ do_neon_abs_neg (void)
return;
rs = neon_select_shape (NS_DD, NS_QQ, NS_NULL);
- et = neon_check_type (2, rs, N_EQK, N_S8 | N_S16 | N_S32 | N_F32 | N_KEY);
+ et = neon_check_type (2, rs, N_EQK, N_S_32 | N_F_16_32 | N_KEY);
inst.instruction |= LOW4 (inst.operands[0].reg) << 12;
inst.instruction |= HI1 (inst.operands[0].reg) << 22;
@@ -15268,6 +15281,10 @@ do_neon_shll (void)
CVT_VAR (f32_s32, N_F32, N_S32, whole_reg, "fsltos", "fsitos", NULL) \
CVT_VAR (f32_u32, N_F32, N_U32, whole_reg, "fultos", "fuitos", NULL) \
/* Half-precision conversions. */ \
+ CVT_VAR (s16_f16, N_S16, N_F16 | N_KEY, whole_reg, NULL, NULL, NULL) \
+ CVT_VAR (u16_f16, N_U16, N_F16 | N_KEY, whole_reg, NULL, NULL, NULL) \
+ CVT_VAR (f16_s16, N_F16 | N_KEY, N_S16, whole_reg, NULL, NULL, NULL) \
+ CVT_VAR (f16_u16, N_F16 | N_KEY, N_U16, whole_reg, NULL, NULL, NULL) \
CVT_VAR (f32_f16, N_F32, N_F16, whole_reg, NULL, NULL, NULL) \
CVT_VAR (f16_f32, N_F16, N_F32, whole_reg, NULL, NULL, NULL) \
/* New VCVT instructions introduced by ARMv8.2 fp16 extension. \
@@ -15500,10 +15517,15 @@ do_neon_cvt_1 (enum neon_cvt_mode mode)
NS_NULL);
enum neon_cvt_flavour flavour = get_neon_cvt_flavour (rs);
+ if (flavour == neon_cvt_flavour_invalid)
+ return;
+
/* PR11109: Handle round-to-zero for VCVT conversions. */
if (mode == neon_cvt_mode_z
&& ARM_CPU_HAS_FEATURE (cpu_variant, fpu_arch_vfp_v2)
- && (flavour == neon_cvt_flavour_s32_f32
+ && (flavour == neon_cvt_flavour_s16_f16
+ || flavour == neon_cvt_flavour_u16_f16
+ || flavour == neon_cvt_flavour_s32_f32
|| flavour == neon_cvt_flavour_u32_f32
|| flavour == neon_cvt_flavour_s32_f64
|| flavour == neon_cvt_flavour_u32_f64)
@@ -15542,7 +15564,8 @@ do_neon_cvt_1 (enum neon_cvt_mode mode)
case NS_QQI:
{
unsigned immbits;
- unsigned enctab[] = { 0x0000100, 0x1000100, 0x0, 0x1000000 };
+ unsigned enctab[] = {0x0000100, 0x1000100, 0x0, 0x1000000,
+ 0x0000100, 0x1000100, 0x0, 0x1000000};
if (vfp_or_neon_is_neon (NEON_CHECK_CC | NEON_CHECK_ARCH) == FAIL)
return;
@@ -15551,7 +15574,6 @@ do_neon_cvt_1 (enum neon_cvt_mode mode)
integer conversion. */
if (inst.operands[2].present && inst.operands[2].imm == 0)
goto int_encode;
- immbits = 32 - inst.operands[2].imm;
NEON_ENCODE (IMMED, inst);
if (flavour != neon_cvt_flavour_invalid)
inst.instruction |= enctab[flavour];
@@ -15561,7 +15583,19 @@ do_neon_cvt_1 (enum neon_cvt_mode mode)
inst.instruction |= HI1 (inst.operands[1].reg) << 5;
inst.instruction |= neon_quad (rs) << 6;
inst.instruction |= 1 << 21;
- inst.instruction |= immbits << 16;
+ if (flavour < neon_cvt_flavour_s16_f16)
+ {
+ inst.instruction |= 1 << 21;
+ immbits = 32 - inst.operands[2].imm;
+ inst.instruction |= immbits << 16;
+ }
+ else
+ {
+ inst.instruction |= 3 << 20;
+ immbits = 16 - inst.operands[2].imm;
+ inst.instruction |= immbits << 16;
+ inst.instruction &= ~(1 << 9);
+ }
neon_dp_fixup (&inst);
}
@@ -15582,8 +15616,14 @@ do_neon_cvt_1 (enum neon_cvt_mode mode)
inst.instruction |= LOW4 (inst.operands[1].reg);
inst.instruction |= HI1 (inst.operands[1].reg) << 5;
inst.instruction |= neon_quad (rs) << 6;
- inst.instruction |= (flavour == neon_cvt_flavour_u32_f32) << 7;
+ inst.instruction |= (flavour == neon_cvt_flavour_u16_f16
+ || flavour == neon_cvt_flavour_u32_f32) << 7;
inst.instruction |= mode << 8;
+ if (flavour == neon_cvt_flavour_u16_f16
+ || flavour == neon_cvt_flavour_s16_f16)
+ /* Mask off the original size bits and reencode them. */
+ inst.instruction = ((inst.instruction & 0xfff3ffff) | (1 << 18));
+
if (thumb_mode)
inst.instruction |= 0xfc000000;
else
@@ -15593,7 +15633,8 @@ do_neon_cvt_1 (enum neon_cvt_mode mode)
{
int_encode:
{
- unsigned enctab[] = { 0x100, 0x180, 0x0, 0x080 };
+ unsigned enctab[] = { 0x100, 0x180, 0x0, 0x080,
+ 0x100, 0x180, 0x0, 0x080};
NEON_ENCODE (INTEGER, inst);
@@ -15608,7 +15649,12 @@ do_neon_cvt_1 (enum neon_cvt_mode mode)
inst.instruction |= LOW4 (inst.operands[1].reg);
inst.instruction |= HI1 (inst.operands[1].reg) << 5;
inst.instruction |= neon_quad (rs) << 6;
- inst.instruction |= 2 << 18;
+ if (flavour >= neon_cvt_flavour_s16_f16
+ && flavour <= neon_cvt_flavour_f16_u16)
+ /* Half precision. */
+ inst.instruction |= 1 << 18;
+ else
+ inst.instruction |= 2 << 18;
neon_dp_fixup (&inst);
}
@@ -16431,7 +16477,7 @@ do_neon_recip_est (void)
{
enum neon_shape rs = neon_select_shape (NS_DD, NS_QQ, NS_NULL);
struct neon_type_el et = neon_check_type (2, rs,
- N_EQK | N_FLT, N_F32 | N_U32 | N_KEY);
+ N_EQK | N_FLT, N_F_16_32 | N_U32 | N_KEY);
inst.instruction |= (et.type == NT_float) << 8;
neon_two_same (neon_quad (rs), 1, et.size);
}
@@ -16945,7 +16991,7 @@ do_vmaxnm (void)
if (vfp_or_neon_is_neon (NEON_CHECK_CC | NEON_CHECK_ARCH8) == FAIL)
return;
- neon_dyadic_misc (NT_untyped, N_F32, 0);
+ neon_dyadic_misc (NT_untyped, N_F_16_32, 0);
}
static void
@@ -17001,7 +17047,7 @@ do_vrint_1 (enum neon_cvt_mode mode)
{
/* Neon encodings (or something broken...). */
inst.error = NULL;
- et = neon_check_type (2, rs, N_EQK, N_F32 | N_KEY);
+ et = neon_check_type (2, rs, N_EQK, N_F_16_32 | N_KEY);
if (et.type == NT_invtype)
return;
@@ -17017,6 +17063,10 @@ do_vrint_1 (enum neon_cvt_mode mode)
inst.instruction |= LOW4 (inst.operands[1].reg);
inst.instruction |= HI1 (inst.operands[1].reg) << 5;
inst.instruction |= neon_quad (rs) << 6;
+ /* Mask off the original size bits and reencode them. */
+ inst.instruction = ((inst.instruction & 0xfff3ffff)
+ | neon_logbits (et.size) << 18);
+
switch (mode)
{
case neon_cvt_mode_z: inst.instruction |= 3 << 7; break;
@@ -20258,7 +20308,7 @@ static const struct asm_opcode insns[] =
NUF(vbitq, 1200110, 3, (RNQ, RNQ, RNQ), neon_bitfield),
NUF(vbif, 1300110, 3, (RNDQ, RNDQ, RNDQ), neon_bitfield),
NUF(vbifq, 1300110, 3, (RNQ, RNQ, RNQ), neon_bitfield),
- /* Int and float variants, types S8 S16 S32 U8 U16 U32 F32. */
+ /* Int and float variants, types S8 S16 S32 U8 U16 U32 F16 F32. */
nUF(vabd, _vabd, 3, (RNDQ, oRNDQ, RNDQ), neon_dyadic_if_su),
nUF(vabdq, _vabd, 3, (RNQ, oRNQ, RNQ), neon_dyadic_if_su),
nUF(vmax, _vmax, 3, (RNDQ, oRNDQ, RNDQ), neon_dyadic_if_su),
@@ -20421,7 +20471,7 @@ static const struct asm_opcode insns[] =
NUF(vpadalq, 1b00600, 2, (RNQ, RNQ), neon_pair_long),
NUF(vpaddl, 1b00200, 2, (RNDQ, RNDQ), neon_pair_long),
NUF(vpaddlq, 1b00200, 2, (RNQ, RNQ), neon_pair_long),
- /* Reciprocal estimates. Types U32 F32. */
+ /* Reciprocal estimates. Types U32 F16 F32. */
NUF(vrecpe, 1b30400, 2, (RNDQ, RNDQ), neon_recip_est),
NUF(vrecpeq, 1b30400, 2, (RNQ, RNQ), neon_recip_est),
NUF(vrsqrte, 1b30480, 2, (RNDQ, RNDQ), neon_recip_est),
diff --git a/gas/testsuite/gas/arm/armv8-2-fp16-simd-thumb.d b/gas/testsuite/gas/arm/armv8-2-fp16-simd-thumb.d
new file mode 100644
index 0000000000000000000000000000000000000000..5578b9bdca6519c9e6b6d3e5510dfd84d43f12df
--- /dev/null
+++ b/gas/testsuite/gas/arm/armv8-2-fp16-simd-thumb.d
@@ -0,0 +1,147 @@
+#name: ARM v8.2 FP16 support on SIMD (Thumb)
+#source: armv8-2-fp16-simd.s
+#objdump: -d
+#as: -march=armv8.2-a+fp16 -mfpu=neon-fp-armv8 -mthumb
+#skip: *-*-*coff *-*-pe *-*-wince *-*-*aout* *-*-netbsd
+
+.*: +file format .*arm.*
+
+Disassembly of section .text:
+
+00000000 <func>:
+ 0: ff34 2d0e vabd.f16 d2, d4, d14
+ 4: ff38 4d6c vabd.f16 q2, q4, q14
+ 8: ef14 2f0e vmax.f16 d2, d4, d14
+ c: ef18 4f6c vmax.f16 q2, q4, q14
+ 10: ef34 2f0e vmin.f16 d2, d4, d14
+ 14: ef38 4f6c vmin.f16 q2, q4, q14
+ 18: ff30 0dec vabd.f16 q0, q8, q14
+ 1c: ef10 0fec vmax.f16 q0, q8, q14
+ 20: ef30 0fec vmin.f16 q0, q8, q14
+ 24: ff33 1d0f vabd.f16 d1, d3, d15
+ 28: ff31 0d08 vabd.f16 d0, d1, d8
+ 2c: ffb5 0708 vabs.f16 d0, d8
+ 30: ffb5 0760 vabs.f16 q0, q8
+ 34: ffb5 0788 vneg.f16 d0, d8
+ 38: ffb5 07e0 vneg.f16 q0, q8
+ 3c: ffb5 474c vabs.f16 q2, q6
+ 40: ffb5 47cc vneg.f16 q2, q6
+ 44: ffb5 7703 vabs.f16 d7, d3
+ 48: ffb5 9781 vneg.f16 d9, d1
+ 4c: ff14 2e1e vacge.f16 d2, d4, d14
+ 50: ff18 4e7c vacge.f16 q2, q4, q14
+ 54: ff34 2e1e vacgt.f16 d2, d4, d14
+ 58: ff38 4e7c vacgt.f16 q2, q4, q14
+ 5c: ff3e 2e14 vacgt.f16 d2, d14, d4
+ 60: ff3c 4ed8 vacgt.f16 q2, q14, q4
+ 64: ff1e 2e14 vacge.f16 d2, d14, d4
+ 68: ff1c 4ed8 vacge.f16 q2, q14, q4
+ 6c: ef14 2e0e vceq.f16 d2, d4, d14
+ 70: ef18 4e6c vceq.f16 q2, q4, q14
+ 74: ff14 2e0e vcge.f16 d2, d4, d14
+ 78: ff18 4e6c vcge.f16 q2, q4, q14
+ 7c: ff34 2e0e vcgt.f16 d2, d4, d14
+ 80: ff38 4e6c vcgt.f16 q2, q4, q14
+ 84: ff1e 2e04 vcge.f16 d2, d14, d4
+ 88: ff1c 4ec8 vcge.f16 q2, q14, q4
+ 8c: ff3e 2e04 vcgt.f16 d2, d14, d4
+ 90: ff3c 4ec8 vcgt.f16 q2, q14, q4
+ 94: ff10 0efc vacge.f16 q0, q8, q14
+ 98: ff30 0efc vacgt.f16 q0, q8, q14
+ 9c: ff3c 0ef0 vacgt.f16 q0, q14, q8
+ a0: ff1c 0ef0 vacge.f16 q0, q14, q8
+ a4: ef10 0eec vceq.f16 q0, q8, q14
+ a8: ff10 0eec vcge.f16 q0, q8, q14
+ ac: ff30 0eec vcgt.f16 q0, q8, q14
+ b0: ff1c 0ee0 vcge.f16 q0, q14, q8
+ b4: ff3c 0ee0 vcgt.f16 q0, q14, q8
+ b8: ef14 2d0e vadd.f16 d2, d4, d14
+ bc: ef18 4d6c vadd.f16 q2, q4, q14
+ c0: ef34 2d0e vsub.f16 d2, d4, d14
+ c4: ef38 4d6c vsub.f16 q2, q4, q14
+ c8: ef10 0dec vadd.f16 q0, q8, q14
+ cc: ef30 0dec vsub.f16 q0, q8, q14
+ d0: ff14 2f1e vmaxnm.f16 d2, d4, d14
+ d4: ff18 4f7c vmaxnm.f16 q2, q4, q14
+ d8: ff34 2f1e vminnm.f16 d2, d4, d14
+ dc: ff38 4f7c vminnm.f16 q2, q4, q14
+ e0: ef14 2c1e vfma.f16 d2, d4, d14
+ e4: ef18 4c7c vfma.f16 q2, q4, q14
+ e8: ef34 2c1e vfms.f16 d2, d4, d14
+ ec: ef38 4c7c vfms.f16 q2, q4, q14
+ f0: ef14 2d1e vmla.f16 d2, d4, d14
+ f4: ef18 4d7c vmla.f16 q2, q4, q14
+ f8: ef34 2d1e vmls.f16 d2, d4, d14
+ fc: ef38 4d7c vmls.f16 q2, q4, q14
+ 100: ffb6 458e vrintz.f16 d4, d14
+ 104: ffb6 85ec vrintz.f16 q4, q14
+ 108: ffb6 448e vrintx.f16 d4, d14
+ 10c: ffb6 84ec vrintx.f16 q4, q14
+ 110: ffb6 450e vrinta.f16 d4, d14
+ 114: ffb6 856c vrinta.f16 q4, q14
+ 118: ffb6 440e vrintn.f16 d4, d14
+ 11c: ffb6 846c vrintn.f16 q4, q14
+ 120: ffb6 478e vrintp.f16 d4, d14
+ 124: ffb6 87ec vrintp.f16 q4, q14
+ 128: ffb6 468e vrintm.f16 d4, d14
+ 12c: ffb6 86ec vrintm.f16 q4, q14
+ 130: ff18 4d0e vpadd.f16 d4, d8, d14
+ 134: ffb7 4508 vrecpe.f16 d4, d8
+ 138: ffb7 8560 vrecpe.f16 q4, q8
+ 13c: ffb7 4588 vrsqrte.f16 d4, d8
+ 140: ffb7 85e0 vrsqrte.f16 q4, q8
+ 144: ffb7 0564 vrecpe.f16 q0, q10
+ 148: ffb7 05e4 vrsqrte.f16 q0, q10
+ 14c: ef1a 8f1c vrecps.f16 d8, d10, d12
+ 150: ef54 0ff8 vrecps.f16 q8, q10, q12
+ 154: ef3a 8f1c vrsqrts.f16 d8, d10, d12
+ 158: ef74 0ff8 vrsqrts.f16 q8, q10, q12
+ 15c: ef10 4f58 vrecps.f16 q2, q0, q4
+ 160: ef30 4f58 vrsqrts.f16 q2, q0, q4
+ 164: ff18 4f0e vpmax.f16 d4, d8, d14
+ 168: ff38 af02 vpmin.f16 d10, d8, d2
+ 16c: ff18 4d1e vmul.f16 d4, d8, d14
+ 170: ff10 7d11 vmul.f16 d7, d0, d1
+ 174: ff10 4dd0 vmul.f16 q2, q8, q0
+ 178: ffb7 600c vcvta.s16.f16 d6, d12
+ 17c: ffb7 c068 vcvta.s16.f16 q6, q12
+ 180: ffb7 630c vcvtm.s16.f16 d6, d12
+ 184: ffb7 c368 vcvtm.s16.f16 q6, q12
+ 188: ffb7 610c vcvtn.s16.f16 d6, d12
+ 18c: ffb7 c168 vcvtn.s16.f16 q6, q12
+ 190: ffb7 620c vcvtp.s16.f16 d6, d12
+ 194: ffb7 c268 vcvtp.s16.f16 q6, q12
+ 198: ffb7 608c vcvta.u16.f16 d6, d12
+ 19c: ffb7 c0e8 vcvta.u16.f16 q6, q12
+ 1a0: ffb7 638c vcvtm.u16.f16 d6, d12
+ 1a4: ffb7 c3e8 vcvtm.u16.f16 q6, q12
+ 1a8: ffb7 618c vcvtn.u16.f16 d6, d12
+ 1ac: ffb7 c1e8 vcvtn.u16.f16 q6, q12
+ 1b0: ffb7 628c vcvtp.u16.f16 d6, d12
+ 1b4: ffb7 c2e8 vcvtp.u16.f16 q6, q12
+ 1b8: ffb7 e700 vcvt.s16.f16 d14, d0
+ 1bc: fff7 c740 vcvt.s16.f16 q14, q0
+ 1c0: ffb7 e780 vcvt.u16.f16 d14, d0
+ 1c4: fff7 c7c0 vcvt.u16.f16 q14, q0
+ 1c8: ffb7 e600 vcvt.f16.s16 d14, d0
+ 1cc: fff7 c640 vcvt.f16.s16 q14, q0
+ 1d0: ffb7 e680 vcvt.f16.u16 d14, d0
+ 1d4: fff7 c6c0 vcvt.f16.u16 q14, q0
+ 1d8: efbd ed10 vcvt.s16.f16 d14, d0, #3
+ 1dc: effd cd50 vcvt.s16.f16 q14, q0, #3
+ 1e0: ffbd ed10 vcvt.u16.f16 d14, d0, #3
+ 1e4: fffd cd50 vcvt.u16.f16 q14, q0, #3
+ 1e8: efbd ec10 vcvt.f16.s16 d14, d0, #3
+ 1ec: effd cc50 vcvt.f16.s16 q14, q0, #3
+ 1f0: ffbd ec10 vcvt.f16.u16 d14, d0, #3
+ 1f4: fffd cc50 vcvt.f16.u16 q14, q0, #3
+ 1f8: ffb5 e502 vceq.f16 d14, d2, #0
+ 1fc: fff5 c544 vceq.f16 q14, q2, #0
+ 200: ffb5 e482 vcge.f16 d14, d2, #0
+ 204: fff5 c4c4 vcge.f16 q14, q2, #0
+ 208: ffb5 e402 vcgt.f16 d14, d2, #0
+ 20c: fff5 c444 vcgt.f16 q14, q2, #0
+ 210: ffb5 e582 vcle.f16 d14, d2, #0
+ 214: fff5 c5c4 vcle.f16 q14, q2, #0
+ 218: ffb5 e602 vclt.f16 d14, d2, #0
+ 21c: fff5 c644 vclt.f16 q14, q2, #0
diff --git a/gas/testsuite/gas/arm/armv8-2-fp16-simd-warning-thumb.d b/gas/testsuite/gas/arm/armv8-2-fp16-simd-warning-thumb.d
new file mode 100644
index 0000000000000000000000000000000000000000..e78f080fe989bd2e626cd13c67592cc93323fa0f
--- /dev/null
+++ b/gas/testsuite/gas/arm/armv8-2-fp16-simd-warning-thumb.d
@@ -0,0 +1,4 @@
+#name: Reject ARM v8.2 FP16 SIMD instruction for early arch (Thumb)
+#source: armv8-2-fp16-simd.s
+#as: -march=armv8.2-a -mfpu=neon-fp-armv8 -mthumb
+#error-output: armv8-2-fp16-simd-warning.l
diff --git a/gas/testsuite/gas/arm/armv8-2-fp16-simd-warning.d b/gas/testsuite/gas/arm/armv8-2-fp16-simd-warning.d
new file mode 100644
index 0000000000000000000000000000000000000000..d39c36da1e6c711874e1870071190a8ba68cdc47
--- /dev/null
+++ b/gas/testsuite/gas/arm/armv8-2-fp16-simd-warning.d
@@ -0,0 +1,4 @@
+#name: Reject ARM v8.2 FP16 SIMD instruction for early arch
+#source: armv8-2-fp16-simd.s
+#as: -march=armv8.2-a -mfpu=neon-fp-armv8
+#error-output: armv8-2-fp16-simd-warning.l
diff --git a/gas/testsuite/gas/arm/armv8-2-fp16-simd-warning.l b/gas/testsuite/gas/arm/armv8-2-fp16-simd-warning.l
new file mode 100644
index 0000000000000000000000000000000000000000..ba27f7c33a497447585c76f5e7aea8a80200ced3
--- /dev/null
+++ b/gas/testsuite/gas/arm/armv8-2-fp16-simd-warning.l
@@ -0,0 +1,137 @@
+[^:]*: Assembler messages:
+[^:]*:163: Error: selected processor does not support fp16 instruction -- `vabd.f16 d2,d4,d14'
+[^:]*:163: Error: selected processor does not support fp16 instruction -- `vabd.f16 q2,q4,q14'
+[^:]*:163: Error: selected processor does not support fp16 instruction -- `vmax.f16 d2,d4,d14'
+[^:]*:163: Error: selected processor does not support fp16 instruction -- `vmax.f16 q2,q4,q14'
+[^:]*:163: Error: selected processor does not support fp16 instruction -- `vmin.f16 d2,d4,d14'
+[^:]*:163: Error: selected processor does not support fp16 instruction -- `vmin.f16 q2,q4,q14'
+[^:]*:164: Error: selected processor does not support fp16 instruction -- `vabdq.f16 q0,q8,q14'
+[^:]*:164: Error: selected processor does not support fp16 instruction -- `vmaxq.f16 q0,q8,q14'
+[^:]*:164: Error: selected processor does not support fp16 instruction -- `vminq.f16 q0,q8,q14'
+[^:]*:165: Error: selected processor does not support fp16 instruction -- `vabd.f16 d1,d3,d15'
+[^:]*:166: Error: selected processor does not support fp16 instruction -- `vabd.f16 d0,d1,d8'
+[^:]*:169: Error: selected processor does not support fp16 instruction -- `vabs.f16 d0,d8'
+[^:]*:169: Error: selected processor does not support fp16 instruction -- `vabs.f16 q0,q8'
+[^:]*:169: Error: selected processor does not support fp16 instruction -- `vneg.f16 d0,d8'
+[^:]*:169: Error: selected processor does not support fp16 instruction -- `vneg.f16 q0,q8'
+[^:]*:170: Error: selected processor does not support fp16 instruction -- `vabsq.f16 q2,q6'
+[^:]*:170: Error: selected processor does not support fp16 instruction -- `vnegq.f16 q2,q6'
+[^:]*:171: Error: selected processor does not support fp16 instruction -- `vabs.f16 d7,d3'
+[^:]*:172: Error: selected processor does not support fp16 instruction -- `vneg.f16 d9,d1'
+[^:]*:175: Error: selected processor does not support fp16 instruction -- `vacge.f16 d2,d4,d14'
+[^:]*:175: Error: selected processor does not support fp16 instruction -- `vacge.f16 q2,q4,q14'
+[^:]*:175: Error: selected processor does not support fp16 instruction -- `vacgt.f16 d2,d4,d14'
+[^:]*:175: Error: selected processor does not support fp16 instruction -- `vacgt.f16 q2,q4,q14'
+[^:]*:175: Error: selected processor does not support fp16 instruction -- `vaclt.f16 d2,d4,d14'
+[^:]*:175: Error: selected processor does not support fp16 instruction -- `vaclt.f16 q2,q4,q14'
+[^:]*:175: Error: selected processor does not support fp16 instruction -- `vacle.f16 d2,d4,d14'
+[^:]*:175: Error: selected processor does not support fp16 instruction -- `vacle.f16 q2,q4,q14'
+[^:]*:175: Error: selected processor does not support fp16 instruction -- `vceq.f16 d2,d4,d14'
+[^:]*:175: Error: selected processor does not support fp16 instruction -- `vceq.f16 q2,q4,q14'
+[^:]*:175: Error: selected processor does not support fp16 instruction -- `vcge.f16 d2,d4,d14'
+[^:]*:175: Error: selected processor does not support fp16 instruction -- `vcge.f16 q2,q4,q14'
+[^:]*:175: Error: selected processor does not support fp16 instruction -- `vcgt.f16 d2,d4,d14'
+[^:]*:175: Error: selected processor does not support fp16 instruction -- `vcgt.f16 q2,q4,q14'
+[^:]*:175: Error: selected processor does not support fp16 instruction -- `vcle.f16 d2,d4,d14'
+[^:]*:175: Error: selected processor does not support fp16 instruction -- `vcle.f16 q2,q4,q14'
+[^:]*:175: Error: selected processor does not support fp16 instruction -- `vclt.f16 d2,d4,d14'
+[^:]*:175: Error: selected processor does not support fp16 instruction -- `vclt.f16 q2,q4,q14'
+[^:]*:176: Error: selected processor does not support fp16 instruction -- `vacgeq.f16 q0,q8,q14'
+[^:]*:176: Error: selected processor does not support fp16 instruction -- `vacgtq.f16 q0,q8,q14'
+[^:]*:176: Error: selected processor does not support fp16 instruction -- `vacltq.f16 q0,q8,q14'
+[^:]*:176: Error: selected processor does not support fp16 instruction -- `vacleq.f16 q0,q8,q14'
+[^:]*:176: Error: selected processor does not support fp16 instruction -- `vceqq.f16 q0,q8,q14'
+[^:]*:176: Error: selected processor does not support fp16 instruction -- `vcgeq.f16 q0,q8,q14'
+[^:]*:176: Error: selected processor does not support fp16 instruction -- `vcgtq.f16 q0,q8,q14'
+[^:]*:176: Error: selected processor does not support fp16 instruction -- `vcleq.f16 q0,q8,q14'
+[^:]*:176: Error: selected processor does not support fp16 instruction -- `vcltq.f16 q0,q8,q14'
+[^:]*:179: Error: selected processor does not support fp16 instruction -- `vadd.f16 d2,d4,d14'
+[^:]*:179: Error: selected processor does not support fp16 instruction -- `vadd.f16 q2,q4,q14'
+[^:]*:179: Error: selected processor does not support fp16 instruction -- `vsub.f16 d2,d4,d14'
+[^:]*:179: Error: selected processor does not support fp16 instruction -- `vsub.f16 q2,q4,q14'
+[^:]*:180: Error: selected processor does not support fp16 instruction -- `vaddq.f16 q0,q8,q14'
+[^:]*:180: Error: selected processor does not support fp16 instruction -- `vsubq.f16 q0,q8,q14'
+[^:]*:183: Error: selected processor does not support fp16 instruction -- `vmaxnm.f16 d2,d4,d14'
+[^:]*:183: Error: selected processor does not support fp16 instruction -- `vmaxnm.f16 q2,q4,q14'
+[^:]*:183: Error: selected processor does not support fp16 instruction -- `vminnm.f16 d2,d4,d14'
+[^:]*:183: Error: selected processor does not support fp16 instruction -- `vminnm.f16 q2,q4,q14'
+[^:]*:186: Error: selected processor does not support fp16 instruction -- `vfma.f16 d2,d4,d14'
+[^:]*:186: Error: selected processor does not support fp16 instruction -- `vfma.f16 q2,q4,q14'
+[^:]*:186: Error: selected processor does not support fp16 instruction -- `vfms.f16 d2,d4,d14'
+[^:]*:186: Error: selected processor does not support fp16 instruction -- `vfms.f16 q2,q4,q14'
+[^:]*:189: Error: selected processor does not support fp16 instruction -- `vmla.f16 d2,d4,d14'
+[^:]*:189: Error: selected processor does not support fp16 instruction -- `vmla.f16 q2,q4,q14'
+[^:]*:189: Error: selected processor does not support fp16 instruction -- `vmls.f16 d2,d4,d14'
+[^:]*:189: Error: selected processor does not support fp16 instruction -- `vmls.f16 q2,q4,q14'
+[^:]*:192: Error: selected processor does not support fp16 instruction -- `vrintz.f16 d4,d14'
+[^:]*:192: Error: selected processor does not support fp16 instruction -- `vrintz.f16 q4,q14'
+[^:]*:192: Error: selected processor does not support fp16 instruction -- `vrintx.f16 d4,d14'
+[^:]*:192: Error: selected processor does not support fp16 instruction -- `vrintx.f16 q4,q14'
+[^:]*:192: Error: selected processor does not support fp16 instruction -- `vrinta.f16 d4,d14'
+[^:]*:192: Error: selected processor does not support fp16 instruction -- `vrinta.f16 q4,q14'
+[^:]*:192: Error: selected processor does not support fp16 instruction -- `vrintn.f16 d4,d14'
+[^:]*:192: Error: selected processor does not support fp16 instruction -- `vrintn.f16 q4,q14'
+[^:]*:192: Error: selected processor does not support fp16 instruction -- `vrintp.f16 d4,d14'
+[^:]*:192: Error: selected processor does not support fp16 instruction -- `vrintp.f16 q4,q14'
+[^:]*:192: Error: selected processor does not support fp16 instruction -- `vrintm.f16 d4,d14'
+[^:]*:192: Error: selected processor does not support fp16 instruction -- `vrintm.f16 q4,q14'
+[^:]*:195: Error: selected processor does not support fp16 instruction -- `vpadd.f16 d4,d8,d14'
+[^:]*:198: Error: selected processor does not support fp16 instruction -- `vrecpe.f16 d4,d8'
+[^:]*:198: Error: selected processor does not support fp16 instruction -- `vrecpe.f16 q4,q8'
+[^:]*:198: Error: selected processor does not support fp16 instruction -- `vrsqrte.f16 d4,d8'
+[^:]*:198: Error: selected processor does not support fp16 instruction -- `vrsqrte.f16 q4,q8'
+[^:]*:199: Error: selected processor does not support fp16 instruction -- `vrecpeq.f16 q0,q10'
+[^:]*:199: Error: selected processor does not support fp16 instruction -- `vrsqrteq.f16 q0,q10'
+[^:]*:202: Error: selected processor does not support fp16 instruction -- `vrecps.f16 d8,d10,d12'
+[^:]*:202: Error: selected processor does not support fp16 instruction -- `vrecps.f16 q8,q10,q12'
+[^:]*:202: Error: selected processor does not support fp16 instruction -- `vrsqrts.f16 d8,d10,d12'
+[^:]*:202: Error: selected processor does not support fp16 instruction -- `vrsqrts.f16 q8,q10,q12'
+[^:]*:203: Error: selected processor does not support fp16 instruction -- `vrecpsq.f16 q2,q0,q4'
+[^:]*:203: Error: selected processor does not support fp16 instruction -- `vrsqrtsq.f16 q2,q0,q4'
+[^:]*:206: Error: selected processor does not support fp16 instruction -- `vpmax.f16 d4,d8,d14'
+[^:]*:207: Error: selected processor does not support fp16 instruction -- `vpmin.f16 d10,d8,d2'
+[^:]*:210: Error: selected processor does not support fp16 instruction -- `vmul.f16 d4,d8,d14'
+[^:]*:211: Error: selected processor does not support fp16 instruction -- `vmul.f16 d7,d0,d1'
+[^:]*:212: Error: selected processor does not support fp16 instruction -- `vmul.f16 q2,q8,q0'
+[^:]*:215: Error: selected processor does not support fp16 instruction -- `vcvta.s16.f16 d6,d12'
+[^:]*:215: Error: selected processor does not support fp16 instruction -- `vcvta.s16.f16 q6,q12'
+[^:]*:215: Error: selected processor does not support fp16 instruction -- `vcvtm.s16.f16 d6,d12'
+[^:]*:215: Error: selected processor does not support fp16 instruction -- `vcvtm.s16.f16 q6,q12'
+[^:]*:215: Error: selected processor does not support fp16 instruction -- `vcvtn.s16.f16 d6,d12'
+[^:]*:215: Error: selected processor does not support fp16 instruction -- `vcvtn.s16.f16 q6,q12'
+[^:]*:215: Error: selected processor does not support fp16 instruction -- `vcvtp.s16.f16 d6,d12'
+[^:]*:215: Error: selected processor does not support fp16 instruction -- `vcvtp.s16.f16 q6,q12'
+[^:]*:215: Error: selected processor does not support fp16 instruction -- `vcvta.u16.f16 d6,d12'
+[^:]*:215: Error: selected processor does not support fp16 instruction -- `vcvta.u16.f16 q6,q12'
+[^:]*:215: Error: selected processor does not support fp16 instruction -- `vcvtm.u16.f16 d6,d12'
+[^:]*:215: Error: selected processor does not support fp16 instruction -- `vcvtm.u16.f16 q6,q12'
+[^:]*:215: Error: selected processor does not support fp16 instruction -- `vcvtn.u16.f16 d6,d12'
+[^:]*:215: Error: selected processor does not support fp16 instruction -- `vcvtn.u16.f16 q6,q12'
+[^:]*:215: Error: selected processor does not support fp16 instruction -- `vcvtp.u16.f16 d6,d12'
+[^:]*:215: Error: selected processor does not support fp16 instruction -- `vcvtp.u16.f16 q6,q12'
+[^:]*:218: Error: selected processor does not support fp16 instruction -- `vcvt.s16.f16 d14,d0'
+[^:]*:218: Error: selected processor does not support fp16 instruction -- `vcvt.s16.f16 q14,q0'
+[^:]*:218: Error: selected processor does not support fp16 instruction -- `vcvt.u16.f16 d14,d0'
+[^:]*:218: Error: selected processor does not support fp16 instruction -- `vcvt.u16.f16 q14,q0'
+[^:]*:218: Error: selected processor does not support fp16 instruction -- `vcvt.f16.s16 d14,d0'
+[^:]*:218: Error: selected processor does not support fp16 instruction -- `vcvt.f16.s16 q14,q0'
+[^:]*:218: Error: selected processor does not support fp16 instruction -- `vcvt.f16.u16 d14,d0'
+[^:]*:218: Error: selected processor does not support fp16 instruction -- `vcvt.f16.u16 q14,q0'
+[^:]*:221: Error: selected processor does not support fp16 instruction -- `vcvt.s16.f16 d14,d0,#3'
+[^:]*:221: Error: selected processor does not support fp16 instruction -- `vcvt.s16.f16 q14,q0,#3'
+[^:]*:221: Error: selected processor does not support fp16 instruction -- `vcvt.u16.f16 d14,d0,#3'
+[^:]*:221: Error: selected processor does not support fp16 instruction -- `vcvt.u16.f16 q14,q0,#3'
+[^:]*:221: Error: selected processor does not support fp16 instruction -- `vcvt.f16.s16 d14,d0,#3'
+[^:]*:221: Error: selected processor does not support fp16 instruction -- `vcvt.f16.s16 q14,q0,#3'
+[^:]*:221: Error: selected processor does not support fp16 instruction -- `vcvt.f16.u16 d14,d0,#3'
+[^:]*:221: Error: selected processor does not support fp16 instruction -- `vcvt.f16.u16 q14,q0,#3'
+[^:]*:224: Error: selected processor does not support fp16 instruction -- `vceq.f16 d14,d2,#0'
+[^:]*:224: Error: selected processor does not support fp16 instruction -- `vceq.f16 q14,q2,#0'
+[^:]*:224: Error: selected processor does not support fp16 instruction -- `vcge.f16 d14,d2,#0'
+[^:]*:224: Error: selected processor does not support fp16 instruction -- `vcge.f16 q14,q2,#0'
+[^:]*:224: Error: selected processor does not support fp16 instruction -- `vcgt.f16 d14,d2,#0'
+[^:]*:224: Error: selected processor does not support fp16 instruction -- `vcgt.f16 q14,q2,#0'
+[^:]*:224: Error: selected processor does not support fp16 instruction -- `vcle.f16 d14,d2,#0'
+[^:]*:224: Error: selected processor does not support fp16 instruction -- `vcle.f16 q14,q2,#0'
+[^:]*:224: Error: selected processor does not support fp16 instruction -- `vclt.f16 d14,d2,#0'
+[^:]*:224: Error: selected processor does not support fp16 instruction -- `vclt.f16 q14,q2,#0'
diff --git a/gas/testsuite/gas/arm/armv8-2-fp16-simd.d b/gas/testsuite/gas/arm/armv8-2-fp16-simd.d
new file mode 100644
index 0000000000000000000000000000000000000000..1a97f393f9d6e37981e67a5fa5c96073dff91d27
--- /dev/null
+++ b/gas/testsuite/gas/arm/armv8-2-fp16-simd.d
@@ -0,0 +1,147 @@
+#name: ARM v8.2 FP16 support on SIMD
+#source: armv8-2-fp16-simd.s
+#objdump: -d
+#as: -march=armv8.2-a+fp16 -mfpu=neon-fp-armv8
+#skip: *-*-*coff *-*-pe *-*-wince *-*-*aout* *-*-netbsd
+
+.*: +file format .*arm.*
+
+Disassembly of section .text:
+
+00000000 <func>:
+ 0: f3342d0e vabd.f16 d2, d4, d14
+ 4: f3384d6c vabd.f16 q2, q4, q14
+ 8: f2142f0e vmax.f16 d2, d4, d14
+ c: f2184f6c vmax.f16 q2, q4, q14
+ 10: f2342f0e vmin.f16 d2, d4, d14
+ 14: f2384f6c vmin.f16 q2, q4, q14
+ 18: f3300dec vabd.f16 q0, q8, q14
+ 1c: f2100fec vmax.f16 q0, q8, q14
+ 20: f2300fec vmin.f16 q0, q8, q14
+ 24: f3331d0f vabd.f16 d1, d3, d15
+ 28: f3310d08 vabd.f16 d0, d1, d8
+ 2c: f3b50708 vabs.f16 d0, d8
+ 30: f3b50760 vabs.f16 q0, q8
+ 34: f3b50788 vneg.f16 d0, d8
+ 38: f3b507e0 vneg.f16 q0, q8
+ 3c: f3b5474c vabs.f16 q2, q6
+ 40: f3b547cc vneg.f16 q2, q6
+ 44: f3b57703 vabs.f16 d7, d3
+ 48: f3b59781 vneg.f16 d9, d1
+ 4c: f3142e1e vacge.f16 d2, d4, d14
+ 50: f3184e7c vacge.f16 q2, q4, q14
+ 54: f3342e1e vacgt.f16 d2, d4, d14
+ 58: f3384e7c vacgt.f16 q2, q4, q14
+ 5c: f33e2e14 vacgt.f16 d2, d14, d4
+ 60: f33c4ed8 vacgt.f16 q2, q14, q4
+ 64: f31e2e14 vacge.f16 d2, d14, d4
+ 68: f31c4ed8 vacge.f16 q2, q14, q4
+ 6c: f2142e0e vceq.f16 d2, d4, d14
+ 70: f2184e6c vceq.f16 q2, q4, q14
+ 74: f3142e0e vcge.f16 d2, d4, d14
+ 78: f3184e6c vcge.f16 q2, q4, q14
+ 7c: f3342e0e vcgt.f16 d2, d4, d14
+ 80: f3384e6c vcgt.f16 q2, q4, q14
+ 84: f31e2e04 vcge.f16 d2, d14, d4
+ 88: f31c4ec8 vcge.f16 q2, q14, q4
+ 8c: f33e2e04 vcgt.f16 d2, d14, d4
+ 90: f33c4ec8 vcgt.f16 q2, q14, q4
+ 94: f3100efc vacge.f16 q0, q8, q14
+ 98: f3300efc vacgt.f16 q0, q8, q14
+ 9c: f33c0ef0 vacgt.f16 q0, q14, q8
+ a0: f31c0ef0 vacge.f16 q0, q14, q8
+ a4: f2100eec vceq.f16 q0, q8, q14
+ a8: f3100eec vcge.f16 q0, q8, q14
+ ac: f3300eec vcgt.f16 q0, q8, q14
+ b0: f31c0ee0 vcge.f16 q0, q14, q8
+ b4: f33c0ee0 vcgt.f16 q0, q14, q8
+ b8: f2142d0e vadd.f16 d2, d4, d14
+ bc: f2184d6c vadd.f16 q2, q4, q14
+ c0: f2342d0e vsub.f16 d2, d4, d14
+ c4: f2384d6c vsub.f16 q2, q4, q14
+ c8: f2100dec vadd.f16 q0, q8, q14
+ cc: f2300dec vsub.f16 q0, q8, q14
+ d0: f3142f1e vmaxnm.f16 d2, d4, d14
+ d4: f3184f7c vmaxnm.f16 q2, q4, q14
+ d8: f3342f1e vminnm.f16 d2, d4, d14
+ dc: f3384f7c vminnm.f16 q2, q4, q14
+ e0: f2142c1e vfma.f16 d2, d4, d14
+ e4: f2184c7c vfma.f16 q2, q4, q14
+ e8: f2342c1e vfms.f16 d2, d4, d14
+ ec: f2384c7c vfms.f16 q2, q4, q14
+ f0: f2142d1e vmla.f16 d2, d4, d14
+ f4: f2184d7c vmla.f16 q2, q4, q14
+ f8: f2342d1e vmls.f16 d2, d4, d14
+ fc: f2384d7c vmls.f16 q2, q4, q14
+ 100: f3b6458e vrintz.f16 d4, d14
+ 104: f3b685ec vrintz.f16 q4, q14
+ 108: f3b6448e vrintx.f16 d4, d14
+ 10c: f3b684ec vrintx.f16 q4, q14
+ 110: f3b6450e vrinta.f16 d4, d14
+ 114: f3b6856c vrinta.f16 q4, q14
+ 118: f3b6440e vrintn.f16 d4, d14
+ 11c: f3b6846c vrintn.f16 q4, q14
+ 120: f3b6478e vrintp.f16 d4, d14
+ 124: f3b687ec vrintp.f16 q4, q14
+ 128: f3b6468e vrintm.f16 d4, d14
+ 12c: f3b686ec vrintm.f16 q4, q14
+ 130: f3184d0e vpadd.f16 d4, d8, d14
+ 134: f3b74508 vrecpe.f16 d4, d8
+ 138: f3b78560 vrecpe.f16 q4, q8
+ 13c: f3b74588 vrsqrte.f16 d4, d8
+ 140: f3b785e0 vrsqrte.f16 q4, q8
+ 144: f3b70564 vrecpe.f16 q0, q10
+ 148: f3b705e4 vrsqrte.f16 q0, q10
+ 14c: f21a8f1c vrecps.f16 d8, d10, d12
+ 150: f2540ff8 vrecps.f16 q8, q10, q12
+ 154: f23a8f1c vrsqrts.f16 d8, d10, d12
+ 158: f2740ff8 vrsqrts.f16 q8, q10, q12
+ 15c: f2104f58 vrecps.f16 q2, q0, q4
+ 160: f2304f58 vrsqrts.f16 q2, q0, q4
+ 164: f3184f0e vpmax.f16 d4, d8, d14
+ 168: f338af02 vpmin.f16 d10, d8, d2
+ 16c: f3184d1e vmul.f16 d4, d8, d14
+ 170: f3107d11 vmul.f16 d7, d0, d1
+ 174: f3104dd0 vmul.f16 q2, q8, q0
+ 178: f3b7600c vcvta.s16.f16 d6, d12
+ 17c: f3b7c068 vcvta.s16.f16 q6, q12
+ 180: f3b7630c vcvtm.s16.f16 d6, d12
+ 184: f3b7c368 vcvtm.s16.f16 q6, q12
+ 188: f3b7610c vcvtn.s16.f16 d6, d12
+ 18c: f3b7c168 vcvtn.s16.f16 q6, q12
+ 190: f3b7620c vcvtp.s16.f16 d6, d12
+ 194: f3b7c268 vcvtp.s16.f16 q6, q12
+ 198: f3b7608c vcvta.u16.f16 d6, d12
+ 19c: f3b7c0e8 vcvta.u16.f16 q6, q12
+ 1a0: f3b7638c vcvtm.u16.f16 d6, d12
+ 1a4: f3b7c3e8 vcvtm.u16.f16 q6, q12
+ 1a8: f3b7618c vcvtn.u16.f16 d6, d12
+ 1ac: f3b7c1e8 vcvtn.u16.f16 q6, q12
+ 1b0: f3b7628c vcvtp.u16.f16 d6, d12
+ 1b4: f3b7c2e8 vcvtp.u16.f16 q6, q12
+ 1b8: f3b7e700 vcvt.s16.f16 d14, d0
+ 1bc: f3f7c740 vcvt.s16.f16 q14, q0
+ 1c0: f3b7e780 vcvt.u16.f16 d14, d0
+ 1c4: f3f7c7c0 vcvt.u16.f16 q14, q0
+ 1c8: f3b7e600 vcvt.f16.s16 d14, d0
+ 1cc: f3f7c640 vcvt.f16.s16 q14, q0
+ 1d0: f3b7e680 vcvt.f16.u16 d14, d0
+ 1d4: f3f7c6c0 vcvt.f16.u16 q14, q0
+ 1d8: f2bded10 vcvt.s16.f16 d14, d0, #3
+ 1dc: f2fdcd50 vcvt.s16.f16 q14, q0, #3
+ 1e0: f3bded10 vcvt.u16.f16 d14, d0, #3
+ 1e4: f3fdcd50 vcvt.u16.f16 q14, q0, #3
+ 1e8: f2bdec10 vcvt.f16.s16 d14, d0, #3
+ 1ec: f2fdcc50 vcvt.f16.s16 q14, q0, #3
+ 1f0: f3bdec10 vcvt.f16.u16 d14, d0, #3
+ 1f4: f3fdcc50 vcvt.f16.u16 q14, q0, #3
+ 1f8: f3b5e502 vceq.f16 d14, d2, #0
+ 1fc: f3f5c544 vceq.f16 q14, q2, #0
+ 200: f3b5e482 vcge.f16 d14, d2, #0
+ 204: f3f5c4c4 vcge.f16 q14, q2, #0
+ 208: f3b5e402 vcgt.f16 d14, d2, #0
+ 20c: f3f5c444 vcgt.f16 q14, q2, #0
+ 210: f3b5e582 vcle.f16 d14, d2, #0
+ 214: f3f5c5c4 vcle.f16 q14, q2, #0
+ 218: f3b5e602 vclt.f16 d14, d2, #0
+ 21c: f3f5c644 vclt.f16 q14, q2, #0
diff --git a/gas/testsuite/gas/arm/armv8-2-fp16-simd.s b/gas/testsuite/gas/arm/armv8-2-fp16-simd.s
new file mode 100644
index 0000000000000000000000000000000000000000..7758f2415e04b4c9956b5db9ac52e5b3272b8f30
--- /dev/null
+++ b/gas/testsuite/gas/arm/armv8-2-fp16-simd.s
@@ -0,0 +1,224 @@
+ .macro f16_dq_ifsu reg0 reg1 reg2
+ .irp op, vabd.f16, vmax.f16, vmin.f16
+ \op d\reg0, d\reg1, d\reg2
+ \op q\reg0, q\reg1, q\reg2
+ .endr
+ .endm
+
+ .macro f16_q_ifsu reg0 reg1 reg2
+ .irp op, vabdq.f16, vmaxq.f16, vminq.f16
+ \op q\reg0, q\reg1, q\reg2
+ .endr
+ .endm
+
+ .macro f16_dq_abs_neg reg0 reg1
+ .irp op, vabs.f16, vneg.f16
+ \op d\reg0, d\reg1
+ \op q\reg0, q\reg1
+ .endr
+ .endm
+
+ .macro f16_q_abs_neg reg0 reg1
+ .irp op, vabsq.f16, vnegq.f16
+ \op q\reg0, q\reg1
+ .endr
+ .endm
+
+ .macro f16_dq_fcmp reg0 reg1 reg2
+ .irp op, vacge.f16, vacgt.f16, vaclt.f16, vacle.f16, vceq.f16, vcge.f16, vcgt.f16, vcle.f16, vclt.f16
+ \op d\reg0, d\reg1, d\reg2
+ \op q\reg0, q\reg1, q\reg2
+ .endr
+ .endm
+
+ .macro f16_dq_fcmp_imm0 reg0 reg1
+ .irp op, vceq.f16, vcge.f16, vcgt.f16, vcle.f16, vclt.f16
+ \op d\reg0, d\reg1, #0
+ \op q\reg0, q\reg1, #0
+ .endr
+ .endm
+
+ .macro f16_q_fcmp reg0 reg1 reg2
+ .irp op, vacgeq.f16, vacgtq.f16, vacltq.f16, vacleq.f16, vceqq.f16, vcgeq.f16, vcgtq.f16, vcleq.f16, vcltq.f16
+ \op q\reg0, q\reg1, q\reg2
+ .endr
+ .endm
+
+ .macro f16_dq_addsub reg0 reg1 reg2
+ .irp op, vadd.f16, vsub.f16
+ \op d\reg0, d\reg1, d\reg2
+ \op q\reg0, q\reg1, q\reg2
+ .endr
+ .endm
+
+ .macro f16_q_addsub reg0 reg1 reg2
+ .irp op, vaddq.f16, vsubq.f16
+ \op q\reg0, q\reg1, q\reg2
+ .endr
+ .endm
+
+ .macro f16_dq_vmaxnm reg0 reg1 reg2
+ .irp op, vmaxnm.f16, vminnm.f16
+ \op d\reg0, d\reg1, d\reg2
+ \op q\reg0, q\reg1, q\reg2
+ .endr
+ .endm
+
+ .macro f16_dq_fmac reg0 reg1 reg2
+ .irp op, vfma.f16, vfms.f16
+ \op d\reg0, d\reg1, d\reg2
+ \op q\reg0, q\reg1, q\reg2
+ .endr
+ .endm
+
+ .macro f16_dq_fmacmaybe reg0 reg1 reg2
+ .irp op, vmla.f16, vmls.f16
+ \op d\reg0, d\reg1, d\reg2
+ \op q\reg0, q\reg1, q\reg2
+ .endr
+ .endm
+
+ .macro f16_dq_vrint reg0 reg1
+ .irp op, vrintz.f16, vrintx.f16, vrinta.f16, vrintn.f16, vrintp.f16, vrintm.f16
+ \op d\reg0, d\reg1
+ \op q\reg0, q\reg1
+ .endr
+ .endm
+
+ .macro f16_dq_recip reg0 reg1
+ .irp op, vrecpe.f16, vrsqrte.f16
+ \op d\reg0, d\reg1
+ \op q\reg0, q\reg1
+ .endr
+ .endm
+
+ .macro f16_q_recip reg0 reg1
+ .irp op, vrecpeq.f16, vrsqrteq.f16
+ \op q\reg0, q\reg1
+ .endr
+ .endm
+
+ .macro f16_dq_step reg0 reg1 reg2
+ .irp op, vrecps.f16, vrsqrts.f16
+ \op d\reg0, d\reg1, d\reg2
+ \op q\reg0, q\reg1, q\reg2
+ .endr
+ .endm
+
+ .macro f16_q_step reg0 reg1 reg2
+ .irp op, vrecpsq.f16, vrsqrtsq.f16
+ \op q\reg0, q\reg1, q\reg2
+ .endr
+ .endm
+
+ .macro f16_dq_cvt reg0 reg1
+ .irp op, vcvta.s16.f16, vcvtm.s16.f16, vcvtn.s16.f16, vcvtp.s16.f16, vcvta.u16.f16, vcvtm.u16.f16, vcvtn.u16.f16, vcvtp.u16.f16,
+ \op d\reg0, d\reg1
+ \op q\reg0, q\reg1
+ .endr
+ .endm
+
+ .macro f16_dq_cvtz reg0 reg1
+ .irp op, vcvt.s16.f16, vcvt.u16.f16, vcvt.f16.s16, vcvt.f16.u16,
+ \op d\reg0, d\reg1
+ \op q\reg0, q\reg1
+ .endr
+ .endm
+
+ .macro f16_dq_cvtz_fixed reg0 reg1 imm
+ .irp op, vcvt.s16.f16, vcvt.u16.f16, vcvt.f16.s16, vcvt.f16.u16,
+ \op d\reg0, d\reg1, #\imm
+ \op q\reg0, q\reg1, #\imm
+ .endr
+ .endm
+
+ .macro f16_dq op reg0 reg1 reg2
+ \op d\reg0, d\reg1, d\reg2
+ \op q\reg0, q\reg1, q\reg2
+ .endm
+
+ .macro f16_d op reg0 reg1 reg2
+ \op d\reg0, d\reg1, d\reg2
+ .endm
+
+ .macro f16_q op reg0 reg1 reg2
+ \op q\reg0, q\reg1, q\reg2
+ .endm
+
+ .macro f16_dq_2 op reg0 reg1
+ \op d\reg0, d\reg1
+ \op q\reg0, q\reg1
+ .endm
+
+ .macro f16_d_2 op reg0 reg1
+ \op d\reg0, d\reg1
+ .endm
+
+ .macro f16_q_2 op reg0 reg1
+ \op q\reg0, q\reg1
+ .endm
+
+func:
+ # neon_dyadic_if_su
+ f16_dq_ifsu 2 4 14
+ f16_q_ifsu 0 8 14
+ f16_d vabd.f16 1 3 15
+ f16_d vabd.f16 0 1 8
+
+ # neon_abs_neg
+ f16_dq_abs_neg 0 8
+ f16_q_abs_neg 2 6
+ f16_d_2 vabs.f16 7 3
+ f16_d_2 vneg.f16 9 1
+
+ # neon_fcmp
+ f16_dq_fcmp 2 4 14
+ f16_q_fcmp 0 8 14
+
+ # neon_addsub_if_i
+ f16_dq_addsub 2 4 14
+ f16_q_addsub 0 8 14
+
+ # neon_vmaxnm
+ f16_dq_vmaxnm 2 4 14
+
+ # neon_fmac
+ f16_dq_fmac 2 4 14
+
+ # neon_mac_maybe_scalar
+ f16_dq_fmacmaybe 2 4 14
+
+ # vrint
+ f16_dq_vrint 4 14
+
+ # neon_dyadic_if_i_d
+ f16_d vpadd.f16 4 8 14
+
+ # neon_recip_est
+ f16_dq_recip 4 8
+ f16_q_recip 0 10
+
+ # neon_step
+ f16_dq_step 8 10 12
+ f16_q_step 2 0 4
+
+ # neon_dyadic_if_su_d
+ f16_d vpmax.f16 4 8 14
+ f16_d vpmin.f16 10 8 2
+
+ # neon_mul
+ f16_d vmul.f16 4 8 14
+ f16_d vmul.f16 7 0 1
+ f16_q vmul.f16 2 8 0
+
+ # neon_cvt
+ f16_dq_cvt 6 12
+
+ # neon_cvtz
+ f16_dq_cvtz 14, 0
+
+ # neon_cvtz_fixed
+ f16_dq_cvtz_fixed 14, 0, 3
+
+ # neon_fcmp_imm0
+ f16_dq_fcmp_imm0 14, 2
diff --git a/opcodes/arm-dis.c b/opcodes/arm-dis.c
index 324304d850cdff43501104295f4384a14b306136..322e801c465235410ea3b8affad1a99ea6971a7b 100644
--- a/opcodes/arm-dis.c
+++ b/opcodes/arm-dis.c
@@ -1032,15 +1032,23 @@ static const struct opcode32 neon_opcodes[] =
/* NEON fused multiply add instructions. */
{ARM_FEATURE_COPROC (FPU_NEON_EXT_FMA),
- 0xf2000c10, 0xffa00f10, "vfma%c.f%20U0\t%12-15,22R, %16-19,7R, %0-3,5R"},
+ 0xf2000c10, 0xffb00f10, "vfma%c.f32\t%12-15,22R, %16-19,7R, %0-3,5R"},
+ {ARM_FEATURE_CORE_HIGH (ARM_EXT2_FP16_INST),
+ 0xf2100c10, 0xffb00f10, "vfma%c.f16\t%12-15,22R, %16-19,7R, %0-3,5R"},
{ARM_FEATURE_COPROC (FPU_NEON_EXT_FMA),
- 0xf2200c10, 0xffa00f10, "vfms%c.f%20U0\t%12-15,22R, %16-19,7R, %0-3,5R"},
+ 0xf2200c10, 0xffb00f10, "vfms%c.f32\t%12-15,22R, %16-19,7R, %0-3,5R"},
+ {ARM_FEATURE_CORE_HIGH (ARM_EXT2_FP16_INST),
+ 0xf2300c10, 0xffb00f10, "vfms%c.f16\t%12-15,22R, %16-19,7R, %0-3,5R"},
/* Two registers, miscellaneous. */
{ARM_FEATURE_COPROC (FPU_NEON_EXT_ARMV8),
0xf3ba0400, 0xffbf0c10, "vrint%7-9?p?m?zaxn%u.f32\t%12-15,22R, %0-3,5R"},
+ {ARM_FEATURE_CORE_HIGH (ARM_EXT2_FP16_INST),
+ 0xf3b60400, 0xffbf0c10, "vrint%7-9?p?m?zaxn%u.f16\t%12-15,22R, %0-3,5R"},
{ARM_FEATURE_COPROC (FPU_NEON_EXT_ARMV8),
0xf3bb0000, 0xffbf0c10, "vcvt%8-9?mpna%u.%7?us32.f32\t%12-15,22R, %0-3,5R"},
+ {ARM_FEATURE_CORE_HIGH (ARM_EXT2_FP16_INST),
+ 0xf3b70000, 0xffbf0c10, "vcvt%8-9?mpna%u.%7?us16.f16\t%12-15,22R, %0-3,5R"},
{ARM_FEATURE_COPROC (FPU_CRYPTO_EXT_ARMV8),
0xf3b00300, 0xffbf0fd0, "aese%u.8\t%12-15,22Q, %0-3,5Q"},
{ARM_FEATURE_COPROC (FPU_CRYPTO_EXT_ARMV8),
@@ -1080,8 +1088,12 @@ static const struct opcode32 neon_opcodes[] =
"vshll%c.i%18-19S2\t%12-15,22Q, %0-3,5D, #%18-19S2"},
{ARM_FEATURE_COPROC (FPU_NEON_EXT_V1),
0xf3bb0400, 0xffbf0e90, "vrecpe%c.%8?fu%18-19S2\t%12-15,22R, %0-3,5R"},
+ {ARM_FEATURE_CORE_HIGH (ARM_EXT2_FP16_INST),
+ 0xf3b70400, 0xffbf0e90, "vrecpe%c.%8?fu16\t%12-15,22R, %0-3,5R"},
{ARM_FEATURE_COPROC (FPU_NEON_EXT_V1),
0xf3bb0480, 0xffbf0e90, "vrsqrte%c.%8?fu%18-19S2\t%12-15,22R, %0-3,5R"},
+ {ARM_FEATURE_CORE_HIGH (ARM_EXT2_FP16_INST),
+ 0xf3b70480, 0xffbf0e90, "vrsqrte%c.%8?fu16\t%12-15,22R, %0-3,5R"},
{ARM_FEATURE_COPROC (FPU_NEON_EXT_V1),
0xf3b00000, 0xffb30f90, "vrev64%c.%18-19S2\t%12-15,22R, %0-3,5R"},
{ARM_FEATURE_COPROC (FPU_NEON_EXT_V1),
@@ -1121,8 +1133,11 @@ static const struct opcode32 neon_opcodes[] =
{ARM_FEATURE_COPROC (FPU_NEON_EXT_V1),
0xf3b00600, 0xffb30f10, "vpadal%c.%7?us%18-19S2\t%12-15,22R, %0-3,5R"},
{ARM_FEATURE_COPROC (FPU_NEON_EXT_V1),
- 0xf3b30600, 0xffb30e10,
+ 0xf3bb0600, 0xffbf0e10,
"vcvt%c.%7-8?usff%18-19Sa.%7-8?ffus%18-19Sa\t%12-15,22R, %0-3,5R"},
+ {ARM_FEATURE_CORE_HIGH (ARM_EXT2_FP16_INST),
+ 0xf3b70600, 0xffbf0e10,
+ "vcvt%c.%7-8?usff16.%7-8?ffus16\t%12-15,22R, %0-3,5R"},
/* Three registers of the same length. */
{ARM_FEATURE_COPROC (FPU_CRYPTO_EXT_ARMV8),
@@ -1140,9 +1155,13 @@ static const struct opcode32 neon_opcodes[] =
{ARM_FEATURE_COPROC (FPU_CRYPTO_EXT_ARMV8),
0xf3200c40, 0xffb00f50, "sha256su1%u.32\t%12-15,22Q, %16-19,7Q, %0-3,5Q"},
{ARM_FEATURE_COPROC (FPU_NEON_EXT_ARMV8),
- 0xf3000f10, 0xffa00f10, "vmaxnm%u.f%20U0\t%12-15,22R, %16-19,7R, %0-3,5R"},
+ 0xf3000f10, 0xffb00f10, "vmaxnm%u.f32\t%12-15,22R, %16-19,7R, %0-3,5R"},
+ {ARM_FEATURE_CORE_HIGH (ARM_EXT2_FP16_INST),
+ 0xf3100f10, 0xffb00f10, "vmaxnm%u.f16\t%12-15,22R, %16-19,7R, %0-3,5R"},
{ARM_FEATURE_COPROC (FPU_NEON_EXT_ARMV8),
- 0xf3200f10, 0xffa00f10, "vminnm%u.f%20U0\t%12-15,22R, %16-19,7R, %0-3,5R"},
+ 0xf3200f10, 0xffb00f10, "vminnm%u.f32\t%12-15,22R, %16-19,7R, %0-3,5R"},
+ {ARM_FEATURE_CORE_HIGH (ARM_EXT2_FP16_INST),
+ 0xf3300f10, 0xffb00f10, "vminnm%u.f16\t%12-15,22R, %16-19,7R, %0-3,5R"},
{ARM_FEATURE_COPROC (FPU_NEON_EXT_V1),
0xf2000110, 0xffb00f10, "vand%c\t%12-15,22R, %16-19,7R, %0-3,5R"},
{ARM_FEATURE_COPROC (FPU_NEON_EXT_V1),
@@ -1160,41 +1179,77 @@ static const struct opcode32 neon_opcodes[] =
{ARM_FEATURE_COPROC (FPU_NEON_EXT_V1),
0xf3300110, 0xffb00f10, "vbif%c\t%12-15,22R, %16-19,7R, %0-3,5R"},
{ARM_FEATURE_COPROC (FPU_NEON_EXT_V1),
- 0xf2000d00, 0xffa00f10, "vadd%c.f%20U0\t%12-15,22R, %16-19,7R, %0-3,5R"},
+ 0xf2000d00, 0xffb00f10, "vadd%c.f32\t%12-15,22R, %16-19,7R, %0-3,5R"},
+ {ARM_FEATURE_CORE_HIGH (ARM_EXT2_FP16_INST),
+ 0xf2100d00, 0xffb00f10, "vadd%c.f16\t%12-15,22R, %16-19,7R, %0-3,5R"},
{ARM_FEATURE_COPROC (FPU_NEON_EXT_V1),
- 0xf2000d10, 0xffa00f10, "vmla%c.f%20U0\t%12-15,22R, %16-19,7R, %0-3,5R"},
+ 0xf2000d10, 0xffb00f10, "vmla%c.f32\t%12-15,22R, %16-19,7R, %0-3,5R"},
+ {ARM_FEATURE_CORE_HIGH (ARM_EXT2_FP16_INST),
+ 0xf2100d10, 0xffb00f10, "vmla%c.f16\t%12-15,22R, %16-19,7R, %0-3,5R"},
{ARM_FEATURE_COPROC (FPU_NEON_EXT_V1),
- 0xf2000e00, 0xffa00f10, "vceq%c.f%20U0\t%12-15,22R, %16-19,7R, %0-3,5R"},
+ 0xf2000e00, 0xffb00f10, "vceq%c.f32\t%12-15,22R, %16-19,7R, %0-3,5R"},
+ {ARM_FEATURE_CORE_HIGH (ARM_EXT2_FP16_INST),
+ 0xf2100e00, 0xffb00f10, "vceq%c.f16\t%12-15,22R, %16-19,7R, %0-3,5R"},
{ARM_FEATURE_COPROC (FPU_NEON_EXT_V1),
- 0xf2000f00, 0xffa00f10, "vmax%c.f%20U0\t%12-15,22R, %16-19,7R, %0-3,5R"},
+ 0xf2000f00, 0xffb00f10, "vmax%c.f32\t%12-15,22R, %16-19,7R, %0-3,5R"},
+ {ARM_FEATURE_CORE_HIGH (ARM_EXT2_FP16_INST),
+ 0xf2100f00, 0xffb00f10, "vmax%c.f16\t%12-15,22R, %16-19,7R, %0-3,5R"},
{ARM_FEATURE_COPROC (FPU_NEON_EXT_V1),
- 0xf2000f10, 0xffa00f10, "vrecps%c.f%20U0\t%12-15,22R, %16-19,7R, %0-3,5R"},
+ 0xf2000f10, 0xffb00f10, "vrecps%c.f32\t%12-15,22R, %16-19,7R, %0-3,5R"},
+ {ARM_FEATURE_CORE_HIGH (ARM_EXT2_FP16_INST),
+ 0xf2100f10, 0xffb00f10, "vrecps%c.f16\t%12-15,22R, %16-19,7R, %0-3,5R"},
{ARM_FEATURE_COPROC (FPU_NEON_EXT_V1),
- 0xf2200d00, 0xffa00f10, "vsub%c.f%20U0\t%12-15,22R, %16-19,7R, %0-3,5R"},
+ 0xf2200d00, 0xffb00f10, "vsub%c.f32\t%12-15,22R, %16-19,7R, %0-3,5R"},
+ {ARM_FEATURE_CORE_HIGH (ARM_EXT2_FP16_INST),
+ 0xf2300d00, 0xffb00f10, "vsub%c.f16\t%12-15,22R, %16-19,7R, %0-3,5R"},
{ARM_FEATURE_COPROC (FPU_NEON_EXT_V1),
- 0xf2200d10, 0xffa00f10, "vmls%c.f%20U0\t%12-15,22R, %16-19,7R, %0-3,5R"},
+ 0xf2200d10, 0xffb00f10, "vmls%c.f32\t%12-15,22R, %16-19,7R, %0-3,5R"},
+ {ARM_FEATURE_CORE_HIGH (ARM_EXT2_FP16_INST),
+ 0xf2300d10, 0xffb00f10, "vmls%c.f16\t%12-15,22R, %16-19,7R, %0-3,5R"},
{ARM_FEATURE_COPROC (FPU_NEON_EXT_V1),
- 0xf2200f00, 0xffa00f10, "vmin%c.f%20U0\t%12-15,22R, %16-19,7R, %0-3,5R"},
+ 0xf2200f00, 0xffb00f10, "vmin%c.f32\t%12-15,22R, %16-19,7R, %0-3,5R"},
+ {ARM_FEATURE_CORE_HIGH (ARM_EXT2_FP16_INST),
+ 0xf2300f00, 0xffb00f10, "vmin%c.f16\t%12-15,22R, %16-19,7R, %0-3,5R"},
{ARM_FEATURE_COPROC (FPU_NEON_EXT_V1),
- 0xf2200f10, 0xffa00f10, "vrsqrts%c.f%20U0\t%12-15,22R, %16-19,7R, %0-3,5R"},
+ 0xf2200f10, 0xffb00f10, "vrsqrts%c.f32\t%12-15,22R, %16-19,7R, %0-3,5R"},
+ {ARM_FEATURE_CORE_HIGH (ARM_EXT2_FP16_INST),
+ 0xf2300f10, 0xffb00f10, "vrsqrts%c.f16\t%12-15,22R, %16-19,7R, %0-3,5R"},
{ARM_FEATURE_COPROC (FPU_NEON_EXT_V1),
- 0xf3000d00, 0xffa00f10, "vpadd%c.f%20U0\t%12-15,22R, %16-19,7R, %0-3,5R"},
+ 0xf3000d00, 0xffb00f10, "vpadd%c.f32\t%12-15,22R, %16-19,7R, %0-3,5R"},
+ {ARM_FEATURE_CORE_HIGH (ARM_EXT2_FP16_INST),
+ 0xf3100d00, 0xffb00f10, "vpadd%c.f16\t%12-15,22R, %16-19,7R, %0-3,5R"},
{ARM_FEATURE_COPROC (FPU_NEON_EXT_V1),
- 0xf3000d10, 0xffa00f10, "vmul%c.f%20U0\t%12-15,22R, %16-19,7R, %0-3,5R"},
+ 0xf3000d10, 0xffb00f10, "vmul%c.f32\t%12-15,22R, %16-19,7R, %0-3,5R"},
+ {ARM_FEATURE_CORE_HIGH (ARM_EXT2_FP16_INST),
+ 0xf3100d10, 0xffb00f10, "vmul%c.f16\t%12-15,22R, %16-19,7R, %0-3,5R"},
{ARM_FEATURE_COPROC (FPU_NEON_EXT_V1),
- 0xf3000e00, 0xffa00f10, "vcge%c.f%20U0\t%12-15,22R, %16-19,7R, %0-3,5R"},
+ 0xf3000e00, 0xffb00f10, "vcge%c.f32\t%12-15,22R, %16-19,7R, %0-3,5R"},
+ {ARM_FEATURE_CORE_HIGH (ARM_EXT2_FP16_INST),
+ 0xf3100e00, 0xffb00f10, "vcge%c.f16\t%12-15,22R, %16-19,7R, %0-3,5R"},
{ARM_FEATURE_COPROC (FPU_NEON_EXT_V1),
- 0xf3000e10, 0xffa00f10, "vacge%c.f%20U0\t%12-15,22R, %16-19,7R, %0-3,5R"},
+ 0xf3000e10, 0xffb00f10, "vacge%c.f32\t%12-15,22R, %16-19,7R, %0-3,5R"},
+ {ARM_FEATURE_CORE_HIGH (ARM_EXT2_FP16_INST),
+ 0xf3100e10, 0xffb00f10, "vacge%c.f16\t%12-15,22R, %16-19,7R, %0-3,5R"},
{ARM_FEATURE_COPROC (FPU_NEON_EXT_V1),
- 0xf3000f00, 0xffa00f10, "vpmax%c.f%20U0\t%12-15,22R, %16-19,7R, %0-3,5R"},
+ 0xf3000f00, 0xffb00f10, "vpmax%c.f32\t%12-15,22R, %16-19,7R, %0-3,5R"},
+ {ARM_FEATURE_CORE_HIGH (ARM_EXT2_FP16_INST),
+ 0xf3100f00, 0xffb00f10, "vpmax%c.f16\t%12-15,22R, %16-19,7R, %0-3,5R"},
{ARM_FEATURE_COPROC (FPU_NEON_EXT_V1),
- 0xf3200d00, 0xffa00f10, "vabd%c.f%20U0\t%12-15,22R, %16-19,7R, %0-3,5R"},
+ 0xf3200d00, 0xffb00f10, "vabd%c.f32\t%12-15,22R, %16-19,7R, %0-3,5R"},
+ {ARM_FEATURE_CORE_HIGH (ARM_EXT2_FP16_INST),
+ 0xf3300d00, 0xffb00f10, "vabd%c.f16\t%12-15,22R, %16-19,7R, %0-3,5R"},
{ARM_FEATURE_COPROC (FPU_NEON_EXT_V1),
- 0xf3200e00, 0xffa00f10, "vcgt%c.f%20U0\t%12-15,22R, %16-19,7R, %0-3,5R"},
+ 0xf3200e00, 0xffb00f10, "vcgt%c.f32\t%12-15,22R, %16-19,7R, %0-3,5R"},
+ {ARM_FEATURE_CORE_HIGH (ARM_EXT2_FP16_INST),
+ 0xf3300e00, 0xffb00f10, "vcgt%c.f16\t%12-15,22R, %16-19,7R, %0-3,5R"},
{ARM_FEATURE_COPROC (FPU_NEON_EXT_V1),
- 0xf3200e10, 0xffa00f10, "vacgt%c.f%20U0\t%12-15,22R, %16-19,7R, %0-3,5R"},
+ 0xf3200e10, 0xffb00f10, "vacgt%c.f32\t%12-15,22R, %16-19,7R, %0-3,5R"},
+ {ARM_FEATURE_CORE_HIGH (ARM_EXT2_FP16_INST),
+ 0xf3300e10, 0xffb00f10, "vacgt%c.f16\t%12-15,22R, %16-19,7R, %0-3,5R"},
{ARM_FEATURE_COPROC (FPU_NEON_EXT_V1),
- 0xf3200f00, 0xffa00f10, "vpmin%c.f%20U0\t%12-15,22R, %16-19,7R, %0-3,5R"},
+ 0xf3200f00, 0xffb00f10, "vpmin%c.f32\t%12-15,22R, %16-19,7R, %0-3,5R"},
+ {ARM_FEATURE_CORE_HIGH (ARM_EXT2_FP16_INST),
+ 0xf3300f00, 0xffb00f10, "vpmin%c.f16\t%12-15,22R, %16-19,7R, %0-3,5R"},
{ARM_FEATURE_COPROC (FPU_NEON_EXT_V1),
0xf2000800, 0xff800f10, "vadd%c.i%20-21S3\t%12-15,22R, %16-19,7R, %0-3,5R"},
{ARM_FEATURE_COPROC (FPU_NEON_EXT_V1),
@@ -1426,6 +1481,9 @@ static const struct opcode32 neon_opcodes[] =
{ARM_FEATURE_COPROC (FPU_NEON_EXT_V1),
0xf2a00e10, 0xfea00e90,
"vcvt%c.%24,8?usff32.%24,8?ffus32\t%12-15,22R, %0-3,5R, #%16-20e"},
+ {ARM_FEATURE_CORE_HIGH (ARM_EXT2_FP16_INST),
+ 0xf2a00c10, 0xfea00e90,
+ "vcvt%c.%24,8?usff16.%24,8?ffus16\t%12-15,22R, %0-3,5R, #%16-20e"},
/* Three registers of different lengths. */
{ARM_FEATURE_COPROC (FPU_CRYPTO_EXT_ARMV8),