This is the mail archive of the binutils@sourceware.org mailing list for the binutils project.

Index Nav:	[Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav:	[Date Prev] [Date Next]	[Thread Prev] [Thread Next]
Other format:	[Raw text]

[Patch/ARM] Support v8.2 fp16 simd instructions

From: Jiong Wang <jiong dot wang at foss dot arm dot com>
To: "binutils at sourceware dot org" <binutils at sourceware dot org>
Date: Wed, 2 Mar 2016 15:19:08 +0000
Subject: [Patch/ARM] Support v8.2 fp16 simd instructions
Authentication-results: sourceware.org; auth=none

This patch add support for new FP16 instructions introduced in ARMv8.2-a
architecture.

Since we have supported FP16 in GAS's infrastructure in scalar patch, the
SIMD support is quite straightforward, we just need to enlarge related types to
include N_F16.

Testcases added to make sure encoding for new instructions are correct under both
arm and thumb mode. This patch also contains rejection testcases to make sure new
FP16 instructions rejected on architectures don't contains such feature.


2016-03-02  Jiong Wang  <jiong.wang@arm.com>

gas/

  * config/tc-arm.c (N_S_32): New.
  (N_F_32): Likewise.
  (N_SUF_32): Support N_F16.
  (N_IF_32): Likewise.
  (neon_dyadic_misc): Likewise.
  (do_neon_cmp): Likewise.
  (do_neon_cmp_inv): Likewise.
  (do_neon_mul): Likewise.
  (do_neon_fcmp_absolute): Likewise.
  (do_neon_step): Likewise.
  (do_neon_abs_neg): Likewise.
  (CVT_FLAVOR_VAR): Likewise.
  (do_neon_cvt_1): Likewise.
  (do_neon_recip_est): Likewise.
  (do_vmaxnm): Likewise.
  (do_vrint_1): Likewise.
  (neon_check_type): Check architecture support for FP16
  extension.
  (insns): Update comments.

opcode/

  * arm-dis.c (neon_opcodes): Support new FP16 instructions.

gas/testsuite/

  * gas/arm/armv8-2-fp16-simd.s: New test source.
  * gas/arm/armv8-2-fp16-simd.d: New testcase for arm mode.
  * gas/arm/armv8-2-fp16-simd-thumb.d: New testcase for thumb mode.
  * gas/arm/armv8-2-fp16-simd-warning.d: New rejection test for arm mode.
  * gas/arm/armv8-2-fp16-simd-warning-thumb.d: New rejection test for thumb
  mode.
  * gas/arm/armv8-2-fp16-simd-warning.l: New expected rejection error file.

diff --git a/gas/config/tc-arm.c b/gas/config/tc-arm.c
index 5d3bae4ace346fbc7e936a7b3a81b42d2c49c5cd..c1a6f76697c9d3ddd51afd069725d6945baed249 100644
--- a/gas/config/tc-arm.c
+++ b/gas/config/tc-arm.c
@@ -13419,9 +13419,11 @@ enum neon_type_mask
 #define N_SU_ALL   (N_S8 | N_S16 | N_S32 | N_S64 | N_U8 | N_U16 | N_U32 | N_U64)
 #define N_SU_32    (N_S8 | N_S16 | N_S32 | N_U8 | N_U16 | N_U32)
 #define N_SU_16_64 (N_S16 | N_S32 | N_S64 | N_U16 | N_U32 | N_U64)
-#define N_SUF_32   (N_SU_32 | N_F32)
+#define N_S_32     (N_S8 | N_S16 | N_S32)
+#define N_F_16_32  (N_F16 | N_F32)
+#define N_SUF_32   (N_SU_32 | N_F_16_32)
 #define N_I_ALL    (N_I8 | N_I16 | N_I32 | N_I64)
-#define N_IF_32    (N_I8 | N_I16 | N_I32 | N_F32)
+#define N_IF_32    (N_I8 | N_I16 | N_I32 | N_F16 | N_F32)
 #define N_F_ALL    (N_F16 | N_F32 | N_F64)
 
 /* Pass this as the first type argument to neon_check_type to ignore types
@@ -13891,6 +13893,15 @@ neon_check_type (unsigned els, enum neon_shape ns, ...)
 		  k_type = g_type;
 		  k_size = g_size;
 		  key_allowed = thisarg & ~N_KEY;
+
+		  /* Check architecture constraint on FP16 extension.  */
+		  if (k_size == 16
+		      && k_type == NT_float
+		      && ! ARM_CPU_HAS_FEATURE (cpu_variant, arm_ext_fp16))
+		    {
+		      inst.error = _(BAD_FP16);
+		      return badtype;
+		    }
 		}
 	    }
 	  else
@@ -14702,7 +14713,7 @@ neon_dyadic_misc (enum neon_el_type ubit_meaning, unsigned types,
   if (et.type == NT_float)
     {
       NEON_ENCODE (FLOAT, inst);
-      neon_three_same (neon_quad (rs), 0, -1);
+      neon_three_same (neon_quad (rs), 0, et.size == 16 ? (int) et.size : -1);
     }
   else
     {
@@ -14863,13 +14874,13 @@ neon_compare (unsigned regtypes, unsigned immtypes, int invert)
 static void
 do_neon_cmp (void)
 {
-  neon_compare (N_SUF_32, N_S8 | N_S16 | N_S32 | N_F32, FALSE);
+  neon_compare (N_SUF_32, N_S_32 | N_F_16_32, FALSE);
 }
 
 static void
 do_neon_cmp_inv (void)
 {
-  neon_compare (N_SUF_32, N_S8 | N_S16 | N_S32 | N_F32, TRUE);
+  neon_compare (N_SUF_32, N_S_32 | N_F_16_32, TRUE);
 }
 
 static void
@@ -14997,7 +15008,7 @@ do_neon_mul (void)
   if (inst.operands[2].isscalar)
     do_neon_mac_maybe_scalar ();
   else
-    neon_dyadic_misc (NT_poly, N_I8 | N_I16 | N_I32 | N_F32 | N_P8, 0);
+    neon_dyadic_misc (NT_poly, N_I8 | N_I16 | N_I32 | N_F16 | N_F32 | N_P8, 0);
 }
 
 static void
@@ -15026,9 +15037,10 @@ static void
 do_neon_fcmp_absolute (void)
 {
   enum neon_shape rs = neon_select_shape (NS_DDD, NS_QQQ, NS_NULL);
-  neon_check_type (3, rs, N_EQK, N_EQK, N_F32 | N_KEY);
+  struct neon_type_el et = neon_check_type (3, rs, N_EQK, N_EQK,
+					    N_F_16_32 | N_KEY);
   /* Size field comes from bit mask.  */
-  neon_three_same (neon_quad (rs), 1, -1);
+  neon_three_same (neon_quad (rs), 1, et.size == 16 ? (int) et.size : -1);
 }
 
 static void
@@ -15042,8 +15054,9 @@ static void
 do_neon_step (void)
 {
   enum neon_shape rs = neon_select_shape (NS_DDD, NS_QQQ, NS_NULL);
-  neon_check_type (3, rs, N_EQK, N_EQK, N_F32 | N_KEY);
-  neon_three_same (neon_quad (rs), 0, -1);
+  struct neon_type_el et = neon_check_type (3, rs, N_EQK, N_EQK,
+					    N_F_16_32 | N_KEY);
+  neon_three_same (neon_quad (rs), 0, et.size == 16 ? (int) et.size : -1);
 }
 
 static void
@@ -15059,7 +15072,7 @@ do_neon_abs_neg (void)
     return;
 
   rs = neon_select_shape (NS_DD, NS_QQ, NS_NULL);
-  et = neon_check_type (2, rs, N_EQK, N_S8 | N_S16 | N_S32 | N_F32 | N_KEY);
+  et = neon_check_type (2, rs, N_EQK, N_S_32 | N_F_16_32 | N_KEY);
 
   inst.instruction |= LOW4 (inst.operands[0].reg) << 12;
   inst.instruction |= HI1 (inst.operands[0].reg) << 22;
@@ -15268,6 +15281,10 @@ do_neon_shll (void)
   CVT_VAR (f32_s32, N_F32, N_S32, whole_reg,   "fsltos", "fsitos", NULL)      \
   CVT_VAR (f32_u32, N_F32, N_U32, whole_reg,   "fultos", "fuitos", NULL)      \
   /* Half-precision conversions.  */					      \
+  CVT_VAR (s16_f16, N_S16, N_F16 | N_KEY, whole_reg, NULL, NULL, NULL)	      \
+  CVT_VAR (u16_f16, N_U16, N_F16 | N_KEY, whole_reg, NULL, NULL, NULL)	      \
+  CVT_VAR (f16_s16, N_F16 | N_KEY, N_S16, whole_reg, NULL, NULL, NULL)	      \
+  CVT_VAR (f16_u16, N_F16 | N_KEY, N_U16, whole_reg, NULL, NULL, NULL)	      \
   CVT_VAR (f32_f16, N_F32, N_F16, whole_reg,   NULL,     NULL,     NULL)      \
   CVT_VAR (f16_f32, N_F16, N_F32, whole_reg,   NULL,     NULL,     NULL)      \
   /* New VCVT instructions introduced by ARMv8.2 fp16 extension.	      \
@@ -15500,10 +15517,15 @@ do_neon_cvt_1 (enum neon_cvt_mode mode)
 					  NS_NULL);
   enum neon_cvt_flavour flavour = get_neon_cvt_flavour (rs);
 
+  if (flavour == neon_cvt_flavour_invalid)
+    return;
+
   /* PR11109: Handle round-to-zero for VCVT conversions.  */
   if (mode == neon_cvt_mode_z
       && ARM_CPU_HAS_FEATURE (cpu_variant, fpu_arch_vfp_v2)
-      && (flavour == neon_cvt_flavour_s32_f32
+      && (flavour == neon_cvt_flavour_s16_f16
+	  || flavour == neon_cvt_flavour_u16_f16
+	  || flavour == neon_cvt_flavour_s32_f32
 	  || flavour == neon_cvt_flavour_u32_f32
 	  || flavour == neon_cvt_flavour_s32_f64
 	  || flavour == neon_cvt_flavour_u32_f64)
@@ -15542,7 +15564,8 @@ do_neon_cvt_1 (enum neon_cvt_mode mode)
     case NS_QQI:
       {
 	unsigned immbits;
-	unsigned enctab[] = { 0x0000100, 0x1000100, 0x0, 0x1000000 };
+	unsigned enctab[] = {0x0000100, 0x1000100, 0x0, 0x1000000,
+			     0x0000100, 0x1000100, 0x0, 0x1000000};
 
 	if (vfp_or_neon_is_neon (NEON_CHECK_CC | NEON_CHECK_ARCH) == FAIL)
 	  return;
@@ -15551,7 +15574,6 @@ do_neon_cvt_1 (enum neon_cvt_mode mode)
 	   integer conversion.  */
 	if (inst.operands[2].present && inst.operands[2].imm == 0)
 	  goto int_encode;
-       immbits = 32 - inst.operands[2].imm;
 	NEON_ENCODE (IMMED, inst);
 	if (flavour != neon_cvt_flavour_invalid)
 	  inst.instruction |= enctab[flavour];
@@ -15561,7 +15583,19 @@ do_neon_cvt_1 (enum neon_cvt_mode mode)
 	inst.instruction |= HI1 (inst.operands[1].reg) << 5;
 	inst.instruction |= neon_quad (rs) << 6;
 	inst.instruction |= 1 << 21;
-	inst.instruction |= immbits << 16;
+	if (flavour < neon_cvt_flavour_s16_f16)
+	  {
+	    inst.instruction |= 1 << 21;
+	    immbits = 32 - inst.operands[2].imm;
+	    inst.instruction |= immbits << 16;
+	  }
+	else
+	  {
+	    inst.instruction |= 3 << 20;
+	    immbits = 16 - inst.operands[2].imm;
+	    inst.instruction |= immbits << 16;
+	    inst.instruction &= ~(1 << 9);
+	  }
 
 	neon_dp_fixup (&inst);
       }
@@ -15582,8 +15616,14 @@ do_neon_cvt_1 (enum neon_cvt_mode mode)
 	  inst.instruction |= LOW4 (inst.operands[1].reg);
 	  inst.instruction |= HI1 (inst.operands[1].reg) << 5;
 	  inst.instruction |= neon_quad (rs) << 6;
-	  inst.instruction |= (flavour == neon_cvt_flavour_u32_f32) << 7;
+	  inst.instruction |= (flavour == neon_cvt_flavour_u16_f16
+			       || flavour == neon_cvt_flavour_u32_f32) << 7;
 	  inst.instruction |= mode << 8;
+	  if (flavour == neon_cvt_flavour_u16_f16
+	      || flavour == neon_cvt_flavour_s16_f16)
+	    /* Mask off the original size bits and reencode them.  */
+	    inst.instruction = ((inst.instruction & 0xfff3ffff) | (1 << 18));
+
 	  if (thumb_mode)
 	    inst.instruction |= 0xfc000000;
 	  else
@@ -15593,7 +15633,8 @@ do_neon_cvt_1 (enum neon_cvt_mode mode)
 	{
     int_encode:
 	  {
-	    unsigned enctab[] = { 0x100, 0x180, 0x0, 0x080 };
+	    unsigned enctab[] = { 0x100, 0x180, 0x0, 0x080,
+				  0x100, 0x180, 0x0, 0x080};
 
 	    NEON_ENCODE (INTEGER, inst);
 
@@ -15608,7 +15649,12 @@ do_neon_cvt_1 (enum neon_cvt_mode mode)
 	    inst.instruction |= LOW4 (inst.operands[1].reg);
 	    inst.instruction |= HI1 (inst.operands[1].reg) << 5;
 	    inst.instruction |= neon_quad (rs) << 6;
-	    inst.instruction |= 2 << 18;
+	    if (flavour >= neon_cvt_flavour_s16_f16
+		&& flavour <= neon_cvt_flavour_f16_u16)
+	      /* Half precision.  */
+	      inst.instruction |= 1 << 18;
+	    else
+	      inst.instruction |= 2 << 18;
 
 	    neon_dp_fixup (&inst);
 	  }
@@ -16431,7 +16477,7 @@ do_neon_recip_est (void)
 {
   enum neon_shape rs = neon_select_shape (NS_DD, NS_QQ, NS_NULL);
   struct neon_type_el et = neon_check_type (2, rs,
-    N_EQK | N_FLT, N_F32 | N_U32 | N_KEY);
+    N_EQK | N_FLT, N_F_16_32 | N_U32 | N_KEY);
   inst.instruction |= (et.type == NT_float) << 8;
   neon_two_same (neon_quad (rs), 1, et.size);
 }
@@ -16945,7 +16991,7 @@ do_vmaxnm (void)
   if (vfp_or_neon_is_neon (NEON_CHECK_CC | NEON_CHECK_ARCH8) == FAIL)
     return;
 
-  neon_dyadic_misc (NT_untyped, N_F32, 0);
+  neon_dyadic_misc (NT_untyped, N_F_16_32, 0);
 }
 
 static void
@@ -17001,7 +17047,7 @@ do_vrint_1 (enum neon_cvt_mode mode)
     {
       /* Neon encodings (or something broken...).  */
       inst.error = NULL;
-      et = neon_check_type (2, rs, N_EQK, N_F32 | N_KEY);
+      et = neon_check_type (2, rs, N_EQK, N_F_16_32 | N_KEY);
 
       if (et.type == NT_invtype)
 	return;
@@ -17017,6 +17063,10 @@ do_vrint_1 (enum neon_cvt_mode mode)
       inst.instruction |= LOW4 (inst.operands[1].reg);
       inst.instruction |= HI1 (inst.operands[1].reg) << 5;
       inst.instruction |= neon_quad (rs) << 6;
+      /* Mask off the original size bits and reencode them.  */
+      inst.instruction = ((inst.instruction & 0xfff3ffff)
+			  | neon_logbits (et.size) << 18);
+
       switch (mode)
 	{
 	case neon_cvt_mode_z: inst.instruction |= 3 << 7; break;
@@ -20258,7 +20308,7 @@ static const struct asm_opcode insns[] =
  NUF(vbitq,     1200110, 3, (RNQ,  RNQ,  RNQ),  neon_bitfield),
  NUF(vbif,      1300110, 3, (RNDQ, RNDQ, RNDQ), neon_bitfield),
  NUF(vbifq,     1300110, 3, (RNQ,  RNQ,  RNQ),  neon_bitfield),
-  /* Int and float variants, types S8 S16 S32 U8 U16 U32 F32.  */
+  /* Int and float variants, types S8 S16 S32 U8 U16 U32 F16 F32.  */
  nUF(vabd,      _vabd,    3, (RNDQ, oRNDQ, RNDQ), neon_dyadic_if_su),
  nUF(vabdq,     _vabd,    3, (RNQ,  oRNQ,  RNQ),  neon_dyadic_if_su),
  nUF(vmax,      _vmax,    3, (RNDQ, oRNDQ, RNDQ), neon_dyadic_if_su),
@@ -20421,7 +20471,7 @@ static const struct asm_opcode insns[] =
  NUF(vpadalq,   1b00600, 2, (RNQ,  RNQ),      neon_pair_long),
  NUF(vpaddl,    1b00200, 2, (RNDQ, RNDQ),     neon_pair_long),
  NUF(vpaddlq,   1b00200, 2, (RNQ,  RNQ),      neon_pair_long),
-  /* Reciprocal estimates. Types U32 F32.  */
+  /* Reciprocal estimates.  Types U32 F16 F32.  */
  NUF(vrecpe,    1b30400, 2, (RNDQ, RNDQ),     neon_recip_est),
  NUF(vrecpeq,   1b30400, 2, (RNQ,  RNQ),      neon_recip_est),
  NUF(vrsqrte,   1b30480, 2, (RNDQ, RNDQ),     neon_recip_est),
diff --git a/gas/testsuite/gas/arm/armv8-2-fp16-simd-thumb.d b/gas/testsuite/gas/arm/armv8-2-fp16-simd-thumb.d
new file mode 100644
index 0000000000000000000000000000000000000000..5578b9bdca6519c9e6b6d3e5510dfd84d43f12df
--- /dev/null
+++ b/gas/testsuite/gas/arm/armv8-2-fp16-simd-thumb.d
@@ -0,0 +1,147 @@
+#name: ARM v8.2 FP16 support on SIMD (Thumb)
+#source: armv8-2-fp16-simd.s
+#objdump: -d
+#as: -march=armv8.2-a+fp16 -mfpu=neon-fp-armv8 -mthumb
+#skip: *-*-*coff *-*-pe *-*-wince *-*-*aout* *-*-netbsd
+
+.*: +file format .*arm.*
+
+Disassembly of section .text:
+
+00000000 <func>:
+   0:	ff34 2d0e 	vabd.f16	d2, d4, d14
+   4:	ff38 4d6c 	vabd.f16	q2, q4, q14
+   8:	ef14 2f0e 	vmax.f16	d2, d4, d14
+   c:	ef18 4f6c 	vmax.f16	q2, q4, q14
+  10:	ef34 2f0e 	vmin.f16	d2, d4, d14
+  14:	ef38 4f6c 	vmin.f16	q2, q4, q14
+  18:	ff30 0dec 	vabd.f16	q0, q8, q14
+  1c:	ef10 0fec 	vmax.f16	q0, q8, q14
+  20:	ef30 0fec 	vmin.f16	q0, q8, q14
+  24:	ff33 1d0f 	vabd.f16	d1, d3, d15
+  28:	ff31 0d08 	vabd.f16	d0, d1, d8
+  2c:	ffb5 0708 	vabs.f16	d0, d8
+  30:	ffb5 0760 	vabs.f16	q0, q8
+  34:	ffb5 0788 	vneg.f16	d0, d8
+  38:	ffb5 07e0 	vneg.f16	q0, q8
+  3c:	ffb5 474c 	vabs.f16	q2, q6
+  40:	ffb5 47cc 	vneg.f16	q2, q6
+  44:	ffb5 7703 	vabs.f16	d7, d3
+  48:	ffb5 9781 	vneg.f16	d9, d1
+  4c:	ff14 2e1e 	vacge.f16	d2, d4, d14
+  50:	ff18 4e7c 	vacge.f16	q2, q4, q14
+  54:	ff34 2e1e 	vacgt.f16	d2, d4, d14
+  58:	ff38 4e7c 	vacgt.f16	q2, q4, q14
+  5c:	ff3e 2e14 	vacgt.f16	d2, d14, d4
+  60:	ff3c 4ed8 	vacgt.f16	q2, q14, q4
+  64:	ff1e 2e14 	vacge.f16	d2, d14, d4
+  68:	ff1c 4ed8 	vacge.f16	q2, q14, q4
+  6c:	ef14 2e0e 	vceq.f16	d2, d4, d14
+  70:	ef18 4e6c 	vceq.f16	q2, q4, q14
+  74:	ff14 2e0e 	vcge.f16	d2, d4, d14
+  78:	ff18 4e6c 	vcge.f16	q2, q4, q14
+  7c:	ff34 2e0e 	vcgt.f16	d2, d4, d14
+  80:	ff38 4e6c 	vcgt.f16	q2, q4, q14
+  84:	ff1e 2e04 	vcge.f16	d2, d14, d4
+  88:	ff1c 4ec8 	vcge.f16	q2, q14, q4
+  8c:	ff3e 2e04 	vcgt.f16	d2, d14, d4
+  90:	ff3c 4ec8 	vcgt.f16	q2, q14, q4
+  94:	ff10 0efc 	vacge.f16	q0, q8, q14
+  98:	ff30 0efc 	vacgt.f16	q0, q8, q14
+  9c:	ff3c 0ef0 	vacgt.f16	q0, q14, q8
+  a0:	ff1c 0ef0 	vacge.f16	q0, q14, q8
+  a4:	ef10 0eec 	vceq.f16	q0, q8, q14
+  a8:	ff10 0eec 	vcge.f16	q0, q8, q14
+  ac:	ff30 0eec 	vcgt.f16	q0, q8, q14
+  b0:	ff1c 0ee0 	vcge.f16	q0, q14, q8
+  b4:	ff3c 0ee0 	vcgt.f16	q0, q14, q8
+  b8:	ef14 2d0e 	vadd.f16	d2, d4, d14
+  bc:	ef18 4d6c 	vadd.f16	q2, q4, q14
+  c0:	ef34 2d0e 	vsub.f16	d2, d4, d14
+  c4:	ef38 4d6c 	vsub.f16	q2, q4, q14
+  c8:	ef10 0dec 	vadd.f16	q0, q8, q14
+  cc:	ef30 0dec 	vsub.f16	q0, q8, q14
+  d0:	ff14 2f1e 	vmaxnm.f16	d2, d4, d14
+  d4:	ff18 4f7c 	vmaxnm.f16	q2, q4, q14
+  d8:	ff34 2f1e 	vminnm.f16	d2, d4, d14
+  dc:	ff38 4f7c 	vminnm.f16	q2, q4, q14
+  e0:	ef14 2c1e 	vfma.f16	d2, d4, d14
+  e4:	ef18 4c7c 	vfma.f16	q2, q4, q14
+  e8:	ef34 2c1e 	vfms.f16	d2, d4, d14
+  ec:	ef38 4c7c 	vfms.f16	q2, q4, q14
+  f0:	ef14 2d1e 	vmla.f16	d2, d4, d14
+  f4:	ef18 4d7c 	vmla.f16	q2, q4, q14
+  f8:	ef34 2d1e 	vmls.f16	d2, d4, d14
+  fc:	ef38 4d7c 	vmls.f16	q2, q4, q14
+ 100:	ffb6 458e 	vrintz.f16	d4, d14
+ 104:	ffb6 85ec 	vrintz.f16	q4, q14
+ 108:	ffb6 448e 	vrintx.f16	d4, d14
+ 10c:	ffb6 84ec 	vrintx.f16	q4, q14
+ 110:	ffb6 450e 	vrinta.f16	d4, d14
+ 114:	ffb6 856c 	vrinta.f16	q4, q14
+ 118:	ffb6 440e 	vrintn.f16	d4, d14
+ 11c:	ffb6 846c 	vrintn.f16	q4, q14
+ 120:	ffb6 478e 	vrintp.f16	d4, d14
+ 124:	ffb6 87ec 	vrintp.f16	q4, q14
+ 128:	ffb6 468e 	vrintm.f16	d4, d14
+ 12c:	ffb6 86ec 	vrintm.f16	q4, q14
+ 130:	ff18 4d0e 	vpadd.f16	d4, d8, d14
+ 134:	ffb7 4508 	vrecpe.f16	d4, d8
+ 138:	ffb7 8560 	vrecpe.f16	q4, q8
+ 13c:	ffb7 4588 	vrsqrte.f16	d4, d8
+ 140:	ffb7 85e0 	vrsqrte.f16	q4, q8
+ 144:	ffb7 0564 	vrecpe.f16	q0, q10
+ 148:	ffb7 05e4 	vrsqrte.f16	q0, q10
+ 14c:	ef1a 8f1c 	vrecps.f16	d8, d10, d12
+ 150:	ef54 0ff8 	vrecps.f16	q8, q10, q12
+ 154:	ef3a 8f1c 	vrsqrts.f16	d8, d10, d12
+ 158:	ef74 0ff8 	vrsqrts.f16	q8, q10, q12
+ 15c:	ef10 4f58 	vrecps.f16	q2, q0, q4
+ 160:	ef30 4f58 	vrsqrts.f16	q2, q0, q4
+ 164:	ff18 4f0e 	vpmax.f16	d4, d8, d14
+ 168:	ff38 af02 	vpmin.f16	d10, d8, d2
+ 16c:	ff18 4d1e 	vmul.f16	d4, d8, d14
+ 170:	ff10 7d11 	vmul.f16	d7, d0, d1
+ 174:	ff10 4dd0 	vmul.f16	q2, q8, q0
+ 178:	ffb7 600c 	vcvta.s16.f16	d6, d12
+ 17c:	ffb7 c068 	vcvta.s16.f16	q6, q12
+ 180:	ffb7 630c 	vcvtm.s16.f16	d6, d12
+ 184:	ffb7 c368 	vcvtm.s16.f16	q6, q12
+ 188:	ffb7 610c 	vcvtn.s16.f16	d6, d12
+ 18c:	ffb7 c168 	vcvtn.s16.f16	q6, q12
+ 190:	ffb7 620c 	vcvtp.s16.f16	d6, d12
+ 194:	ffb7 c268 	vcvtp.s16.f16	q6, q12
+ 198:	ffb7 608c 	vcvta.u16.f16	d6, d12
+ 19c:	ffb7 c0e8 	vcvta.u16.f16	q6, q12
+ 1a0:	ffb7 638c 	vcvtm.u16.f16	d6, d12
+ 1a4:	ffb7 c3e8 	vcvtm.u16.f16	q6, q12
+ 1a8:	ffb7 618c 	vcvtn.u16.f16	d6, d12
+ 1ac:	ffb7 c1e8 	vcvtn.u16.f16	q6, q12
+ 1b0:	ffb7 628c 	vcvtp.u16.f16	d6, d12
+ 1b4:	ffb7 c2e8 	vcvtp.u16.f16	q6, q12
+ 1b8:	ffb7 e700 	vcvt.s16.f16	d14, d0
+ 1bc:	fff7 c740 	vcvt.s16.f16	q14, q0
+ 1c0:	ffb7 e780 	vcvt.u16.f16	d14, d0
+ 1c4:	fff7 c7c0 	vcvt.u16.f16	q14, q0
+ 1c8:	ffb7 e600 	vcvt.f16.s16	d14, d0
+ 1cc:	fff7 c640 	vcvt.f16.s16	q14, q0
+ 1d0:	ffb7 e680 	vcvt.f16.u16	d14, d0
+ 1d4:	fff7 c6c0 	vcvt.f16.u16	q14, q0
+ 1d8:	efbd ed10 	vcvt.s16.f16	d14, d0, #3
+ 1dc:	effd cd50 	vcvt.s16.f16	q14, q0, #3
+ 1e0:	ffbd ed10 	vcvt.u16.f16	d14, d0, #3
+ 1e4:	fffd cd50 	vcvt.u16.f16	q14, q0, #3
+ 1e8:	efbd ec10 	vcvt.f16.s16	d14, d0, #3
+ 1ec:	effd cc50 	vcvt.f16.s16	q14, q0, #3
+ 1f0:	ffbd ec10 	vcvt.f16.u16	d14, d0, #3
+ 1f4:	fffd cc50 	vcvt.f16.u16	q14, q0, #3
+ 1f8:	ffb5 e502 	vceq.f16	d14, d2, #0
+ 1fc:	fff5 c544 	vceq.f16	q14, q2, #0
+ 200:	ffb5 e482 	vcge.f16	d14, d2, #0
+ 204:	fff5 c4c4 	vcge.f16	q14, q2, #0
+ 208:	ffb5 e402 	vcgt.f16	d14, d2, #0
+ 20c:	fff5 c444 	vcgt.f16	q14, q2, #0
+ 210:	ffb5 e582 	vcle.f16	d14, d2, #0
+ 214:	fff5 c5c4 	vcle.f16	q14, q2, #0
+ 218:	ffb5 e602 	vclt.f16	d14, d2, #0
+ 21c:	fff5 c644 	vclt.f16	q14, q2, #0
diff --git a/gas/testsuite/gas/arm/armv8-2-fp16-simd-warning-thumb.d b/gas/testsuite/gas/arm/armv8-2-fp16-simd-warning-thumb.d
new file mode 100644
index 0000000000000000000000000000000000000000..e78f080fe989bd2e626cd13c67592cc93323fa0f
--- /dev/null
+++ b/gas/testsuite/gas/arm/armv8-2-fp16-simd-warning-thumb.d
@@ -0,0 +1,4 @@
+#name: Reject ARM v8.2 FP16 SIMD instruction for early arch (Thumb)
+#source: armv8-2-fp16-simd.s
+#as: -march=armv8.2-a -mfpu=neon-fp-armv8 -mthumb
+#error-output: armv8-2-fp16-simd-warning.l
diff --git a/gas/testsuite/gas/arm/armv8-2-fp16-simd-warning.d b/gas/testsuite/gas/arm/armv8-2-fp16-simd-warning.d
new file mode 100644
index 0000000000000000000000000000000000000000..d39c36da1e6c711874e1870071190a8ba68cdc47
--- /dev/null
+++ b/gas/testsuite/gas/arm/armv8-2-fp16-simd-warning.d
@@ -0,0 +1,4 @@
+#name: Reject ARM v8.2 FP16 SIMD instruction for early arch
+#source: armv8-2-fp16-simd.s
+#as: -march=armv8.2-a -mfpu=neon-fp-armv8
+#error-output: armv8-2-fp16-simd-warning.l
diff --git a/gas/testsuite/gas/arm/armv8-2-fp16-simd-warning.l b/gas/testsuite/gas/arm/armv8-2-fp16-simd-warning.l
new file mode 100644
index 0000000000000000000000000000000000000000..ba27f7c33a497447585c76f5e7aea8a80200ced3
--- /dev/null
+++ b/gas/testsuite/gas/arm/armv8-2-fp16-simd-warning.l
@@ -0,0 +1,137 @@
+[^:]*: Assembler messages:
+[^:]*:163: Error: selected processor does not support fp16 instruction -- `vabd.f16 d2,d4,d14'
+[^:]*:163: Error: selected processor does not support fp16 instruction -- `vabd.f16 q2,q4,q14'
+[^:]*:163: Error: selected processor does not support fp16 instruction -- `vmax.f16 d2,d4,d14'
+[^:]*:163: Error: selected processor does not support fp16 instruction -- `vmax.f16 q2,q4,q14'
+[^:]*:163: Error: selected processor does not support fp16 instruction -- `vmin.f16 d2,d4,d14'
+[^:]*:163: Error: selected processor does not support fp16 instruction -- `vmin.f16 q2,q4,q14'
+[^:]*:164: Error: selected processor does not support fp16 instruction -- `vabdq.f16 q0,q8,q14'
+[^:]*:164: Error: selected processor does not support fp16 instruction -- `vmaxq.f16 q0,q8,q14'
+[^:]*:164: Error: selected processor does not support fp16 instruction -- `vminq.f16 q0,q8,q14'
+[^:]*:165: Error: selected processor does not support fp16 instruction -- `vabd.f16 d1,d3,d15'
+[^:]*:166: Error: selected processor does not support fp16 instruction -- `vabd.f16 d0,d1,d8'
+[^:]*:169: Error: selected processor does not support fp16 instruction -- `vabs.f16 d0,d8'
+[^:]*:169: Error: selected processor does not support fp16 instruction -- `vabs.f16 q0,q8'
+[^:]*:169: Error: selected processor does not support fp16 instruction -- `vneg.f16 d0,d8'
+[^:]*:169: Error: selected processor does not support fp16 instruction -- `vneg.f16 q0,q8'
+[^:]*:170: Error: selected processor does not support fp16 instruction -- `vabsq.f16 q2,q6'
+[^:]*:170: Error: selected processor does not support fp16 instruction -- `vnegq.f16 q2,q6'
+[^:]*:171: Error: selected processor does not support fp16 instruction -- `vabs.f16 d7,d3'
+[^:]*:172: Error: selected processor does not support fp16 instruction -- `vneg.f16 d9,d1'
+[^:]*:175: Error: selected processor does not support fp16 instruction -- `vacge.f16 d2,d4,d14'
+[^:]*:175: Error: selected processor does not support fp16 instruction -- `vacge.f16 q2,q4,q14'
+[^:]*:175: Error: selected processor does not support fp16 instruction -- `vacgt.f16 d2,d4,d14'
+[^:]*:175: Error: selected processor does not support fp16 instruction -- `vacgt.f16 q2,q4,q14'
+[^:]*:175: Error: selected processor does not support fp16 instruction -- `vaclt.f16 d2,d4,d14'
+[^:]*:175: Error: selected processor does not support fp16 instruction -- `vaclt.f16 q2,q4,q14'
+[^:]*:175: Error: selected processor does not support fp16 instruction -- `vacle.f16 d2,d4,d14'
+[^:]*:175: Error: selected processor does not support fp16 instruction -- `vacle.f16 q2,q4,q14'
+[^:]*:175: Error: selected processor does not support fp16 instruction -- `vceq.f16 d2,d4,d14'
+[^:]*:175: Error: selected processor does not support fp16 instruction -- `vceq.f16 q2,q4,q14'
+[^:]*:175: Error: selected processor does not support fp16 instruction -- `vcge.f16 d2,d4,d14'
+[^:]*:175: Error: selected processor does not support fp16 instruction -- `vcge.f16 q2,q4,q14'
+[^:]*:175: Error: selected processor does not support fp16 instruction -- `vcgt.f16 d2,d4,d14'
+[^:]*:175: Error: selected processor does not support fp16 instruction -- `vcgt.f16 q2,q4,q14'
+[^:]*:175: Error: selected processor does not support fp16 instruction -- `vcle.f16 d2,d4,d14'
+[^:]*:175: Error: selected processor does not support fp16 instruction -- `vcle.f16 q2,q4,q14'
+[^:]*:175: Error: selected processor does not support fp16 instruction -- `vclt.f16 d2,d4,d14'
+[^:]*:175: Error: selected processor does not support fp16 instruction -- `vclt.f16 q2,q4,q14'
+[^:]*:176: Error: selected processor does not support fp16 instruction -- `vacgeq.f16 q0,q8,q14'
+[^:]*:176: Error: selected processor does not support fp16 instruction -- `vacgtq.f16 q0,q8,q14'
+[^:]*:176: Error: selected processor does not support fp16 instruction -- `vacltq.f16 q0,q8,q14'
+[^:]*:176: Error: selected processor does not support fp16 instruction -- `vacleq.f16 q0,q8,q14'
+[^:]*:176: Error: selected processor does not support fp16 instruction -- `vceqq.f16 q0,q8,q14'
+[^:]*:176: Error: selected processor does not support fp16 instruction -- `vcgeq.f16 q0,q8,q14'
+[^:]*:176: Error: selected processor does not support fp16 instruction -- `vcgtq.f16 q0,q8,q14'
+[^:]*:176: Error: selected processor does not support fp16 instruction -- `vcleq.f16 q0,q8,q14'
+[^:]*:176: Error: selected processor does not support fp16 instruction -- `vcltq.f16 q0,q8,q14'
+[^:]*:179: Error: selected processor does not support fp16 instruction -- `vadd.f16 d2,d4,d14'
+[^:]*:179: Error: selected processor does not support fp16 instruction -- `vadd.f16 q2,q4,q14'
+[^:]*:179: Error: selected processor does not support fp16 instruction -- `vsub.f16 d2,d4,d14'
+[^:]*:179: Error: selected processor does not support fp16 instruction -- `vsub.f16 q2,q4,q14'
+[^:]*:180: Error: selected processor does not support fp16 instruction -- `vaddq.f16 q0,q8,q14'
+[^:]*:180: Error: selected processor does not support fp16 instruction -- `vsubq.f16 q0,q8,q14'
+[^:]*:183: Error: selected processor does not support fp16 instruction -- `vmaxnm.f16 d2,d4,d14'
+[^:]*:183: Error: selected processor does not support fp16 instruction -- `vmaxnm.f16 q2,q4,q14'
+[^:]*:183: Error: selected processor does not support fp16 instruction -- `vminnm.f16 d2,d4,d14'
+[^:]*:183: Error: selected processor does not support fp16 instruction -- `vminnm.f16 q2,q4,q14'
+[^:]*:186: Error: selected processor does not support fp16 instruction -- `vfma.f16 d2,d4,d14'
+[^:]*:186: Error: selected processor does not support fp16 instruction -- `vfma.f16 q2,q4,q14'
+[^:]*:186: Error: selected processor does not support fp16 instruction -- `vfms.f16 d2,d4,d14'
+[^:]*:186: Error: selected processor does not support fp16 instruction -- `vfms.f16 q2,q4,q14'
+[^:]*:189: Error: selected processor does not support fp16 instruction -- `vmla.f16 d2,d4,d14'
+[^:]*:189: Error: selected processor does not support fp16 instruction -- `vmla.f16 q2,q4,q14'
+[^:]*:189: Error: selected processor does not support fp16 instruction -- `vmls.f16 d2,d4,d14'
+[^:]*:189: Error: selected processor does not support fp16 instruction -- `vmls.f16 q2,q4,q14'
+[^:]*:192: Error: selected processor does not support fp16 instruction -- `vrintz.f16 d4,d14'
+[^:]*:192: Error: selected processor does not support fp16 instruction -- `vrintz.f16 q4,q14'
+[^:]*:192: Error: selected processor does not support fp16 instruction -- `vrintx.f16 d4,d14'
+[^:]*:192: Error: selected processor does not support fp16 instruction -- `vrintx.f16 q4,q14'
+[^:]*:192: Error: selected processor does not support fp16 instruction -- `vrinta.f16 d4,d14'
+[^:]*:192: Error: selected processor does not support fp16 instruction -- `vrinta.f16 q4,q14'
+[^:]*:192: Error: selected processor does not support fp16 instruction -- `vrintn.f16 d4,d14'
+[^:]*:192: Error: selected processor does not support fp16 instruction -- `vrintn.f16 q4,q14'
+[^:]*:192: Error: selected processor does not support fp16 instruction -- `vrintp.f16 d4,d14'
+[^:]*:192: Error: selected processor does not support fp16 instruction -- `vrintp.f16 q4,q14'
+[^:]*:192: Error: selected processor does not support fp16 instruction -- `vrintm.f16 d4,d14'
+[^:]*:192: Error: selected processor does not support fp16 instruction -- `vrintm.f16 q4,q14'
+[^:]*:195: Error: selected processor does not support fp16 instruction -- `vpadd.f16 d4,d8,d14'
+[^:]*:198: Error: selected processor does not support fp16 instruction -- `vrecpe.f16 d4,d8'
+[^:]*:198: Error: selected processor does not support fp16 instruction -- `vrecpe.f16 q4,q8'
+[^:]*:198: Error: selected processor does not support fp16 instruction -- `vrsqrte.f16 d4,d8'
+[^:]*:198: Error: selected processor does not support fp16 instruction -- `vrsqrte.f16 q4,q8'
+[^:]*:199: Error: selected processor does not support fp16 instruction -- `vrecpeq.f16 q0,q10'
+[^:]*:199: Error: selected processor does not support fp16 instruction -- `vrsqrteq.f16 q0,q10'
+[^:]*:202: Error: selected processor does not support fp16 instruction -- `vrecps.f16 d8,d10,d12'
+[^:]*:202: Error: selected processor does not support fp16 instruction -- `vrecps.f16 q8,q10,q12'
+[^:]*:202: Error: selected processor does not support fp16 instruction -- `vrsqrts.f16 d8,d10,d12'
+[^:]*:202: Error: selected processor does not support fp16 instruction -- `vrsqrts.f16 q8,q10,q12'
+[^:]*:203: Error: selected processor does not support fp16 instruction -- `vrecpsq.f16 q2,q0,q4'
+[^:]*:203: Error: selected processor does not support fp16 instruction -- `vrsqrtsq.f16 q2,q0,q4'
+[^:]*:206: Error: selected processor does not support fp16 instruction -- `vpmax.f16 d4,d8,d14'
+[^:]*:207: Error: selected processor does not support fp16 instruction -- `vpmin.f16 d10,d8,d2'
+[^:]*:210: Error: selected processor does not support fp16 instruction -- `vmul.f16 d4,d8,d14'
+[^:]*:211: Error: selected processor does not support fp16 instruction -- `vmul.f16 d7,d0,d1'
+[^:]*:212: Error: selected processor does not support fp16 instruction -- `vmul.f16 q2,q8,q0'
+[^:]*:215: Error: selected processor does not support fp16 instruction -- `vcvta.s16.f16 d6,d12'
+[^:]*:215: Error: selected processor does not support fp16 instruction -- `vcvta.s16.f16 q6,q12'
+[^:]*:215: Error: selected processor does not support fp16 instruction -- `vcvtm.s16.f16 d6,d12'
+[^:]*:215: Error: selected processor does not support fp16 instruction -- `vcvtm.s16.f16 q6,q12'
+[^:]*:215: Error: selected processor does not support fp16 instruction -- `vcvtn.s16.f16 d6,d12'
+[^:]*:215: Error: selected processor does not support fp16 instruction -- `vcvtn.s16.f16 q6,q12'
+[^:]*:215: Error: selected processor does not support fp16 instruction -- `vcvtp.s16.f16 d6,d12'
+[^:]*:215: Error: selected processor does not support fp16 instruction -- `vcvtp.s16.f16 q6,q12'
+[^:]*:215: Error: selected processor does not support fp16 instruction -- `vcvta.u16.f16 d6,d12'
+[^:]*:215: Error: selected processor does not support fp16 instruction -- `vcvta.u16.f16 q6,q12'
+[^:]*:215: Error: selected processor does not support fp16 instruction -- `vcvtm.u16.f16 d6,d12'
+[^:]*:215: Error: selected processor does not support fp16 instruction -- `vcvtm.u16.f16 q6,q12'
+[^:]*:215: Error: selected processor does not support fp16 instruction -- `vcvtn.u16.f16 d6,d12'
+[^:]*:215: Error: selected processor does not support fp16 instruction -- `vcvtn.u16.f16 q6,q12'
+[^:]*:215: Error: selected processor does not support fp16 instruction -- `vcvtp.u16.f16 d6,d12'
+[^:]*:215: Error: selected processor does not support fp16 instruction -- `vcvtp.u16.f16 q6,q12'
+[^:]*:218: Error: selected processor does not support fp16 instruction -- `vcvt.s16.f16 d14,d0'
+[^:]*:218: Error: selected processor does not support fp16 instruction -- `vcvt.s16.f16 q14,q0'
+[^:]*:218: Error: selected processor does not support fp16 instruction -- `vcvt.u16.f16 d14,d0'
+[^:]*:218: Error: selected processor does not support fp16 instruction -- `vcvt.u16.f16 q14,q0'
+[^:]*:218: Error: selected processor does not support fp16 instruction -- `vcvt.f16.s16 d14,d0'
+[^:]*:218: Error: selected processor does not support fp16 instruction -- `vcvt.f16.s16 q14,q0'
+[^:]*:218: Error: selected processor does not support fp16 instruction -- `vcvt.f16.u16 d14,d0'
+[^:]*:218: Error: selected processor does not support fp16 instruction -- `vcvt.f16.u16 q14,q0'
+[^:]*:221: Error: selected processor does not support fp16 instruction -- `vcvt.s16.f16 d14,d0,#3'
+[^:]*:221: Error: selected processor does not support fp16 instruction -- `vcvt.s16.f16 q14,q0,#3'
+[^:]*:221: Error: selected processor does not support fp16 instruction -- `vcvt.u16.f16 d14,d0,#3'
+[^:]*:221: Error: selected processor does not support fp16 instruction -- `vcvt.u16.f16 q14,q0,#3'
+[^:]*:221: Error: selected processor does not support fp16 instruction -- `vcvt.f16.s16 d14,d0,#3'
+[^:]*:221: Error: selected processor does not support fp16 instruction -- `vcvt.f16.s16 q14,q0,#3'
+[^:]*:221: Error: selected processor does not support fp16 instruction -- `vcvt.f16.u16 d14,d0,#3'
+[^:]*:221: Error: selected processor does not support fp16 instruction -- `vcvt.f16.u16 q14,q0,#3'
+[^:]*:224: Error: selected processor does not support fp16 instruction -- `vceq.f16 d14,d2,#0'
+[^:]*:224: Error: selected processor does not support fp16 instruction -- `vceq.f16 q14,q2,#0'
+[^:]*:224: Error: selected processor does not support fp16 instruction -- `vcge.f16 d14,d2,#0'
+[^:]*:224: Error: selected processor does not support fp16 instruction -- `vcge.f16 q14,q2,#0'
+[^:]*:224: Error: selected processor does not support fp16 instruction -- `vcgt.f16 d14,d2,#0'
+[^:]*:224: Error: selected processor does not support fp16 instruction -- `vcgt.f16 q14,q2,#0'
+[^:]*:224: Error: selected processor does not support fp16 instruction -- `vcle.f16 d14,d2,#0'
+[^:]*:224: Error: selected processor does not support fp16 instruction -- `vcle.f16 q14,q2,#0'
+[^:]*:224: Error: selected processor does not support fp16 instruction -- `vclt.f16 d14,d2,#0'
+[^:]*:224: Error: selected processor does not support fp16 instruction -- `vclt.f16 q14,q2,#0'
diff --git a/gas/testsuite/gas/arm/armv8-2-fp16-simd.d b/gas/testsuite/gas/arm/armv8-2-fp16-simd.d
new file mode 100644
index 0000000000000000000000000000000000000000..1a97f393f9d6e37981e67a5fa5c96073dff91d27
--- /dev/null
+++ b/gas/testsuite/gas/arm/armv8-2-fp16-simd.d
@@ -0,0 +1,147 @@
+#name: ARM v8.2 FP16 support on SIMD
+#source: armv8-2-fp16-simd.s
+#objdump: -d
+#as: -march=armv8.2-a+fp16 -mfpu=neon-fp-armv8
+#skip: *-*-*coff *-*-pe *-*-wince *-*-*aout* *-*-netbsd
+
+.*: +file format .*arm.*
+
+Disassembly of section .text:
+
+00000000 <func>:
+   0:	f3342d0e 	vabd.f16	d2, d4, d14
+   4:	f3384d6c 	vabd.f16	q2, q4, q14
+   8:	f2142f0e 	vmax.f16	d2, d4, d14
+   c:	f2184f6c 	vmax.f16	q2, q4, q14
+  10:	f2342f0e 	vmin.f16	d2, d4, d14
+  14:	f2384f6c 	vmin.f16	q2, q4, q14
+  18:	f3300dec 	vabd.f16	q0, q8, q14
+  1c:	f2100fec 	vmax.f16	q0, q8, q14
+  20:	f2300fec 	vmin.f16	q0, q8, q14
+  24:	f3331d0f 	vabd.f16	d1, d3, d15
+  28:	f3310d08 	vabd.f16	d0, d1, d8
+  2c:	f3b50708 	vabs.f16	d0, d8
+  30:	f3b50760 	vabs.f16	q0, q8
+  34:	f3b50788 	vneg.f16	d0, d8
+  38:	f3b507e0 	vneg.f16	q0, q8
+  3c:	f3b5474c 	vabs.f16	q2, q6
+  40:	f3b547cc 	vneg.f16	q2, q6
+  44:	f3b57703 	vabs.f16	d7, d3
+  48:	f3b59781 	vneg.f16	d9, d1
+  4c:	f3142e1e 	vacge.f16	d2, d4, d14
+  50:	f3184e7c 	vacge.f16	q2, q4, q14
+  54:	f3342e1e 	vacgt.f16	d2, d4, d14
+  58:	f3384e7c 	vacgt.f16	q2, q4, q14
+  5c:	f33e2e14 	vacgt.f16	d2, d14, d4
+  60:	f33c4ed8 	vacgt.f16	q2, q14, q4
+  64:	f31e2e14 	vacge.f16	d2, d14, d4
+  68:	f31c4ed8 	vacge.f16	q2, q14, q4
+  6c:	f2142e0e 	vceq.f16	d2, d4, d14
+  70:	f2184e6c 	vceq.f16	q2, q4, q14
+  74:	f3142e0e 	vcge.f16	d2, d4, d14
+  78:	f3184e6c 	vcge.f16	q2, q4, q14
+  7c:	f3342e0e 	vcgt.f16	d2, d4, d14
+  80:	f3384e6c 	vcgt.f16	q2, q4, q14
+  84:	f31e2e04 	vcge.f16	d2, d14, d4
+  88:	f31c4ec8 	vcge.f16	q2, q14, q4
+  8c:	f33e2e04 	vcgt.f16	d2, d14, d4
+  90:	f33c4ec8 	vcgt.f16	q2, q14, q4
+  94:	f3100efc 	vacge.f16	q0, q8, q14
+  98:	f3300efc 	vacgt.f16	q0, q8, q14
+  9c:	f33c0ef0 	vacgt.f16	q0, q14, q8
+  a0:	f31c0ef0 	vacge.f16	q0, q14, q8
+  a4:	f2100eec 	vceq.f16	q0, q8, q14
+  a8:	f3100eec 	vcge.f16	q0, q8, q14
+  ac:	f3300eec 	vcgt.f16	q0, q8, q14
+  b0:	f31c0ee0 	vcge.f16	q0, q14, q8
+  b4:	f33c0ee0 	vcgt.f16	q0, q14, q8
+  b8:	f2142d0e 	vadd.f16	d2, d4, d14
+  bc:	f2184d6c 	vadd.f16	q2, q4, q14
+  c0:	f2342d0e 	vsub.f16	d2, d4, d14
+  c4:	f2384d6c 	vsub.f16	q2, q4, q14
+  c8:	f2100dec 	vadd.f16	q0, q8, q14
+  cc:	f2300dec 	vsub.f16	q0, q8, q14
+  d0:	f3142f1e 	vmaxnm.f16	d2, d4, d14
+  d4:	f3184f7c 	vmaxnm.f16	q2, q4, q14
+  d8:	f3342f1e 	vminnm.f16	d2, d4, d14
+  dc:	f3384f7c 	vminnm.f16	q2, q4, q14
+  e0:	f2142c1e 	vfma.f16	d2, d4, d14
+  e4:	f2184c7c 	vfma.f16	q2, q4, q14
+  e8:	f2342c1e 	vfms.f16	d2, d4, d14
+  ec:	f2384c7c 	vfms.f16	q2, q4, q14
+  f0:	f2142d1e 	vmla.f16	d2, d4, d14
+  f4:	f2184d7c 	vmla.f16	q2, q4, q14
+  f8:	f2342d1e 	vmls.f16	d2, d4, d14
+  fc:	f2384d7c 	vmls.f16	q2, q4, q14
+ 100:	f3b6458e 	vrintz.f16	d4, d14
+ 104:	f3b685ec 	vrintz.f16	q4, q14
+ 108:	f3b6448e 	vrintx.f16	d4, d14
+ 10c:	f3b684ec 	vrintx.f16	q4, q14
+ 110:	f3b6450e 	vrinta.f16	d4, d14
+ 114:	f3b6856c 	vrinta.f16	q4, q14
+ 118:	f3b6440e 	vrintn.f16	d4, d14
+ 11c:	f3b6846c 	vrintn.f16	q4, q14
+ 120:	f3b6478e 	vrintp.f16	d4, d14
+ 124:	f3b687ec 	vrintp.f16	q4, q14
+ 128:	f3b6468e 	vrintm.f16	d4, d14
+ 12c:	f3b686ec 	vrintm.f16	q4, q14
+ 130:	f3184d0e 	vpadd.f16	d4, d8, d14
+ 134:	f3b74508 	vrecpe.f16	d4, d8
+ 138:	f3b78560 	vrecpe.f16	q4, q8
+ 13c:	f3b74588 	vrsqrte.f16	d4, d8
+ 140:	f3b785e0 	vrsqrte.f16	q4, q8
+ 144:	f3b70564 	vrecpe.f16	q0, q10
+ 148:	f3b705e4 	vrsqrte.f16	q0, q10
+ 14c:	f21a8f1c 	vrecps.f16	d8, d10, d12
+ 150:	f2540ff8 	vrecps.f16	q8, q10, q12
+ 154:	f23a8f1c 	vrsqrts.f16	d8, d10, d12
+ 158:	f2740ff8 	vrsqrts.f16	q8, q10, q12
+ 15c:	f2104f58 	vrecps.f16	q2, q0, q4
+ 160:	f2304f58 	vrsqrts.f16	q2, q0, q4
+ 164:	f3184f0e 	vpmax.f16	d4, d8, d14
+ 168:	f338af02 	vpmin.f16	d10, d8, d2
+ 16c:	f3184d1e 	vmul.f16	d4, d8, d14
+ 170:	f3107d11 	vmul.f16	d7, d0, d1
+ 174:	f3104dd0 	vmul.f16	q2, q8, q0
+ 178:	f3b7600c 	vcvta.s16.f16	d6, d12
+ 17c:	f3b7c068 	vcvta.s16.f16	q6, q12
+ 180:	f3b7630c 	vcvtm.s16.f16	d6, d12
+ 184:	f3b7c368 	vcvtm.s16.f16	q6, q12
+ 188:	f3b7610c 	vcvtn.s16.f16	d6, d12
+ 18c:	f3b7c168 	vcvtn.s16.f16	q6, q12
+ 190:	f3b7620c 	vcvtp.s16.f16	d6, d12
+ 194:	f3b7c268 	vcvtp.s16.f16	q6, q12
+ 198:	f3b7608c 	vcvta.u16.f16	d6, d12
+ 19c:	f3b7c0e8 	vcvta.u16.f16	q6, q12
+ 1a0:	f3b7638c 	vcvtm.u16.f16	d6, d12
+ 1a4:	f3b7c3e8 	vcvtm.u16.f16	q6, q12
+ 1a8:	f3b7618c 	vcvtn.u16.f16	d6, d12
+ 1ac:	f3b7c1e8 	vcvtn.u16.f16	q6, q12
+ 1b0:	f3b7628c 	vcvtp.u16.f16	d6, d12
+ 1b4:	f3b7c2e8 	vcvtp.u16.f16	q6, q12
+ 1b8:	f3b7e700 	vcvt.s16.f16	d14, d0
+ 1bc:	f3f7c740 	vcvt.s16.f16	q14, q0
+ 1c0:	f3b7e780 	vcvt.u16.f16	d14, d0
+ 1c4:	f3f7c7c0 	vcvt.u16.f16	q14, q0
+ 1c8:	f3b7e600 	vcvt.f16.s16	d14, d0
+ 1cc:	f3f7c640 	vcvt.f16.s16	q14, q0
+ 1d0:	f3b7e680 	vcvt.f16.u16	d14, d0
+ 1d4:	f3f7c6c0 	vcvt.f16.u16	q14, q0
+ 1d8:	f2bded10 	vcvt.s16.f16	d14, d0, #3
+ 1dc:	f2fdcd50 	vcvt.s16.f16	q14, q0, #3
+ 1e0:	f3bded10 	vcvt.u16.f16	d14, d0, #3
+ 1e4:	f3fdcd50 	vcvt.u16.f16	q14, q0, #3
+ 1e8:	f2bdec10 	vcvt.f16.s16	d14, d0, #3
+ 1ec:	f2fdcc50 	vcvt.f16.s16	q14, q0, #3
+ 1f0:	f3bdec10 	vcvt.f16.u16	d14, d0, #3
+ 1f4:	f3fdcc50 	vcvt.f16.u16	q14, q0, #3
+ 1f8:	f3b5e502 	vceq.f16	d14, d2, #0
+ 1fc:	f3f5c544 	vceq.f16	q14, q2, #0
+ 200:	f3b5e482 	vcge.f16	d14, d2, #0
+ 204:	f3f5c4c4 	vcge.f16	q14, q2, #0
+ 208:	f3b5e402 	vcgt.f16	d14, d2, #0
+ 20c:	f3f5c444 	vcgt.f16	q14, q2, #0
+ 210:	f3b5e582 	vcle.f16	d14, d2, #0
+ 214:	f3f5c5c4 	vcle.f16	q14, q2, #0
+ 218:	f3b5e602 	vclt.f16	d14, d2, #0
+ 21c:	f3f5c644 	vclt.f16	q14, q2, #0
diff --git a/gas/testsuite/gas/arm/armv8-2-fp16-simd.s b/gas/testsuite/gas/arm/armv8-2-fp16-simd.s
new file mode 100644
index 0000000000000000000000000000000000000000..7758f2415e04b4c9956b5db9ac52e5b3272b8f30
--- /dev/null
+++ b/gas/testsuite/gas/arm/armv8-2-fp16-simd.s
@@ -0,0 +1,224 @@
+	.macro f16_dq_ifsu reg0 reg1 reg2
+	.irp op, vabd.f16, vmax.f16, vmin.f16
+		\op d\reg0, d\reg1, d\reg2
+		\op q\reg0, q\reg1, q\reg2
+	.endr
+	.endm
+
+	.macro f16_q_ifsu reg0 reg1 reg2
+	.irp op, vabdq.f16, vmaxq.f16, vminq.f16
+		\op q\reg0, q\reg1, q\reg2
+	.endr
+	.endm
+
+	.macro f16_dq_abs_neg reg0 reg1
+	.irp op, vabs.f16, vneg.f16
+		\op d\reg0, d\reg1
+		\op q\reg0, q\reg1
+	.endr
+	.endm
+
+	.macro f16_q_abs_neg reg0 reg1
+	.irp op, vabsq.f16, vnegq.f16
+		\op q\reg0, q\reg1
+	.endr
+	.endm
+
+	.macro f16_dq_fcmp reg0 reg1 reg2
+	.irp op, vacge.f16, vacgt.f16, vaclt.f16, vacle.f16, vceq.f16, vcge.f16, vcgt.f16, vcle.f16, vclt.f16
+		\op d\reg0, d\reg1, d\reg2
+		\op q\reg0, q\reg1, q\reg2
+	.endr
+	.endm
+
+	.macro f16_dq_fcmp_imm0 reg0 reg1
+	.irp op, vceq.f16, vcge.f16, vcgt.f16, vcle.f16, vclt.f16
+		\op d\reg0, d\reg1, #0
+		\op q\reg0, q\reg1, #0
+	.endr
+	.endm
+
+	.macro f16_q_fcmp reg0 reg1 reg2
+	.irp op, vacgeq.f16, vacgtq.f16, vacltq.f16, vacleq.f16, vceqq.f16, vcgeq.f16, vcgtq.f16, vcleq.f16, vcltq.f16
+		\op q\reg0, q\reg1, q\reg2
+	.endr
+	.endm
+
+	.macro f16_dq_addsub reg0 reg1 reg2
+	.irp op, vadd.f16, vsub.f16
+		\op d\reg0, d\reg1, d\reg2
+		\op q\reg0, q\reg1, q\reg2
+	.endr
+	.endm
+
+	.macro f16_q_addsub reg0 reg1 reg2
+	.irp op, vaddq.f16, vsubq.f16
+		\op q\reg0, q\reg1, q\reg2
+	.endr
+	.endm
+
+	.macro f16_dq_vmaxnm reg0 reg1 reg2
+	.irp op, vmaxnm.f16, vminnm.f16
+		\op d\reg0, d\reg1, d\reg2
+		\op q\reg0, q\reg1, q\reg2
+	.endr
+	.endm
+
+	.macro f16_dq_fmac reg0 reg1 reg2
+	.irp op, vfma.f16, vfms.f16
+		\op d\reg0, d\reg1, d\reg2
+		\op q\reg0, q\reg1, q\reg2
+	.endr
+	.endm
+
+	.macro f16_dq_fmacmaybe reg0 reg1 reg2
+	.irp op, vmla.f16, vmls.f16
+		\op d\reg0, d\reg1, d\reg2
+		\op q\reg0, q\reg1, q\reg2
+	.endr
+	.endm
+
+	.macro f16_dq_vrint reg0 reg1
+	.irp op, vrintz.f16, vrintx.f16, vrinta.f16, vrintn.f16, vrintp.f16, vrintm.f16
+		\op d\reg0, d\reg1
+		\op q\reg0, q\reg1
+	.endr
+	.endm
+
+	.macro f16_dq_recip reg0 reg1
+	.irp op, vrecpe.f16, vrsqrte.f16
+		\op d\reg0, d\reg1
+		\op q\reg0, q\reg1
+	.endr
+	.endm
+
+	.macro f16_q_recip reg0 reg1
+	.irp op, vrecpeq.f16, vrsqrteq.f16
+		\op q\reg0, q\reg1
+	.endr
+	.endm
+
+	.macro f16_dq_step reg0 reg1 reg2
+	.irp op, vrecps.f16, vrsqrts.f16
+		\op d\reg0, d\reg1, d\reg2
+		\op q\reg0, q\reg1, q\reg2
+	.endr
+	.endm
+
+	.macro f16_q_step reg0 reg1 reg2
+	.irp op, vrecpsq.f16, vrsqrtsq.f16
+		\op q\reg0, q\reg1, q\reg2
+	.endr
+	.endm
+
+	.macro f16_dq_cvt reg0 reg1
+	.irp op, vcvta.s16.f16, vcvtm.s16.f16, vcvtn.s16.f16, vcvtp.s16.f16, vcvta.u16.f16, vcvtm.u16.f16, vcvtn.u16.f16, vcvtp.u16.f16,
+		\op d\reg0, d\reg1
+		\op q\reg0, q\reg1
+	.endr
+	.endm
+
+	.macro f16_dq_cvtz reg0 reg1
+	.irp op, vcvt.s16.f16, vcvt.u16.f16, vcvt.f16.s16, vcvt.f16.u16,
+		\op d\reg0, d\reg1
+		\op q\reg0, q\reg1
+	.endr
+	.endm
+
+	.macro f16_dq_cvtz_fixed reg0 reg1 imm
+	.irp op, vcvt.s16.f16, vcvt.u16.f16, vcvt.f16.s16, vcvt.f16.u16,
+		\op d\reg0, d\reg1, #\imm
+		\op q\reg0, q\reg1, #\imm
+	.endr
+	.endm
+
+	.macro f16_dq op reg0 reg1 reg2
+		\op d\reg0, d\reg1, d\reg2
+		\op q\reg0, q\reg1, q\reg2
+	.endm
+
+	.macro f16_d op reg0 reg1 reg2
+		\op d\reg0, d\reg1, d\reg2
+	.endm
+
+	.macro f16_q op reg0 reg1 reg2
+		\op q\reg0, q\reg1, q\reg2
+	.endm
+
+	.macro f16_dq_2 op reg0 reg1
+		\op d\reg0, d\reg1
+		\op q\reg0, q\reg1
+	.endm
+
+	.macro f16_d_2 op reg0 reg1
+		\op d\reg0, d\reg1
+	.endm
+
+	.macro f16_q_2 op reg0 reg1
+		\op q\reg0, q\reg1
+	.endm
+
+func:
+	# neon_dyadic_if_su
+	f16_dq_ifsu 2 4 14
+	f16_q_ifsu 0 8 14
+	f16_d  vabd.f16 1 3 15
+	f16_d  vabd.f16 0 1 8
+
+	# neon_abs_neg
+	f16_dq_abs_neg 0 8
+	f16_q_abs_neg 2 6
+	f16_d_2  vabs.f16 7 3
+	f16_d_2  vneg.f16 9 1
+
+	# neon_fcmp
+	f16_dq_fcmp 2 4 14
+	f16_q_fcmp 0 8 14
+
+	# neon_addsub_if_i
+	f16_dq_addsub 2 4 14
+	f16_q_addsub 0 8 14
+
+	# neon_vmaxnm
+	f16_dq_vmaxnm 2 4 14
+
+	# neon_fmac
+	f16_dq_fmac 2 4 14
+
+	# neon_mac_maybe_scalar
+	f16_dq_fmacmaybe 2 4 14
+
+	# vrint
+	f16_dq_vrint 4 14
+
+	# neon_dyadic_if_i_d
+	f16_d vpadd.f16 4 8 14
+
+	# neon_recip_est
+	f16_dq_recip 4 8
+	f16_q_recip 0 10
+
+	# neon_step
+	f16_dq_step 8 10 12
+	f16_q_step 2 0 4
+
+	# neon_dyadic_if_su_d
+	f16_d vpmax.f16 4 8 14
+	f16_d vpmin.f16 10 8 2
+
+	# neon_mul
+	f16_d vmul.f16 4 8 14
+	f16_d vmul.f16 7 0 1
+	f16_q vmul.f16 2 8 0
+
+	# neon_cvt
+	f16_dq_cvt 6 12
+
+	# neon_cvtz
+	f16_dq_cvtz 14, 0
+
+	# neon_cvtz_fixed
+	f16_dq_cvtz_fixed 14, 0, 3
+
+	# neon_fcmp_imm0
+	f16_dq_fcmp_imm0 14, 2
diff --git a/opcodes/arm-dis.c b/opcodes/arm-dis.c
index 324304d850cdff43501104295f4384a14b306136..322e801c465235410ea3b8affad1a99ea6971a7b 100644
--- a/opcodes/arm-dis.c
+++ b/opcodes/arm-dis.c
@@ -1032,15 +1032,23 @@ static const struct opcode32 neon_opcodes[] =
 
   /* NEON fused multiply add instructions.  */
   {ARM_FEATURE_COPROC (FPU_NEON_EXT_FMA),
-    0xf2000c10, 0xffa00f10, "vfma%c.f%20U0\t%12-15,22R, %16-19,7R, %0-3,5R"},
+    0xf2000c10, 0xffb00f10, "vfma%c.f32\t%12-15,22R, %16-19,7R, %0-3,5R"},
+  {ARM_FEATURE_CORE_HIGH (ARM_EXT2_FP16_INST),
+    0xf2100c10, 0xffb00f10, "vfma%c.f16\t%12-15,22R, %16-19,7R, %0-3,5R"},
   {ARM_FEATURE_COPROC (FPU_NEON_EXT_FMA),
-    0xf2200c10, 0xffa00f10, "vfms%c.f%20U0\t%12-15,22R, %16-19,7R, %0-3,5R"},
+    0xf2200c10, 0xffb00f10, "vfms%c.f32\t%12-15,22R, %16-19,7R, %0-3,5R"},
+  {ARM_FEATURE_CORE_HIGH (ARM_EXT2_FP16_INST),
+    0xf2300c10, 0xffb00f10, "vfms%c.f16\t%12-15,22R, %16-19,7R, %0-3,5R"},
 
   /* Two registers, miscellaneous.  */
   {ARM_FEATURE_COPROC (FPU_NEON_EXT_ARMV8),
     0xf3ba0400, 0xffbf0c10, "vrint%7-9?p?m?zaxn%u.f32\t%12-15,22R, %0-3,5R"},
+  {ARM_FEATURE_CORE_HIGH (ARM_EXT2_FP16_INST),
+    0xf3b60400, 0xffbf0c10, "vrint%7-9?p?m?zaxn%u.f16\t%12-15,22R, %0-3,5R"},
   {ARM_FEATURE_COPROC (FPU_NEON_EXT_ARMV8),
     0xf3bb0000, 0xffbf0c10, "vcvt%8-9?mpna%u.%7?us32.f32\t%12-15,22R, %0-3,5R"},
+  {ARM_FEATURE_CORE_HIGH (ARM_EXT2_FP16_INST),
+    0xf3b70000, 0xffbf0c10, "vcvt%8-9?mpna%u.%7?us16.f16\t%12-15,22R, %0-3,5R"},
   {ARM_FEATURE_COPROC (FPU_CRYPTO_EXT_ARMV8),
     0xf3b00300, 0xffbf0fd0, "aese%u.8\t%12-15,22Q, %0-3,5Q"},
   {ARM_FEATURE_COPROC (FPU_CRYPTO_EXT_ARMV8),
@@ -1080,8 +1088,12 @@ static const struct opcode32 neon_opcodes[] =
     "vshll%c.i%18-19S2\t%12-15,22Q, %0-3,5D, #%18-19S2"},
   {ARM_FEATURE_COPROC (FPU_NEON_EXT_V1),
     0xf3bb0400, 0xffbf0e90, "vrecpe%c.%8?fu%18-19S2\t%12-15,22R, %0-3,5R"},
+  {ARM_FEATURE_CORE_HIGH (ARM_EXT2_FP16_INST),
+    0xf3b70400, 0xffbf0e90, "vrecpe%c.%8?fu16\t%12-15,22R, %0-3,5R"},
   {ARM_FEATURE_COPROC (FPU_NEON_EXT_V1),
     0xf3bb0480, 0xffbf0e90, "vrsqrte%c.%8?fu%18-19S2\t%12-15,22R, %0-3,5R"},
+  {ARM_FEATURE_CORE_HIGH (ARM_EXT2_FP16_INST),
+    0xf3b70480, 0xffbf0e90, "vrsqrte%c.%8?fu16\t%12-15,22R, %0-3,5R"},
   {ARM_FEATURE_COPROC (FPU_NEON_EXT_V1),
     0xf3b00000, 0xffb30f90, "vrev64%c.%18-19S2\t%12-15,22R, %0-3,5R"},
   {ARM_FEATURE_COPROC (FPU_NEON_EXT_V1),
@@ -1121,8 +1133,11 @@ static const struct opcode32 neon_opcodes[] =
   {ARM_FEATURE_COPROC (FPU_NEON_EXT_V1),
     0xf3b00600, 0xffb30f10, "vpadal%c.%7?us%18-19S2\t%12-15,22R, %0-3,5R"},
   {ARM_FEATURE_COPROC (FPU_NEON_EXT_V1),
-    0xf3b30600, 0xffb30e10,
+    0xf3bb0600, 0xffbf0e10,
     "vcvt%c.%7-8?usff%18-19Sa.%7-8?ffus%18-19Sa\t%12-15,22R, %0-3,5R"},
+  {ARM_FEATURE_CORE_HIGH (ARM_EXT2_FP16_INST),
+    0xf3b70600, 0xffbf0e10,
+    "vcvt%c.%7-8?usff16.%7-8?ffus16\t%12-15,22R, %0-3,5R"},
 
   /* Three registers of the same length.  */
   {ARM_FEATURE_COPROC (FPU_CRYPTO_EXT_ARMV8),
@@ -1140,9 +1155,13 @@ static const struct opcode32 neon_opcodes[] =
   {ARM_FEATURE_COPROC (FPU_CRYPTO_EXT_ARMV8),
     0xf3200c40, 0xffb00f50, "sha256su1%u.32\t%12-15,22Q, %16-19,7Q, %0-3,5Q"},
   {ARM_FEATURE_COPROC (FPU_NEON_EXT_ARMV8),
-    0xf3000f10, 0xffa00f10, "vmaxnm%u.f%20U0\t%12-15,22R, %16-19,7R, %0-3,5R"},
+    0xf3000f10, 0xffb00f10, "vmaxnm%u.f32\t%12-15,22R, %16-19,7R, %0-3,5R"},
+  {ARM_FEATURE_CORE_HIGH (ARM_EXT2_FP16_INST),
+    0xf3100f10, 0xffb00f10, "vmaxnm%u.f16\t%12-15,22R, %16-19,7R, %0-3,5R"},
   {ARM_FEATURE_COPROC (FPU_NEON_EXT_ARMV8),
-    0xf3200f10, 0xffa00f10, "vminnm%u.f%20U0\t%12-15,22R, %16-19,7R, %0-3,5R"},
+    0xf3200f10, 0xffb00f10, "vminnm%u.f32\t%12-15,22R, %16-19,7R, %0-3,5R"},
+  {ARM_FEATURE_CORE_HIGH (ARM_EXT2_FP16_INST),
+    0xf3300f10, 0xffb00f10, "vminnm%u.f16\t%12-15,22R, %16-19,7R, %0-3,5R"},
   {ARM_FEATURE_COPROC (FPU_NEON_EXT_V1),
     0xf2000110, 0xffb00f10, "vand%c\t%12-15,22R, %16-19,7R, %0-3,5R"},
   {ARM_FEATURE_COPROC (FPU_NEON_EXT_V1),
@@ -1160,41 +1179,77 @@ static const struct opcode32 neon_opcodes[] =
   {ARM_FEATURE_COPROC (FPU_NEON_EXT_V1),
     0xf3300110, 0xffb00f10, "vbif%c\t%12-15,22R, %16-19,7R, %0-3,5R"},
   {ARM_FEATURE_COPROC (FPU_NEON_EXT_V1),
-    0xf2000d00, 0xffa00f10, "vadd%c.f%20U0\t%12-15,22R, %16-19,7R, %0-3,5R"},
+    0xf2000d00, 0xffb00f10, "vadd%c.f32\t%12-15,22R, %16-19,7R, %0-3,5R"},
+  {ARM_FEATURE_CORE_HIGH (ARM_EXT2_FP16_INST),
+    0xf2100d00, 0xffb00f10, "vadd%c.f16\t%12-15,22R, %16-19,7R, %0-3,5R"},
   {ARM_FEATURE_COPROC (FPU_NEON_EXT_V1),
-    0xf2000d10, 0xffa00f10, "vmla%c.f%20U0\t%12-15,22R, %16-19,7R, %0-3,5R"},
+    0xf2000d10, 0xffb00f10, "vmla%c.f32\t%12-15,22R, %16-19,7R, %0-3,5R"},
+  {ARM_FEATURE_CORE_HIGH (ARM_EXT2_FP16_INST),
+    0xf2100d10, 0xffb00f10, "vmla%c.f16\t%12-15,22R, %16-19,7R, %0-3,5R"},
   {ARM_FEATURE_COPROC (FPU_NEON_EXT_V1),
-    0xf2000e00, 0xffa00f10, "vceq%c.f%20U0\t%12-15,22R, %16-19,7R, %0-3,5R"},
+    0xf2000e00, 0xffb00f10, "vceq%c.f32\t%12-15,22R, %16-19,7R, %0-3,5R"},
+  {ARM_FEATURE_CORE_HIGH (ARM_EXT2_FP16_INST),
+    0xf2100e00, 0xffb00f10, "vceq%c.f16\t%12-15,22R, %16-19,7R, %0-3,5R"},
   {ARM_FEATURE_COPROC (FPU_NEON_EXT_V1),
-    0xf2000f00, 0xffa00f10, "vmax%c.f%20U0\t%12-15,22R, %16-19,7R, %0-3,5R"},
+    0xf2000f00, 0xffb00f10, "vmax%c.f32\t%12-15,22R, %16-19,7R, %0-3,5R"},
+  {ARM_FEATURE_CORE_HIGH (ARM_EXT2_FP16_INST),
+    0xf2100f00, 0xffb00f10, "vmax%c.f16\t%12-15,22R, %16-19,7R, %0-3,5R"},
   {ARM_FEATURE_COPROC (FPU_NEON_EXT_V1),
-    0xf2000f10, 0xffa00f10, "vrecps%c.f%20U0\t%12-15,22R, %16-19,7R, %0-3,5R"},
+    0xf2000f10, 0xffb00f10, "vrecps%c.f32\t%12-15,22R, %16-19,7R, %0-3,5R"},
+  {ARM_FEATURE_CORE_HIGH (ARM_EXT2_FP16_INST),
+    0xf2100f10, 0xffb00f10, "vrecps%c.f16\t%12-15,22R, %16-19,7R, %0-3,5R"},
   {ARM_FEATURE_COPROC (FPU_NEON_EXT_V1),
-    0xf2200d00, 0xffa00f10, "vsub%c.f%20U0\t%12-15,22R, %16-19,7R, %0-3,5R"},
+    0xf2200d00, 0xffb00f10, "vsub%c.f32\t%12-15,22R, %16-19,7R, %0-3,5R"},
+  {ARM_FEATURE_CORE_HIGH (ARM_EXT2_FP16_INST),
+    0xf2300d00, 0xffb00f10, "vsub%c.f16\t%12-15,22R, %16-19,7R, %0-3,5R"},
   {ARM_FEATURE_COPROC (FPU_NEON_EXT_V1),
-    0xf2200d10, 0xffa00f10, "vmls%c.f%20U0\t%12-15,22R, %16-19,7R, %0-3,5R"},
+    0xf2200d10, 0xffb00f10, "vmls%c.f32\t%12-15,22R, %16-19,7R, %0-3,5R"},
+  {ARM_FEATURE_CORE_HIGH (ARM_EXT2_FP16_INST),
+    0xf2300d10, 0xffb00f10, "vmls%c.f16\t%12-15,22R, %16-19,7R, %0-3,5R"},
   {ARM_FEATURE_COPROC (FPU_NEON_EXT_V1),
-    0xf2200f00, 0xffa00f10, "vmin%c.f%20U0\t%12-15,22R, %16-19,7R, %0-3,5R"},
+    0xf2200f00, 0xffb00f10, "vmin%c.f32\t%12-15,22R, %16-19,7R, %0-3,5R"},
+  {ARM_FEATURE_CORE_HIGH (ARM_EXT2_FP16_INST),
+    0xf2300f00, 0xffb00f10, "vmin%c.f16\t%12-15,22R, %16-19,7R, %0-3,5R"},
   {ARM_FEATURE_COPROC (FPU_NEON_EXT_V1),
-    0xf2200f10, 0xffa00f10, "vrsqrts%c.f%20U0\t%12-15,22R, %16-19,7R, %0-3,5R"},
+    0xf2200f10, 0xffb00f10, "vrsqrts%c.f32\t%12-15,22R, %16-19,7R, %0-3,5R"},
+  {ARM_FEATURE_CORE_HIGH (ARM_EXT2_FP16_INST),
+    0xf2300f10, 0xffb00f10, "vrsqrts%c.f16\t%12-15,22R, %16-19,7R, %0-3,5R"},
   {ARM_FEATURE_COPROC (FPU_NEON_EXT_V1),
-    0xf3000d00, 0xffa00f10, "vpadd%c.f%20U0\t%12-15,22R, %16-19,7R, %0-3,5R"},
+    0xf3000d00, 0xffb00f10, "vpadd%c.f32\t%12-15,22R, %16-19,7R, %0-3,5R"},
+  {ARM_FEATURE_CORE_HIGH (ARM_EXT2_FP16_INST),
+    0xf3100d00, 0xffb00f10, "vpadd%c.f16\t%12-15,22R, %16-19,7R, %0-3,5R"},
   {ARM_FEATURE_COPROC (FPU_NEON_EXT_V1),
-    0xf3000d10, 0xffa00f10, "vmul%c.f%20U0\t%12-15,22R, %16-19,7R, %0-3,5R"},
+    0xf3000d10, 0xffb00f10, "vmul%c.f32\t%12-15,22R, %16-19,7R, %0-3,5R"},
+  {ARM_FEATURE_CORE_HIGH (ARM_EXT2_FP16_INST),
+    0xf3100d10, 0xffb00f10, "vmul%c.f16\t%12-15,22R, %16-19,7R, %0-3,5R"},
   {ARM_FEATURE_COPROC (FPU_NEON_EXT_V1),
-    0xf3000e00, 0xffa00f10, "vcge%c.f%20U0\t%12-15,22R, %16-19,7R, %0-3,5R"},
+    0xf3000e00, 0xffb00f10, "vcge%c.f32\t%12-15,22R, %16-19,7R, %0-3,5R"},
+  {ARM_FEATURE_CORE_HIGH (ARM_EXT2_FP16_INST),
+    0xf3100e00, 0xffb00f10, "vcge%c.f16\t%12-15,22R, %16-19,7R, %0-3,5R"},
   {ARM_FEATURE_COPROC (FPU_NEON_EXT_V1),
-    0xf3000e10, 0xffa00f10, "vacge%c.f%20U0\t%12-15,22R, %16-19,7R, %0-3,5R"},
+    0xf3000e10, 0xffb00f10, "vacge%c.f32\t%12-15,22R, %16-19,7R, %0-3,5R"},
+  {ARM_FEATURE_CORE_HIGH (ARM_EXT2_FP16_INST),
+    0xf3100e10, 0xffb00f10, "vacge%c.f16\t%12-15,22R, %16-19,7R, %0-3,5R"},
   {ARM_FEATURE_COPROC (FPU_NEON_EXT_V1),
-    0xf3000f00, 0xffa00f10, "vpmax%c.f%20U0\t%12-15,22R, %16-19,7R, %0-3,5R"},
+    0xf3000f00, 0xffb00f10, "vpmax%c.f32\t%12-15,22R, %16-19,7R, %0-3,5R"},
+  {ARM_FEATURE_CORE_HIGH (ARM_EXT2_FP16_INST),
+    0xf3100f00, 0xffb00f10, "vpmax%c.f16\t%12-15,22R, %16-19,7R, %0-3,5R"},
   {ARM_FEATURE_COPROC (FPU_NEON_EXT_V1),
-    0xf3200d00, 0xffa00f10, "vabd%c.f%20U0\t%12-15,22R, %16-19,7R, %0-3,5R"},
+    0xf3200d00, 0xffb00f10, "vabd%c.f32\t%12-15,22R, %16-19,7R, %0-3,5R"},
+  {ARM_FEATURE_CORE_HIGH (ARM_EXT2_FP16_INST),
+    0xf3300d00, 0xffb00f10, "vabd%c.f16\t%12-15,22R, %16-19,7R, %0-3,5R"},
   {ARM_FEATURE_COPROC (FPU_NEON_EXT_V1),
-    0xf3200e00, 0xffa00f10, "vcgt%c.f%20U0\t%12-15,22R, %16-19,7R, %0-3,5R"},
+    0xf3200e00, 0xffb00f10, "vcgt%c.f32\t%12-15,22R, %16-19,7R, %0-3,5R"},
+  {ARM_FEATURE_CORE_HIGH (ARM_EXT2_FP16_INST),
+    0xf3300e00, 0xffb00f10, "vcgt%c.f16\t%12-15,22R, %16-19,7R, %0-3,5R"},
   {ARM_FEATURE_COPROC (FPU_NEON_EXT_V1),
-    0xf3200e10, 0xffa00f10, "vacgt%c.f%20U0\t%12-15,22R, %16-19,7R, %0-3,5R"},
+    0xf3200e10, 0xffb00f10, "vacgt%c.f32\t%12-15,22R, %16-19,7R, %0-3,5R"},
+  {ARM_FEATURE_CORE_HIGH (ARM_EXT2_FP16_INST),
+    0xf3300e10, 0xffb00f10, "vacgt%c.f16\t%12-15,22R, %16-19,7R, %0-3,5R"},
   {ARM_FEATURE_COPROC (FPU_NEON_EXT_V1),
-    0xf3200f00, 0xffa00f10, "vpmin%c.f%20U0\t%12-15,22R, %16-19,7R, %0-3,5R"},
+    0xf3200f00, 0xffb00f10, "vpmin%c.f32\t%12-15,22R, %16-19,7R, %0-3,5R"},
+  {ARM_FEATURE_CORE_HIGH (ARM_EXT2_FP16_INST),
+    0xf3300f00, 0xffb00f10, "vpmin%c.f16\t%12-15,22R, %16-19,7R, %0-3,5R"},
   {ARM_FEATURE_COPROC (FPU_NEON_EXT_V1),
     0xf2000800, 0xff800f10, "vadd%c.i%20-21S3\t%12-15,22R, %16-19,7R, %0-3,5R"},
   {ARM_FEATURE_COPROC (FPU_NEON_EXT_V1),
@@ -1426,6 +1481,9 @@ static const struct opcode32 neon_opcodes[] =
   {ARM_FEATURE_COPROC (FPU_NEON_EXT_V1),
     0xf2a00e10, 0xfea00e90,
     "vcvt%c.%24,8?usff32.%24,8?ffus32\t%12-15,22R, %0-3,5R, #%16-20e"},
+  {ARM_FEATURE_CORE_HIGH (ARM_EXT2_FP16_INST),
+    0xf2a00c10, 0xfea00e90,
+    "vcvt%c.%24,8?usff16.%24,8?ffus16\t%12-15,22R, %0-3,5R, #%16-20e"},
 
   /* Three registers of different lengths.  */
   {ARM_FEATURE_COPROC (FPU_CRYPTO_EXT_ARMV8),

Follow-Ups:
- Re: [Patch/ARM] Support v8.2 fp16 simd instructions
  - From: Ramana Radhakrishnan

Index Nav:	[Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav:	[Date Prev] [Date Next]	[Thread Prev] [Thread Next]