[Patch][binutils][arm] BFloat16 enablement [4/10]

Wed Nov 6 15:54:00 GMT 2019

Hi,

Small update that addresses Nick's comments from patch 0.
Fixes the compile time warning/error and regressions on
wince-pe, arm-symbianelf, and arm-nacl.

Regards,
Mihail

On 10/29/2019 03:20 PM, Mihail Ionescu wrote:
> 
> Hi,
> 
> This patch is part of a series that adds support for Armv8.6-A
> (Matrix Multiply and BFloat16 extensions) to binutils.
> 
> This patch introduces BFloat16 instructions to the arm backend.
> The following BFloat16 instructions are added: vdot, vfma{l/t},
> vmmla, vfmal{t/b}, vcvt, vcvt{t/b}[1].
> 
> [1]https://developer.arm.com/docs/ddi0597/latest/simd-and-floating-point-instructions-alphabetic-order
> 
> gas/ChangeLog:
> 
> 2019-10-29  Mihail Ionescu  <mihail.ionescu@arm.com>
> 2019-10-29  Matthew Malcomson  <matthew.malcomson@arm.com>
> 
> 	* config/tc-arm.c (arm_archs): Add armv8.6-a option.
> 	(cpu_arch_ver): Add TAG_CPU_ARCH_V8 tag for Armv8.6-a.
> 	* doc/c-arm.texi (-march): New armv8.6-a arch.
> 	* config/tc-arm.c (arm_ext_bf16): New feature set.
> 	(enum neon_el_type): Add NT_bfloat value.
> 	(B_MNEM_vfmat, B_MNEM_vfmab): New bfloat16 encoder
> 	helpers.
> 	(BAD_BF16): New message.
> 	(parse_neon_type): Add bf16 type specifier.
> 	(enum neon_type_mask): Add N_BF16 type.
> 	(type_chk_of_el_type): Account for NT_bfloat.
> 	(el_type_of_type_chk): Account for N_BF16.
> 	(neon_three_args): Split out from neon_three_same.
> 	(neon_three_same): Part split out into neon_three_args.
> 	(CVT_FLAVOUR_VAR): Add bf16_f32 cvt flavour.
> 	(do_neon_cvt_1): Account for vcvt.bf16.f32.
> 	(do_bfloat_vmla): New.
> 	(do_mve_vfma): New function to deal with the mnemonic clash between the BF16
> 	vfmat and the MVE vfma in a VPT block with a 't'rue condition.
> 	(do_neon_cvttb_1): Account for vcvt{t,b}.bf16.f32.
> 	(do_vdot): New
> 	(do_vmmla): New
> 	(insns): Add vdot and vmmla mnemonics.
> 	(arm_extensions): Add "bf16" extension.
> 	* doc/c-arm.texi: Document "bf16" extension.
> 	* testsuite/gas/arm/attr-march-armv8_6-a.d: New test.
> 	* testsuite/gas/arm/bfloat16-bad.d: New test.
> 	* testsuite/gas/arm/bfloat16-bad.l: New test.
> 	* testsuite/gas/arm/bfloat16-bad.s: New test.
> 	* testsuite/gas/arm/bfloat16-cmdline-bad-2.d: New test.
> 	* testsuite/gas/arm/bfloat16-cmdline-bad-3.d: New test.
> 	* testsuite/gas/arm/bfloat16-cmdline-bad.d: New test.
> 	* testsuite/gas/arm/bfloat16-neon.s: New test.
> 	* testsuite/gas/arm/bfloat16-non-neon.s: New test.
> 	* testsuite/gas/arm/bfloat16-thumb-bad.d: New test.
> 	* testsuite/gas/arm/bfloat16-thumb-bad.l: New test.
> 	* testsuite/gas/arm/bfloat16-thumb.d: New test.
> 	* testsuite/gas/arm/bfloat16-vfp.d: New test.
> 	* testsuite/gas/arm/bfloat16.d: New test.
> 	* testsuite/gas/arm/bfloat16.s: New test.
> 
> 	
> include/ChangeLog:
> 
> 2019-10-29  Mihail Ionescu  <mihail.ionescu@arm.com>
> 2019-10-29  Matthew Malcomson  <matthew.malcomson@arm.com>
> 
> 	* opcode/arm.h (ARM_EXT2_V8_6A, ARM_AEXT2_V8_6A,
> 	ARM_ARCH_V8_6A): New.
> 	* opcode/arm.h (ARM_EXT2_BF16): New feature macro.
> 	(ARM_AEXT2_V8_6A): Include above macro in definition.
> 
> opcodes/ChangeLog:
> 
> 2019-10-29  Mihail Ionescu  <mihail.ionescu@arm.com>
> 2019-10-29  Matthew Malcomson  <matthew.malcomson@arm.com>
> 
> 	* arm-dis.c (select_arm_features): Update bfd_march_arm_8 with
> 	Armv8.6-A.
> 	(coprocessor_opcodes): Add bfloat16 vcvt{t,b}.
> 	(neon_opcodes): Add bfloat SIMD instructions.
> 	(print_insn_coprocessor): Add new control character %b to print
> 	condition code without checking cp_num.
> 	(print_insn_neon): Account for BFloat16 instructions that have no
> 	special top-byte handling.
> 
> 
> Regression tested on arm-none-eabi.
> 
> Is it ok for trunk?
> 
> 
> Regards,
> Mihail
> 
> ###############     Attachment also inlined for ease of reply    ###############
> 
> 
> diff --git a/gas/config/tc-arm.c b/gas/config/tc-arm.c
> index 1f462307ed9129d8aca8a4bd371965c4b2f0c0ca..44502837b0dc29c46117d05a54d44541835e10b2 100644
> --- a/gas/config/tc-arm.c
> +++ b/gas/config/tc-arm.c
> @@ -275,6 +275,8 @@ static const arm_feature_set arm_ext_sb =
>     ARM_FEATURE_CORE_HIGH (ARM_EXT2_SB);
>   static const arm_feature_set arm_ext_predres =
>     ARM_FEATURE_CORE_HIGH (ARM_EXT2_PREDRES);
> +static const arm_feature_set arm_ext_bf16 =
> +  ARM_FEATURE_CORE_HIGH (ARM_EXT2_BF16);
>   
>   static const arm_feature_set arm_arch_any = ARM_ANY;
>   #ifdef OBJ_ELF
> @@ -446,6 +448,7 @@ enum neon_el_type
>     NT_float,
>     NT_poly,
>     NT_signed,
> +  NT_bfloat,
>     NT_unsigned
>   };
>   
> @@ -893,6 +896,7 @@ struct asm_opcode
>   	_("cannot use writeback with PC-relative addressing")
>   #define BAD_RANGE	_("branch out of range")
>   #define BAD_FP16	_("selected processor does not support fp16 instruction")
> +#define BAD_BF16	_("selected processor does not support bf16 instruction")
>   #define UNPRED_REG(R)	_("using " R " results in unpredictable behaviour")
>   #define THUMB1_RELOC_ONLY  _("relocation valid in thumb1 code only")
>   #define MVE_NOT_IT	_("Warning: instruction is UNPREDICTABLE in an IT " \
> @@ -1468,6 +1472,27 @@ parse_neon_type (struct neon_type *type, char **str)
>   	  thissize = 64;
>   	  ptr++;
>   	  goto done;
> +	case 'b':
> +	  thistype = NT_bfloat;
> +	  switch (TOLOWER (*(++ptr)))
> +	    {
> +	    case 'f':
> +	      ptr += 1;
> +	      thissize = strtoul (ptr, &ptr, 10);
> +	      if (thissize != 16)
> +		{
> +		  as_bad (_("bad size %d in type specifier"), thissize);
> +		  return FAIL;
> +		}
> +	      goto done;
> +	    case '0': case '1': case '2': case '3': case '4':
> +	    case '5': case '6': case '7': case '8': case '9':
> +	    case ' ': case '.':
> +	      as_bad (_("unexpected type character `b' -- did you mean `bf'?"));
> +	      return FAIL;
> +	    default:
> +	      break;
> +	    }
>   	default:
>   	  as_bad (_("unexpected character `%c' in type specifier"), *ptr);
>   	  return FAIL;
> @@ -14505,6 +14530,10 @@ do_mve_scalar_shift (void)
>   #define M_MNEM_vqrshrunt    0xfe801fc0
>   #define M_MNEM_vqrshrunb    0xfe800fc0
>   
> +/* Bfloat16 instruction encoder helpers.  */
> +#define B_MNEM_vfmat 0xfc300850
> +#define B_MNEM_vfmab 0xfc300810
> +
>   /* Neon instruction encoder helpers.  */
>   
>   /* Encodings for the different types for various Neon opcodes.  */
> @@ -14850,6 +14879,7 @@ enum neon_type_mask
>     N_F32  = 0x0080000,
>     N_F64  = 0x0100000,
>     N_P64	 = 0x0200000,
> +  N_BF16 = 0x0400000,
>     N_KEY  = 0x1000000, /* Key element (main type specifier).  */
>     N_EQK  = 0x2000000, /* Given operand has the same type & size as the key.  */
>     N_VFP  = 0x4000000, /* VFP mode: operand size must match register width.  */
> @@ -15148,6 +15178,10 @@ type_chk_of_el_type (enum neon_el_type type, unsigned size)
>   	}
>         break;
>   
> +    case NT_bfloat:
> +      if (size == 16) return N_BF16;
> +      break;
> +
>       default: ;
>       }
>   
> @@ -15166,7 +15200,8 @@ el_type_of_type_chk (enum neon_el_type *type, unsigned *size,
>   
>     if ((mask & (N_S8 | N_U8 | N_I8 | N_8 | N_P8)) != 0)
>       *size = 8;
> -  else if ((mask & (N_S16 | N_U16 | N_I16 | N_16 | N_F16 | N_P16)) != 0)
> +  else if ((mask & (N_S16 | N_U16 | N_I16 | N_16 | N_F16 | N_P16 | N_BF16))
> +	   != 0)
>       *size = 16;
>     else if ((mask & (N_S32 | N_U32 | N_I32 | N_32 | N_F32)) != 0)
>       *size = 32;
> @@ -15187,6 +15222,8 @@ el_type_of_type_chk (enum neon_el_type *type, unsigned *size,
>       *type = NT_poly;
>     else if ((mask & (N_F_ALL)) != 0)
>       *type = NT_float;
> +  else if ((mask & (N_BF16)) != 0)
> +    *type = NT_bfloat;
>     else
>       return FAIL;
>   
> @@ -16623,6 +16660,20 @@ mve_encode_rrqq (unsigned U, unsigned size)
>     inst.is_neon = 1;
>   }
>   
> +/* Helper function for neon_three_same handling the operands.  */
> +static void
> +neon_three_args (int isquad)
> +{
> +  inst.instruction |= LOW4 (inst.operands[0].reg) << 12;
> +  inst.instruction |= HI1 (inst.operands[0].reg) << 22;
> +  inst.instruction |= LOW4 (inst.operands[1].reg) << 16;
> +  inst.instruction |= HI1 (inst.operands[1].reg) << 7;
> +  inst.instruction |= LOW4 (inst.operands[2].reg);
> +  inst.instruction |= HI1 (inst.operands[2].reg) << 5;
> +  inst.instruction |= (isquad != 0) << 6;
> +  inst.is_neon = 1;
> +}
> +
>   /* Encode insns with bit pattern:
>   
>     |28/24|23|22 |21 20|19 16|15 12|11    8|7|6|5|4|3  0|
> @@ -16634,13 +16685,7 @@ mve_encode_rrqq (unsigned U, unsigned size)
>   static void
>   neon_three_same (int isquad, int ubit, int size)
>   {
> -  inst.instruction |= LOW4 (inst.operands[0].reg) << 12;
> -  inst.instruction |= HI1 (inst.operands[0].reg) << 22;
> -  inst.instruction |= LOW4 (inst.operands[1].reg) << 16;
> -  inst.instruction |= HI1 (inst.operands[1].reg) << 7;
> -  inst.instruction |= LOW4 (inst.operands[2].reg);
> -  inst.instruction |= HI1 (inst.operands[2].reg) << 5;
> -  inst.instruction |= (isquad != 0) << 6;
> +  neon_three_args (isquad);
>     inst.instruction |= (ubit != 0) << 24;
>     if (size != -1)
>       inst.instruction |= neon_logbits (size) << 20;
> @@ -17783,6 +17828,44 @@ do_neon_mac_maybe_scalar (void)
>   }
>   
>   static void
> +do_bfloat_vfma (void)
> +{
> +  constraint (!mark_feature_used (&fpu_neon_ext_armv8), _(BAD_FPU));
> +  constraint (!mark_feature_used (&arm_ext_bf16), _(BAD_BF16));
> +  enum neon_shape rs;
> +  int t_bit = 0;
> +
> +  if (inst.instruction != B_MNEM_vfmab)
> +  {
> +      t_bit = 1;
> +      inst.instruction = B_MNEM_vfmat;
> +  }
> +
> +  if (inst.operands[2].isscalar)
> +    {
> +      rs = neon_select_shape (NS_QQS, NS_NULL);
> +      neon_check_type (3, rs, N_EQK, N_EQK, N_BF16 | N_KEY);
> +
> +      inst.instruction |= (1 << 25);
> +      int index = inst.operands[2].reg & 0xf;
> +      constraint (!(index < 4), _("index must be in the range 0 to 3"));
> +      inst.operands[2].reg >>= 4;
> +      constraint (!(inst.operands[2].reg < 8),
> +		  _("indexed register must be less than 8"));
> +      neon_three_args (t_bit);
> +      inst.instruction |= ((index & 1) << 3);
> +      inst.instruction |= ((index & 2) << 4);
> +    }
> +  else
> +    {
> +      rs = neon_select_shape (NS_QQQ, NS_NULL);
> +      neon_check_type (3, rs, N_EQK, N_EQK, N_BF16 | N_KEY);
> +      neon_three_args (t_bit);
> +    }
> +
> +}
> +
> +static void
>   do_neon_fmac (void)
>   {
>     if (ARM_CPU_HAS_FEATURE (cpu_variant, fpu_vfp_ext_fma)
> @@ -17800,6 +17883,7 @@ do_neon_fmac (void)
>   
>         if (rs == NS_QQR)
>   	{
> +
>   	  if (inst.operands[2].reg == REG_SP)
>   	    as_tsktsk (MVE_BAD_SP);
>   	  else if (inst.operands[2].reg == REG_PC)
> @@ -17825,6 +17909,24 @@ do_neon_fmac (void)
>   }
>   
>   static void
> +do_mve_vfma (void)
> +{
> +  if (!ARM_CPU_HAS_FEATURE (cpu_variant, arm_ext_bf16) &&
> +      inst.cond == COND_ALWAYS)
> +    {
> +      constraint (!ARM_CPU_HAS_FEATURE (cpu_variant, mve_ext), BAD_FPU);
> +      inst.instruction = N_MNEM_vfma;
> +      inst.pred_insn_type = INSIDE_VPT_INSN;
> +      inst.cond = 0xf;
> +      return do_neon_fmac();
> +    }
> +  else
> +    {
> +      do_bfloat_vfma();
> +    }
> +}
> +
> +static void
>   do_neon_tst (void)
>   {
>     enum neon_shape rs = neon_select_shape (NS_DDD, NS_QQQ, NS_NULL);
> @@ -18653,6 +18755,7 @@ do_neon_shll (void)
>     CVT_VAR (f16_u32, N_F16 | N_KEY, N_U32, N_VFP, "fultos", "fuitos", NULL)    \
>     CVT_VAR (u32_f16, N_U32, N_F16 | N_KEY, N_VFP, "ftouls", "ftouis", "ftouizs")\
>     CVT_VAR (s32_f16, N_S32, N_F16 | N_KEY, N_VFP, "ftosls", "ftosis", "ftosizs")\
> +  CVT_VAR (bf16_f32, N_BF16, N_F32, whole_reg,   NULL, NULL, NULL)	      \
>     /* VFP instructions.  */						      \
>     CVT_VAR (f32_f64, N_F32, N_F64, N_VFP,       NULL,     "fcvtsd", NULL)      \
>     CVT_VAR (f64_f32, N_F64, N_F32, N_VFP,       NULL,     "fcvtds", NULL)      \
> @@ -19120,8 +19223,21 @@ do_neon_cvt_1 (enum neon_cvt_mode mode)
>   	  }
>   
>         if (rs == NS_DQ)
> -	inst.instruction = 0x3b60600;
> +	{
> +	  if (flavour == neon_cvt_flavour_bf16_f32)
> +	    {
> +	      if (vfp_or_neon_is_neon (NEON_CHECK_ARCH8) == FAIL)
> +		return;
> +	      constraint (!mark_feature_used (&arm_ext_bf16), _(BAD_BF16));
> +	      /* VCVT.bf16.f32.  */
> +	      inst.instruction = 0x11b60640;
> +	    }
> +	  else
> +	    /* VCVT.f16.f32.  */
> +	    inst.instruction = 0x3b60600;
> +	}
>         else
> +	/* VCVT.f32.f16.  */
>   	inst.instruction = 0x3b60700;
>   
>         inst.instruction |= LOW4 (inst.operands[0].reg) << 12;
> @@ -19271,6 +19387,14 @@ do_neon_cvttb_1 (bfd_boolean t)
>         inst.error = NULL;
>         do_neon_cvttb_2 (t, /*to=*/FALSE, /*is_double=*/TRUE);
>       }
> +  else if (neon_check_type (2, rs, N_BF16 | N_VFP, N_F32).type != NT_invtype)
> +    {
> +      constraint (!mark_feature_used (&arm_ext_bf16), _(BAD_BF16));
> +      inst.error = NULL;
> +      inst.instruction |= (1 << 8);
> +      inst.instruction &= ~(1 << 9);
> +      do_neon_cvttb_2 (t, /*to=*/TRUE, /*is_double=*/FALSE);
> +    }
>     else
>       return;
>   }
> @@ -19522,16 +19646,6 @@ do_neon_fmac_maybe_scalar_long (int subtype)
>        0x2.  */
>     int size = -1;
>   
> -  if (inst.cond != COND_ALWAYS)
> -    as_warn (_("vfmal/vfmsl with FP16 type cannot be conditional, the "
> -	       "behaviour is UNPREDICTABLE"));
> -
> -  constraint (!ARM_CPU_HAS_FEATURE (cpu_variant, arm_ext_fp16_fml),
> -	      _(BAD_FP16));
> -
> -  constraint (!ARM_CPU_HAS_FEATURE (cpu_variant, fpu_neon_ext_armv8),
> -	      _(BAD_FPU));
> -
>     /* vfmal/vfmsl are in three-same D/Q register format or the third operand can
>        be a scalar index register.  */
>     if (inst.operands[2].isscalar)
> @@ -19550,7 +19664,16 @@ do_neon_fmac_maybe_scalar_long (int subtype)
>         rs = neon_select_shape (NS_DHH, NS_QDD, NS_NULL);
>       }
>   
> -  neon_check_type (3, rs, N_EQK, N_EQK, N_KEY | N_F16);
> +
> +  if (inst.cond != COND_ALWAYS)
> +    as_warn (_("vfmal/vfmsl with FP16 type cannot be conditional, the "
> +	       "behaviour is UNPREDICTABLE"));
> +
> +  constraint (!ARM_CPU_HAS_FEATURE (cpu_variant, arm_ext_fp16_fml),
> +	      _(BAD_FP16));
> +
> +  constraint (!ARM_CPU_HAS_FEATURE (cpu_variant, fpu_neon_ext_armv8),
> +	      _(BAD_FPU));
>   
>     /* "opcode" from template has included "ubit", so simply pass 0 here.  Also,
>        the "S" bit in size field has been reused to differentiate vfmal and vfmsl,
> @@ -21500,6 +21623,46 @@ do_vjcvt (void)
>     do_vfp_cond_or_thumb ();
>   }
>   
> +static void
> +do_vdot (void)
> +{
> +  enum neon_shape rs;
> +  constraint (!mark_feature_used (&fpu_neon_ext_armv8), _(BAD_FPU));
> +  set_pred_insn_type (OUTSIDE_PRED_INSN);
> +  if (inst.operands[2].isscalar)
> +    {
> +      rs = neon_select_shape (NS_DDS, NS_QQS, NS_NULL);
> +      neon_check_type (3, rs, N_EQK, N_EQK, N_BF16 | N_KEY);
> +
> +      inst.instruction |= (1 << 25);
> +      int index = inst.operands[2].reg & 0xf;
> +      constraint ((index != 1 && index != 0), _("index must be 0 or 1"));
> +      inst.operands[2].reg >>= 4;
> +      constraint (!(inst.operands[2].reg < 16),
> +		  _("indexed register must be less than 16"));
> +      neon_three_args (rs == NS_QQS);
> +      inst.instruction |= (index << 5);
> +    }
> +  else
> +    {
> +      rs = neon_select_shape (NS_DDD, NS_QQQ, NS_NULL);
> +      neon_check_type (3, rs, N_EQK, N_EQK, N_BF16 | N_KEY);
> +      neon_three_args (rs == NS_QQQ);
> +    }
> +}
> +
> +static void
> +do_vmmla (void)
> +{
> +  enum neon_shape rs = neon_select_shape (NS_QQQ, NS_NULL);
> +  neon_check_type (3, rs, N_EQK, N_EQK, N_BF16 | N_KEY);
> +
> +  constraint (!mark_feature_used (&fpu_neon_ext_armv8), _(BAD_FPU));
> +  set_pred_insn_type (OUTSIDE_PRED_INSN);
> +
> +  neon_three_args (1);
> +}
> +
>   

> 
> 
>   /* Overall per-instruction processing.	*/
>   
> @@ -24845,8 +25008,8 @@ static const struct asm_opcode insns[] =
>    NCE (vins,      eb00ac0,       2, (RVS, RVS), neon_movhf),
>   
>    /* New backported fma/fms instructions optional in v8.2.  */
> - NCE (vfmal, 810, 3, (RNDQ, RNSD, RNSD_RNSC), neon_vfmal),
> - NCE (vfmsl, 810, 3, (RNDQ, RNSD, RNSD_RNSC), neon_vfmsl),
> + NUF (vfmsl, 810, 3, (RNDQ, RNSD, RNSD_RNSC), neon_vfmsl),
> + NUF (vfmal, 810, 3, (RNDQ, RNSD, RNSD_RNSC), neon_vfmal),
>   
>   #undef  THUMB_VARIANT
>   #define THUMB_VARIANT  & fpu_neon_ext_v1
> @@ -25096,10 +25259,11 @@ static const struct asm_opcode insns[] =
>   #define ARM_VARIANT    & fpu_vfp_ext_fma
>   #undef  THUMB_VARIANT
>   #define THUMB_VARIANT  & fpu_vfp_ext_fma
> - /* Mnemonics shared by Neon, VFP and MVE.  These are included in the
> + /* Mnemonics shared by Neon, VFP, MVE and BF16.  These are included in the
>       VFP FMA variant; NEON and VFP FMA always includes the NEON
>       FMA instructions.  */
>    mnCEF(vfma,     _vfma,    3, (RNSDQMQ, oRNSDQMQ, RNSDQMQR), neon_fmac),
> + TUF ("vfmat",    c300850,    fc300850,  3, (RNSDQMQ, oRNSDQMQ, RNSDQ_RNSC_MQ_RR), mve_vfma, mve_vfma),
>    mnCEF(vfms,     _vfms,    3, (RNSDQMQ, oRNSDQMQ, RNSDQMQ),  neon_fmac),
>   
>    /* ffmas/ffmad/ffmss/ffmsd are dummy mnemonics to satisfy gas;
> @@ -25772,6 +25936,24 @@ static const struct asm_opcode insns[] =
>   #define	THUMB_VARIANT & arm_ext_v6t2_v8m
>    MNUF (vcadd, 0, 4, (RNDQMQ, RNDQMQ, RNDQMQ, EXPi), vcadd),
>    MNUF (vcmla, 0, 4, (RNDQMQ, RNDQMQ, RNDQMQ_RNSC, EXPi), vcmla),
> +
> +#undef	ARM_VARIANT
> +#define ARM_VARIANT &arm_ext_bf16
> +#undef	THUMB_VARIANT
> +#define	THUMB_VARIANT &arm_ext_bf16
> + TUF ("vdot", c000d00, fc000d00, 3, (RNDQ, RNDQ, RNDQ_RNSC), vdot, vdot),
> + TUF ("vmmla", c000c40, fc000c40, 3, (RNQ, RNQ, RNQ), vmmla, vmmla),
> + TUF ("vfmab", c300810, fc300810, 3, (RNDQ, RNDQ, RNDQ_RNSC), bfloat_vfma, bfloat_vfma),
> +
> +#undef	ARM_VARIANT
> +#define ARM_VARIANT &arm_ext_i8mm
> +#undef	THUMB_VARIANT
> +#define	THUMB_VARIANT &arm_ext_i8mm
> + TUF ("vsmmla", c200c40, fc200c40, 3, (RNQ, RNQ, RNQ), vsmmla, vsmmla),
> + TUF ("vummla", c200c50, fc200c50, 3, (RNQ, RNQ, RNQ), vummla, vummla),
> + TUF ("vusmmla", ca00c40, fca00c40, 3, (RNQ, RNQ, RNQ), vummla, vummla),
> + TUF ("vusdot", c800d00, fc800d00, 3, (RNDQ, RNDQ, RNDQ_RNSC), vusdot, vusdot),
> + TUF ("vsudot", c800d10, fc800d10, 3, (RNDQ, RNDQ, RNSC), vsudot, vsudot),
>   };
>   #undef ARM_VARIANT
>   #undef THUMB_VARIANT
> @@ -30936,6 +31118,11 @@ static const struct arm_ext_table armv85a_ext_table[] =
>     { NULL, 0, ARM_ARCH_NONE, ARM_ARCH_NONE }
>   };
>   
> +static const struct arm_ext_table armv86a_ext_table[] =
> +{
> +  { NULL, 0, ARM_ARCH_NONE, ARM_ARCH_NONE }
> +};
> +
>   static const struct arm_ext_table armv8m_main_ext_table[] =
>   {
>     ARM_EXT ("dsp", ARM_FEATURE_CORE_LOW (ARM_EXT_V5ExP | ARM_EXT_V6_DSP),
> @@ -31041,6 +31228,7 @@ static const struct arm_arch_option_table arm_archs[] =
>     ARM_ARCH_OPT2 ("armv8-r",	  ARM_ARCH_V8R,		FPU_ARCH_VFP, armv8r),
>     ARM_ARCH_OPT2 ("armv8.4-a",	  ARM_ARCH_V8_4A,	FPU_ARCH_VFP, armv84a),
>     ARM_ARCH_OPT2 ("armv8.5-a",	  ARM_ARCH_V8_5A,	FPU_ARCH_VFP, armv85a),
> +  ARM_ARCH_OPT2 ("armv8.6-a",	  ARM_ARCH_V8_6A,	FPU_ARCH_VFP, armv86a),
>     ARM_ARCH_OPT ("xscale",	  ARM_ARCH_XSCALE,	FPU_ARCH_VFP),
>     ARM_ARCH_OPT ("iwmmxt",	  ARM_ARCH_IWMMXT,	FPU_ARCH_VFP),
>     ARM_ARCH_OPT ("iwmmxt2",	  ARM_ARCH_IWMMXT2,	FPU_ARCH_VFP),
> @@ -31071,6 +31259,9 @@ struct arm_option_extension_value_table
>      use the context sensitive approach using arm_ext_table's.  */
>   static const struct arm_option_extension_value_table arm_extensions[] =
>   {
> +  ARM_EXT_OPT ("bf16",  ARM_FEATURE_CORE_HIGH (ARM_EXT2_BF16),
> +			ARM_FEATURE_CORE_HIGH (ARM_EXT2_BF16),
> +			ARM_ARCH_V8_2A),
>     ARM_EXT_OPT ("crc",  ARCH_CRC_ARMV8, ARM_FEATURE_COPROC (CRC_EXT_ARMV8),
>   			 ARM_FEATURE_CORE_LOW (ARM_EXT_V8)),
>     ARM_EXT_OPT ("crypto", FPU_ARCH_CRYPTO_NEON_VFP_ARMV8,
> @@ -31821,7 +32012,8 @@ static const cpu_arch_ver_table cpu_arch_ver[] =
>       {TAG_CPU_ARCH_V8,	      ARM_ARCH_V8_4A},
>       {TAG_CPU_ARCH_V8,	      ARM_ARCH_V8_5A},
>       {TAG_CPU_ARCH_V8_1M_MAIN, ARM_ARCH_V8_1M_MAIN},
> -    {-1,		      ARM_ARCH_NONE}
> +    {TAG_CPU_ARCH_V8,	    ARM_ARCH_V8_6A},
> +    {-1,		    ARM_ARCH_NONE}
>   };
>   
>   /* Set an attribute if it has not already been set by the user.  */
> diff --git a/gas/doc/c-arm.texi b/gas/doc/c-arm.texi
> index 175de8eae10db7d7ec6608ff2fe35f4392c62100..8afee70120f122e3724b27a71443d03b7c2ad719 100644
> --- a/gas/doc/c-arm.texi
> +++ b/gas/doc/c-arm.texi
> @@ -180,6 +180,7 @@ been added, again in ascending alphabetical order.  For example,
>   
>   
>   The following extensions are currently supported:
> +@code{bf16} (BFloat16 extensions for v8.6-A architecture),
>   @code{crc}
>   @code{crypto} (Cryptography Extensions for v8-A architecture, implies @code{fp+simd}),
>   @code{dotprod} (Dot Product Extensions for v8.2-A architecture, implies @code{fp+simd}),
> @@ -254,6 +255,7 @@ names are recognized:
>   @code{armv8-m.base},
>   @code{armv8-m.main},
>   @code{armv8.1-m.main},
> +@code{armv8.6-a},
>   @code{iwmmxt},
>   @code{iwmmxt2}
>   and
> diff --git a/gas/testsuite/gas/arm/attr-march-armv8_6-a.d b/gas/testsuite/gas/arm/attr-march-armv8_6-a.d
> new file mode 100644
> index 0000000000000000000000000000000000000000..73bcbaf35e8096da24b31dd676ec5b62794f3a90
> --- /dev/null
> +++ b/gas/testsuite/gas/arm/attr-march-armv8_6-a.d
> @@ -0,0 +1,17 @@
> +# name: attributes for -march=armv8.6-a
> +# source: blank.s
> +# as: -march=armv8.6-a
> +# readelf: -A
> +# This test is only valid on EABI based ports.
> +# target: *-*-*eabi* *-*-nacl*
> +
> +Attribute Section: aeabi
> +File Attributes
> +  Tag_CPU_name: "8.6-A"
> +  Tag_CPU_arch: v8
> +  Tag_CPU_arch_profile: Application
> +  Tag_ARM_ISA_use: Yes
> +  Tag_THUMB_ISA_use: Thumb-2
> +  Tag_Advanced_SIMD_arch: NEON for ARMv8.1
> +  Tag_MPextension_use: Allowed
> +  Tag_Virtualization_use: TrustZone and Virtualization Extensions
> diff --git a/gas/testsuite/gas/arm/bfloat16-bad.d b/gas/testsuite/gas/arm/bfloat16-bad.d
> new file mode 100644
> index 0000000000000000000000000000000000000000..95f266dd324675e4a96a14e64d239f1ebdc2708c
> --- /dev/null
> +++ b/gas/testsuite/gas/arm/bfloat16-bad.d
> @@ -0,0 +1,4 @@
> +#name: Bfloat 16 failure cases
> +#source: bfloat16-bad.s
> +#as: -mno-warn-deprecated -march=armv8.6-a+simd
> +#error_output: bfloat16-bad.l
> diff --git a/gas/testsuite/gas/arm/bfloat16-bad.l b/gas/testsuite/gas/arm/bfloat16-bad.l
> new file mode 100644
> index 0000000000000000000000000000000000000000..242e538b53d136284f54761b709bc4d82263ed19
> --- /dev/null
> +++ b/gas/testsuite/gas/arm/bfloat16-bad.l
> @@ -0,0 +1,112 @@
> +[^ :]+: Assembler messages:
> +[^ :]+:[0-9]+: Error: unexpected type character `b' -- did you mean `bf'\?
> +[^ :]+:[0-9]+: Error: bad instruction `vdot.b16 d0,d0,d0'
> +[^ :]+:[0-9]+: Error: unexpected type character `b' -- did you mean `bf'\?
> +[^ :]+:[0-9]+: Error: bad arguments to instruction -- `vmmla q0.b16,q0,q0'
> +[^ :]+:[0-9]+: Error: bad size 32 in type specifier
> +[^ :]+:[0-9]+: Error: bad instruction `vdot.bf32 d0,d0,d0\[1\]'
> +[^ :]+:[0-9]+: Error: bad size 32 in type specifier
> +[^ :]+:[0-9]+: Error: bad arguments to instruction -- `vdot d0.bf32,d0,d0'
> +[^ :]+:[0-9]+: Error: bad size 32 in type specifier
> +[^ :]+:[0-9]+: Error: bad arguments to instruction -- `vdot d0.bf32,d0.bf16,d0.bf16'
> +[^ :]+:[0-9]+: Error: instruction cannot be conditional -- `vdotne d0,d0,d0'
> +[^ :]+:[0-9]+: Error: instruction cannot be conditional -- `vdotne d0,d0,d0\[1\]'
> +[^ :]+:[0-9]+: Error: instruction cannot be conditional -- `vmmlane q0,q0,q0'
> +[^ :]+:[0-9]+: Error: invalid instruction shape -- `vfmatne.bf16 q0,d0,d0'
> +[^ :]+:[0-9]+: Error: invalid instruction shape -- `vfmatne.bf16 q0,d0,d0\[0\]'
> +[^ :]+:[0-9]+: Error: instruction cannot be conditional -- `vfmabne.bf16 q0,d0,d0'
> +[^ :]+:[0-9]+: Error: instruction cannot be conditional -- `vfmabne.bf16 q0,d0,d0\[0\]'
> +[^ :]+:[0-9]+: Error: instruction cannot be conditional -- `vcvtne.bf16.f32 d0,q0'
> +[^ :]+:[0-9]+: Error: Neon double or quad precision register expected -- `vdot d32,d0,d0'
> +[^ :]+:[0-9]+: Error: Neon double or quad precision register expected -- `vdot d0,d32,d0'
> +[^ :]+:[0-9]+: Error: Neon double or quad precision register expected -- `vdot d0,d0,d32'
> +[^ :]+:[0-9]+: Error: Neon double or quad precision register expected -- `vdot d32,d0,d0\[0\]'
> +[^ :]+:[0-9]+: Error: Neon double or quad precision register expected -- `vdot d0,d32,d0\[0\]'
> +[^ :]+:[0-9]+: Error: indexed register must be less than 16 -- `vdot d0,d0,d16\[0\]'
> +[^ :]+:[0-9]+: Error: VFP single, double or Neon quad precision register expected -- `vcvtne.bf16.f32 d32,q0'
> +[^ :]+:[0-9]+: Error: Neon double or quad precision register expected -- `vdot q16,q0,q0'
> +[^ :]+:[0-9]+: Error: Neon double or quad precision register expected -- `vdot q0,q16,q0'
> +[^ :]+:[0-9]+: Error: Neon double or quad precision register expected -- `vdot q0,q0,q16'
> +[^ :]+:[0-9]+: Error: Neon double or quad precision register expected -- `vdot q16,q0,d0\[0\]'
> +[^ :]+:[0-9]+: Error: Neon double or quad precision register expected -- `vdot q0,q16,d0\[0\]'
> +[^ :]+:[0-9]+: Error: Neon quad precision register expected -- `vmmla q16,q0,q0'
> +[^ :]+:[0-9]+: Error: Neon quad precision register expected -- `vmmla q0,q16,q0'
> +[^ :]+:[0-9]+: Error: Neon quad precision register expected -- `vmmla q0,q0,q16'
> +[^ :]+:[0-9]+: Error: Neon double or quad precision register expected -- `vfmab.bf16 q16,d0,d0'
> +[^ :]+:[0-9]+: Error: Neon double or quad precision register expected -- `vfmab.bf16 q16,d0,d0\[0\]'
> +[^ :]+:[0-9]+: Error: Neon double or quad precision register expected -- `vfmab.bf16 q0,q32,d0'
> +[^ :]+:[0-9]+: Error: Neon double or quad precision register expected -- `vfmab.bf16 q0,q32,d0\[0\]'
> +[^ :]+:[0-9]+: Error: indexed register must be less than 8 -- `vfmab.bf16 q0,q0,d8\[0\]'
> +[^ :]+:[0-9]+: Error: VFP single, double or Neon quad precision register expected -- `vfmat.bf16 q16,d0,d0'
> +[^ :]+:[0-9]+: Error: VFP single, double or Neon quad precision register expected -- `vfmat.bf16 q16,d0,d0\[0\]'
> +[^ :]+:[0-9]+: Error: VFP single, double or Neon quad precision register expected -- `vfmat.bf16 q0,q32,d0'
> +[^ :]+:[0-9]+: Error: VFP single, double or Neon quad precision register expected -- `vfmat.bf16 q0,q32,d0\[0\]'
> +[^ :]+:[0-9]+: Error: indexed register must be less than 8 -- `vfmat.bf16 q0,q0,d8\[0\]'
> +[^ :]+:[0-9]+: Error: VFP single, double or Neon quad precision register expected -- `vcvt.bf16.f32 d0,q16'
> +[^ :]+:[0-9]+: Error: invalid instruction shape -- `vdot q0,q0,d5'
> +[^ :]+:[0-9]+: Error: invalid instruction shape -- `vdot q0,d5,q0'
> +[^ :]+:[0-9]+: Error: invalid instruction shape -- `vdot d5,q0,q0'
> +[^ :]+:[0-9]+: Error: only D registers may be indexed -- `vdot q0,d5,q0\[0\]'
> +[^ :]+:[0-9]+: Error: only D registers may be indexed -- `vdot d5,q0,q0\[0\]'
> +[^ :]+:[0-9]+: Error: Neon quad precision register expected -- `vmmla q0,q0,d5'
> +[^ :]+:[0-9]+: Error: Neon quad precision register expected -- `vmmla q0,d5,q0'
> +[^ :]+:[0-9]+: Error: Neon quad precision register expected -- `vmmla d5,q0,q0'
> +[^ :]+:[0-9]+: Error: invalid instruction shape -- `vfmab.bf16 d0,q0,d0'
> +[^ :]+:[0-9]+: Error: invalid instruction shape -- `vfmab.bf16 d0,q0,d0\[0\]'
> +[^ :]+:[0-9]+: Error: invalid instruction shape -- `vfmat.bf16 d0,q0,d0'
> +[^ :]+:[0-9]+: Error: invalid instruction shape -- `vfmat.bf16 d0,q0,d0\[0\]'
> +[^ :]+:[0-9]+: Error: operand size must match register width
> +[^ :]+:[0-9]+: Error: invalid neon suffix for non neon instruction
> +[^ :]+:[0-9]+: Error: index must be 0 or 1 -- `vdot q0,q0,d0\[2\]'
> +[^ :]+:[0-9]+: Error: index must be in the range 0 to 3 -- `vfmab.bf16 q0,d0,d0\[4\]'
> +[^ :]+:[0-9]+: Error: index must be in the range 0 to 3 -- `vfmat.bf16 q0,d0,d0\[4\]'
> +[^ :]+:[0-9]+: Error: unexpected type character `b' -- did you mean `bf'\?
> +[^ :]+:[0-9]+: Error: bad instruction `vcvtb.b16.f32 s0,s0'
> +[^ :]+:[0-9]+: Error: bad size 32 in type specifier
> +[^ :]+:[0-9]+: Error: bad instruction `vcvtb.bf32.f32 s0,s0'
> +[^ :]+:[0-9]+: Error: unexpected type character `b' -- did you mean `bf'\?
> +[^ :]+:[0-9]+: Error: bad arguments to instruction -- `vcvtb s0.b16,s0.f32'
> +[^ :]+:[0-9]+: Error: bad size 32 in type specifier
> +[^ :]+:[0-9]+: Error: bad arguments to instruction -- `vcvtb s0.bf32,s0.f32'
> +[^ :]+:[0-9]+: Error: bad type in SIMD instruction -- `vcvtb s0.f32,s0.bf16'
> +[^ :]+:[0-9]+: Error: unexpected type character `b' -- did you mean `bf'\?
> +[^ :]+:[0-9]+: Error: bad instruction `vcvtt.b16.f32 s0,s0'
> +[^ :]+:[0-9]+: Error: bad size 32 in type specifier
> +[^ :]+:[0-9]+: Error: bad instruction `vcvtt.bf32.f32 s0,s0'
> +[^ :]+:[0-9]+: Error: unexpected type character `b' -- did you mean `bf'\?
> +[^ :]+:[0-9]+: Error: bad arguments to instruction -- `vcvtt s0.b16,s0.f32'
> +[^ :]+:[0-9]+: Error: bad size 32 in type specifier
> +[^ :]+:[0-9]+: Error: bad arguments to instruction -- `vcvtt s0.bf32,s0.f32'
> +[^ :]+:[0-9]+: Error: bad type in SIMD instruction -- `vcvtt s0.f32,s0.bf16'
> +[^ :]+:[0-9]+: Error: unexpected type character `b' -- did you mean `bf'\?
> +[^ :]+:[0-9]+: Error: bad instruction `vcvt.b16.f32 d0,q0'
> +[^ :]+:[0-9]+: Error: bad size 32 in type specifier
> +[^ :]+:[0-9]+: Error: bad instruction `vcvt.bf32.f32 d0,q0'
> +[^ :]+:[0-9]+: Error: unexpected type character `b' -- did you mean `bf'\?
> +[^ :]+:[0-9]+: Error: bad arguments to instruction -- `vcvt d0.b16,q0.f32'
> +[^ :]+:[0-9]+: Error: bad size 32 in type specifier
> +[^ :]+:[0-9]+: Error: bad arguments to instruction -- `vcvt d0.bf32,q0.f32'
> +[^ :]+:[0-9]+: Error: bad type in SIMD instruction -- `vcvt d0.f32,q0.bf16'
> +[^ :]+:[0-9]+: Error: immediate value out of range -- `vcvtt.bf16.f32 s0,s0,#0'
> +[^ :]+:[0-9]+: Error: invalid instruction shape -- `vcvtt.bf16.f32 s0,s0,#1'
> +[^ :]+:[0-9]+: Error: bad type in SIMD instruction -- `vcvtt.bf16.f32 d0,s0'
> +[^ :]+:[0-9]+: Error: bad arguments to instruction -- `vcvtt.bf16.f32 s0'
> +[^ :]+:[0-9]+: Error: constant expression required -- `vcvtt.bf16.f32 s0,s0,s0,s0'
> +[^ :]+:[0-9]+: Error: constant expression required -- `vcvtt.bf16.f32 s0,s0,s0'
> +[^ :]+:[0-9]+: Error: VFP single or double precision register expected -- `vcvtt.bf16.f32 s0,s32'
> +[^ :]+:[0-9]+: Error: VFP single or double precision register expected -- `vcvtt.bf16.f32 s32,s32'
> +[^ :]+:[0-9]+: Error: immediate value out of range -- `vcvtb.bf16.f32 s0,s0,#0'
> +[^ :]+:[0-9]+: Error: invalid instruction shape -- `vcvtb.bf16.f32 s0,s0,#1'
> +[^ :]+:[0-9]+: Error: bad type in SIMD instruction -- `vcvtb.bf16.f32 d0,s0'
> +[^ :]+:[0-9]+: Error: bad arguments to instruction -- `vcvtb.bf16.f32 s0'
> +[^ :]+:[0-9]+: Error: constant expression required -- `vcvtb.bf16.f32 s0,s0,s0,s0'
> +[^ :]+:[0-9]+: Error: constant expression required -- `vcvtb.bf16.f32 s0,s0,s0'
> +[^ :]+:[0-9]+: Error: VFP single or double precision register expected -- `vcvtb.bf16.f32 s0,s32'
> +[^ :]+:[0-9]+: Error: VFP single or double precision register expected -- `vcvtb.bf16.f32 s32,s32'
> +[^ :]+:[0-9]+: Error: instruction cannot be conditional -- `vcvtne.bf16.f32 d0,q0'
> +[^ :]+:[0-9]+: Error: instruction cannot be conditional -- `vdotne.bf16 d0,d20,d11'
> +[^ :]+:[0-9]+: Error: instruction cannot be conditional -- `vdotne.bf16 d0,d20,d11\[1\]'
> +[^ :]+:[0-9]+: Error: instruction cannot be conditional -- `vmmlane.bf16 q0,q0,q0'
> +[^ :]+:[0-9]+: Error: IT falling in the range of a previous IT block -- `ittt ne'
> +[^ :]+:[0-9]+: Error: instruction not allowed in IT block -- `vdot.bf16 d0,d20,d11'
> +[^ :]+:[0-9]+: Error: instruction not allowed in IT block -- `vdot.bf16 d0,d20,d11\[1\]'
> diff --git a/gas/testsuite/gas/arm/bfloat16-bad.s b/gas/testsuite/gas/arm/bfloat16-bad.s
> new file mode 100644
> index 0000000000000000000000000000000000000000..f6db1ffe37b7cf493319564d7df66e8841aac97d
> --- /dev/null
> +++ b/gas/testsuite/gas/arm/bfloat16-bad.s
> @@ -0,0 +1,119 @@
> +.syntax unified
> +
> +// Test warnings about type specifier being incorrect.
> +vdot.b16  d0, d0, d0
> +vmmla  q0.b16, q0, q0
> +vdot.bf32 d0, d0, d0[1]
> +vdot d0.bf32, d0, d0
> +vdot d0.bf32, d0.bf16, d0.bf16
> +
> +// Test conditions are not allowed in ARM.
> +vdotne d0, d0, d0
> +vdotne d0, d0, d0[1]
> +vmmlane q0, q0, q0
> +vfmatne.bf16 q0, d0, d0
> +vfmatne.bf16 q0, d0, d0[0]
> +vfmabne.bf16 q0, d0, d0
> +vfmabne.bf16 q0, d0, d0[0]
> +vcvtne.bf16.f32 d0, q0
> +
> +// d register out of range
> +vdot d32, d0, d0
> +vdot d0, d32, d0
> +vdot d0, d0, d32
> +vdot d32, d0, d0[0]
> +vdot d0, d32, d0[0]
> +vdot d0, d0, d16[0]
> +vcvtne.bf16.f32 d32, q0
> +
> +// q register out of range
> +vdot q16, q0, q0
> +vdot q0, q16, q0
> +vdot q0, q0, q16
> +vdot q16, q0, d0[0]
> +vdot q0, q16, d0[0]
> +vmmla q16, q0, q0
> +vmmla q0, q16, q0
> +vmmla q0, q0, q16
> +vfmab.bf16 q16, d0, d0
> +vfmab.bf16 q16, d0, d0[0]
> +vfmab.bf16 q0, q32, d0
> +vfmab.bf16 q0, q32, d0[0]
> +vfmab.bf16 q0, q0, d8[0]
> +vfmat.bf16 q16, d0, d0
> +vfmat.bf16 q16, d0, d0[0]
> +vfmat.bf16 q0, q32, d0
> +vfmat.bf16 q0, q32, d0[0]
> +vfmat.bf16 q0, q0, d8[0]
> +vcvt.bf16.f32 d0, q16
> +
> +// Incorrect set of arguments
> +vdot q0, q0, d5
> +vdot q0, d5, q0
> +vdot d5, q0, q0
> +vdot q0, d5, q0[0]
> +vdot d5, q0, q0[0]
> +vmmla q0, q0, d5
> +vmmla q0, d5, q0
> +vmmla d5, q0, q0
> +vfmab.bf16 d0, q0, d0
> +vfmab.bf16 d0, q0, d0[0]
> +vfmat.bf16 d0, q0, d0
> +vfmat.bf16 d0, q0, d0[0]
> +vcvt.bf16.f32 q0, d0
> +
> +// vdot index out of range
> +vdot q0, q0, d0[2]
> +
> +// vfma<bt> index out of range
> +vfmab.bf16 q0, d0, d0[4]
> +vfmat.bf16 q0, d0, d0[4]
> +
> +// Non neon encodings (this file gets assembled more than once but with
> +// different flags, providing different error messages each time).
> +
> +// Type specifier warnings
> +.macro conversion_type_specifier_check insn, dest, source
> +\insn\().b16.f32 \dest, \source
> +\insn\().bf32.f32 \dest, \source
> +\insn \dest\().b16, \source\().f32
> +\insn \dest\().bf32, \source\().f32
> +\insn \dest\().f32, \source\().bf16
> +.endm
> +
> +conversion_type_specifier_check vcvtb, s0, s0
> +conversion_type_specifier_check vcvtt, s0, s0
> +conversion_type_specifier_check vcvt, d0, q0
> +
> +// Conditions allowed (and checked in the "Valid" source file).
> +
> +// Incorrect set of operands & registers out of range
> +.macro bad_args insn
> +\insn\().bf16.f32 s0, s0, #0
> +\insn\().bf16.f32 s0, s0, #1
> +\insn\().bf16.f32 d0, s0
> +\insn\().bf16.f32 s0
> +\insn\().bf16.f32 s0, s0, s0, s0
> +\insn\().bf16.f32 s0, s0, s0
> +\insn\().bf16.f32 s0, s32
> +\insn\().bf16.f32 s32, s32
> +.endm
> +bad_args vcvtt
> +bad_args vcvtb
> +
> +// Allowed in thumb mode but not allowed in arm mode.
> +it ne
> +vcvtne.bf16.f32 d0, q0
> +
> +// Ensure these instructions are not allowed to have a conditional suffix.
> +ittt ne
> +vdotne.bf16 d0, d20, d11
> +vdotne.bf16 d0, d20, d11[1]
> +vmmlane.bf16 q0, q0, q0
> +
> +// Ensure we are warned these instructions are UNPREDICTABLE in an IT block in
> +// thumb.
> +ittt ne
> +vdot.bf16 d0, d20, d11
> +vdot.bf16 d0, d20, d11[1]
> +vmmla.bf16 q0, q0, q0
> diff --git a/gas/testsuite/gas/arm/bfloat16-cmdline-bad-2.d b/gas/testsuite/gas/arm/bfloat16-cmdline-bad-2.d
> new file mode 100644
> index 0000000000000000000000000000000000000000..d13b864ab83a62c6083abe5b9280581a4fe92d29
> --- /dev/null
> +++ b/gas/testsuite/gas/arm/bfloat16-cmdline-bad-2.d
> @@ -0,0 +1,4 @@
> +#name: Bfloat 16 bad processor
> +#source: bfloat16-non-neon.s
> +#as: -mno-warn-deprecated -march=armv8.5-a+simd
> +#error: .*Error: selected processor does not support bf16 instruction.*
> diff --git a/gas/testsuite/gas/arm/bfloat16-cmdline-bad-3.d b/gas/testsuite/gas/arm/bfloat16-cmdline-bad-3.d
> new file mode 100644
> index 0000000000000000000000000000000000000000..5dfdeb4d6ccc6575e357835e10dcb2638c03de35
> --- /dev/null
> +++ b/gas/testsuite/gas/arm/bfloat16-cmdline-bad-3.d
> @@ -0,0 +1,4 @@
> +#name: Bfloat 16 bad extension
> +#source: bfloat16-non-neon.s
> +#as: -mno-warn-deprecated -march=armv8.1-a+bf16
> +#error: .*Error: extension does not apply to the base architecture.*
> diff --git a/gas/testsuite/gas/arm/bfloat16-cmdline-bad.d b/gas/testsuite/gas/arm/bfloat16-cmdline-bad.d
> new file mode 100644
> index 0000000000000000000000000000000000000000..34b8a963f817e17a03cd59a2f1f4509a446bf1ca
> --- /dev/null
> +++ b/gas/testsuite/gas/arm/bfloat16-cmdline-bad.d
> @@ -0,0 +1,5 @@
> +#name: Bfloat 16 bad FPU
> +#source: bfloat16-neon.s
> +#as: -mno-warn-deprecated -mfpu=vfpxd -march=armv8.6-a
> +#error: .*Error: selected FPU does not support instruction.*
> +
> diff --git a/gas/testsuite/gas/arm/bfloat16-neon.s b/gas/testsuite/gas/arm/bfloat16-neon.s
> new file mode 100644
> index 0000000000000000000000000000000000000000..6f422650cde2e74f5c5948d3ea1a1690a62e2a60
> --- /dev/null
> +++ b/gas/testsuite/gas/arm/bfloat16-neon.s
> @@ -0,0 +1,53 @@
> +.syntax unified
> +// Check argument encoding by having different arguments.
> +// We use 20 and 11 since their binary encoding is 10100 and 01011
> +// respectively which ensures that we distinguish between the D/M/N bit
> +// encoding the first or last bit of the argument.
> +// q registers are encoded as double their actual number.
> +vdot.bf16 d0, d20, d11
> +vdot d11.bf16, d0.bf16, d20.bf16
> +
> +.macro conversion_type_specifier_check insn, dest, source
> +\insn\().bf16.f32 \dest, \source
> +\insn \dest\().bf16, \source\().f32
> +\insn \dest\().bf16, \source\().f32
> +.endm
> +conversion_type_specifier_check vcvtt,s0,s0
> +conversion_type_specifier_check vcvtb,s0,s0
> +conversion_type_specifier_check vcvt,d0,q0
> +
> +
> +// Here we follow the same encoding sequence as above.
> +// Since the 'M' bit encodes the index and the last register is encoded in 4
> +// bits that argument has a different number.
> +vdot.bf16 d11, d0, d4[1]
> +vdot d0.bf16, d20.bf16, d11.bf16[0]
> +
> +// vmmla only works on q registers.
> +// These registers are encoded as double the number given in the mnemonic.
> +// Hence we choose different numbers to ensure a similar bit pattern as above.
> +// 10 & 5 produce the bit patterns 10100 & 01010
> +vmmla.bf16 q10, q5, q0
> +vmmla q5.bf16, q0.bf16, q10.bf16
> +
> +vfmat.bf16 q10, q11, q0
> +vfmat.bf16 q10, q11, d0[3]
> +vfmat.bf16 q10, q11, d0[0]
> +
> +vfmab.bf16 q10, q11, q0
> +vfmab.bf16 q10, q11, d0[3]
> +vfmab.bf16 q10, q11, d0[0]
> +
> +// vcvt
> +// - no condition allowed in arm
> +// - no condition allowed in thumb outside IT block
> +// - Condition *allowed* in thumb in IT block
> +// - different encoding between thumb and arm
> +vcvt.bf16.f32 d20, q5
> +vcvt.bf16.f32 d11, q10
> +
> +// Only works for thumb mode.
> +.ifdef COMPILING_FOR_THUMB
> +it ne
> +vcvtne.bf16.f32 d0, q0
> +.endif
> diff --git a/gas/testsuite/gas/arm/bfloat16-non-neon.s b/gas/testsuite/gas/arm/bfloat16-non-neon.s
> new file mode 100644
> index 0000000000000000000000000000000000000000..95e3c3b4a11d7fd7b3f12624cf7753f62f7164f3
> --- /dev/null
> +++ b/gas/testsuite/gas/arm/bfloat16-non-neon.s
> @@ -0,0 +1,9 @@
> +.syntax unified
> +vcvtb.bf16.f32 s20, s11
> +it ne
> +vcvtbne.bf16.f32 s11, s20
> +vcvtbal.bf16.f32 s0, s0
> +vcvtt.bf16.f32 s20, s11
> +it ne
> +vcvttne.bf16.f32 s11, s20
> +vcvttal.bf16.f32 s0, s0
> diff --git a/gas/testsuite/gas/arm/bfloat16-thumb-bad.d b/gas/testsuite/gas/arm/bfloat16-thumb-bad.d
> new file mode 100644
> index 0000000000000000000000000000000000000000..8322cf0343564197af1e64d3a5830d3e53ea9638
> --- /dev/null
> +++ b/gas/testsuite/gas/arm/bfloat16-thumb-bad.d
> @@ -0,0 +1,4 @@
> +#name: Bfloat 16 Thumb failure cases
> +#source: bfloat16-bad.s
> +#as: -mno-warn-deprecated -mthumb -march=armv8.6-a+simd
> +#error_output: bfloat16-thumb-bad.l
> diff --git a/gas/testsuite/gas/arm/bfloat16-thumb-bad.l b/gas/testsuite/gas/arm/bfloat16-thumb-bad.l
> new file mode 100644
> index 0000000000000000000000000000000000000000..adfcf6fe71ea030a80d14d3acce2f8d8d2b1098e
> --- /dev/null
> +++ b/gas/testsuite/gas/arm/bfloat16-thumb-bad.l
> @@ -0,0 +1,112 @@
> +[^ :]+: Assembler messages:
> +[^ :]+:[0-9]+: Error: unexpected type character `b' -- did you mean `bf'\?
> +[^ :]+:[0-9]+: Error: bad instruction `vdot\.b16 d0,d0,d0'
> +[^ :]+:[0-9]+: Error: unexpected type character `b' -- did you mean `bf'\?
> +[^ :]+:[0-9]+: Error: bad arguments to instruction -- `vmmla q0\.b16,q0,q0'
> +[^ :]+:[0-9]+: Error: bad size 32 in type specifier
> +[^ :]+:[0-9]+: Error: bad instruction `vdot\.bf32 d0,d0,d0\[1\]'
> +[^ :]+:[0-9]+: Error: bad size 32 in type specifier
> +[^ :]+:[0-9]+: Error: bad arguments to instruction -- `vdot d0\.bf32,d0,d0'
> +[^ :]+:[0-9]+: Error: bad size 32 in type specifier
> +[^ :]+:[0-9]+: Error: bad arguments to instruction -- `vdot d0\.bf32,d0\.bf16,d0\.bf16'
> +[^ :]+:[0-9]+: Error: operand types can't be inferred -- `vdotne d0,d0,d0'
> +[^ :]+:[0-9]+: Error: operand types can't be inferred -- `vdotne d0,d0,d0\[1\]'
> +[^ :]+:[0-9]+: Error: operand types can't be inferred -- `vmmlane q0,q0,q0'
> +[^ :]+:[0-9]+: Error: thumb conditional instruction should be in IT block -- `vfmatne\.bf16 q0,d0,d0'
> +[^ :]+:[0-9]+: Error: thumb conditional instruction should be in IT block -- `vfmatne\.bf16 q0,d0,d0\[0\]'
> +[^ :]+:[0-9]+: Error: thumb conditional instruction should be in IT block -- `vfmabne\.bf16 q0,d0,d0'
> +[^ :]+:[0-9]+: Error: thumb conditional instruction should be in IT block -- `vfmabne\.bf16 q0,d0,d0\[0\]'
> +[^ :]+:[0-9]+: Error: thumb conditional instruction should be in IT block -- `vcvtne\.bf16\.f32 d0,q0'
> +[^ :]+:[0-9]+: Error: Neon double or quad precision register expected -- `vdot d32,d0,d0'
> +[^ :]+:[0-9]+: Error: Neon double or quad precision register expected -- `vdot d0,d32,d0'
> +[^ :]+:[0-9]+: Error: Neon double or quad precision register expected -- `vdot d0,d0,d32'
> +[^ :]+:[0-9]+: Error: Neon double or quad precision register expected -- `vdot d32,d0,d0\[0\]'
> +[^ :]+:[0-9]+: Error: Neon double or quad precision register expected -- `vdot d0,d32,d0\[0\]'
> +[^ :]+:[0-9]+: Error: indexed register must be less than 16 -- `vdot d0,d0,d16\[0\]'
> +[^ :]+:[0-9]+: Error: VFP single, double or Neon quad precision register expected -- `vcvtne\.bf16\.f32 d32,q0'
> +[^ :]+:[0-9]+: Error: Neon double or quad precision register expected -- `vdot q16,q0,q0'
> +[^ :]+:[0-9]+: Error: Neon double or quad precision register expected -- `vdot q0,q16,q0'
> +[^ :]+:[0-9]+: Error: Neon double or quad precision register expected -- `vdot q0,q0,q16'
> +[^ :]+:[0-9]+: Error: Neon double or quad precision register expected -- `vdot q16,q0,d0\[0\]'
> +[^ :]+:[0-9]+: Error: Neon double or quad precision register expected -- `vdot q0,q16,d0\[0\]'
> +[^ :]+:[0-9]+: Error: Neon quad precision register expected -- `vmmla q16,q0,q0'
> +[^ :]+:[0-9]+: Error: Neon quad precision register expected -- `vmmla q0,q16,q0'
> +[^ :]+:[0-9]+: Error: Neon quad precision register expected -- `vmmla q0,q0,q16'
> +[^ :]+:[0-9]+: Error: Neon double or quad precision register expected -- `vfmab\.bf16 q16,d0,d0'
> +[^ :]+:[0-9]+: Error: Neon double or quad precision register expected -- `vfmab\.bf16 q16,d0,d0\[0\]'
> +[^ :]+:[0-9]+: Error: Neon double or quad precision register expected -- `vfmab\.bf16 q0,q32,d0'
> +[^ :]+:[0-9]+: Error: Neon double or quad precision register expected -- `vfmab\.bf16 q0,q32,d0\[0\]'
> +[^ :]+:[0-9]+: Error: indexed register must be less than 8 -- `vfmab\.bf16 q0,q0,d8\[0\]'
> +[^ :]+:[0-9]+: Error: VFP single, double or Neon quad precision register expected -- `vfmat\.bf16 q16,d0,d0'
> +[^ :]+:[0-9]+: Error: VFP single, double or Neon quad precision register expected -- `vfmat\.bf16 q16,d0,d0\[0\]'
> +[^ :]+:[0-9]+: Error: VFP single, double or Neon quad precision register expected -- `vfmat\.bf16 q0,q32,d0'
> +[^ :]+:[0-9]+: Error: VFP single, double or Neon quad precision register expected -- `vfmat\.bf16 q0,q32,d0\[0\]'
> +[^ :]+:[0-9]+: Error: indexed register must be less than 8 -- `vfmat\.bf16 q0,q0,d8\[0\]'
> +[^ :]+:[0-9]+: Error: VFP single, double or Neon quad precision register expected -- `vcvt\.bf16\.f32 d0,q16'
> +[^ :]+:[0-9]+: Error: invalid instruction shape -- `vdot q0,q0,d5'
> +[^ :]+:[0-9]+: Error: invalid instruction shape -- `vdot q0,d5,q0'
> +[^ :]+:[0-9]+: Error: invalid instruction shape -- `vdot d5,q0,q0'
> +[^ :]+:[0-9]+: Error: only D registers may be indexed -- `vdot q0,d5,q0\[0\]'
> +[^ :]+:[0-9]+: Error: only D registers may be indexed -- `vdot d5,q0,q0\[0\]'
> +[^ :]+:[0-9]+: Error: Neon quad precision register expected -- `vmmla q0,q0,d5'
> +[^ :]+:[0-9]+: Error: Neon quad precision register expected -- `vmmla q0,d5,q0'
> +[^ :]+:[0-9]+: Error: Neon quad precision register expected -- `vmmla d5,q0,q0'
> +[^ :]+:[0-9]+: Error: invalid instruction shape -- `vfmab\.bf16 d0,q0,d0'
> +[^ :]+:[0-9]+: Error: invalid instruction shape -- `vfmab\.bf16 d0,q0,d0\[0\]'
> +[^ :]+:[0-9]+: Error: invalid instruction shape -- `vfmat\.bf16 d0,q0,d0'
> +[^ :]+:[0-9]+: Error: invalid instruction shape -- `vfmat\.bf16 d0,q0,d0\[0\]'
> +[^ :]+:[0-9]+: Error: operand size must match register width
> +[^ :]+:[0-9]+: Error: invalid neon suffix for non neon instruction
> +[^ :]+:[0-9]+: Error: index must be 0 or 1 -- `vdot q0,q0,d0\[2\]'
> +[^ :]+:[0-9]+: Error: index must be in the range 0 to 3 -- `vfmab\.bf16 q0,d0,d0\[4\]'
> +[^ :]+:[0-9]+: Error: index must be in the range 0 to 3 -- `vfmat\.bf16 q0,d0,d0\[4\]'
> +[^ :]+:[0-9]+: Error: unexpected type character `b' -- did you mean `bf'\?
> +[^ :]+:[0-9]+: Error: bad instruction `vcvtb\.b16\.f32 s0,s0'
> +[^ :]+:[0-9]+: Error: bad size 32 in type specifier
> +[^ :]+:[0-9]+: Error: bad instruction `vcvtb\.bf32\.f32 s0,s0'
> +[^ :]+:[0-9]+: Error: unexpected type character `b' -- did you mean `bf'\?
> +[^ :]+:[0-9]+: Error: bad arguments to instruction -- `vcvtb s0\.b16,s0\.f32'
> +[^ :]+:[0-9]+: Error: bad size 32 in type specifier
> +[^ :]+:[0-9]+: Error: bad arguments to instruction -- `vcvtb s0\.bf32,s0\.f32'
> +[^ :]+:[0-9]+: Error: bad type in SIMD instruction -- `vcvtb s0\.f32,s0\.bf16'
> +[^ :]+:[0-9]+: Error: unexpected type character `b' -- did you mean `bf'\?
> +[^ :]+:[0-9]+: Error: bad instruction `vcvtt\.b16\.f32 s0,s0'
> +[^ :]+:[0-9]+: Error: bad size 32 in type specifier
> +[^ :]+:[0-9]+: Error: bad instruction `vcvtt\.bf32\.f32 s0,s0'
> +[^ :]+:[0-9]+: Error: unexpected type character `b' -- did you mean `bf'\?
> +[^ :]+:[0-9]+: Error: bad arguments to instruction -- `vcvtt s0\.b16,s0\.f32'
> +[^ :]+:[0-9]+: Error: bad size 32 in type specifier
> +[^ :]+:[0-9]+: Error: bad arguments to instruction -- `vcvtt s0\.bf32,s0\.f32'
> +[^ :]+:[0-9]+: Error: bad type in SIMD instruction -- `vcvtt s0\.f32,s0\.bf16'
> +[^ :]+:[0-9]+: Error: unexpected type character `b' -- did you mean `bf'\?
> +[^ :]+:[0-9]+: Error: bad instruction `vcvt\.b16\.f32 d0,q0'
> +[^ :]+:[0-9]+: Error: bad size 32 in type specifier
> +[^ :]+:[0-9]+: Error: bad instruction `vcvt\.bf32\.f32 d0,q0'
> +[^ :]+:[0-9]+: Error: unexpected type character `b' -- did you mean `bf'\?
> +[^ :]+:[0-9]+: Error: bad arguments to instruction -- `vcvt d0\.b16,q0\.f32'
> +[^ :]+:[0-9]+: Error: bad size 32 in type specifier
> +[^ :]+:[0-9]+: Error: bad arguments to instruction -- `vcvt d0\.bf32,q0\.f32'
> +[^ :]+:[0-9]+: Error: bad type in SIMD instruction -- `vcvt d0\.f32,q0\.bf16'
> +[^ :]+:[0-9]+: Error: immediate value out of range -- `vcvtt\.bf16\.f32 s0,s0,#0'
> +[^ :]+:[0-9]+: Error: invalid instruction shape -- `vcvtt\.bf16\.f32 s0,s0,#1'
> +[^ :]+:[0-9]+: Error: bad type in SIMD instruction -- `vcvtt\.bf16\.f32 d0,s0'
> +[^ :]+:[0-9]+: Error: bad arguments to instruction -- `vcvtt\.bf16\.f32 s0'
> +[^ :]+:[0-9]+: Error: constant expression required -- `vcvtt\.bf16\.f32 s0,s0,s0,s0'
> +[^ :]+:[0-9]+: Error: constant expression required -- `vcvtt\.bf16\.f32 s0,s0,s0'
> +[^ :]+:[0-9]+: Error: VFP single or double precision register expected -- `vcvtt\.bf16\.f32 s0,s32'
> +[^ :]+:[0-9]+: Error: VFP single or double precision register expected -- `vcvtt\.bf16\.f32 s32,s32'
> +[^ :]+:[0-9]+: Error: immediate value out of range -- `vcvtb\.bf16\.f32 s0,s0,#0'
> +[^ :]+:[0-9]+: Error: invalid instruction shape -- `vcvtb\.bf16\.f32 s0,s0,#1'
> +[^ :]+:[0-9]+: Error: bad type in SIMD instruction -- `vcvtb\.bf16\.f32 d0,s0'
> +[^ :]+:[0-9]+: Error: bad arguments to instruction -- `vcvtb\.bf16\.f32 s0'
> +[^ :]+:[0-9]+: Error: constant expression required -- `vcvtb\.bf16\.f32 s0,s0,s0,s0'
> +[^ :]+:[0-9]+: Error: constant expression required -- `vcvtb\.bf16\.f32 s0,s0,s0'
> +[^ :]+:[0-9]+: Error: VFP single or double precision register expected -- `vcvtb\.bf16\.f32 s0,s32'
> +[^ :]+:[0-9]+: Error: VFP single or double precision register expected -- `vcvtb\.bf16\.f32 s32,s32'
> +[^ :]+:[0-9]+: Error: instruction not allowed in IT block -- `vdotne\.bf16 d0,d20,d11'
> +[^ :]+:[0-9]+: Error: instruction not allowed in IT block -- `vdotne\.bf16 d0,d20,d11\[1\]'
> +[^ :]+:[0-9]+: Error: instruction not allowed in IT block -- `vmmlane\.bf16 q0,q0,q0'
> +[^ :]+:[0-9]+: Error: instruction not allowed in IT block -- `vdot\.bf16 d0,d20,d11'
> +[^ :]+:[0-9]+: Error: instruction not allowed in IT block -- `vdot\.bf16 d0,d20,d11\[1\]'
> +[^ :]+:[0-9]+: Error: instruction not allowed in IT block -- `vmmla\.bf16 q0,q0,q0'
> +
> diff --git a/gas/testsuite/gas/arm/bfloat16-thumb.d b/gas/testsuite/gas/arm/bfloat16-thumb.d
> new file mode 100644
> index 0000000000000000000000000000000000000000..7efdf9f15be34c969f750336d134fec698bb32bc
> --- /dev/null
> +++ b/gas/testsuite/gas/arm/bfloat16-thumb.d
> @@ -0,0 +1,43 @@
> +#name: Bfloat 16 extension Thumb
> +#source: bfloat16.s
> +#as: -mno-warn-deprecated --defsym COMPILING_FOR_THUMB=1 -mthumb -march=armv8.6-a+simd -I$srcdir/$subdir
> +#objdump: -dr --show-raw-insn
> +
> +.*: +file format .*arm*
> +
> +Disassembly of section .text:
> +
> +00000000 <\.text>:
> + *[0-9a-f]+:	fc04 0d8b 	vdot\.bf16	d0, d20, d11
> + *[0-9a-f]+:	fc00 bd24 	vdot\.bf16	d11, d0, d20
> + *[0-9a-f]+:	eeb3 09c0 	vcvtt\.bf16\.f32	s0, s0
> + *[0-9a-f]+:	eeb3 09c0 	vcvtt\.bf16\.f32	s0, s0
> + *[0-9a-f]+:	eeb3 09c0 	vcvtt\.bf16\.f32	s0, s0
> + *[0-9a-f]+:	eeb3 0940 	vcvtb\.bf16\.f32	s0, s0
> + *[0-9a-f]+:	eeb3 0940 	vcvtb\.bf16\.f32	s0, s0
> + *[0-9a-f]+:	eeb3 0940 	vcvtb\.bf16\.f32	s0, s0
> + *[0-9a-f]+:	ffb6 0640 	vcvt\.bf16\.f32	d0, q0
> + *[0-9a-f]+:	ffb6 0640 	vcvt\.bf16\.f32	d0, q0
> + *[0-9a-f]+:	ffb6 0640 	vcvt\.bf16\.f32	d0, q0
> + *[0-9a-f]+:	fe00 bd24 	vdot\.bf16	d11, d0, d4\[1\]
> + *[0-9a-f]+:	fe04 0d8b 	vdot\.bf16	d0, d20, d11\[0\]
> + *[0-9a-f]+:	fc4a 4c40 	vmmla\.bf16	q10, q5, q0
> + *[0-9a-f]+:	fc00 ac64 	vmmla\.bf16	q5, q0, q10
> + *[0-9a-f]+:	fc76 48d0 	vfmat\.bf16	q10, q11, q0
> + *[0-9a-f]+:	fe76 48f8 	vfmat\.bf16	q10, q11, d0\[3\]
> + *[0-9a-f]+:	fe76 48d0 	vfmat\.bf16	q10, q11, d0\[0\]
> + *[0-9a-f]+:	fc76 4890 	vfmab\.bf16	q10, q11, q0
> + *[0-9a-f]+:	fe76 48b8 	vfmab\.bf16	q10, q11, d0\[3\]
> + *[0-9a-f]+:	fe76 4890 	vfmab\.bf16	q10, q11, d0\[0\]
> + *[0-9a-f]+:	fff6 464a 	vcvt\.bf16\.f32	d20, q5
> + *[0-9a-f]+:	ffb6 b664 	vcvt\.bf16\.f32	d11, q10
> + *[0-9a-f]+:	bf18      	it	ne
> + *[0-9a-f]+:	ffb6 0640 	vcvtne\.bf16\.f32	d0, q0
> + *[0-9a-f]+:	eeb3 a965 	vcvtb\.bf16\.f32	s20, s11
> + *[0-9a-f]+:	bf18      	it	ne
> + *[0-9a-f]+:	eef3 594a 	vcvtbne\.bf16\.f32	s11, s20
> + *[0-9a-f]+:	eeb3 0940 	vcvtb\.bf16\.f32	s0, s0
> + *[0-9a-f]+:	eeb3 a9e5 	vcvtt\.bf16\.f32	s20, s11
> + *[0-9a-f]+:	bf18      	it	ne
> + *[0-9a-f]+:	eef3 59ca 	vcvttne\.bf16\.f32	s11, s20
> + *[0-9a-f]+:	eeb3 09c0 	vcvtt\.bf16\.f32	s0, s0
> diff --git a/gas/testsuite/gas/arm/bfloat16-vfp.d b/gas/testsuite/gas/arm/bfloat16-vfp.d
> new file mode 100644
> index 0000000000000000000000000000000000000000..487aa88e6ba62c275ae210b8fe52d3de45b6d709
> --- /dev/null
> +++ b/gas/testsuite/gas/arm/bfloat16-vfp.d
> @@ -0,0 +1,16 @@
> +#name: Bfloat 16 VFP
> +#source: bfloat16-non-neon.s
> +#as: -mno-warn-deprecated -mfpu=vfpxd -march=armv8.6-a -I$srcdir/$subdir
> +#objdump: -dr --show-raw-insn
> +
> +.*: +file format .*arm.*
> +
> +Disassembly of section .text:
> +
> +00000000 <.text>:
> + *[0-9a-f]*:	eeb3a965 	vcvtb.bf16.f32	s20, s11
> + *[0-9a-f]*:	1ef3594a 	vcvtbne.bf16.f32	s11, s20
> + *[0-9a-f]*:	eeb30940 	vcvtb.bf16.f32	s0, s0
> + *[0-9a-f]*:	eeb3a9e5 	vcvtt.bf16.f32	s20, s11
> + *[0-9a-f]*:	1ef359ca 	vcvttne.bf16.f32	s11, s20
> + *[0-9a-f]*:	eeb309c0 	vcvtt.bf16.f32	s0, s0
> diff --git a/gas/testsuite/gas/arm/bfloat16.d b/gas/testsuite/gas/arm/bfloat16.d
> new file mode 100644
> index 0000000000000000000000000000000000000000..b76c17faba6801ec5e07481af205c57efe0ef28b
> --- /dev/null
> +++ b/gas/testsuite/gas/arm/bfloat16.d
> @@ -0,0 +1,39 @@
> +#name: Bfloat 16 extension
> +#source: bfloat16.s
> +#as: -mno-warn-deprecated -march=armv8.6-a+simd -I$srcdir/$subdir
> +#objdump: -dr --show-raw-insn
> +
> +.*:     file format .*
> +
> +Disassembly of section \.text:
> +
> +00000000 <.text>:
> + *[0-9a-f]+:	fc040d8b 	vdot\.bf16	d0, d20, d11
> + *[0-9a-f]+:	fc00bd24 	vdot\.bf16	d11, d0, d20
> + *[0-9a-f]+:	eeb309c0 	vcvtt\.bf16\.f32	s0, s0
> + *[0-9a-f]+:	eeb309c0 	vcvtt\.bf16\.f32	s0, s0
> + *[0-9a-f]+:	eeb309c0 	vcvtt\.bf16\.f32	s0, s0
> + *[0-9a-f]+:	eeb30940 	vcvtb\.bf16\.f32	s0, s0
> + *[0-9a-f]+:	eeb30940 	vcvtb\.bf16\.f32	s0, s0
> + *[0-9a-f]+:	eeb30940 	vcvtb\.bf16\.f32	s0, s0
> + *[0-9a-f]+:	f3b60640 	vcvt\.bf16\.f32	d0, q0
> + *[0-9a-f]+:	f3b60640 	vcvt\.bf16\.f32	d0, q0
> + *[0-9a-f]+:	f3b60640 	vcvt\.bf16\.f32	d0, q0
> + *[0-9a-f]+:	fe00bd24 	vdot\.bf16	d11, d0, d4\[1\]
> + *[0-9a-f]+:	fe040d8b 	vdot\.bf16	d0, d20, d11\[0\]
> + *[0-9a-f]+:	fc4a4c40 	vmmla\.bf16	q10, q5, q0
> + *[0-9a-f]+:	fc00ac64 	vmmla\.bf16	q5, q0, q10
> + *[0-9a-f]*:	fc7648d0 	vfmat\.bf16	q10, q11, q0
> + *[0-9a-f]*:	fe7648f8 	vfmat\.bf16	q10, q11, d0\[3\]
> + *[0-9a-f]*:	fe7648d0 	vfmat\.bf16	q10, q11, d0\[0\]
> + *[0-9a-f]*:	fc764890 	vfmab\.bf16	q10, q11, q0
> + *[0-9a-f]*:	fe7648b8 	vfmab\.bf16	q10, q11, d0\[3\]
> + *[0-9a-f]*:	fe764890 	vfmab\.bf16	q10, q11, d0\[0\]
> + *[0-9a-f]+:	f3f6464a 	vcvt\.bf16\.f32	d20, q5
> + *[0-9a-f]+:	f3b6b664 	vcvt\.bf16\.f32	d11, q10
> + *[0-9a-f]+:	eeb3a965 	vcvtb\.bf16\.f32	s20, s11
> + *[0-9a-f]+:	1ef3594a 	vcvtbne\.bf16\.f32	s11, s20
> + *[0-9a-f]+:	eeb30940 	vcvtb\.bf16\.f32	s0, s0
> + *[0-9a-f]+:	eeb3a9e5 	vcvtt\.bf16\.f32	s20, s11
> + *[0-9a-f]+:	1ef359ca 	vcvttne\.bf16\.f32	s11, s20
> + *[0-9a-f]+:	eeb309c0 	vcvtt\.bf16\.f32	s0, s0
> diff --git a/gas/testsuite/gas/arm/bfloat16.s b/gas/testsuite/gas/arm/bfloat16.s
> new file mode 100644
> index 0000000000000000000000000000000000000000..6016ed2eb060d5339bd948b9226b861416fe629e
> --- /dev/null
> +++ b/gas/testsuite/gas/arm/bfloat16.s
> @@ -0,0 +1,2 @@
> +.include "bfloat16-neon.s"
> +.include "bfloat16-non-neon.s"
> diff --git a/include/opcode/arm.h b/include/opcode/arm.h
> index a870905907b38f001812f460e3cd816e9675f851..7aea4d6e56805731d8d91f9a908c1cca332f3ab9 100644
> --- a/include/opcode/arm.h
> +++ b/include/opcode/arm.h
> @@ -73,6 +73,8 @@
>   #define ARM_EXT2_SB	     0x00002000	/* Speculation Barrier instruction.  */
>   #define ARM_EXT2_PREDRES     0x00004000	/* Prediction Restriction insns.     */
>   #define ARM_EXT2_V8_1M_MAIN  0x00008000 /* ARMv8.1-M Mainline.		     */
> +#define ARM_EXT2_V8_6A	     0x00010000	/* ARM V8.6A.			     */
> +#define ARM_EXT2_BF16	     0x00020000 /* ARMv8 bfloat16.		     */
>   
>   /* Co-processor space extensions.  */
>   #define ARM_CEXT_XSCALE	     0x00000001	/* Allow MIA etc.	 	   */
> @@ -169,6 +171,7 @@
>   					   | ARM_EXT2_V8_4A)
>   #define ARM_AEXT2_V8_5A	(ARM_AEXT2_V8_4A   | ARM_EXT2_V8_5A | ARM_EXT2_SB     \
>   					   | ARM_EXT2_PREDRES)
> +#define ARM_AEXT2_V8_6A	(ARM_AEXT2_V8_5A   | ARM_EXT2_V8_6A | ARM_EXT2_BF16)
>   #define ARM_AEXT_V8M_BASE	(ARM_AEXT_V6SM	    | ARM_EXT_DIV)
>   #define ARM_AEXT_V8M_MAIN	 ARM_AEXT_V7M
>   #define ARM_AEXT_V8M_MAIN_DSP	 ARM_AEXT_V7EM
> @@ -352,6 +355,9 @@
>   #define ARM_ARCH_V8_5A	 ARM_FEATURE (ARM_AEXT_V8A, ARM_AEXT2_V8_5A,	   \
>   				      CRC_EXT_ARMV8 | FPU_NEON_EXT_RDMA	   \
>   						    | FPU_NEON_EXT_DOTPROD)
> +#define ARM_ARCH_V8_6A	 ARM_FEATURE (ARM_AEXT_V8A, ARM_AEXT2_V8_6A,	   \
> +				      CRC_EXT_ARMV8 | FPU_NEON_EXT_RDMA	   \
> +						    | FPU_NEON_EXT_DOTPROD)
>   #define ARM_ARCH_V8M_BASE      ARM_FEATURE_CORE (ARM_AEXT_V8M_BASE,	   \
>   						 ARM_AEXT2_V8M_BASE)
>   #define ARM_ARCH_V8M_MAIN      ARM_FEATURE_CORE (ARM_AEXT_V8M_MAIN,	   \
> diff --git a/opcodes/arm-dis.c b/opcodes/arm-dis.c
> index 85c573034ea1e149eecbbe73eb705945040448d9..50ae9576561477a7c6e50628ffb20e005d9e9e59 100644
> --- a/opcodes/arm-dis.c
> +++ b/opcodes/arm-dis.c
> @@ -396,6 +396,7 @@ struct opcode16
>      %%			%
>   
>      %c			print condition code (always bits 28-31 in ARM mode)
> +   %b			print condition code allowing cp_num == 9
>      %q			print shifter argument
>      %u			print condition code (unconditional in ARM mode,
>                             UNPREDICTABLE if not AL in Thumb)
> @@ -1207,11 +1208,15 @@ static const struct sopcode32 coprocessor_opcodes[] =
>     {ANY, ARM_FEATURE_CORE_HIGH (ARM_EXT2_V8_3A),
>       0xfea00800, 0xffa00f10, "vcmla%c.f32\t%12-15,22V, %16-19,7V, %0-3,5D[0], #%20?21%20?780"},
>   
> +  /* BFloat16 instructions.  */
> +  {ANY, ARM_FEATURE_CORE_HIGH (ARM_EXT2_BF16),
> +    0x0eb30940, 0x0fbf0f50, "vcvt%7?tb%b.bf16.f32\t%y1, %y0"},
> +
>     /* Dot Product instructions in the space of coprocessor 13.  */
>     {ANY, ARM_FEATURE_COPROC (FPU_NEON_EXT_DOTPROD),
>       0xfc200d00, 0xffb00f00, "v%4?usdot.%4?us8\t%12-15,22V, %16-19,7V, %0-3,5V"},
>     {ANY, ARM_FEATURE_COPROC (FPU_NEON_EXT_DOTPROD),
> -    0xfe000d00, 0xff000f00, "v%4?usdot.%4?us8\t%12-15,22V, %16-19,7V, %0-3D[%5?10]"},
> +    0xfe200d00, 0xff200f00, "v%4?usdot.%4?us8\t%12-15,22V, %16-19,7V, %0-3D[%5?10]"},
>   
>     /* ARMv8.2 FMAC Long instructions in the space of coprocessor 8.  */
>     {ANY, ARM_FEATURE_CORE_HIGH (ARM_EXT2_FP16_INST | ARM_EXT2_V8_2A),
> @@ -1452,6 +1457,20 @@ static const struct opcode32 neon_opcodes[] =
>     {ARM_FEATURE_CORE_HIGH (ARM_EXT2_FP16_INST),
>       0xf2300c10, 0xffb00f10, "vfms%c.f16\t%12-15,22R, %16-19,7R, %0-3,5R"},
>   
> +  /* BFloat16 instructions.  */
> +  {ARM_FEATURE_CORE_HIGH (ARM_EXT2_BF16),
> +    0xfc000d00, 0xffb00f10, "vdot.bf16\t%12-15,22R, %16-19,7R, %0-3,5R"},
> +  {ARM_FEATURE_CORE_HIGH (ARM_EXT2_BF16),
> +    0xfe000d00, 0xffb00f10, "vdot.bf16\t%12-15,22R, %16-19,7R, d%0-3d[%5d]"},
> +  {ARM_FEATURE_CORE_HIGH (ARM_EXT2_BF16),
> +    0xfc000c40, 0xffb00f50, "vmmla.bf16\t%12-15,22R, %16-19,7R, %0-3,5R"},
> +  {ARM_FEATURE_CORE_HIGH (ARM_EXT2_BF16),
> +    0xf3b60640, 0xffbf0fd0, "vcvt%c.bf16.f32\t%12-15,22D, %0-3,5Q"},
> +  {ARM_FEATURE_CORE_HIGH (ARM_EXT2_BF16),
> +    0xfc300810, 0xffb00f10, "vfma%6?tb.bf16\t%12-15,22Q, %16-19,7Q, %0-3,5Q"},
> +  {ARM_FEATURE_CORE_HIGH (ARM_EXT2_BF16),
> +    0xfe300810, 0xffb00f10, "vfma%6?tb.bf16\t%12-15,22Q, %16-19,7Q, %0-2D[%3,5d]"},
> +
>     /* Two registers, miscellaneous.  */
>     {ARM_FEATURE_COPROC (FPU_NEON_EXT_ARMV8),
>       0xf3ba0400, 0xffbf0c10, "vrint%7-9?p?m?zaxn%u.f32\t%12-15,22R, %0-3,5R"},
> @@ -8159,6 +8178,8 @@ print_insn_coprocessor_1 (const struct sopcode32 *opcodes,
>   		  if (cond != COND_UNCOND && cp_num == 9)
>   		    is_unpredictable = TRUE;
>   
> +		  /* Fall through.  */
> +		case 'b':
>   		  func (stream, "%s", arm_conditional[cond]);
>   		  break;
>   
> @@ -8772,6 +8793,10 @@ print_insn_neon (struct disassemble_info *info, long given, bfd_boolean thumb)
>   	}
>         else if ((given & 0xff000000) == 0xf9000000)
>   	given ^= 0xf9000000 ^ 0xf4000000;
> +      /* BFloat16 neon instructions without special top byte handling.  */
> +      else if ((given & 0xff000000) == 0xfe000000
> +	       || (given & 0xff000000) == 0xfc000000)
> +	;
>         /* vdup is also a valid neon instruction.  */
>         else if ((given & 0xff910f5f) != 0xee800b10)
>   	return FALSE;
> @@ -11625,11 +11650,11 @@ select_arm_features (unsigned long mach,
>       case bfd_mach_arm_7EM:	 ARM_SET_FEATURES (ARM_ARCH_V7EM); break;
>       case bfd_mach_arm_8:
>   	{
> -	  /* Add bits for extensions that Armv8.5-A recognizes.  */
> -	  arm_feature_set armv8_5_ext_fset
> +	  /* Add bits for extensions that Armv8.6-A recognizes.  */
> +	  arm_feature_set armv8_6_ext_fset
>   	    = ARM_FEATURE_CORE_HIGH (ARM_EXT2_FP16_INST);
> -	  ARM_SET_FEATURES (ARM_ARCH_V8_5A);
> -	  ARM_MERGE_FEATURE_SETS (arch_fset, arch_fset, armv8_5_ext_fset);
> +	  ARM_SET_FEATURES (ARM_ARCH_V8_6A);
> +	  ARM_MERGE_FEATURE_SETS (arch_fset, arch_fset, armv8_6_ext_fset);
>   	  break;
>   	}
>       case bfd_mach_arm_8R:	 ARM_SET_FEATURES (ARM_ARCH_V8R); break;
> 
-------------- next part --------------

diff --git a/gas/config/tc-arm.c b/gas/config/tc-arm.c
index 1f462307ed9129d8aca8a4bd371965c4b2f0c0ca..fb10f0b510f84b92b7cf58ac9091bf9667607ad0 100644
--- a/gas/config/tc-arm.c
+++ b/gas/config/tc-arm.c
@@ -275,6 +275,8 @@ static const arm_feature_set arm_ext_sb =
   ARM_FEATURE_CORE_HIGH (ARM_EXT2_SB);
 static const arm_feature_set arm_ext_predres =
   ARM_FEATURE_CORE_HIGH (ARM_EXT2_PREDRES);
+static const arm_feature_set arm_ext_bf16 =
+  ARM_FEATURE_CORE_HIGH (ARM_EXT2_BF16);
 
 static const arm_feature_set arm_arch_any = ARM_ANY;
 #ifdef OBJ_ELF
@@ -446,6 +448,7 @@ enum neon_el_type
   NT_float,
   NT_poly,
   NT_signed,
+  NT_bfloat,
   NT_unsigned
 };
 
@@ -893,6 +896,7 @@ struct asm_opcode
 	_("cannot use writeback with PC-relative addressing")
 #define BAD_RANGE	_("branch out of range")
 #define BAD_FP16	_("selected processor does not support fp16 instruction")
+#define BAD_BF16	_("selected processor does not support bf16 instruction")
 #define UNPRED_REG(R)	_("using " R " results in unpredictable behaviour")
 #define THUMB1_RELOC_ONLY  _("relocation valid in thumb1 code only")
 #define MVE_NOT_IT	_("Warning: instruction is UNPREDICTABLE in an IT " \
@@ -1468,6 +1472,28 @@ parse_neon_type (struct neon_type *type, char **str)
 	  thissize = 64;
 	  ptr++;
 	  goto done;
+	case 'b':
+	  thistype = NT_bfloat;
+	  switch (TOLOWER (*(++ptr)))
+	    {
+	    case 'f':
+	      ptr += 1;
+	      thissize = strtoul (ptr, &ptr, 10);
+	      if (thissize != 16)
+		{
+		  as_bad (_("bad size %d in type specifier"), thissize);
+		  return FAIL;
+		}
+	      goto done;
+	    case '0': case '1': case '2': case '3': case '4':
+	    case '5': case '6': case '7': case '8': case '9':
+	    case ' ': case '.':
+	      as_bad (_("unexpected type character `b' -- did you mean `bf'?"));
+	      return FAIL;
+	    default:
+	      break;
+	    }
+	  break;
 	default:
 	  as_bad (_("unexpected character `%c' in type specifier"), *ptr);
 	  return FAIL;
@@ -14505,6 +14531,10 @@ do_mve_scalar_shift (void)
 #define M_MNEM_vqrshrunt    0xfe801fc0
 #define M_MNEM_vqrshrunb    0xfe800fc0
 
+/* Bfloat16 instruction encoder helpers.  */
+#define B_MNEM_vfmat 0xfc300850
+#define B_MNEM_vfmab 0xfc300810
+
 /* Neon instruction encoder helpers.  */
 
 /* Encodings for the different types for various Neon opcodes.  */
@@ -14850,6 +14880,7 @@ enum neon_type_mask
   N_F32  = 0x0080000,
   N_F64  = 0x0100000,
   N_P64	 = 0x0200000,
+  N_BF16 = 0x0400000,
   N_KEY  = 0x1000000, /* Key element (main type specifier).  */
   N_EQK  = 0x2000000, /* Given operand has the same type & size as the key.  */
   N_VFP  = 0x4000000, /* VFP mode: operand size must match register width.  */
@@ -15148,6 +15179,10 @@ type_chk_of_el_type (enum neon_el_type type, unsigned size)
 	}
       break;
 
+    case NT_bfloat:
+      if (size == 16) return N_BF16;
+      break;
+
     default: ;
     }
 
@@ -15166,7 +15201,8 @@ el_type_of_type_chk (enum neon_el_type *type, unsigned *size,
 
   if ((mask & (N_S8 | N_U8 | N_I8 | N_8 | N_P8)) != 0)
     *size = 8;
-  else if ((mask & (N_S16 | N_U16 | N_I16 | N_16 | N_F16 | N_P16)) != 0)
+  else if ((mask & (N_S16 | N_U16 | N_I16 | N_16 | N_F16 | N_P16 | N_BF16))
+	   != 0)
     *size = 16;
   else if ((mask & (N_S32 | N_U32 | N_I32 | N_32 | N_F32)) != 0)
     *size = 32;
@@ -15187,6 +15223,8 @@ el_type_of_type_chk (enum neon_el_type *type, unsigned *size,
     *type = NT_poly;
   else if ((mask & (N_F_ALL)) != 0)
     *type = NT_float;
+  else if ((mask & (N_BF16)) != 0)
+    *type = NT_bfloat;
   else
     return FAIL;
 
@@ -16623,6 +16661,20 @@ mve_encode_rrqq (unsigned U, unsigned size)
   inst.is_neon = 1;
 }
 
+/* Helper function for neon_three_same handling the operands.  */
+static void
+neon_three_args (int isquad)
+{
+  inst.instruction |= LOW4 (inst.operands[0].reg) << 12;
+  inst.instruction |= HI1 (inst.operands[0].reg) << 22;
+  inst.instruction |= LOW4 (inst.operands[1].reg) << 16;
+  inst.instruction |= HI1 (inst.operands[1].reg) << 7;
+  inst.instruction |= LOW4 (inst.operands[2].reg);
+  inst.instruction |= HI1 (inst.operands[2].reg) << 5;
+  inst.instruction |= (isquad != 0) << 6;
+  inst.is_neon = 1;
+}
+
 /* Encode insns with bit pattern:
 
   |28/24|23|22 |21 20|19 16|15 12|11    8|7|6|5|4|3  0|
@@ -16634,13 +16686,7 @@ mve_encode_rrqq (unsigned U, unsigned size)
 static void
 neon_three_same (int isquad, int ubit, int size)
 {
-  inst.instruction |= LOW4 (inst.operands[0].reg) << 12;
-  inst.instruction |= HI1 (inst.operands[0].reg) << 22;
-  inst.instruction |= LOW4 (inst.operands[1].reg) << 16;
-  inst.instruction |= HI1 (inst.operands[1].reg) << 7;
-  inst.instruction |= LOW4 (inst.operands[2].reg);
-  inst.instruction |= HI1 (inst.operands[2].reg) << 5;
-  inst.instruction |= (isquad != 0) << 6;
+  neon_three_args (isquad);
   inst.instruction |= (ubit != 0) << 24;
   if (size != -1)
     inst.instruction |= neon_logbits (size) << 20;
@@ -17783,6 +17829,44 @@ do_neon_mac_maybe_scalar (void)
 }
 
 static void
+do_bfloat_vfma (void)
+{
+  constraint (!mark_feature_used (&fpu_neon_ext_armv8), _(BAD_FPU));
+  constraint (!mark_feature_used (&arm_ext_bf16), _(BAD_BF16));
+  enum neon_shape rs;
+  int t_bit = 0;
+
+  if (inst.instruction != B_MNEM_vfmab)
+  {
+      t_bit = 1;
+      inst.instruction = B_MNEM_vfmat;
+  }
+
+  if (inst.operands[2].isscalar)
+    {
+      rs = neon_select_shape (NS_QQS, NS_NULL);
+      neon_check_type (3, rs, N_EQK, N_EQK, N_BF16 | N_KEY);
+
+      inst.instruction |= (1 << 25);
+      int index = inst.operands[2].reg & 0xf;
+      constraint (!(index < 4), _("index must be in the range 0 to 3"));
+      inst.operands[2].reg >>= 4;
+      constraint (!(inst.operands[2].reg < 8),
+		  _("indexed register must be less than 8"));
+      neon_three_args (t_bit);
+      inst.instruction |= ((index & 1) << 3);
+      inst.instruction |= ((index & 2) << 4);
+    }
+  else
+    {
+      rs = neon_select_shape (NS_QQQ, NS_NULL);
+      neon_check_type (3, rs, N_EQK, N_EQK, N_BF16 | N_KEY);
+      neon_three_args (t_bit);
+    }
+
+}
+
+static void
 do_neon_fmac (void)
 {
   if (ARM_CPU_HAS_FEATURE (cpu_variant, fpu_vfp_ext_fma)
@@ -17800,6 +17884,7 @@ do_neon_fmac (void)
 
       if (rs == NS_QQR)
 	{
+
 	  if (inst.operands[2].reg == REG_SP)
 	    as_tsktsk (MVE_BAD_SP);
 	  else if (inst.operands[2].reg == REG_PC)
@@ -17825,6 +17910,24 @@ do_neon_fmac (void)
 }
 
 static void
+do_mve_vfma (void)
+{
+  if (!ARM_CPU_HAS_FEATURE (cpu_variant, arm_ext_bf16) &&
+      inst.cond == COND_ALWAYS)
+    {
+      constraint (!ARM_CPU_HAS_FEATURE (cpu_variant, mve_ext), BAD_FPU);
+      inst.instruction = N_MNEM_vfma;
+      inst.pred_insn_type = INSIDE_VPT_INSN;
+      inst.cond = 0xf;
+      return do_neon_fmac();
+    }
+  else
+    {
+      do_bfloat_vfma();
+    }
+}
+
+static void
 do_neon_tst (void)
 {
   enum neon_shape rs = neon_select_shape (NS_DDD, NS_QQQ, NS_NULL);
@@ -18653,6 +18756,7 @@ do_neon_shll (void)
   CVT_VAR (f16_u32, N_F16 | N_KEY, N_U32, N_VFP, "fultos", "fuitos", NULL)    \
   CVT_VAR (u32_f16, N_U32, N_F16 | N_KEY, N_VFP, "ftouls", "ftouis", "ftouizs")\
   CVT_VAR (s32_f16, N_S32, N_F16 | N_KEY, N_VFP, "ftosls", "ftosis", "ftosizs")\
+  CVT_VAR (bf16_f32, N_BF16, N_F32, whole_reg,   NULL, NULL, NULL)	      \
   /* VFP instructions.  */						      \
   CVT_VAR (f32_f64, N_F32, N_F64, N_VFP,       NULL,     "fcvtsd", NULL)      \
   CVT_VAR (f64_f32, N_F64, N_F32, N_VFP,       NULL,     "fcvtds", NULL)      \
@@ -19120,8 +19224,21 @@ do_neon_cvt_1 (enum neon_cvt_mode mode)
 	  }
 
       if (rs == NS_DQ)
-	inst.instruction = 0x3b60600;
+	{
+	  if (flavour == neon_cvt_flavour_bf16_f32)
+	    {
+	      if (vfp_or_neon_is_neon (NEON_CHECK_ARCH8) == FAIL)
+		return;
+	      constraint (!mark_feature_used (&arm_ext_bf16), _(BAD_BF16));
+	      /* VCVT.bf16.f32.  */
+	      inst.instruction = 0x11b60640;
+	    }
+	  else
+	    /* VCVT.f16.f32.  */
+	    inst.instruction = 0x3b60600;
+	}
       else
+	/* VCVT.f32.f16.  */
 	inst.instruction = 0x3b60700;
 
       inst.instruction |= LOW4 (inst.operands[0].reg) << 12;
@@ -19271,6 +19388,14 @@ do_neon_cvttb_1 (bfd_boolean t)
       inst.error = NULL;
       do_neon_cvttb_2 (t, /*to=*/FALSE, /*is_double=*/TRUE);
     }
+  else if (neon_check_type (2, rs, N_BF16 | N_VFP, N_F32).type != NT_invtype)
+    {
+      constraint (!mark_feature_used (&arm_ext_bf16), _(BAD_BF16));
+      inst.error = NULL;
+      inst.instruction |= (1 << 8);
+      inst.instruction &= ~(1 << 9);
+      do_neon_cvttb_2 (t, /*to=*/TRUE, /*is_double=*/FALSE);
+    }
   else
     return;
 }
@@ -19522,16 +19647,6 @@ do_neon_fmac_maybe_scalar_long (int subtype)
      0x2.  */
   int size = -1;
 
-  if (inst.cond != COND_ALWAYS)
-    as_warn (_("vfmal/vfmsl with FP16 type cannot be conditional, the "
-	       "behaviour is UNPREDICTABLE"));
-
-  constraint (!ARM_CPU_HAS_FEATURE (cpu_variant, arm_ext_fp16_fml),
-	      _(BAD_FP16));
-
-  constraint (!ARM_CPU_HAS_FEATURE (cpu_variant, fpu_neon_ext_armv8),
-	      _(BAD_FPU));
-
   /* vfmal/vfmsl are in three-same D/Q register format or the third operand can
      be a scalar index register.  */
   if (inst.operands[2].isscalar)
@@ -19550,7 +19665,16 @@ do_neon_fmac_maybe_scalar_long (int subtype)
       rs = neon_select_shape (NS_DHH, NS_QDD, NS_NULL);
     }
 
-  neon_check_type (3, rs, N_EQK, N_EQK, N_KEY | N_F16);
+
+  if (inst.cond != COND_ALWAYS)
+    as_warn (_("vfmal/vfmsl with FP16 type cannot be conditional, the "
+	       "behaviour is UNPREDICTABLE"));
+
+  constraint (!ARM_CPU_HAS_FEATURE (cpu_variant, arm_ext_fp16_fml),
+	      _(BAD_FP16));
+
+  constraint (!ARM_CPU_HAS_FEATURE (cpu_variant, fpu_neon_ext_armv8),
+	      _(BAD_FPU));
 
   /* "opcode" from template has included "ubit", so simply pass 0 here.  Also,
      the "S" bit in size field has been reused to differentiate vfmal and vfmsl,
@@ -21500,6 +21624,46 @@ do_vjcvt (void)
   do_vfp_cond_or_thumb ();
 }
 
+static void
+do_vdot (void)
+{
+  enum neon_shape rs;
+  constraint (!mark_feature_used (&fpu_neon_ext_armv8), _(BAD_FPU));
+  set_pred_insn_type (OUTSIDE_PRED_INSN);
+  if (inst.operands[2].isscalar)
+    {
+      rs = neon_select_shape (NS_DDS, NS_QQS, NS_NULL);
+      neon_check_type (3, rs, N_EQK, N_EQK, N_BF16 | N_KEY);
+
+      inst.instruction |= (1 << 25);
+      int index = inst.operands[2].reg & 0xf;
+      constraint ((index != 1 && index != 0), _("index must be 0 or 1"));
+      inst.operands[2].reg >>= 4;
+      constraint (!(inst.operands[2].reg < 16),
+		  _("indexed register must be less than 16"));
+      neon_three_args (rs == NS_QQS);
+      inst.instruction |= (index << 5);
+    }
+  else
+    {
+      rs = neon_select_shape (NS_DDD, NS_QQQ, NS_NULL);
+      neon_check_type (3, rs, N_EQK, N_EQK, N_BF16 | N_KEY);
+      neon_three_args (rs == NS_QQQ);
+    }
+}
+
+static void
+do_vmmla (void)
+{
+  enum neon_shape rs = neon_select_shape (NS_QQQ, NS_NULL);
+  neon_check_type (3, rs, N_EQK, N_EQK, N_BF16 | N_KEY);
+
+  constraint (!mark_feature_used (&fpu_neon_ext_armv8), _(BAD_FPU));
+  set_pred_insn_type (OUTSIDE_PRED_INSN);
+
+  neon_three_args (1);
+}
+
 

 /* Overall per-instruction processing.	*/
 
@@ -24845,8 +25009,8 @@ static const struct asm_opcode insns[] =
  NCE (vins,      eb00ac0,       2, (RVS, RVS), neon_movhf),
 
  /* New backported fma/fms instructions optional in v8.2.  */
- NCE (vfmal, 810, 3, (RNDQ, RNSD, RNSD_RNSC), neon_vfmal),
- NCE (vfmsl, 810, 3, (RNDQ, RNSD, RNSD_RNSC), neon_vfmsl),
+ NUF (vfmsl, 810, 3, (RNDQ, RNSD, RNSD_RNSC), neon_vfmsl),
+ NUF (vfmal, 810, 3, (RNDQ, RNSD, RNSD_RNSC), neon_vfmal),
 
 #undef  THUMB_VARIANT
 #define THUMB_VARIANT  & fpu_neon_ext_v1
@@ -25096,10 +25260,11 @@ static const struct asm_opcode insns[] =
 #define ARM_VARIANT    & fpu_vfp_ext_fma
 #undef  THUMB_VARIANT
 #define THUMB_VARIANT  & fpu_vfp_ext_fma
- /* Mnemonics shared by Neon, VFP and MVE.  These are included in the
+ /* Mnemonics shared by Neon, VFP, MVE and BF16.  These are included in the
     VFP FMA variant; NEON and VFP FMA always includes the NEON
     FMA instructions.  */
  mnCEF(vfma,     _vfma,    3, (RNSDQMQ, oRNSDQMQ, RNSDQMQR), neon_fmac),
+ TUF ("vfmat",    c300850,    fc300850,  3, (RNSDQMQ, oRNSDQMQ, RNSDQ_RNSC_MQ_RR), mve_vfma, mve_vfma),
  mnCEF(vfms,     _vfms,    3, (RNSDQMQ, oRNSDQMQ, RNSDQMQ),  neon_fmac),
 
  /* ffmas/ffmad/ffmss/ffmsd are dummy mnemonics to satisfy gas;
@@ -25772,6 +25937,24 @@ static const struct asm_opcode insns[] =
 #define	THUMB_VARIANT & arm_ext_v6t2_v8m
  MNUF (vcadd, 0, 4, (RNDQMQ, RNDQMQ, RNDQMQ, EXPi), vcadd),
  MNUF (vcmla, 0, 4, (RNDQMQ, RNDQMQ, RNDQMQ_RNSC, EXPi), vcmla),
+
+#undef	ARM_VARIANT
+#define ARM_VARIANT &arm_ext_bf16
+#undef	THUMB_VARIANT
+#define	THUMB_VARIANT &arm_ext_bf16
+ TUF ("vdot", c000d00, fc000d00, 3, (RNDQ, RNDQ, RNDQ_RNSC), vdot, vdot),
+ TUF ("vmmla", c000c40, fc000c40, 3, (RNQ, RNQ, RNQ), vmmla, vmmla),
+ TUF ("vfmab", c300810, fc300810, 3, (RNDQ, RNDQ, RNDQ_RNSC), bfloat_vfma, bfloat_vfma),
+
+#undef	ARM_VARIANT
+#define ARM_VARIANT &arm_ext_i8mm
+#undef	THUMB_VARIANT
+#define	THUMB_VARIANT &arm_ext_i8mm
+ TUF ("vsmmla", c200c40, fc200c40, 3, (RNQ, RNQ, RNQ), vsmmla, vsmmla),
+ TUF ("vummla", c200c50, fc200c50, 3, (RNQ, RNQ, RNQ), vummla, vummla),
+ TUF ("vusmmla", ca00c40, fca00c40, 3, (RNQ, RNQ, RNQ), vummla, vummla),
+ TUF ("vusdot", c800d00, fc800d00, 3, (RNDQ, RNDQ, RNDQ_RNSC), vusdot, vusdot),
+ TUF ("vsudot", c800d10, fc800d10, 3, (RNDQ, RNDQ, RNSC), vsudot, vsudot),
 };
 #undef ARM_VARIANT
 #undef THUMB_VARIANT
@@ -30936,6 +31119,11 @@ static const struct arm_ext_table armv85a_ext_table[] =
   { NULL, 0, ARM_ARCH_NONE, ARM_ARCH_NONE }
 };
 
+static const struct arm_ext_table armv86a_ext_table[] =
+{
+  { NULL, 0, ARM_ARCH_NONE, ARM_ARCH_NONE }
+};
+
 static const struct arm_ext_table armv8m_main_ext_table[] =
 {
   ARM_EXT ("dsp", ARM_FEATURE_CORE_LOW (ARM_EXT_V5ExP | ARM_EXT_V6_DSP),
@@ -31041,6 +31229,7 @@ static const struct arm_arch_option_table arm_archs[] =
   ARM_ARCH_OPT2 ("armv8-r",	  ARM_ARCH_V8R,		FPU_ARCH_VFP, armv8r),
   ARM_ARCH_OPT2 ("armv8.4-a",	  ARM_ARCH_V8_4A,	FPU_ARCH_VFP, armv84a),
   ARM_ARCH_OPT2 ("armv8.5-a",	  ARM_ARCH_V8_5A,	FPU_ARCH_VFP, armv85a),
+  ARM_ARCH_OPT2 ("armv8.6-a",	  ARM_ARCH_V8_6A,	FPU_ARCH_VFP, armv86a),
   ARM_ARCH_OPT ("xscale",	  ARM_ARCH_XSCALE,	FPU_ARCH_VFP),
   ARM_ARCH_OPT ("iwmmxt",	  ARM_ARCH_IWMMXT,	FPU_ARCH_VFP),
   ARM_ARCH_OPT ("iwmmxt2",	  ARM_ARCH_IWMMXT2,	FPU_ARCH_VFP),
@@ -31071,6 +31260,9 @@ struct arm_option_extension_value_table
    use the context sensitive approach using arm_ext_table's.  */
 static const struct arm_option_extension_value_table arm_extensions[] =
 {
+  ARM_EXT_OPT ("bf16",  ARM_FEATURE_CORE_HIGH (ARM_EXT2_BF16),
+			ARM_FEATURE_CORE_HIGH (ARM_EXT2_BF16),
+			ARM_ARCH_V8_2A),
   ARM_EXT_OPT ("crc",  ARCH_CRC_ARMV8, ARM_FEATURE_COPROC (CRC_EXT_ARMV8),
 			 ARM_FEATURE_CORE_LOW (ARM_EXT_V8)),
   ARM_EXT_OPT ("crypto", FPU_ARCH_CRYPTO_NEON_VFP_ARMV8,
@@ -31821,7 +32013,8 @@ static const cpu_arch_ver_table cpu_arch_ver[] =
     {TAG_CPU_ARCH_V8,	      ARM_ARCH_V8_4A},
     {TAG_CPU_ARCH_V8,	      ARM_ARCH_V8_5A},
     {TAG_CPU_ARCH_V8_1M_MAIN, ARM_ARCH_V8_1M_MAIN},
-    {-1,		      ARM_ARCH_NONE}
+    {TAG_CPU_ARCH_V8,	    ARM_ARCH_V8_6A},
+    {-1,		    ARM_ARCH_NONE}
 };
 
 /* Set an attribute if it has not already been set by the user.  */
diff --git a/gas/doc/c-arm.texi b/gas/doc/c-arm.texi
index 175de8eae10db7d7ec6608ff2fe35f4392c62100..8afee70120f122e3724b27a71443d03b7c2ad719 100644
--- a/gas/doc/c-arm.texi
+++ b/gas/doc/c-arm.texi
@@ -180,6 +180,7 @@ been added, again in ascending alphabetical order.  For example,
 
 
 The following extensions are currently supported:
+@code{bf16} (BFloat16 extensions for v8.6-A architecture),
 @code{crc}
 @code{crypto} (Cryptography Extensions for v8-A architecture, implies @code{fp+simd}),
 @code{dotprod} (Dot Product Extensions for v8.2-A architecture, implies @code{fp+simd}),
@@ -254,6 +255,7 @@ names are recognized:
 @code{armv8-m.base},
 @code{armv8-m.main},
 @code{armv8.1-m.main},
+@code{armv8.6-a},
 @code{iwmmxt},
 @code{iwmmxt2}
 and
diff --git a/gas/testsuite/gas/arm/attr-march-armv8_6-a.d b/gas/testsuite/gas/arm/attr-march-armv8_6-a.d
new file mode 100644
index 0000000000000000000000000000000000000000..73bcbaf35e8096da24b31dd676ec5b62794f3a90
--- /dev/null
+++ b/gas/testsuite/gas/arm/attr-march-armv8_6-a.d
@@ -0,0 +1,17 @@
+# name: attributes for -march=armv8.6-a
+# source: blank.s
+# as: -march=armv8.6-a
+# readelf: -A
+# This test is only valid on EABI based ports.
+# target: *-*-*eabi* *-*-nacl*
+
+Attribute Section: aeabi
+File Attributes
+  Tag_CPU_name: "8.6-A"
+  Tag_CPU_arch: v8
+  Tag_CPU_arch_profile: Application
+  Tag_ARM_ISA_use: Yes
+  Tag_THUMB_ISA_use: Thumb-2
+  Tag_Advanced_SIMD_arch: NEON for ARMv8.1
+  Tag_MPextension_use: Allowed
+  Tag_Virtualization_use: TrustZone and Virtualization Extensions
diff --git a/gas/testsuite/gas/arm/bfloat16-bad.d b/gas/testsuite/gas/arm/bfloat16-bad.d
new file mode 100644
index 0000000000000000000000000000000000000000..95f266dd324675e4a96a14e64d239f1ebdc2708c
--- /dev/null
+++ b/gas/testsuite/gas/arm/bfloat16-bad.d
@@ -0,0 +1,4 @@
+#name: Bfloat 16 failure cases
+#source: bfloat16-bad.s
+#as: -mno-warn-deprecated -march=armv8.6-a+simd
+#error_output: bfloat16-bad.l
diff --git a/gas/testsuite/gas/arm/bfloat16-bad.l b/gas/testsuite/gas/arm/bfloat16-bad.l
new file mode 100644
index 0000000000000000000000000000000000000000..242e538b53d136284f54761b709bc4d82263ed19
--- /dev/null
+++ b/gas/testsuite/gas/arm/bfloat16-bad.l
@@ -0,0 +1,112 @@
+[^ :]+: Assembler messages:
+[^ :]+:[0-9]+: Error: unexpected type character `b' -- did you mean `bf'\?
+[^ :]+:[0-9]+: Error: bad instruction `vdot.b16 d0,d0,d0'
+[^ :]+:[0-9]+: Error: unexpected type character `b' -- did you mean `bf'\?
+[^ :]+:[0-9]+: Error: bad arguments to instruction -- `vmmla q0.b16,q0,q0'
+[^ :]+:[0-9]+: Error: bad size 32 in type specifier
+[^ :]+:[0-9]+: Error: bad instruction `vdot.bf32 d0,d0,d0\[1\]'
+[^ :]+:[0-9]+: Error: bad size 32 in type specifier
+[^ :]+:[0-9]+: Error: bad arguments to instruction -- `vdot d0.bf32,d0,d0'
+[^ :]+:[0-9]+: Error: bad size 32 in type specifier
+[^ :]+:[0-9]+: Error: bad arguments to instruction -- `vdot d0.bf32,d0.bf16,d0.bf16'
+[^ :]+:[0-9]+: Error: instruction cannot be conditional -- `vdotne d0,d0,d0'
+[^ :]+:[0-9]+: Error: instruction cannot be conditional -- `vdotne d0,d0,d0\[1\]'
+[^ :]+:[0-9]+: Error: instruction cannot be conditional -- `vmmlane q0,q0,q0'
+[^ :]+:[0-9]+: Error: invalid instruction shape -- `vfmatne.bf16 q0,d0,d0'
+[^ :]+:[0-9]+: Error: invalid instruction shape -- `vfmatne.bf16 q0,d0,d0\[0\]'
+[^ :]+:[0-9]+: Error: instruction cannot be conditional -- `vfmabne.bf16 q0,d0,d0'
+[^ :]+:[0-9]+: Error: instruction cannot be conditional -- `vfmabne.bf16 q0,d0,d0\[0\]'
+[^ :]+:[0-9]+: Error: instruction cannot be conditional -- `vcvtne.bf16.f32 d0,q0'
+[^ :]+:[0-9]+: Error: Neon double or quad precision register expected -- `vdot d32,d0,d0'
+[^ :]+:[0-9]+: Error: Neon double or quad precision register expected -- `vdot d0,d32,d0'
+[^ :]+:[0-9]+: Error: Neon double or quad precision register expected -- `vdot d0,d0,d32'
+[^ :]+:[0-9]+: Error: Neon double or quad precision register expected -- `vdot d32,d0,d0\[0\]'
+[^ :]+:[0-9]+: Error: Neon double or quad precision register expected -- `vdot d0,d32,d0\[0\]'
+[^ :]+:[0-9]+: Error: indexed register must be less than 16 -- `vdot d0,d0,d16\[0\]'
+[^ :]+:[0-9]+: Error: VFP single, double or Neon quad precision register expected -- `vcvtne.bf16.f32 d32,q0'
+[^ :]+:[0-9]+: Error: Neon double or quad precision register expected -- `vdot q16,q0,q0'
+[^ :]+:[0-9]+: Error: Neon double or quad precision register expected -- `vdot q0,q16,q0'
+[^ :]+:[0-9]+: Error: Neon double or quad precision register expected -- `vdot q0,q0,q16'
+[^ :]+:[0-9]+: Error: Neon double or quad precision register expected -- `vdot q16,q0,d0\[0\]'
+[^ :]+:[0-9]+: Error: Neon double or quad precision register expected -- `vdot q0,q16,d0\[0\]'
+[^ :]+:[0-9]+: Error: Neon quad precision register expected -- `vmmla q16,q0,q0'
+[^ :]+:[0-9]+: Error: Neon quad precision register expected -- `vmmla q0,q16,q0'
+[^ :]+:[0-9]+: Error: Neon quad precision register expected -- `vmmla q0,q0,q16'
+[^ :]+:[0-9]+: Error: Neon double or quad precision register expected -- `vfmab.bf16 q16,d0,d0'
+[^ :]+:[0-9]+: Error: Neon double or quad precision register expected -- `vfmab.bf16 q16,d0,d0\[0\]'
+[^ :]+:[0-9]+: Error: Neon double or quad precision register expected -- `vfmab.bf16 q0,q32,d0'
+[^ :]+:[0-9]+: Error: Neon double or quad precision register expected -- `vfmab.bf16 q0,q32,d0\[0\]'
+[^ :]+:[0-9]+: Error: indexed register must be less than 8 -- `vfmab.bf16 q0,q0,d8\[0\]'
+[^ :]+:[0-9]+: Error: VFP single, double or Neon quad precision register expected -- `vfmat.bf16 q16,d0,d0'
+[^ :]+:[0-9]+: Error: VFP single, double or Neon quad precision register expected -- `vfmat.bf16 q16,d0,d0\[0\]'
+[^ :]+:[0-9]+: Error: VFP single, double or Neon quad precision register expected -- `vfmat.bf16 q0,q32,d0'
+[^ :]+:[0-9]+: Error: VFP single, double or Neon quad precision register expected -- `vfmat.bf16 q0,q32,d0\[0\]'
+[^ :]+:[0-9]+: Error: indexed register must be less than 8 -- `vfmat.bf16 q0,q0,d8\[0\]'
+[^ :]+:[0-9]+: Error: VFP single, double or Neon quad precision register expected -- `vcvt.bf16.f32 d0,q16'
+[^ :]+:[0-9]+: Error: invalid instruction shape -- `vdot q0,q0,d5'
+[^ :]+:[0-9]+: Error: invalid instruction shape -- `vdot q0,d5,q0'
+[^ :]+:[0-9]+: Error: invalid instruction shape -- `vdot d5,q0,q0'
+[^ :]+:[0-9]+: Error: only D registers may be indexed -- `vdot q0,d5,q0\[0\]'
+[^ :]+:[0-9]+: Error: only D registers may be indexed -- `vdot d5,q0,q0\[0\]'
+[^ :]+:[0-9]+: Error: Neon quad precision register expected -- `vmmla q0,q0,d5'
+[^ :]+:[0-9]+: Error: Neon quad precision register expected -- `vmmla q0,d5,q0'
+[^ :]+:[0-9]+: Error: Neon quad precision register expected -- `vmmla d5,q0,q0'
+[^ :]+:[0-9]+: Error: invalid instruction shape -- `vfmab.bf16 d0,q0,d0'
+[^ :]+:[0-9]+: Error: invalid instruction shape -- `vfmab.bf16 d0,q0,d0\[0\]'
+[^ :]+:[0-9]+: Error: invalid instruction shape -- `vfmat.bf16 d0,q0,d0'
+[^ :]+:[0-9]+: Error: invalid instruction shape -- `vfmat.bf16 d0,q0,d0\[0\]'
+[^ :]+:[0-9]+: Error: operand size must match register width
+[^ :]+:[0-9]+: Error: invalid neon suffix for non neon instruction
+[^ :]+:[0-9]+: Error: index must be 0 or 1 -- `vdot q0,q0,d0\[2\]'
+[^ :]+:[0-9]+: Error: index must be in the range 0 to 3 -- `vfmab.bf16 q0,d0,d0\[4\]'
+[^ :]+:[0-9]+: Error: index must be in the range 0 to 3 -- `vfmat.bf16 q0,d0,d0\[4\]'
+[^ :]+:[0-9]+: Error: unexpected type character `b' -- did you mean `bf'\?
+[^ :]+:[0-9]+: Error: bad instruction `vcvtb.b16.f32 s0,s0'
+[^ :]+:[0-9]+: Error: bad size 32 in type specifier
+[^ :]+:[0-9]+: Error: bad instruction `vcvtb.bf32.f32 s0,s0'
+[^ :]+:[0-9]+: Error: unexpected type character `b' -- did you mean `bf'\?
+[^ :]+:[0-9]+: Error: bad arguments to instruction -- `vcvtb s0.b16,s0.f32'
+[^ :]+:[0-9]+: Error: bad size 32 in type specifier
+[^ :]+:[0-9]+: Error: bad arguments to instruction -- `vcvtb s0.bf32,s0.f32'
+[^ :]+:[0-9]+: Error: bad type in SIMD instruction -- `vcvtb s0.f32,s0.bf16'
+[^ :]+:[0-9]+: Error: unexpected type character `b' -- did you mean `bf'\?
+[^ :]+:[0-9]+: Error: bad instruction `vcvtt.b16.f32 s0,s0'
+[^ :]+:[0-9]+: Error: bad size 32 in type specifier
+[^ :]+:[0-9]+: Error: bad instruction `vcvtt.bf32.f32 s0,s0'
+[^ :]+:[0-9]+: Error: unexpected type character `b' -- did you mean `bf'\?
+[^ :]+:[0-9]+: Error: bad arguments to instruction -- `vcvtt s0.b16,s0.f32'
+[^ :]+:[0-9]+: Error: bad size 32 in type specifier
+[^ :]+:[0-9]+: Error: bad arguments to instruction -- `vcvtt s0.bf32,s0.f32'
+[^ :]+:[0-9]+: Error: bad type in SIMD instruction -- `vcvtt s0.f32,s0.bf16'
+[^ :]+:[0-9]+: Error: unexpected type character `b' -- did you mean `bf'\?
+[^ :]+:[0-9]+: Error: bad instruction `vcvt.b16.f32 d0,q0'
+[^ :]+:[0-9]+: Error: bad size 32 in type specifier
+[^ :]+:[0-9]+: Error: bad instruction `vcvt.bf32.f32 d0,q0'
+[^ :]+:[0-9]+: Error: unexpected type character `b' -- did you mean `bf'\?
+[^ :]+:[0-9]+: Error: bad arguments to instruction -- `vcvt d0.b16,q0.f32'
+[^ :]+:[0-9]+: Error: bad size 32 in type specifier
+[^ :]+:[0-9]+: Error: bad arguments to instruction -- `vcvt d0.bf32,q0.f32'
+[^ :]+:[0-9]+: Error: bad type in SIMD instruction -- `vcvt d0.f32,q0.bf16'
+[^ :]+:[0-9]+: Error: immediate value out of range -- `vcvtt.bf16.f32 s0,s0,#0'
+[^ :]+:[0-9]+: Error: invalid instruction shape -- `vcvtt.bf16.f32 s0,s0,#1'
+[^ :]+:[0-9]+: Error: bad type in SIMD instruction -- `vcvtt.bf16.f32 d0,s0'
+[^ :]+:[0-9]+: Error: bad arguments to instruction -- `vcvtt.bf16.f32 s0'
+[^ :]+:[0-9]+: Error: constant expression required -- `vcvtt.bf16.f32 s0,s0,s0,s0'
+[^ :]+:[0-9]+: Error: constant expression required -- `vcvtt.bf16.f32 s0,s0,s0'
+[^ :]+:[0-9]+: Error: VFP single or double precision register expected -- `vcvtt.bf16.f32 s0,s32'
+[^ :]+:[0-9]+: Error: VFP single or double precision register expected -- `vcvtt.bf16.f32 s32,s32'
+[^ :]+:[0-9]+: Error: immediate value out of range -- `vcvtb.bf16.f32 s0,s0,#0'
+[^ :]+:[0-9]+: Error: invalid instruction shape -- `vcvtb.bf16.f32 s0,s0,#1'
+[^ :]+:[0-9]+: Error: bad type in SIMD instruction -- `vcvtb.bf16.f32 d0,s0'
+[^ :]+:[0-9]+: Error: bad arguments to instruction -- `vcvtb.bf16.f32 s0'
+[^ :]+:[0-9]+: Error: constant expression required -- `vcvtb.bf16.f32 s0,s0,s0,s0'
+[^ :]+:[0-9]+: Error: constant expression required -- `vcvtb.bf16.f32 s0,s0,s0'
+[^ :]+:[0-9]+: Error: VFP single or double precision register expected -- `vcvtb.bf16.f32 s0,s32'
+[^ :]+:[0-9]+: Error: VFP single or double precision register expected -- `vcvtb.bf16.f32 s32,s32'
+[^ :]+:[0-9]+: Error: instruction cannot be conditional -- `vcvtne.bf16.f32 d0,q0'
+[^ :]+:[0-9]+: Error: instruction cannot be conditional -- `vdotne.bf16 d0,d20,d11'
+[^ :]+:[0-9]+: Error: instruction cannot be conditional -- `vdotne.bf16 d0,d20,d11\[1\]'
+[^ :]+:[0-9]+: Error: instruction cannot be conditional -- `vmmlane.bf16 q0,q0,q0'
+[^ :]+:[0-9]+: Error: IT falling in the range of a previous IT block -- `ittt ne'
+[^ :]+:[0-9]+: Error: instruction not allowed in IT block -- `vdot.bf16 d0,d20,d11'
+[^ :]+:[0-9]+: Error: instruction not allowed in IT block -- `vdot.bf16 d0,d20,d11\[1\]'
diff --git a/gas/testsuite/gas/arm/bfloat16-bad.s b/gas/testsuite/gas/arm/bfloat16-bad.s
new file mode 100644
index 0000000000000000000000000000000000000000..f6db1ffe37b7cf493319564d7df66e8841aac97d
--- /dev/null
+++ b/gas/testsuite/gas/arm/bfloat16-bad.s
@@ -0,0 +1,119 @@
+.syntax unified
+
+// Test warnings about type specifier being incorrect.
+vdot.b16  d0, d0, d0
+vmmla  q0.b16, q0, q0
+vdot.bf32 d0, d0, d0[1]
+vdot d0.bf32, d0, d0
+vdot d0.bf32, d0.bf16, d0.bf16
+
+// Test conditions are not allowed in ARM.
+vdotne d0, d0, d0
+vdotne d0, d0, d0[1]
+vmmlane q0, q0, q0
+vfmatne.bf16 q0, d0, d0
+vfmatne.bf16 q0, d0, d0[0]
+vfmabne.bf16 q0, d0, d0
+vfmabne.bf16 q0, d0, d0[0]
+vcvtne.bf16.f32 d0, q0
+
+// d register out of range
+vdot d32, d0, d0
+vdot d0, d32, d0
+vdot d0, d0, d32
+vdot d32, d0, d0[0]
+vdot d0, d32, d0[0]
+vdot d0, d0, d16[0]
+vcvtne.bf16.f32 d32, q0
+
+// q register out of range
+vdot q16, q0, q0
+vdot q0, q16, q0
+vdot q0, q0, q16
+vdot q16, q0, d0[0]
+vdot q0, q16, d0[0]
+vmmla q16, q0, q0
+vmmla q0, q16, q0
+vmmla q0, q0, q16
+vfmab.bf16 q16, d0, d0
+vfmab.bf16 q16, d0, d0[0]
+vfmab.bf16 q0, q32, d0
+vfmab.bf16 q0, q32, d0[0]
+vfmab.bf16 q0, q0, d8[0]
+vfmat.bf16 q16, d0, d0
+vfmat.bf16 q16, d0, d0[0]
+vfmat.bf16 q0, q32, d0
+vfmat.bf16 q0, q32, d0[0]
+vfmat.bf16 q0, q0, d8[0]
+vcvt.bf16.f32 d0, q16
+
+// Incorrect set of arguments
+vdot q0, q0, d5
+vdot q0, d5, q0
+vdot d5, q0, q0
+vdot q0, d5, q0[0]
+vdot d5, q0, q0[0]
+vmmla q0, q0, d5
+vmmla q0, d5, q0
+vmmla d5, q0, q0
+vfmab.bf16 d0, q0, d0
+vfmab.bf16 d0, q0, d0[0]
+vfmat.bf16 d0, q0, d0
+vfmat.bf16 d0, q0, d0[0]
+vcvt.bf16.f32 q0, d0
+
+// vdot index out of range
+vdot q0, q0, d0[2]
+
+// vfma<bt> index out of range
+vfmab.bf16 q0, d0, d0[4]
+vfmat.bf16 q0, d0, d0[4]
+
+// Non neon encodings (this file gets assembled more than once but with
+// different flags, providing different error messages each time).
+
+// Type specifier warnings
+.macro conversion_type_specifier_check insn, dest, source
+\insn\().b16.f32 \dest, \source
+\insn\().bf32.f32 \dest, \source
+\insn \dest\().b16, \source\().f32
+\insn \dest\().bf32, \source\().f32
+\insn \dest\().f32, \source\().bf16
+.endm
+
+conversion_type_specifier_check vcvtb, s0, s0
+conversion_type_specifier_check vcvtt, s0, s0
+conversion_type_specifier_check vcvt, d0, q0
+
+// Conditions allowed (and checked in the "Valid" source file).
+
+// Incorrect set of operands & registers out of range
+.macro bad_args insn
+\insn\().bf16.f32 s0, s0, #0
+\insn\().bf16.f32 s0, s0, #1
+\insn\().bf16.f32 d0, s0
+\insn\().bf16.f32 s0
+\insn\().bf16.f32 s0, s0, s0, s0
+\insn\().bf16.f32 s0, s0, s0
+\insn\().bf16.f32 s0, s32
+\insn\().bf16.f32 s32, s32
+.endm
+bad_args vcvtt
+bad_args vcvtb
+
+// Allowed in thumb mode but not allowed in arm mode.
+it ne
+vcvtne.bf16.f32 d0, q0
+
+// Ensure these instructions are not allowed to have a conditional suffix.
+ittt ne
+vdotne.bf16 d0, d20, d11
+vdotne.bf16 d0, d20, d11[1]
+vmmlane.bf16 q0, q0, q0
+
+// Ensure we are warned these instructions are UNPREDICTABLE in an IT block in
+// thumb.
+ittt ne
+vdot.bf16 d0, d20, d11
+vdot.bf16 d0, d20, d11[1]
+vmmla.bf16 q0, q0, q0
diff --git a/gas/testsuite/gas/arm/bfloat16-cmdline-bad-2.d b/gas/testsuite/gas/arm/bfloat16-cmdline-bad-2.d
new file mode 100644
index 0000000000000000000000000000000000000000..d13b864ab83a62c6083abe5b9280581a4fe92d29
--- /dev/null
+++ b/gas/testsuite/gas/arm/bfloat16-cmdline-bad-2.d
@@ -0,0 +1,4 @@
+#name: Bfloat 16 bad processor
+#source: bfloat16-non-neon.s
+#as: -mno-warn-deprecated -march=armv8.5-a+simd
+#error: .*Error: selected processor does not support bf16 instruction.*
diff --git a/gas/testsuite/gas/arm/bfloat16-cmdline-bad-3.d b/gas/testsuite/gas/arm/bfloat16-cmdline-bad-3.d
new file mode 100644
index 0000000000000000000000000000000000000000..5dfdeb4d6ccc6575e357835e10dcb2638c03de35
--- /dev/null
+++ b/gas/testsuite/gas/arm/bfloat16-cmdline-bad-3.d
@@ -0,0 +1,4 @@
+#name: Bfloat 16 bad extension
+#source: bfloat16-non-neon.s
+#as: -mno-warn-deprecated -march=armv8.1-a+bf16
+#error: .*Error: extension does not apply to the base architecture.*
diff --git a/gas/testsuite/gas/arm/bfloat16-cmdline-bad.d b/gas/testsuite/gas/arm/bfloat16-cmdline-bad.d
new file mode 100644
index 0000000000000000000000000000000000000000..34b8a963f817e17a03cd59a2f1f4509a446bf1ca
--- /dev/null
+++ b/gas/testsuite/gas/arm/bfloat16-cmdline-bad.d
@@ -0,0 +1,5 @@
+#name: Bfloat 16 bad FPU
+#source: bfloat16-neon.s
+#as: -mno-warn-deprecated -mfpu=vfpxd -march=armv8.6-a
+#error: .*Error: selected FPU does not support instruction.*
+
diff --git a/gas/testsuite/gas/arm/bfloat16-neon.s b/gas/testsuite/gas/arm/bfloat16-neon.s
new file mode 100644
index 0000000000000000000000000000000000000000..6f422650cde2e74f5c5948d3ea1a1690a62e2a60
--- /dev/null
+++ b/gas/testsuite/gas/arm/bfloat16-neon.s
@@ -0,0 +1,53 @@
+.syntax unified
+// Check argument encoding by having different arguments.
+// We use 20 and 11 since their binary encoding is 10100 and 01011
+// respectively which ensures that we distinguish between the D/M/N bit
+// encoding the first or last bit of the argument.
+// q registers are encoded as double their actual number.
+vdot.bf16 d0, d20, d11
+vdot d11.bf16, d0.bf16, d20.bf16
+
+.macro conversion_type_specifier_check insn, dest, source
+\insn\().bf16.f32 \dest, \source
+\insn \dest\().bf16, \source\().f32
+\insn \dest\().bf16, \source\().f32
+.endm
+conversion_type_specifier_check vcvtt,s0,s0
+conversion_type_specifier_check vcvtb,s0,s0
+conversion_type_specifier_check vcvt,d0,q0
+
+
+// Here we follow the same encoding sequence as above.
+// Since the 'M' bit encodes the index and the last register is encoded in 4
+// bits that argument has a different number.
+vdot.bf16 d11, d0, d4[1]
+vdot d0.bf16, d20.bf16, d11.bf16[0]
+
+// vmmla only works on q registers.
+// These registers are encoded as double the number given in the mnemonic.
+// Hence we choose different numbers to ensure a similar bit pattern as above.
+// 10 & 5 produce the bit patterns 10100 & 01010
+vmmla.bf16 q10, q5, q0
+vmmla q5.bf16, q0.bf16, q10.bf16
+
+vfmat.bf16 q10, q11, q0
+vfmat.bf16 q10, q11, d0[3]
+vfmat.bf16 q10, q11, d0[0]
+
+vfmab.bf16 q10, q11, q0
+vfmab.bf16 q10, q11, d0[3]
+vfmab.bf16 q10, q11, d0[0]
+
+// vcvt
+// - no condition allowed in arm
+// - no condition allowed in thumb outside IT block
+// - Condition *allowed* in thumb in IT block
+// - different encoding between thumb and arm
+vcvt.bf16.f32 d20, q5
+vcvt.bf16.f32 d11, q10
+
+// Only works for thumb mode.
+.ifdef COMPILING_FOR_THUMB
+it ne
+vcvtne.bf16.f32 d0, q0
+.endif
diff --git a/gas/testsuite/gas/arm/bfloat16-non-neon.s b/gas/testsuite/gas/arm/bfloat16-non-neon.s
new file mode 100644
index 0000000000000000000000000000000000000000..95e3c3b4a11d7fd7b3f12624cf7753f62f7164f3
--- /dev/null
+++ b/gas/testsuite/gas/arm/bfloat16-non-neon.s
@@ -0,0 +1,9 @@
+.syntax unified
+vcvtb.bf16.f32 s20, s11
+it ne
+vcvtbne.bf16.f32 s11, s20
+vcvtbal.bf16.f32 s0, s0
+vcvtt.bf16.f32 s20, s11
+it ne
+vcvttne.bf16.f32 s11, s20
+vcvttal.bf16.f32 s0, s0
diff --git a/gas/testsuite/gas/arm/bfloat16-thumb-bad.d b/gas/testsuite/gas/arm/bfloat16-thumb-bad.d
new file mode 100644
index 0000000000000000000000000000000000000000..8322cf0343564197af1e64d3a5830d3e53ea9638
--- /dev/null
+++ b/gas/testsuite/gas/arm/bfloat16-thumb-bad.d
@@ -0,0 +1,4 @@
+#name: Bfloat 16 Thumb failure cases
+#source: bfloat16-bad.s
+#as: -mno-warn-deprecated -mthumb -march=armv8.6-a+simd
+#error_output: bfloat16-thumb-bad.l
diff --git a/gas/testsuite/gas/arm/bfloat16-thumb-bad.l b/gas/testsuite/gas/arm/bfloat16-thumb-bad.l
new file mode 100644
index 0000000000000000000000000000000000000000..adfcf6fe71ea030a80d14d3acce2f8d8d2b1098e
--- /dev/null
+++ b/gas/testsuite/gas/arm/bfloat16-thumb-bad.l
@@ -0,0 +1,112 @@
+[^ :]+: Assembler messages:
+[^ :]+:[0-9]+: Error: unexpected type character `b' -- did you mean `bf'\?
+[^ :]+:[0-9]+: Error: bad instruction `vdot\.b16 d0,d0,d0'
+[^ :]+:[0-9]+: Error: unexpected type character `b' -- did you mean `bf'\?
+[^ :]+:[0-9]+: Error: bad arguments to instruction -- `vmmla q0\.b16,q0,q0'
+[^ :]+:[0-9]+: Error: bad size 32 in type specifier
+[^ :]+:[0-9]+: Error: bad instruction `vdot\.bf32 d0,d0,d0\[1\]'
+[^ :]+:[0-9]+: Error: bad size 32 in type specifier
+[^ :]+:[0-9]+: Error: bad arguments to instruction -- `vdot d0\.bf32,d0,d0'
+[^ :]+:[0-9]+: Error: bad size 32 in type specifier
+[^ :]+:[0-9]+: Error: bad arguments to instruction -- `vdot d0\.bf32,d0\.bf16,d0\.bf16'
+[^ :]+:[0-9]+: Error: operand types can't be inferred -- `vdotne d0,d0,d0'
+[^ :]+:[0-9]+: Error: operand types can't be inferred -- `vdotne d0,d0,d0\[1\]'
+[^ :]+:[0-9]+: Error: operand types can't be inferred -- `vmmlane q0,q0,q0'
+[^ :]+:[0-9]+: Error: thumb conditional instruction should be in IT block -- `vfmatne\.bf16 q0,d0,d0'
+[^ :]+:[0-9]+: Error: thumb conditional instruction should be in IT block -- `vfmatne\.bf16 q0,d0,d0\[0\]'
+[^ :]+:[0-9]+: Error: thumb conditional instruction should be in IT block -- `vfmabne\.bf16 q0,d0,d0'
+[^ :]+:[0-9]+: Error: thumb conditional instruction should be in IT block -- `vfmabne\.bf16 q0,d0,d0\[0\]'
+[^ :]+:[0-9]+: Error: thumb conditional instruction should be in IT block -- `vcvtne\.bf16\.f32 d0,q0'
+[^ :]+:[0-9]+: Error: Neon double or quad precision register expected -- `vdot d32,d0,d0'
+[^ :]+:[0-9]+: Error: Neon double or quad precision register expected -- `vdot d0,d32,d0'
+[^ :]+:[0-9]+: Error: Neon double or quad precision register expected -- `vdot d0,d0,d32'
+[^ :]+:[0-9]+: Error: Neon double or quad precision register expected -- `vdot d32,d0,d0\[0\]'
+[^ :]+:[0-9]+: Error: Neon double or quad precision register expected -- `vdot d0,d32,d0\[0\]'
+[^ :]+:[0-9]+: Error: indexed register must be less than 16 -- `vdot d0,d0,d16\[0\]'
+[^ :]+:[0-9]+: Error: VFP single, double or Neon quad precision register expected -- `vcvtne\.bf16\.f32 d32,q0'
+[^ :]+:[0-9]+: Error: Neon double or quad precision register expected -- `vdot q16,q0,q0'
+[^ :]+:[0-9]+: Error: Neon double or quad precision register expected -- `vdot q0,q16,q0'
+[^ :]+:[0-9]+: Error: Neon double or quad precision register expected -- `vdot q0,q0,q16'
+[^ :]+:[0-9]+: Error: Neon double or quad precision register expected -- `vdot q16,q0,d0\[0\]'
+[^ :]+:[0-9]+: Error: Neon double or quad precision register expected -- `vdot q0,q16,d0\[0\]'
+[^ :]+:[0-9]+: Error: Neon quad precision register expected -- `vmmla q16,q0,q0'
+[^ :]+:[0-9]+: Error: Neon quad precision register expected -- `vmmla q0,q16,q0'
+[^ :]+:[0-9]+: Error: Neon quad precision register expected -- `vmmla q0,q0,q16'
+[^ :]+:[0-9]+: Error: Neon double or quad precision register expected -- `vfmab\.bf16 q16,d0,d0'
+[^ :]+:[0-9]+: Error: Neon double or quad precision register expected -- `vfmab\.bf16 q16,d0,d0\[0\]'
+[^ :]+:[0-9]+: Error: Neon double or quad precision register expected -- `vfmab\.bf16 q0,q32,d0'
+[^ :]+:[0-9]+: Error: Neon double or quad precision register expected -- `vfmab\.bf16 q0,q32,d0\[0\]'
+[^ :]+:[0-9]+: Error: indexed register must be less than 8 -- `vfmab\.bf16 q0,q0,d8\[0\]'
+[^ :]+:[0-9]+: Error: VFP single, double or Neon quad precision register expected -- `vfmat\.bf16 q16,d0,d0'
+[^ :]+:[0-9]+: Error: VFP single, double or Neon quad precision register expected -- `vfmat\.bf16 q16,d0,d0\[0\]'
+[^ :]+:[0-9]+: Error: VFP single, double or Neon quad precision register expected -- `vfmat\.bf16 q0,q32,d0'
+[^ :]+:[0-9]+: Error: VFP single, double or Neon quad precision register expected -- `vfmat\.bf16 q0,q32,d0\[0\]'
+[^ :]+:[0-9]+: Error: indexed register must be less than 8 -- `vfmat\.bf16 q0,q0,d8\[0\]'
+[^ :]+:[0-9]+: Error: VFP single, double or Neon quad precision register expected -- `vcvt\.bf16\.f32 d0,q16'
+[^ :]+:[0-9]+: Error: invalid instruction shape -- `vdot q0,q0,d5'
+[^ :]+:[0-9]+: Error: invalid instruction shape -- `vdot q0,d5,q0'
+[^ :]+:[0-9]+: Error: invalid instruction shape -- `vdot d5,q0,q0'
+[^ :]+:[0-9]+: Error: only D registers may be indexed -- `vdot q0,d5,q0\[0\]'
+[^ :]+:[0-9]+: Error: only D registers may be indexed -- `vdot d5,q0,q0\[0\]'
+[^ :]+:[0-9]+: Error: Neon quad precision register expected -- `vmmla q0,q0,d5'
+[^ :]+:[0-9]+: Error: Neon quad precision register expected -- `vmmla q0,d5,q0'
+[^ :]+:[0-9]+: Error: Neon quad precision register expected -- `vmmla d5,q0,q0'
+[^ :]+:[0-9]+: Error: invalid instruction shape -- `vfmab\.bf16 d0,q0,d0'
+[^ :]+:[0-9]+: Error: invalid instruction shape -- `vfmab\.bf16 d0,q0,d0\[0\]'
+[^ :]+:[0-9]+: Error: invalid instruction shape -- `vfmat\.bf16 d0,q0,d0'
+[^ :]+:[0-9]+: Error: invalid instruction shape -- `vfmat\.bf16 d0,q0,d0\[0\]'
+[^ :]+:[0-9]+: Error: operand size must match register width
+[^ :]+:[0-9]+: Error: invalid neon suffix for non neon instruction
+[^ :]+:[0-9]+: Error: index must be 0 or 1 -- `vdot q0,q0,d0\[2\]'
+[^ :]+:[0-9]+: Error: index must be in the range 0 to 3 -- `vfmab\.bf16 q0,d0,d0\[4\]'
+[^ :]+:[0-9]+: Error: index must be in the range 0 to 3 -- `vfmat\.bf16 q0,d0,d0\[4\]'
+[^ :]+:[0-9]+: Error: unexpected type character `b' -- did you mean `bf'\?
+[^ :]+:[0-9]+: Error: bad instruction `vcvtb\.b16\.f32 s0,s0'
+[^ :]+:[0-9]+: Error: bad size 32 in type specifier
+[^ :]+:[0-9]+: Error: bad instruction `vcvtb\.bf32\.f32 s0,s0'
+[^ :]+:[0-9]+: Error: unexpected type character `b' -- did you mean `bf'\?
+[^ :]+:[0-9]+: Error: bad arguments to instruction -- `vcvtb s0\.b16,s0\.f32'
+[^ :]+:[0-9]+: Error: bad size 32 in type specifier
+[^ :]+:[0-9]+: Error: bad arguments to instruction -- `vcvtb s0\.bf32,s0\.f32'
+[^ :]+:[0-9]+: Error: bad type in SIMD instruction -- `vcvtb s0\.f32,s0\.bf16'
+[^ :]+:[0-9]+: Error: unexpected type character `b' -- did you mean `bf'\?
+[^ :]+:[0-9]+: Error: bad instruction `vcvtt\.b16\.f32 s0,s0'
+[^ :]+:[0-9]+: Error: bad size 32 in type specifier
+[^ :]+:[0-9]+: Error: bad instruction `vcvtt\.bf32\.f32 s0,s0'
+[^ :]+:[0-9]+: Error: unexpected type character `b' -- did you mean `bf'\?
+[^ :]+:[0-9]+: Error: bad arguments to instruction -- `vcvtt s0\.b16,s0\.f32'
+[^ :]+:[0-9]+: Error: bad size 32 in type specifier
+[^ :]+:[0-9]+: Error: bad arguments to instruction -- `vcvtt s0\.bf32,s0\.f32'
+[^ :]+:[0-9]+: Error: bad type in SIMD instruction -- `vcvtt s0\.f32,s0\.bf16'
+[^ :]+:[0-9]+: Error: unexpected type character `b' -- did you mean `bf'\?
+[^ :]+:[0-9]+: Error: bad instruction `vcvt\.b16\.f32 d0,q0'
+[^ :]+:[0-9]+: Error: bad size 32 in type specifier
+[^ :]+:[0-9]+: Error: bad instruction `vcvt\.bf32\.f32 d0,q0'
+[^ :]+:[0-9]+: Error: unexpected type character `b' -- did you mean `bf'\?
+[^ :]+:[0-9]+: Error: bad arguments to instruction -- `vcvt d0\.b16,q0\.f32'
+[^ :]+:[0-9]+: Error: bad size 32 in type specifier
+[^ :]+:[0-9]+: Error: bad arguments to instruction -- `vcvt d0\.bf32,q0\.f32'
+[^ :]+:[0-9]+: Error: bad type in SIMD instruction -- `vcvt d0\.f32,q0\.bf16'
+[^ :]+:[0-9]+: Error: immediate value out of range -- `vcvtt\.bf16\.f32 s0,s0,#0'
+[^ :]+:[0-9]+: Error: invalid instruction shape -- `vcvtt\.bf16\.f32 s0,s0,#1'
+[^ :]+:[0-9]+: Error: bad type in SIMD instruction -- `vcvtt\.bf16\.f32 d0,s0'
+[^ :]+:[0-9]+: Error: bad arguments to instruction -- `vcvtt\.bf16\.f32 s0'
+[^ :]+:[0-9]+: Error: constant expression required -- `vcvtt\.bf16\.f32 s0,s0,s0,s0'
+[^ :]+:[0-9]+: Error: constant expression required -- `vcvtt\.bf16\.f32 s0,s0,s0'
+[^ :]+:[0-9]+: Error: VFP single or double precision register expected -- `vcvtt\.bf16\.f32 s0,s32'
+[^ :]+:[0-9]+: Error: VFP single or double precision register expected -- `vcvtt\.bf16\.f32 s32,s32'
+[^ :]+:[0-9]+: Error: immediate value out of range -- `vcvtb\.bf16\.f32 s0,s0,#0'
+[^ :]+:[0-9]+: Error: invalid instruction shape -- `vcvtb\.bf16\.f32 s0,s0,#1'
+[^ :]+:[0-9]+: Error: bad type in SIMD instruction -- `vcvtb\.bf16\.f32 d0,s0'
+[^ :]+:[0-9]+: Error: bad arguments to instruction -- `vcvtb\.bf16\.f32 s0'
+[^ :]+:[0-9]+: Error: constant expression required -- `vcvtb\.bf16\.f32 s0,s0,s0,s0'
+[^ :]+:[0-9]+: Error: constant expression required -- `vcvtb\.bf16\.f32 s0,s0,s0'
+[^ :]+:[0-9]+: Error: VFP single or double precision register expected -- `vcvtb\.bf16\.f32 s0,s32'
+[^ :]+:[0-9]+: Error: VFP single or double precision register expected -- `vcvtb\.bf16\.f32 s32,s32'
+[^ :]+:[0-9]+: Error: instruction not allowed in IT block -- `vdotne\.bf16 d0,d20,d11'
+[^ :]+:[0-9]+: Error: instruction not allowed in IT block -- `vdotne\.bf16 d0,d20,d11\[1\]'
+[^ :]+:[0-9]+: Error: instruction not allowed in IT block -- `vmmlane\.bf16 q0,q0,q0'
+[^ :]+:[0-9]+: Error: instruction not allowed in IT block -- `vdot\.bf16 d0,d20,d11'
+[^ :]+:[0-9]+: Error: instruction not allowed in IT block -- `vdot\.bf16 d0,d20,d11\[1\]'
+[^ :]+:[0-9]+: Error: instruction not allowed in IT block -- `vmmla\.bf16 q0,q0,q0'
+
diff --git a/gas/testsuite/gas/arm/bfloat16-thumb.d b/gas/testsuite/gas/arm/bfloat16-thumb.d
new file mode 100644
index 0000000000000000000000000000000000000000..cf70d1619a5b6f6adebb13042d75b8057e9498bd
--- /dev/null
+++ b/gas/testsuite/gas/arm/bfloat16-thumb.d
@@ -0,0 +1,44 @@
+#name: Bfloat 16 extension Thumb
+#source: bfloat16.s
+#as: -mno-warn-deprecated --defsym COMPILING_FOR_THUMB=1 -mthumb -march=armv8.6-a+simd -I$srcdir/$subdir
+#objdump: -dr --show-raw-insn
+#skip: *-*-pe *-*-wince
+
+.*: +file format .*arm.*
+
+Disassembly of section .text:
+
+00000000 <\.text>:
+ *[0-9a-f]+:	fc04 0d8b 	vdot\.bf16	d0, d20, d11
+ *[0-9a-f]+:	fc00 bd24 	vdot\.bf16	d11, d0, d20
+ *[0-9a-f]+:	eeb3 09c0 	vcvtt\.bf16\.f32	s0, s0
+ *[0-9a-f]+:	eeb3 09c0 	vcvtt\.bf16\.f32	s0, s0
+ *[0-9a-f]+:	eeb3 09c0 	vcvtt\.bf16\.f32	s0, s0
+ *[0-9a-f]+:	eeb3 0940 	vcvtb\.bf16\.f32	s0, s0
+ *[0-9a-f]+:	eeb3 0940 	vcvtb\.bf16\.f32	s0, s0
+ *[0-9a-f]+:	eeb3 0940 	vcvtb\.bf16\.f32	s0, s0
+ *[0-9a-f]+:	ffb6 0640 	vcvt\.bf16\.f32	d0, q0
+ *[0-9a-f]+:	ffb6 0640 	vcvt\.bf16\.f32	d0, q0
+ *[0-9a-f]+:	ffb6 0640 	vcvt\.bf16\.f32	d0, q0
+ *[0-9a-f]+:	fe00 bd24 	vdot\.bf16	d11, d0, d4\[1\]
+ *[0-9a-f]+:	fe04 0d8b 	vdot\.bf16	d0, d20, d11\[0\]
+ *[0-9a-f]+:	fc4a 4c40 	vmmla\.bf16	q10, q5, q0
+ *[0-9a-f]+:	fc00 ac64 	vmmla\.bf16	q5, q0, q10
+ *[0-9a-f]+:	fc76 48d0 	vfmat\.bf16	q10, q11, q0
+ *[0-9a-f]+:	fe76 48f8 	vfmat\.bf16	q10, q11, d0\[3\]
+ *[0-9a-f]+:	fe76 48d0 	vfmat\.bf16	q10, q11, d0\[0\]
+ *[0-9a-f]+:	fc76 4890 	vfmab\.bf16	q10, q11, q0
+ *[0-9a-f]+:	fe76 48b8 	vfmab\.bf16	q10, q11, d0\[3\]
+ *[0-9a-f]+:	fe76 4890 	vfmab\.bf16	q10, q11, d0\[0\]
+ *[0-9a-f]+:	fff6 464a 	vcvt\.bf16\.f32	d20, q5
+ *[0-9a-f]+:	ffb6 b664 	vcvt\.bf16\.f32	d11, q10
+ *[0-9a-f]+:	bf18      	it	ne
+ *[0-9a-f]+:	ffb6 0640 	vcvtne\.bf16\.f32	d0, q0
+ *[0-9a-f]+:	eeb3 a965 	vcvtb\.bf16\.f32	s20, s11
+ *[0-9a-f]+:	bf18      	it	ne
+ *[0-9a-f]+:	eef3 594a 	vcvtbne\.bf16\.f32	s11, s20
+ *[0-9a-f]+:	eeb3 0940 	vcvtb\.bf16\.f32	s0, s0
+ *[0-9a-f]+:	eeb3 a9e5 	vcvtt\.bf16\.f32	s20, s11
+ *[0-9a-f]+:	bf18      	it	ne
+ *[0-9a-f]+:	eef3 59ca 	vcvttne\.bf16\.f32	s11, s20
+ *[0-9a-f]+:	eeb3 09c0 	vcvtt\.bf16\.f32	s0, s0
diff --git a/gas/testsuite/gas/arm/bfloat16-vfp.d b/gas/testsuite/gas/arm/bfloat16-vfp.d
new file mode 100644
index 0000000000000000000000000000000000000000..487aa88e6ba62c275ae210b8fe52d3de45b6d709
--- /dev/null
+++ b/gas/testsuite/gas/arm/bfloat16-vfp.d
@@ -0,0 +1,16 @@
+#name: Bfloat 16 VFP
+#source: bfloat16-non-neon.s
+#as: -mno-warn-deprecated -mfpu=vfpxd -march=armv8.6-a -I$srcdir/$subdir
+#objdump: -dr --show-raw-insn
+
+.*: +file format .*arm.*
+
+Disassembly of section .text:
+
+00000000 <.text>:
+ *[0-9a-f]*:	eeb3a965 	vcvtb.bf16.f32	s20, s11
+ *[0-9a-f]*:	1ef3594a 	vcvtbne.bf16.f32	s11, s20
+ *[0-9a-f]*:	eeb30940 	vcvtb.bf16.f32	s0, s0
+ *[0-9a-f]*:	eeb3a9e5 	vcvtt.bf16.f32	s20, s11
+ *[0-9a-f]*:	1ef359ca 	vcvttne.bf16.f32	s11, s20
+ *[0-9a-f]*:	eeb309c0 	vcvtt.bf16.f32	s0, s0
diff --git a/gas/testsuite/gas/arm/bfloat16.d b/gas/testsuite/gas/arm/bfloat16.d
new file mode 100644
index 0000000000000000000000000000000000000000..b76c17faba6801ec5e07481af205c57efe0ef28b
--- /dev/null
+++ b/gas/testsuite/gas/arm/bfloat16.d
@@ -0,0 +1,39 @@
+#name: Bfloat 16 extension
+#source: bfloat16.s
+#as: -mno-warn-deprecated -march=armv8.6-a+simd -I$srcdir/$subdir
+#objdump: -dr --show-raw-insn
+
+.*:     file format .*
+
+Disassembly of section \.text:
+
+00000000 <.text>:
+ *[0-9a-f]+:	fc040d8b 	vdot\.bf16	d0, d20, d11
+ *[0-9a-f]+:	fc00bd24 	vdot\.bf16	d11, d0, d20
+ *[0-9a-f]+:	eeb309c0 	vcvtt\.bf16\.f32	s0, s0
+ *[0-9a-f]+:	eeb309c0 	vcvtt\.bf16\.f32	s0, s0
+ *[0-9a-f]+:	eeb309c0 	vcvtt\.bf16\.f32	s0, s0
+ *[0-9a-f]+:	eeb30940 	vcvtb\.bf16\.f32	s0, s0
+ *[0-9a-f]+:	eeb30940 	vcvtb\.bf16\.f32	s0, s0
+ *[0-9a-f]+:	eeb30940 	vcvtb\.bf16\.f32	s0, s0
+ *[0-9a-f]+:	f3b60640 	vcvt\.bf16\.f32	d0, q0
+ *[0-9a-f]+:	f3b60640 	vcvt\.bf16\.f32	d0, q0
+ *[0-9a-f]+:	f3b60640 	vcvt\.bf16\.f32	d0, q0
+ *[0-9a-f]+:	fe00bd24 	vdot\.bf16	d11, d0, d4\[1\]
+ *[0-9a-f]+:	fe040d8b 	vdot\.bf16	d0, d20, d11\[0\]
+ *[0-9a-f]+:	fc4a4c40 	vmmla\.bf16	q10, q5, q0
+ *[0-9a-f]+:	fc00ac64 	vmmla\.bf16	q5, q0, q10
+ *[0-9a-f]*:	fc7648d0 	vfmat\.bf16	q10, q11, q0
+ *[0-9a-f]*:	fe7648f8 	vfmat\.bf16	q10, q11, d0\[3\]
+ *[0-9a-f]*:	fe7648d0 	vfmat\.bf16	q10, q11, d0\[0\]
+ *[0-9a-f]*:	fc764890 	vfmab\.bf16	q10, q11, q0
+ *[0-9a-f]*:	fe7648b8 	vfmab\.bf16	q10, q11, d0\[3\]
+ *[0-9a-f]*:	fe764890 	vfmab\.bf16	q10, q11, d0\[0\]
+ *[0-9a-f]+:	f3f6464a 	vcvt\.bf16\.f32	d20, q5
+ *[0-9a-f]+:	f3b6b664 	vcvt\.bf16\.f32	d11, q10
+ *[0-9a-f]+:	eeb3a965 	vcvtb\.bf16\.f32	s20, s11
+ *[0-9a-f]+:	1ef3594a 	vcvtbne\.bf16\.f32	s11, s20
+ *[0-9a-f]+:	eeb30940 	vcvtb\.bf16\.f32	s0, s0
+ *[0-9a-f]+:	eeb3a9e5 	vcvtt\.bf16\.f32	s20, s11
+ *[0-9a-f]+:	1ef359ca 	vcvttne\.bf16\.f32	s11, s20
+ *[0-9a-f]+:	eeb309c0 	vcvtt\.bf16\.f32	s0, s0
diff --git a/gas/testsuite/gas/arm/bfloat16.s b/gas/testsuite/gas/arm/bfloat16.s
new file mode 100644
index 0000000000000000000000000000000000000000..6016ed2eb060d5339bd948b9226b861416fe629e
--- /dev/null
+++ b/gas/testsuite/gas/arm/bfloat16.s
@@ -0,0 +1,2 @@
+.include "bfloat16-neon.s"
+.include "bfloat16-non-neon.s"
diff --git a/include/opcode/arm.h b/include/opcode/arm.h
index a870905907b38f001812f460e3cd816e9675f851..7aea4d6e56805731d8d91f9a908c1cca332f3ab9 100644
--- a/include/opcode/arm.h
+++ b/include/opcode/arm.h
@@ -73,6 +73,8 @@
 #define ARM_EXT2_SB	     0x00002000	/* Speculation Barrier instruction.  */
 #define ARM_EXT2_PREDRES     0x00004000	/* Prediction Restriction insns.     */
 #define ARM_EXT2_V8_1M_MAIN  0x00008000 /* ARMv8.1-M Mainline.		     */
+#define ARM_EXT2_V8_6A	     0x00010000	/* ARM V8.6A.			     */
+#define ARM_EXT2_BF16	     0x00020000 /* ARMv8 bfloat16.		     */
 
 /* Co-processor space extensions.  */
 #define ARM_CEXT_XSCALE	     0x00000001	/* Allow MIA etc.	 	   */
@@ -169,6 +171,7 @@
 					   | ARM_EXT2_V8_4A)
 #define ARM_AEXT2_V8_5A	(ARM_AEXT2_V8_4A   | ARM_EXT2_V8_5A | ARM_EXT2_SB     \
 					   | ARM_EXT2_PREDRES)
+#define ARM_AEXT2_V8_6A	(ARM_AEXT2_V8_5A   | ARM_EXT2_V8_6A | ARM_EXT2_BF16)
 #define ARM_AEXT_V8M_BASE	(ARM_AEXT_V6SM	    | ARM_EXT_DIV)
 #define ARM_AEXT_V8M_MAIN	 ARM_AEXT_V7M
 #define ARM_AEXT_V8M_MAIN_DSP	 ARM_AEXT_V7EM
@@ -352,6 +355,9 @@
 #define ARM_ARCH_V8_5A	 ARM_FEATURE (ARM_AEXT_V8A, ARM_AEXT2_V8_5A,	   \
 				      CRC_EXT_ARMV8 | FPU_NEON_EXT_RDMA	   \
 						    | FPU_NEON_EXT_DOTPROD)
+#define ARM_ARCH_V8_6A	 ARM_FEATURE (ARM_AEXT_V8A, ARM_AEXT2_V8_6A,	   \
+				      CRC_EXT_ARMV8 | FPU_NEON_EXT_RDMA	   \
+						    | FPU_NEON_EXT_DOTPROD)
 #define ARM_ARCH_V8M_BASE      ARM_FEATURE_CORE (ARM_AEXT_V8M_BASE,	   \
 						 ARM_AEXT2_V8M_BASE)
 #define ARM_ARCH_V8M_MAIN      ARM_FEATURE_CORE (ARM_AEXT_V8M_MAIN,	   \
diff --git a/opcodes/arm-dis.c b/opcodes/arm-dis.c
index 85c573034ea1e149eecbbe73eb705945040448d9..50ae9576561477a7c6e50628ffb20e005d9e9e59 100644
--- a/opcodes/arm-dis.c
+++ b/opcodes/arm-dis.c
@@ -396,6 +396,7 @@ struct opcode16
    %%			%
 
    %c			print condition code (always bits 28-31 in ARM mode)
+   %b			print condition code allowing cp_num == 9
    %q			print shifter argument
    %u			print condition code (unconditional in ARM mode,
                           UNPREDICTABLE if not AL in Thumb)
@@ -1207,11 +1208,15 @@ static const struct sopcode32 coprocessor_opcodes[] =
   {ANY, ARM_FEATURE_CORE_HIGH (ARM_EXT2_V8_3A),
     0xfea00800, 0xffa00f10, "vcmla%c.f32\t%12-15,22V, %16-19,7V, %0-3,5D[0], #%20?21%20?780"},
 
+  /* BFloat16 instructions.  */
+  {ANY, ARM_FEATURE_CORE_HIGH (ARM_EXT2_BF16),
+    0x0eb30940, 0x0fbf0f50, "vcvt%7?tb%b.bf16.f32\t%y1, %y0"},
+
   /* Dot Product instructions in the space of coprocessor 13.  */
   {ANY, ARM_FEATURE_COPROC (FPU_NEON_EXT_DOTPROD),
     0xfc200d00, 0xffb00f00, "v%4?usdot.%4?us8\t%12-15,22V, %16-19,7V, %0-3,5V"},
   {ANY, ARM_FEATURE_COPROC (FPU_NEON_EXT_DOTPROD),
-    0xfe000d00, 0xff000f00, "v%4?usdot.%4?us8\t%12-15,22V, %16-19,7V, %0-3D[%5?10]"},
+    0xfe200d00, 0xff200f00, "v%4?usdot.%4?us8\t%12-15,22V, %16-19,7V, %0-3D[%5?10]"},
 
   /* ARMv8.2 FMAC Long instructions in the space of coprocessor 8.  */
   {ANY, ARM_FEATURE_CORE_HIGH (ARM_EXT2_FP16_INST | ARM_EXT2_V8_2A),
@@ -1452,6 +1457,20 @@ static const struct opcode32 neon_opcodes[] =
   {ARM_FEATURE_CORE_HIGH (ARM_EXT2_FP16_INST),
     0xf2300c10, 0xffb00f10, "vfms%c.f16\t%12-15,22R, %16-19,7R, %0-3,5R"},
 
+  /* BFloat16 instructions.  */
+  {ARM_FEATURE_CORE_HIGH (ARM_EXT2_BF16),
+    0xfc000d00, 0xffb00f10, "vdot.bf16\t%12-15,22R, %16-19,7R, %0-3,5R"},
+  {ARM_FEATURE_CORE_HIGH (ARM_EXT2_BF16),
+    0xfe000d00, 0xffb00f10, "vdot.bf16\t%12-15,22R, %16-19,7R, d%0-3d[%5d]"},
+  {ARM_FEATURE_CORE_HIGH (ARM_EXT2_BF16),
+    0xfc000c40, 0xffb00f50, "vmmla.bf16\t%12-15,22R, %16-19,7R, %0-3,5R"},
+  {ARM_FEATURE_CORE_HIGH (ARM_EXT2_BF16),
+    0xf3b60640, 0xffbf0fd0, "vcvt%c.bf16.f32\t%12-15,22D, %0-3,5Q"},
+  {ARM_FEATURE_CORE_HIGH (ARM_EXT2_BF16),
+    0xfc300810, 0xffb00f10, "vfma%6?tb.bf16\t%12-15,22Q, %16-19,7Q, %0-3,5Q"},
+  {ARM_FEATURE_CORE_HIGH (ARM_EXT2_BF16),
+    0xfe300810, 0xffb00f10, "vfma%6?tb.bf16\t%12-15,22Q, %16-19,7Q, %0-2D[%3,5d]"},
+
   /* Two registers, miscellaneous.  */
   {ARM_FEATURE_COPROC (FPU_NEON_EXT_ARMV8),
     0xf3ba0400, 0xffbf0c10, "vrint%7-9?p?m?zaxn%u.f32\t%12-15,22R, %0-3,5R"},
@@ -8159,6 +8178,8 @@ print_insn_coprocessor_1 (const struct sopcode32 *opcodes,
 		  if (cond != COND_UNCOND && cp_num == 9)
 		    is_unpredictable = TRUE;
 
+		  /* Fall through.  */
+		case 'b':
 		  func (stream, "%s", arm_conditional[cond]);
 		  break;
 
@@ -8772,6 +8793,10 @@ print_insn_neon (struct disassemble_info *info, long given, bfd_boolean thumb)
 	}
       else if ((given & 0xff000000) == 0xf9000000)
 	given ^= 0xf9000000 ^ 0xf4000000;
+      /* BFloat16 neon instructions without special top byte handling.  */
+      else if ((given & 0xff000000) == 0xfe000000
+	       || (given & 0xff000000) == 0xfc000000)
+	;
       /* vdup is also a valid neon instruction.  */
       else if ((given & 0xff910f5f) != 0xee800b10)
 	return FALSE;
@@ -11625,11 +11650,11 @@ select_arm_features (unsigned long mach,
     case bfd_mach_arm_7EM:	 ARM_SET_FEATURES (ARM_ARCH_V7EM); break;
     case bfd_mach_arm_8:
 	{
-	  /* Add bits for extensions that Armv8.5-A recognizes.  */
-	  arm_feature_set armv8_5_ext_fset
+	  /* Add bits for extensions that Armv8.6-A recognizes.  */
+	  arm_feature_set armv8_6_ext_fset
 	    = ARM_FEATURE_CORE_HIGH (ARM_EXT2_FP16_INST);
-	  ARM_SET_FEATURES (ARM_ARCH_V8_5A);
-	  ARM_MERGE_FEATURE_SETS (arch_fset, arch_fset, armv8_5_ext_fset);
+	  ARM_SET_FEATURES (ARM_ARCH_V8_6A);
+	  ARM_MERGE_FEATURE_SETS (arch_fset, arch_fset, armv8_6_ext_fset);
 	  break;
 	}
     case bfd_mach_arm_8R:	 ARM_SET_FEATURES (ARM_ARCH_V8R); break;