This is the mail archive of the binutils@sourceware.org mailing list for the binutils project.

Index Nav:	[Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav:	[Date Prev] [Date Next]	[Thread Prev] [Thread Next]
Other format:	[Raw text]

[RFA/ARM 17/21] Add support for 16-/64-bit Floating Point conversions.

From: Matthew Gretton-Dann <matthew dot gretton-dann at arm dot com>
To: binutils at sourceware dot org
Date: Thu, 23 Aug 2012 16:04:24 +0100
Subject: [RFA/ARM 17/21] Add support for 16-/64-bit Floating Point conversions.
References: <cover.1345733341.git.matthew.gretton-dann@arm.com>

ARMv8 extends VCVT[TB] to support converting to/from double precision
floats.

This patch adds support for this, in the process it also fixes bugs in
the support for half-precision <-> single-precision conversions.

gas/ChangeLog:
2012-08-23  Matthew Gretton-Dann  <matthew.gretton-dann@arm.com>

	* config/tc-arm.c (el_type_type_check): Add handling for 16-bit
	floating point types.
	(do_neon_cvttb_2): New function.
	(do_neon_cvttb_1): Likewise.
	(do_neon_cvtb): Refactor to use do_neon_cvttb_1.
	(do_neon_cvtt): Likewise.

gas/testsuite/ChangeLog:
2012-08-23  Matthew Gretton-Dann  <matthew.gretton-dann@arm.com>

	* gas/arm/armv8-a+fp.d: Update testcase.
	* gas/arm/armv8-a+fp.s: Likewise.
	* gas/arm/half-prec-vfpv3.s: Likewise.

opcodes/ChangeLog:
2012-08-23  Matthew Gretton-Dann  <matthew.gretton-dann@arm.com>

	* arm-dis.c (coprocessor_opcodes): Add support for HP/DP
	conversions.

diff --git a/gas/config/tc-arm.c b/gas/config/tc-arm.c
index 3c8bd60..6d8d18f 100644
--- a/gas/config/tc-arm.c
+++ b/gas/config/tc-arm.c
@@ -12832,7 +12832,7 @@ el_type_of_type_chk (enum neon_el_type *type, unsigned *size,
 
   if ((mask & (N_S8 | N_U8 | N_I8 | N_8 | N_P8)) != 0)
     *size = 8;
-  else if ((mask & (N_S16 | N_U16 | N_I16 | N_16 | N_P16)) != 0)
+  else if ((mask & (N_S16 | N_U16 | N_I16 | N_16 | N_F16 | N_P16)) != 0)
     *size = 16;
   else if ((mask & (N_S32 | N_U32 | N_I32 | N_32 | N_F32)) != 0)
     *size = 32;
@@ -12851,7 +12851,7 @@ el_type_of_type_chk (enum neon_el_type *type, unsigned *size,
     *type = NT_untyped;
   else if ((mask & (N_P8 | N_P16)) != 0)
     *type = NT_poly;
-  else if ((mask & (N_F32 | N_F64)) != 0)
+  else if ((mask & (N_F16 | N_F32 | N_F64)) != 0)
     *type = NT_float;
   else
     return FAIL;
@@ -14888,31 +14888,63 @@ do_neon_cvtm (void)
 }
 
 static void
-do_neon_cvtb (void)
+do_neon_cvttb_2 (bfd_boolean t, bfd_boolean to, bfd_boolean is_double)
 {
-  inst.instruction = 0xeb20a40;
+  if (is_double)
+    mark_feature_used (&fpu_vfp_ext_armv8);
 
-  /* The sizes are attached to the mnemonic.  */
-  if (inst.vectype.el[0].type != NT_invtype
-      && inst.vectype.el[0].size == 16)
-    inst.instruction |= 0x00010000;
+  encode_arm_vfp_reg (inst.operands[0].reg,
+		      (is_double && !to) ? VFP_REG_Dd : VFP_REG_Sd);
+  encode_arm_vfp_reg (inst.operands[1].reg,
+		      (is_double && to) ? VFP_REG_Dm : VFP_REG_Sm);
+  inst.instruction |= to ? 0x10000 : 0;
+  inst.instruction |= t ? 0x80 : 0;
+  inst.instruction |= is_double ? 0x100 : 0;
+  do_vfp_cond_or_thumb ();
+}
 
-  /* Programmer's syntax: the sizes are attached to the operands.  */
-  else if (inst.operands[0].vectype.type != NT_invtype
-	   && inst.operands[0].vectype.size == 16)
-    inst.instruction |= 0x00010000;
+static void
+do_neon_cvttb_1 (bfd_boolean t)
+{
+  enum neon_shape rs = neon_select_shape (NS_FF, NS_FD, NS_DF, NS_NULL);
 
-  encode_arm_vfp_reg (inst.operands[0].reg, VFP_REG_Sd);
-  encode_arm_vfp_reg (inst.operands[1].reg, VFP_REG_Sm);
-  do_vfp_cond_or_thumb ();
+  if (rs == NS_NULL)
+    return;
+  else if (neon_check_type (2, rs, N_F16, N_F32 | N_VFP).type != NT_invtype)
+    {
+      inst.error = NULL;
+      do_neon_cvttb_2 (t, /*to=*/TRUE, /*is_double=*/FALSE);
+    }
+  else if (neon_check_type (2, rs, N_F32 | N_VFP, N_F16).type != NT_invtype)
+    {
+      inst.error = NULL;
+      do_neon_cvttb_2 (t, /*to=*/FALSE, /*is_double=*/FALSE);
+    }
+  else if (neon_check_type (2, rs, N_F16, N_F64 | N_VFP).type != NT_invtype)
+    {
+      inst.error = NULL;
+      do_neon_cvttb_2 (t, /*to=*/TRUE, /*is_double=*/TRUE);
+    }
+  else if (neon_check_type (2, rs, N_F64 | N_VFP, N_F16).type != NT_invtype)
+    {
+      inst.error = NULL;
+      do_neon_cvttb_2 (t, /*to=*/FALSE, /*is_double=*/TRUE);
+    }
+  else
+    return;
+}
+
+static void
+do_neon_cvtb (void)
+{
+  do_neon_cvttb_1 (FALSE);
 }
 
 
 static void
 do_neon_cvtt (void)
 {
-  do_neon_cvtb ();
-  inst.instruction |= 0x80;
+  do_neon_cvttb_1 (TRUE);
 }
 
 static void
@@ -18958,8 +18990,8 @@ static const struct asm_opcode insns[] =
 
  nCEF(vcvt,     _vcvt,   3, (RNSDQ, RNSDQ, oI32z), neon_cvt),
  nCEF(vcvtr,    _vcvt,   2, (RNSDQ, RNSDQ), neon_cvtr),
- nCEF(vcvtb,	_vcvt,	 2, (RVS, RVS), neon_cvtb),
- nCEF(vcvtt,	_vcvt,	 2, (RVS, RVS), neon_cvtt),
+ NCEF(vcvtb,	eb20a40, 2, (RVSD, RVSD), neon_cvtb),
+ NCEF(vcvtt,	eb20a40, 2, (RVSD, RVSD), neon_cvtt),
 
 
   /* NOTE: All VMOV encoding is special-cased!  */
diff --git a/gas/testsuite/gas/arm/armv8-a+fp.d b/gas/testsuite/gas/arm/armv8-a+fp.d
index e478411..bb52e0a 100644
--- a/gas/testsuite/gas/arm/armv8-a+fp.d
+++ b/gas/testsuite/gas/arm/armv8-a+fp.d
@@ -50,6 +50,14 @@ Disassembly of section .text:
 0[0-9a-f]+ <[^>]+> feb91b41 	vrintn.f64.f64	d1, d1
 0[0-9a-f]+ <[^>]+> fefaeb6e 	vrintp.f64.f64	d30, d30
 0[0-9a-f]+ <[^>]+> fefbfb6f 	vrintm.f64.f64	d31, d31
+0[0-9a-f]+ <[^>]+> eeb30bc0 	vcvtt.f16.f64	s0, d0
+0[0-9a-f]+ <[^>]+> eef30b60 	vcvtb.f16.f64	s1, d16
+0[0-9a-f]+ <[^>]+> eeb3fbcf 	vcvtt.f16.f64	s30, d15
+0[0-9a-f]+ <[^>]+> eef3fb6f 	vcvtb.f16.f64	s31, d31
+0[0-9a-f]+ <[^>]+> eeb20bc0 	vcvtt.f64.f16	d0, s0
+0[0-9a-f]+ <[^>]+> eef20b60 	vcvtb.f64.f16	d16, s1
+0[0-9a-f]+ <[^>]+> eeb2fbcf 	vcvtt.f64.f16	d15, s30
+0[0-9a-f]+ <[^>]+> eef2fb6f 	vcvtb.f64.f16	d31, s31
 0[0-9a-f]+ <[^>]+> fe00 0a00 	vseleq.f32	s0, s0, s0
 0[0-9a-f]+ <[^>]+> fe50 0aa0 	vselvs.f32	s1, s1, s1
 0[0-9a-f]+ <[^>]+> fe2f fa0f 	vselge.f32	s30, s30, s30
@@ -96,3 +104,11 @@ Disassembly of section .text:
 0[0-9a-f]+ <[^>]+> feb9 1b41 	vrintn.f64.f64	d1, d1
 0[0-9a-f]+ <[^>]+> fefa eb6e 	vrintp.f64.f64	d30, d30
 0[0-9a-f]+ <[^>]+> fefb fb6f 	vrintm.f64.f64	d31, d31
+0[0-9a-f]+ <[^>]+> eeb3 0bc0 	vcvtt.f16.f64	s0, d0
+0[0-9a-f]+ <[^>]+> eef3 0b60 	vcvtb.f16.f64	s1, d16
+0[0-9a-f]+ <[^>]+> eeb3 fbcf 	vcvtt.f16.f64	s30, d15
+0[0-9a-f]+ <[^>]+> eef3 fb6f 	vcvtb.f16.f64	s31, d31
+0[0-9a-f]+ <[^>]+> eeb2 0bc0 	vcvtt.f64.f16	d0, s0
+0[0-9a-f]+ <[^>]+> eef2 0b60 	vcvtb.f64.f16	d16, s1
+0[0-9a-f]+ <[^>]+> eeb2 fbcf 	vcvtt.f64.f16	d15, s30
+0[0-9a-f]+ <[^>]+> eef2 fb6f 	vcvtb.f64.f16	d31, s31
diff --git a/gas/testsuite/gas/arm/armv8-a+fp.s b/gas/testsuite/gas/arm/armv8-a+fp.s
index 10a391a..f99302f 100644
--- a/gas/testsuite/gas/arm/armv8-a+fp.s
+++ b/gas/testsuite/gas/arm/armv8-a+fp.s
@@ -50,6 +50,14 @@
 	vrintn.f64.f64	d1, d1
 	vrintp.f64.f64	d30, d30
 	vrintm.f64.f64	d31, d31
+	vcvtt.f16.f64	s0, d0
+	vcvtb.f16.f64	s1, d16
+	vcvtt.f16.f64	s30, d15
+	vcvtb.f16.f64	s31, d31
+	vcvtt.f64.f16	d0, s0
+	vcvtb.f64.f16	d16, s1
+	vcvtt.f64.f16	d15, s30
+	vcvtb.f64.f16	d31, s31
 
 	.thumb
 	vseleq.f32	s0, s0, s0
@@ -98,3 +106,11 @@
 	vrintn.f64.f64	d1, d1
 	vrintp.f64.f64	d30, d30
 	vrintm.f64.f64	d31, d31
+	vcvtt.f16.f64	s0, d0
+	vcvtb.f16.f64	s1, d16
+	vcvtt.f16.f64	s30, d15
+	vcvtb.f16.f64	s31, d31
+	vcvtt.f64.f16	d0, s0
+	vcvtb.f64.f16	d16, s1
+	vcvtt.f64.f16	d15, s30
+	vcvtb.f64.f16	d31, s31
diff --git a/gas/testsuite/gas/arm/half-prec-vfpv3.s b/gas/testsuite/gas/arm/half-prec-vfpv3.s
index acd1508..d658807 100644
--- a/gas/testsuite/gas/arm/half-prec-vfpv3.s
+++ b/gas/testsuite/gas/arm/half-prec-vfpv3.s
@@ -1,20 +1,20 @@
 	.text
-	vcvtt.f32.f32		s0, s1
-	vcvtteq.f32.f32		s2, s3
-	vcvttne.f32.f32		s2, s3
-	vcvttcs.f32.f32		s2, s3
-	vcvttcc.f32.f32		s2, s3
-	vcvttmi.f32.f32		s2, s3
-	vcvttpl.f32.f32		s2, s3
-	vcvttvs.f32.f32		s2, s3
-	vcvttvc.f32.f32		s2, s3
-	vcvtthi.f32.f32		s2, s3
-	vcvttls.f32.f32		s2, s3
-	vcvttge.f32.f32		s2, s3
-	vcvttlt.f32.f32		s2, s3
-	vcvttgt.f32.f32		s2, s3
-	vcvttle.f32.f32		s2, s3
-	vcvttal.f32.f32		s2, s3
+	vcvtt.f32.f16		s0, s1
+	vcvtteq.f32.f16		s2, s3
+	vcvttne.f32.f16		s2, s3
+	vcvttcs.f32.f16		s2, s3
+	vcvttcc.f32.f16		s2, s3
+	vcvttmi.f32.f16		s2, s3
+	vcvttpl.f32.f16		s2, s3
+	vcvttvs.f32.f16		s2, s3
+	vcvttvc.f32.f16		s2, s3
+	vcvtthi.f32.f16		s2, s3
+	vcvttls.f32.f16		s2, s3
+	vcvttge.f32.f16		s2, s3
+	vcvttlt.f32.f16		s2, s3
+	vcvttgt.f32.f16		s2, s3
+	vcvttle.f32.f16		s2, s3
+	vcvttal.f32.f16		s2, s3
 
 	vcvtt.f16.f32		s0, s1
 	vcvtteq.f16.f32		s2, s3
@@ -33,22 +33,22 @@
 	vcvttle.f16.f32		s2, s3
 	vcvttal.f16.f32		s2, s3
 
-	vcvtb.f32.f32		s0, s1
-	vcvtbeq.f32.f32		s2, s3
-	vcvtbne.f32.f32		s2, s3
-	vcvtbcs.f32.f32		s2, s3
-	vcvtbcc.f32.f32		s2, s3
-	vcvtbmi.f32.f32		s2, s3
-	vcvtbpl.f32.f32		s2, s3
-	vcvtbvs.f32.f32		s2, s3
-	vcvtbvc.f32.f32		s2, s3
-	vcvtbhi.f32.f32		s2, s3
-	vcvtbls.f32.f32		s2, s3
-	vcvtbge.f32.f32		s2, s3
-	vcvtblt.f32.f32		s2, s3
-	vcvtbgt.f32.f32		s2, s3
-	vcvtble.f32.f32		s2, s3
-	vcvtbal.f32.f32		s2, s3
+	vcvtb.f32.f16		s0, s1
+	vcvtbeq.f32.f16		s2, s3
+	vcvtbne.f32.f16		s2, s3
+	vcvtbcs.f32.f16		s2, s3
+	vcvtbcc.f32.f16		s2, s3
+	vcvtbmi.f32.f16		s2, s3
+	vcvtbpl.f32.f16		s2, s3
+	vcvtbvs.f32.f16		s2, s3
+	vcvtbvc.f32.f16		s2, s3
+	vcvtbhi.f32.f16		s2, s3
+	vcvtbls.f32.f16		s2, s3
+	vcvtbge.f32.f16		s2, s3
+	vcvtblt.f32.f16		s2, s3
+	vcvtbgt.f32.f16		s2, s3
+	vcvtble.f32.f16		s2, s3
+	vcvtbal.f32.f16		s2, s3
 
 	vcvtb.f16.f32		s0, s1
 	vcvtbeq.f16.f32		s2, s3
diff --git a/opcodes/arm-dis.c b/opcodes/arm-dis.c
index 60463d1..c6deac9 100644
--- a/opcodes/arm-dis.c
+++ b/opcodes/arm-dis.c
@@ -319,6 +319,8 @@ static const struct opcode32 coprocessor_opcodes[] =
   {FPU_NEON_EXT_V1, 0x0e400b10, 0x0fd00f10, "vmov%c.8\t%16-19,7D[%5,6,21d], %12-15r"},
   {FPU_NEON_EXT_V1, 0x0e500b10, 0x0f500f10, "vmov%c.%23?us8\t%12-15r, %16-19,7D[%5,6,21d]"},
   /* Half-precision conversion instructions.  */
+  {FPU_VFP_EXT_ARMV8, 0x0eb20b40, 0x0fbf0f50, "vcvt%7?tb%c.f64.f16\t%z1, %y0"},
+  {FPU_VFP_EXT_ARMV8, 0x0eb30b40, 0x0fbf0f50, "vcvt%7?tb%c.f16.f64\t%y1, %z0"},
   {FPU_VFP_EXT_FP16, 0x0eb20a40, 0x0fbf0f50, "vcvt%7?tb%c.f32.f16\t%y1, %y0"},
   {FPU_VFP_EXT_FP16, 0x0eb30a40, 0x0fbf0f50, "vcvt%7?tb%c.f16.f32\t%y1, %y0"},

Follow-Ups:
- Re: [RFA/ARM 17/21] Add support for 16-/64-bit Floating Point conversions.
  - From: Richard Earnshaw

Index Nav:	[Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav:	[Date Prev] [Date Next]	[Thread Prev] [Thread Next]