This is the mail archive of the
binutils@sourceware.org
mailing list for the binutils project.
Re: [Patch]Option support to ARM MCU Cortex-M7 and related FPU
- From: Richard Earnshaw <rearnsha at arm dot com>
- To: Terry Guo <terry dot guo at arm dot com>, "binutils at sourceware dot org" <binutils at sourceware dot org>
- Date: Tue, 30 Sep 2014 16:56:15 +0100
- Subject: Re: [Patch]Option support to ARM MCU Cortex-M7 and related FPU
- Authentication-results: sourceware.org; auth=none
- References: <000001cfd7ac$fab50db0$f01f2910$ at arm dot com>
On 24/09/14 05:06, Terry Guo wrote:
> Hi there,
>
> The attached patch implemented option support in Binutils to support new ARM
> MCU announced at:
> http://www.arm.com/about/newsroom/arm-supercharges-mcu-market-with-high-perf
> ormance-cortex-m7-processor.php
> http://www.arm.com/products/processors/cortex-m/cortex-m7-processor.php
>
> Tested with Binutils regression test and no new regressions. Is it OK to
> trunk?
>
> BR,
> Terry
>
> gas/ChangeLog:
> 2014-09-24 Terry Guo <terry.guo@arm.com>
>
> * config/tc-arm.c (arm_cpus): Support cortex-m7.
> (arm_fpus): Support fpv5-sp-d16 and fpv5-d16.
> (do_vfp_nsyn_cvt_fpv8): Generate error when use D register for S
> register only
> target like FPv5-SP-D16.
> (do_neon_cvttb_1): Likewise.
> (do_vfp_nsyn_fpv8): Likewise.
> (do_vrint_1): Likewise.
> * doc/c-arm.texi: Document new cpu and fpu names for cortex-m7.
>
> gas/testsuite/ChangeLog:
> 2014-09-24 Terry Guo <terry.guo@arm.com>
>
> * gas/arm/armv7e-m+fpv5-d16.s: New.
> * gas/arm/armv7e-m+fpv5-d16.d: Likewise.
> * gas/arm/armv7e-m+fpv5-sp-d16.s: Likewise.
> * gas/arm/armv7e-m+fpv5-sp-d16.d: Likewise.
>
>
> option-support-cortex-m7-v1.txt
>
>
> diff --git a/gas/config/tc-arm.c b/gas/config/tc-arm.c
> index 5077f87..2fe3126 100644
> --- a/gas/config/tc-arm.c
> +++ b/gas/config/tc-arm.c
> @@ -15032,6 +15032,13 @@ do_vfp_nsyn_cvt_fpv8 (enum neon_cvt_flavour flavour,
> int sz, op;
> int rm;
>
> + /* Targets like FPv5-SP-D16 don't support FP v8 instructions with
> + D register operands. */
> + if (flavour == neon_cvt_flavour_s32_f64
> + || flavour == neon_cvt_flavour_u32_f64)
> + constraint (!ARM_CPU_HAS_FEATURE (cpu_variant, fpu_vfp_ext_v1),
> + _(BAD_FPU));
> +
This feels like an abuse of the feature testing (what has vfp_ext_v1
really got to do with this restriction?). Why don't you create
fpu_vfp_ext_v5xd and fpu_vfp_ext_armv8 and then use these in the tests?
Similarly for the cases below.
> set_it_insn_type (OUTSIDE_IT_INSN);
>
> switch (flavour)
> @@ -15296,11 +15303,21 @@ do_neon_cvttb_1 (bfd_boolean t)
> }
> else if (neon_check_type (2, rs, N_F16, N_F64 | N_VFP).type != NT_invtype)
> {
> + /* The VCVTB and VCVTT instructions with D-register operands
> + don't work for SP only targets. */
> + constraint (!ARM_CPU_HAS_FEATURE (cpu_variant, fpu_vfp_ext_v1),
> + _(BAD_FPU));
> +
> inst.error = NULL;
> do_neon_cvttb_2 (t, /*to=*/TRUE, /*is_double=*/TRUE);
> }
> else if (neon_check_type (2, rs, N_F64 | N_VFP, N_F16).type != NT_invtype)
> {
> + /* The VCVTB and VCVTT instructions with D-register operands
> + don't work for SP only targets. */
> + constraint (!ARM_CPU_HAS_FEATURE (cpu_variant, fpu_vfp_ext_v1),
> + _(BAD_FPU));
> +
> inst.error = NULL;
> do_neon_cvttb_2 (t, /*to=*/FALSE, /*is_double=*/TRUE);
> }
> @@ -16427,6 +16444,12 @@ do_neon_ldx_stx (void)
> static void
> do_vfp_nsyn_fpv8 (enum neon_shape rs)
> {
> + /* Targets like FPv5-SP-D16 don't support FP v8 instructions with
> + D register operands. */
> + if (neon_shape_class[rs] == SC_DOUBLE)
> + constraint (!ARM_CPU_HAS_FEATURE (cpu_variant, fpu_vfp_ext_v1),
> + _(BAD_FPU));
> +
> NEON_ENCODE (FPV8, inst);
>
> if (rs == NS_FFF)
> @@ -16472,6 +16495,12 @@ do_vrint_1 (enum neon_cvt_mode mode)
> if (rs == NS_NULL)
> return;
>
> + /* Targets like FPv5-SP-D16 don't support FP v8 instructions with
> + D register operands. */
> + if (neon_shape_class[rs] == SC_DOUBLE)
> + constraint (!ARM_CPU_HAS_FEATURE (cpu_variant, fpu_vfp_ext_v1),
> + _(BAD_FPU));
> +
> et = neon_check_type (2, rs, N_EQK | N_VFP, N_F32 | N_F64 | N_KEY | N_VFP);
> if (et.type != NT_invtype)
> {
> @@ -24406,6 +24435,7 @@ static const struct arm_cpu_option_table arm_cpus[] =
> ARM_CPU_OPT ("cortex-r7", ARM_ARCH_V7R_IDIV,
> FPU_ARCH_VFP_V3D16,
> "Cortex-R7"),
> + ARM_CPU_OPT ("cortex-m7", ARM_ARCH_V7EM, FPU_NONE, "Cortex-M7"),
> ARM_CPU_OPT ("cortex-m4", ARM_ARCH_V7EM, FPU_NONE, "Cortex-M4"),
> ARM_CPU_OPT ("cortex-m3", ARM_ARCH_V7M, FPU_NONE, "Cortex-M3"),
> ARM_CPU_OPT ("cortex-m1", ARM_ARCH_V6SM, FPU_NONE, "Cortex-M1"),
> @@ -24574,6 +24604,8 @@ static const struct arm_option_fpu_value_table arm_fpus[] =
> {"vfpv4", FPU_ARCH_VFP_V4},
> {"vfpv4-d16", FPU_ARCH_VFP_V4D16},
> {"fpv4-sp-d16", FPU_ARCH_VFP_V4_SP_D16},
> + {"fpv5-d16", FPU_ARCH_VFP_V5D16},
> + {"fpv5-sp-d16", FPU_ARCH_VFP_V5_SP_D16},
> {"neon-vfpv4", FPU_ARCH_NEON_VFP_V4},
> {"fp-armv8", FPU_ARCH_VFP_ARMV8},
> {"neon-fp-armv8", FPU_ARCH_NEON_VFP_ARMV8},
> diff --git a/gas/doc/c-arm.texi b/gas/doc/c-arm.texi
> index 7bcce94..12952a7 100644
> --- a/gas/doc/c-arm.texi
> +++ b/gas/doc/c-arm.texi
> @@ -123,6 +123,7 @@ recognized:
> @code{cortex-r4f},
> @code{cortex-r5},
> @code{cortex-r7},
> +@code{cortex-m7},
> @code{cortex-m4},
> @code{cortex-m3},
> @code{cortex-m1},
> @@ -247,6 +248,8 @@ The following format options are recognized:
> @code{vfpv4},
> @code{vfpv4-d16},
> @code{fpv4-sp-d16},
> +@code{fpv5-sp-d16},
> +@code{fpv5-d16},
> @code{fp-armv8},
> @code{arm1020t},
> @code{arm1020e},
> diff --git a/gas/testsuite/gas/arm/armv7e-m+fpv5-d16.d b/gas/testsuite/gas/arm/armv7e-m+fpv5-d16.d
> new file mode 100644
> index 0000000..2951b1b
> --- /dev/null
> +++ b/gas/testsuite/gas/arm/armv7e-m+fpv5-d16.d
> @@ -0,0 +1,60 @@
> +#name: Valid v7e-m+fpv5-d16
> +#objdump: -dr --prefix-addresses --show-raw-insn
> +
> +.*: +file format .*arm.*
> +
> +Disassembly of section .text:
> +0[0-9a-f]+ <[^>]+> fe00 0a00 vseleq.f32 s0, s0, s0
> +0[0-9a-f]+ <[^>]+> fe50 0aa0 vselvs.f32 s1, s1, s1
> +0[0-9a-f]+ <[^>]+> fe2f fa0f vselge.f32 s30, s30, s30
> +0[0-9a-f]+ <[^>]+> fe7f faaf vselgt.f32 s31, s31, s31
> +0[0-9a-f]+ <[^>]+> fe00 0b00 vseleq.f64 d0, d0, d0
> +0[0-9a-f]+ <[^>]+> fe18 8b08 vselvs.f64 d8, d8, d8
> +0[0-9a-f]+ <[^>]+> fe2f fb0f vselge.f64 d15, d15, d15
> +0[0-9a-f]+ <[^>]+> fe3a ab0a vselgt.f64 d10, d10, d10
> +0[0-9a-f]+ <[^>]+> fe80 0a00 vmaxnm.f32 s0, s0, s0
> +0[0-9a-f]+ <[^>]+> fec0 0aa0 vmaxnm.f32 s1, s1, s1
> +0[0-9a-f]+ <[^>]+> fe8f fa0f vmaxnm.f32 s30, s30, s30
> +0[0-9a-f]+ <[^>]+> fecf faaf vmaxnm.f32 s31, s31, s31
> +0[0-9a-f]+ <[^>]+> fe80 0b00 vmaxnm.f64 d0, d0, d0
> +0[0-9a-f]+ <[^>]+> fe88 8b08 vmaxnm.f64 d8, d8, d8
> +0[0-9a-f]+ <[^>]+> fe8f fb0f vmaxnm.f64 d15, d15, d15
> +0[0-9a-f]+ <[^>]+> fe8a ab0a vmaxnm.f64 d10, d10, d10
> +0[0-9a-f]+ <[^>]+> fe80 0a40 vminnm.f32 s0, s0, s0
> +0[0-9a-f]+ <[^>]+> fec0 0ae0 vminnm.f32 s1, s1, s1
> +0[0-9a-f]+ <[^>]+> fe8f fa4f vminnm.f32 s30, s30, s30
> +0[0-9a-f]+ <[^>]+> fecf faef vminnm.f32 s31, s31, s31
> +0[0-9a-f]+ <[^>]+> fe80 0b40 vminnm.f64 d0, d0, d0
> +0[0-9a-f]+ <[^>]+> fe88 8b48 vminnm.f64 d8, d8, d8
> +0[0-9a-f]+ <[^>]+> fe8f fb4f vminnm.f64 d15, d15, d15
> +0[0-9a-f]+ <[^>]+> fe8a ab4a vminnm.f64 d10, d10, d10
> +0[0-9a-f]+ <[^>]+> febc 0ac0 vcvta.s32.f32 s0, s0
> +0[0-9a-f]+ <[^>]+> fefd 0ae0 vcvtn.s32.f32 s1, s1
> +0[0-9a-f]+ <[^>]+> febe fa4f vcvtp.u32.f32 s30, s30
> +0[0-9a-f]+ <[^>]+> feff fa6f vcvtm.u32.f32 s31, s31
> +0[0-9a-f]+ <[^>]+> febc 0bc0 vcvta.s32.f64 s0, d0
> +0[0-9a-f]+ <[^>]+> fefd 0bc8 vcvtn.s32.f64 s1, d8
> +0[0-9a-f]+ <[^>]+> febe fb4f vcvtp.u32.f64 s30, d15
> +0[0-9a-f]+ <[^>]+> feff fb4a vcvtm.u32.f64 s31, d10
> +0[0-9a-f]+ <[^>]+> eeb6 0ac0 vrintz.f32 s0, s0
> +0[0-9a-f]+ <[^>]+> eef7 0a60 vrintx.f32 s1, s1
> +0[0-9a-f]+ <[^>]+> eeb6 fa4f vrintr.f32 s30, s30
> +0[0-9a-f]+ <[^>]+> feb8 0a40 vrinta.f32 s0, s0
> +0[0-9a-f]+ <[^>]+> fef9 0a60 vrintn.f32 s1, s1
> +0[0-9a-f]+ <[^>]+> feba fa4f vrintp.f32 s30, s30
> +0[0-9a-f]+ <[^>]+> fefb fa6f vrintm.f32 s31, s31
> +0[0-9a-f]+ <[^>]+> eeb6 0bc0 vrintz.f64 d0, d0
> +0[0-9a-f]+ <[^>]+> eeb7 1b41 vrintx.f64 d1, d1
> +0[0-9a-f]+ <[^>]+> eeb6 ab4a vrintr.f64 d10, d10
> +0[0-9a-f]+ <[^>]+> feb8 0b40 vrinta.f64 d0, d0
> +0[0-9a-f]+ <[^>]+> feb9 1b41 vrintn.f64 d1, d1
> +0[0-9a-f]+ <[^>]+> feba ab4a vrintp.f64 d10, d10
> +0[0-9a-f]+ <[^>]+> febb ab4a vrintm.f64 d10, d10
> +0[0-9a-f]+ <[^>]+> eeb3 0bc0 vcvtt.f16.f64 s0, d0
> +0[0-9a-f]+ <[^>]+> eef3 0b48 vcvtb.f16.f64 s1, d8
> +0[0-9a-f]+ <[^>]+> eeb3 fbcf vcvtt.f16.f64 s30, d15
> +0[0-9a-f]+ <[^>]+> eef3 fb4a vcvtb.f16.f64 s31, d10
> +0[0-9a-f]+ <[^>]+> eeb2 0bc0 vcvtt.f64.f16 d0, s0
> +0[0-9a-f]+ <[^>]+> eeb2 8b60 vcvtb.f64.f16 d8, s1
> +0[0-9a-f]+ <[^>]+> eeb2 fbcf vcvtt.f64.f16 d15, s30
> +0[0-9a-f]+ <[^>]+> eeb2 ab6f vcvtb.f64.f16 d10, s31
> diff --git a/gas/testsuite/gas/arm/armv7e-m+fpv5-d16.s b/gas/testsuite/gas/arm/armv7e-m+fpv5-d16.s
> new file mode 100644
> index 0000000..06fba06
> --- /dev/null
> +++ b/gas/testsuite/gas/arm/armv7e-m+fpv5-d16.s
> @@ -0,0 +1,60 @@
> + .syntax unified
> + .text
> + .arch armv7e-m
> + .fpu fpv5-d16
> +
> + .thumb
> + vseleq.f32 s0, s0, s0
> + vselvs.f32 s1, s1, s1
> + vselge.f32 s30, s30, s30
> + vselgt.f32 s31, s31, s31
> + vseleq.f64 d0, d0, d0
> + vselvs.f64 d8, d8, d8
> + vselge.f64 d15, d15, d15
> + vselgt.f64 d10, d10, d10
> + vmaxnm.f32 s0, s0, s0
> + vmaxnm.f32 s1, s1, s1
> + vmaxnm.f32 s30, s30, s30
> + vmaxnm.f32 s31, s31, s31
> + vmaxnm.f64 d0, d0, d0
> + vmaxnm.f64 d8, d8, d8
> + vmaxnm.f64 d15, d15, d15
> + vmaxnm.f64 d10, d10, d10
> + vminnm.f32 s0, s0, s0
> + vminnm.f32 s1, s1, s1
> + vminnm.f32 s30, s30, s30
> + vminnm.f32 s31, s31, s31
> + vminnm.f64 d0, d0, d0
> + vminnm.f64 d8, d8, d8
> + vminnm.f64 d15, d15, d15
> + vminnm.f64 d10, d10, d10
> + vcvta.s32.f32 s0, s0
> + vcvtn.s32.f32 s1, s1
> + vcvtp.u32.f32 s30, s30
> + vcvtm.u32.f32 s31, s31
> + vcvta.s32.f64 s0, d0
> + vcvtn.s32.f64 s1, d8
> + vcvtp.u32.f64 s30, d15
> + vcvtm.u32.f64 s31, d10
> + vrintz.f32 s0, s0
> + vrintx.f32 s1, s1
> + vrintr.f32 s30, s30
> + vrinta.f32 s0, s0
> + vrintn.f32 s1, s1
> + vrintp.f32 s30, s30
> + vrintm.f32 s31, s31
> + vrintz.f64 d0, d0
> + vrintx.f64 d1, d1
> + vrintr.f64 d10, d10
> + vrinta.f64 d0, d0
> + vrintn.f64 d1, d1
> + vrintp.f64 d10, d10
> + vrintm.f64 d10, d10
> + vcvtt.f16.f64 s0, d0
> + vcvtb.f16.f64 s1, d8
> + vcvtt.f16.f64 s30, d15
> + vcvtb.f16.f64 s31, d10
> + vcvtt.f64.f16 d0, s0
> + vcvtb.f64.f16 d8, s1
> + vcvtt.f64.f16 d15, s30
> + vcvtb.f64.f16 d10, s31
> diff --git a/gas/testsuite/gas/arm/armv7e-m+fpv5-sp-d16.d b/gas/testsuite/gas/arm/armv7e-m+fpv5-sp-d16.d
> new file mode 100644
> index 0000000..84ed3b0
> --- /dev/null
> +++ b/gas/testsuite/gas/arm/armv7e-m+fpv5-sp-d16.d
> @@ -0,0 +1,28 @@
> +#objdump: -dr --prefix-addresses --show-raw-insn
> +
> +.*: +file format .*arm.*
> +
> +Disassembly of section .text:
> +0[0-9a-f]+ <[^>]+> fe00 0a00 vseleq.f32 s0, s0, s0
> +0[0-9a-f]+ <[^>]+> fe50 0aa0 vselvs.f32 s1, s1, s1
> +0[0-9a-f]+ <[^>]+> fe2f fa0f vselge.f32 s30, s30, s30
> +0[0-9a-f]+ <[^>]+> fe7f faaf vselgt.f32 s31, s31, s31
> +0[0-9a-f]+ <[^>]+> fe80 0a00 vmaxnm.f32 s0, s0, s0
> +0[0-9a-f]+ <[^>]+> fec0 0aa0 vmaxnm.f32 s1, s1, s1
> +0[0-9a-f]+ <[^>]+> fe8f fa0f vmaxnm.f32 s30, s30, s30
> +0[0-9a-f]+ <[^>]+> fecf faaf vmaxnm.f32 s31, s31, s31
> +0[0-9a-f]+ <[^>]+> fe80 0a40 vminnm.f32 s0, s0, s0
> +0[0-9a-f]+ <[^>]+> fec0 0ae0 vminnm.f32 s1, s1, s1
> +0[0-9a-f]+ <[^>]+> fe8f fa4f vminnm.f32 s30, s30, s30
> +0[0-9a-f]+ <[^>]+> fecf faef vminnm.f32 s31, s31, s31
> +0[0-9a-f]+ <[^>]+> febc 0ac0 vcvta.s32.f32 s0, s0
> +0[0-9a-f]+ <[^>]+> fefd 0ae0 vcvtn.s32.f32 s1, s1
> +0[0-9a-f]+ <[^>]+> febe fa4f vcvtp.u32.f32 s30, s30
> +0[0-9a-f]+ <[^>]+> feff fa6f vcvtm.u32.f32 s31, s31
> +0[0-9a-f]+ <[^>]+> eeb6 0ac0 vrintz.f32 s0, s0
> +0[0-9a-f]+ <[^>]+> eef7 0a60 vrintx.f32 s1, s1
> +0[0-9a-f]+ <[^>]+> eeb6 fa4f vrintr.f32 s30, s30
> +0[0-9a-f]+ <[^>]+> feb8 0a40 vrinta.f32 s0, s0
> +0[0-9a-f]+ <[^>]+> fef9 0a60 vrintn.f32 s1, s1
> +0[0-9a-f]+ <[^>]+> feba fa4f vrintp.f32 s30, s30
> +0[0-9a-f]+ <[^>]+> fefb fa6f vrintm.f32 s31, s31
> diff --git a/gas/testsuite/gas/arm/armv7e-m+fpv5-sp-d16.s b/gas/testsuite/gas/arm/armv7e-m+fpv5-sp-d16.s
> new file mode 100644
> index 0000000..0fee290
> --- /dev/null
> +++ b/gas/testsuite/gas/arm/armv7e-m+fpv5-sp-d16.s
> @@ -0,0 +1,29 @@
> + .syntax unified
> + .text
> + .arch armv7e-m
> + .fpu fpv5-sp-d16
> +
> + .thumb
> + vseleq.f32 s0, s0, s0
> + vselvs.f32 s1, s1, s1
> + vselge.f32 s30, s30, s30
> + vselgt.f32 s31, s31, s31
> + vmaxnm.f32 s0, s0, s0
> + vmaxnm.f32 s1, s1, s1
> + vmaxnm.f32 s30, s30, s30
> + vmaxnm.f32 s31, s31, s31
> + vminnm.f32 s0, s0, s0
> + vminnm.f32 s1, s1, s1
> + vminnm.f32 s30, s30, s30
> + vminnm.f32 s31, s31, s31
> + vcvta.s32.f32 s0, s0
> + vcvtn.s32.f32 s1, s1
> + vcvtp.u32.f32 s30, s30
> + vcvtm.u32.f32 s31, s31
> + vrintz.f32 s0, s0
> + vrintx.f32 s1, s1
> + vrintr.f32 s30, s30
> + vrinta.f32 s0, s0
> + vrintn.f32 s1, s1
> + vrintp.f32 s30, s30
> + vrintm.f32 s31, s31
> diff --git a/include/opcode/arm.h b/include/opcode/arm.h
> index 6d4825a..34d7195 100644
> --- a/include/opcode/arm.h
> +++ b/include/opcode/arm.h
> @@ -153,6 +153,8 @@
> #define FPU_VFP_V4D16 (FPU_VFP_V3D16 | FPU_VFP_EXT_FP16 | FPU_VFP_EXT_FMA)
> #define FPU_VFP_V4 (FPU_VFP_V3 | FPU_VFP_EXT_FP16 | FPU_VFP_EXT_FMA)
> #define FPU_VFP_V4_SP_D16 (FPU_VFP_V3xD | FPU_VFP_EXT_FP16 | FPU_VFP_EXT_FMA)
> +#define FPU_VFP_V5D16 (FPU_VFP_V4D16 | FPU_VFP_EXT_ARMV8)
> +#define FPU_VFP_V5_SP_D16 (FPU_VFP_V4_SP_D16 | FPU_VFP_EXT_ARMV8)
> #define FPU_VFP_ARMV8 (FPU_VFP_V4 | FPU_VFP_EXT_ARMV8)
> #define FPU_NEON_ARMV8 (FPU_NEON_EXT_V1 | FPU_NEON_EXT_FMA | FPU_NEON_EXT_ARMV8)
> #define FPU_CRYPTO_ARMV8 (FPU_CRYPTO_EXT_ARMV8)
> @@ -186,6 +188,8 @@
> #define FPU_ARCH_VFP_V4 ARM_FEATURE(0, FPU_VFP_V4)
> #define FPU_ARCH_VFP_V4D16 ARM_FEATURE(0, FPU_VFP_V4D16)
> #define FPU_ARCH_VFP_V4_SP_D16 ARM_FEATURE(0, FPU_VFP_V4_SP_D16)
> +#define FPU_ARCH_VFP_V5D16 ARM_FEATURE(0, FPU_VFP_V5D16)
> +#define FPU_ARCH_VFP_V5_SP_D16 ARM_FEATURE(0, FPU_VFP_V5_SP_D16)
> #define FPU_ARCH_NEON_VFP_V4 \
> ARM_FEATURE(0, FPU_VFP_V4 | FPU_NEON_EXT_V1 | FPU_NEON_EXT_FMA)
> #define FPU_ARCH_VFP_ARMV8 ARM_FEATURE(0, FPU_VFP_ARMV8)
>