[PATCH] aarch64: Check for register aliases before mnemonics

Richard Earnshaw Richard.Earnshaw@foss.arm.com
Tue Nov 30 12:25:19 GMT 2021



On 30/11/2021 12:16, Richard Sandiford via Binutils wrote:
> Previously we would not accept:
> 
> 	A .req B
> 
> if A happened to be the name of an instruction.  Adding new
> instructions could therefore invalidate existing register aliases.
> 
> I noticed this with a test that used "zero" as a register alias
> for "xzr", where "zero" is now also the name of an SME instruction.
> I don't have any evidence that "real" code is doing this, but it
> seems at least plausible.
> 
> This patch switches things so that we check for register aliases
> first.  It might slow down parsing slightly, but the difference
> is unlikely to be noticeable.
> 
> Things like:
> 
> 	b	.req + 0
> 
> still work, since create_register_alias checks for " .req ",
> and with the input scrubber, we'll only keep whitespace after
> .req if it's followed by another name.  If there's some valid
> expression that I haven't thought about that is scrubbed to
> " .req ", users could avoid the ambiguity by wrapping .req
> in parentheses.
> 
> The new test for invalid aliases already passed.  I just wanted
> something to exercise the !dot condition.
> 
> I can't find a way of exercising the (existing) p == base condition,
> but I'm not brave enough to say that it can never happen.  If it does
> happen, get_mnemonic_name would return an empty string.
> 

We inherited the `a .req b` syntax from the aarch32 assembler, which in 
turn inherited it from the syntax supported by the original Acorn 
assembler for RISC iX.  However, it's always been a bit of an anomaly.

I wonder if it would be wise to add a more conventional directive

   .req a b

and then start the process of deprecating the old form?

> Tested on aarch64-linux-gnu.  OK to install?

OK.

R.

> 
> Richard
> 
> 
> gas/
> 	* config/tc-aarch64.c (opcode_lookup): Move mnemonic extraction
> 	code to...
> 	(md_assemble): ...here.  Check for register aliases first.
> 	* testsuite/gas/aarch64/register_aliases.d,
> 	testsuite/gas/aarch64/register_aliases.s: Test for a register
> 	alias called "zero".
> 	* testsuite/gas/aarch64/register_aliases_invalid.d,
> 	testsuite/gas/aarch64/register_aliases_invalid.l,
> 	testsuite/gas/aarch64/register_aliases_invalid.s: New test.
> ---
>   gas/config/tc-aarch64.c                       | 62 +++++++++----------
>   gas/testsuite/gas/aarch64/register_aliases.d  |  1 +
>   gas/testsuite/gas/aarch64/register_aliases.s  |  3 +-
>   .../gas/aarch64/register_aliases_invalid.d    |  1 +
>   .../gas/aarch64/register_aliases_invalid.l    |  3 +
>   .../gas/aarch64/register_aliases_invalid.s    |  2 +
>   6 files changed, 38 insertions(+), 34 deletions(-)
>   create mode 100644 gas/testsuite/gas/aarch64/register_aliases_invalid.d
>   create mode 100644 gas/testsuite/gas/aarch64/register_aliases_invalid.l
>   create mode 100644 gas/testsuite/gas/aarch64/register_aliases_invalid.s
> 
> diff --git a/gas/config/tc-aarch64.c b/gas/config/tc-aarch64.c
> index 9fc61f70c61..b6ed80e6d3a 100644
> --- a/gas/config/tc-aarch64.c
> +++ b/gas/config/tc-aarch64.c
> @@ -5738,25 +5738,18 @@ lookup_mnemonic (const char *start, int len)
>   }
>   
>   /* Subroutine of md_assemble, responsible for looking up the primary
> -   opcode from the mnemonic the user wrote.  STR points to the
> -   beginning of the mnemonic. */
> +   opcode from the mnemonic the user wrote.  BASE points to the beginning
> +   of the mnemonic, DOT points to the first '.' within the mnemonic
> +   (if any) and END points to the end of the mnemonic.  */
>   
>   static templates *
> -opcode_lookup (char **str)
> +opcode_lookup (char *base, char *dot, char *end)
>   {
> -  char *end, *base, *dot;
>     const aarch64_cond *cond;
>     char condname[16];
>     int len;
>   
> -  /* Scan up to the end of the mnemonic, which must end in white space,
> -     '.', or end of string.  */
> -  dot = 0;
> -  for (base = end = *str; is_part_of_name(*end); end++)
> -    if (*end == '.' && !dot)
> -      dot = end;
> -
> -  if (end == base || dot == base)
> +  if (dot == end)
>       return 0;
>   
>     inst.cond = COND_ALWAYS;
> @@ -5765,23 +5758,13 @@ opcode_lookup (char **str)
>     if (dot)
>       {
>         cond = str_hash_find_n (aarch64_cond_hsh, dot + 1, end - dot - 1);
> -      if (cond)
> -	{
> -	  inst.cond = cond->value;
> -	  *str = end;
> -	}
> -      else
> -	{
> -	  *str = dot;
> -	  return 0;
> -	}
> +      if (!cond)
> +	return 0;
> +      inst.cond = cond->value;
>         len = dot - base;
>       }
>     else
> -    {
> -      *str = end;
> -      len = end - base;
> -    }
> +    len = end - base;
>   
>     if (inst.cond == COND_ALWAYS)
>       {
> @@ -7870,7 +7853,6 @@ dump_opcode_operands (const aarch64_opcode *opcode)
>   void
>   md_assemble (char *str)
>   {
> -  char *p = str;
>     templates *template;
>     const aarch64_opcode *opcode;
>     aarch64_inst *inst_base;
> @@ -7893,14 +7875,28 @@ md_assemble (char *str)
>     DEBUG_TRACE ("==============================");
>     DEBUG_TRACE ("Enter md_assemble with %s", str);
>   
> -  template = opcode_lookup (&p);
> +  /* Scan up to the end of the mnemonic, which must end in whitespace,
> +     '.', or end of string.  */
> +  char *p = str;
> +  char *dot = 0;
> +  for (; is_part_of_name (*p); p++)
> +    if (*p == '.' && !dot)
> +      dot = p;
> +
> +  if (p == str)
> +    {
> +      as_bad (_("unknown mnemonic -- `%s'"), str);
> +      return;
> +    }
> +
> +  if (!dot && create_register_alias (str, p))
> +    return;
> +
> +  template = opcode_lookup (str, dot, p);
>     if (!template)
>       {
> -      /* It wasn't an instruction, but it might be a register alias of
> -         the form alias .req reg directive.  */
> -      if (!create_register_alias (str, p))
> -	as_bad (_("unknown mnemonic `%s' -- `%s'"), get_mnemonic_name (str),
> -		str);
> +      as_bad (_("unknown mnemonic `%s' -- `%s'"), get_mnemonic_name (str),
> +	      str);
>         return;
>       }
>   
> diff --git a/gas/testsuite/gas/aarch64/register_aliases.d b/gas/testsuite/gas/aarch64/register_aliases.d
> index eab63870dee..8d614b47606 100644
> --- a/gas/testsuite/gas/aarch64/register_aliases.d
> +++ b/gas/testsuite/gas/aarch64/register_aliases.d
> @@ -10,3 +10,4 @@ Disassembly of section \.text:
>      8:	f94003b1 	ldr	x17, \[x29\]
>      c:	f90003b0 	str	x16, \[x29\]
>     10:	f94003b1 	ldr	x17, \[x29\]
> +  14:	f900001f 	str	xzr, \[x0\]
> diff --git a/gas/testsuite/gas/aarch64/register_aliases.s b/gas/testsuite/gas/aarch64/register_aliases.s
> index fcd065078bb..856be5699ce 100644
> --- a/gas/testsuite/gas/aarch64/register_aliases.s
> +++ b/gas/testsuite/gas/aarch64/register_aliases.s
> @@ -3,9 +3,10 @@
>   	fp 	.req 	x29
>   	ip0	.req 	x16
>   	ip1	.req 	x17
> +	zero	.req	xzr
>   	add	ip0, ip0, lr
>   	str 	ip0, [fp]
>   	ldr	ip1, [fp]
>   	str 	IP0, [fp]
>   	ldr	IP1, [fp]
> -
> +	str	zero, [x0]
> diff --git a/gas/testsuite/gas/aarch64/register_aliases_invalid.d b/gas/testsuite/gas/aarch64/register_aliases_invalid.d
> new file mode 100644
> index 00000000000..7c453ce02b8
> --- /dev/null
> +++ b/gas/testsuite/gas/aarch64/register_aliases_invalid.d
> @@ -0,0 +1 @@
> +#error_output: register_aliases_invalid.l
> diff --git a/gas/testsuite/gas/aarch64/register_aliases_invalid.l b/gas/testsuite/gas/aarch64/register_aliases_invalid.l
> new file mode 100644
> index 00000000000..6350049df74
> --- /dev/null
> +++ b/gas/testsuite/gas/aarch64/register_aliases_invalid.l
> @@ -0,0 +1,3 @@
> +.*:
> +.*: Error: unknown mnemonic `lr\.req' -- `lr\.req x30'
> +.*: Error: unknown mnemonic `lr\.a' -- `lr\.a .req x30'
> diff --git a/gas/testsuite/gas/aarch64/register_aliases_invalid.s b/gas/testsuite/gas/aarch64/register_aliases_invalid.s
> new file mode 100644
> index 00000000000..2df2eaab4d6
> --- /dev/null
> +++ b/gas/testsuite/gas/aarch64/register_aliases_invalid.s
> @@ -0,0 +1,2 @@
> +lr.req 	x30
> +lr.a	.req	x30
> 


More information about the Binutils mailing list