[PATCH v3 8/9] gas: have scrubber retain more whitespace

Wed Jul 31 12:04:36 GMT 2024

According to the description of the state machine, the expectation
appears to be that (leaving aside labels) any insn mnemonic or
directive would be followed by a comma separated list of operands. That
may have been true very long ago, but the latest with the advent of more
elaborate macros this isn't rhe case anymore. Neither macro parameters
in macro definitions nor macro arguments in macro invocations are
required to be separated by commas. Hence whitespace serves a crucial
role there. Plus even without "real" macros issues exist, in e.g.

	.irp n, ...
	insn\n\(suffix)	operand1, operand2
	.endr

Whitespace following the closing parenthesis would have been removed
(ahead of even processing the .irp), as the "opcode" was deemed to have
ended earlier already.

Therefore, squash the distinction between "opcode" and operands, i.e.
fold state 10 back into state 3. Also drop most of the distinction
between "symbol chars" and "relatively normal" ones. Not entirely
unexpectedly this results in the need to skip whitespace in a few more
places in arch-specific code (and quite likely more changes are needed
for insn forms not covered by the testsuite).

As a result the D10V special case is no longer necessary.

In config/tc-sparc.c also move a comment to be next to the code being
commented.

In opcodes/cgen-asm.in some further cleanup is done, following the local
var adjustments.
---
Diffs of updates to generated files (CGEN) omitted here.
---
In config/tc-sparc.c the second of the "else if" touched looks
suspicious: Without looking at s1[-2], how can s1[-3] be reliably of the
expected meaning? Is there perhaps ISDIGIT(s1[-2]) missing?
---
v3: Add NEWS entry.
v2: Further target-specific adjustments. However, drop the earlier x86
    adjustment as no longer necessary.

--- a/gas/NEWS
+++ b/gas/NEWS
@@ -1,5 +1,12 @@
 -*- text -*-
 
+* The scrubber (pre-processor) now leaves in place more whitespace, to permit
+  various constructs not fitting the basic "insn opnd1,opnd2[,...]" scheme to
+  work.  This, however, means that macro invocations like "m 1 + 2", i.e. not
+  using double quotes or parentheses around the (apparently) sole argument,
+  will now be treated as passing three arguments.  Such lack of quotation /
+  parenthesization was never reliable to use.
+
 Changes in 2.43:
 
 * The MIPS '--trap' command-line option now causes GAS to dynamically
--- a/gas/app.c
+++ b/gas/app.c
@@ -472,16 +472,18 @@ do_scrub_chars (size_t (*get) (char *, s
 
   /*State 0: beginning of normal line
 	  1: After first whitespace on line (flush more white)
-	  2: After first non-white (opcode) on line (keep 1white)
-	  3: after second white on line (into operands) (flush white)
+	  2: After first non-white (opcode or maybe label when they're followed
+	     by colons) on line (keep 1white)
+	  3: after subsequent white on line (typically into operands)
+	     (flush more white)
 	  4: after putting out a .linefile, put out digits
 	  5: parsing a string, then go to old-state
 	  6: putting out \ escape in a "d string.
 	  7: no longer used
 	  8: no longer used
-	  9: After seeing symbol char in state 3 (keep 1white after symchar)
-	 10: After seeing whitespace in state 9 (keep white before symchar)
-	 11: After seeing a symbol character in state 0 (eg a label definition)
+	  9: After seeing non-white in state 3 (keep 1white)
+	 10: no longer used
+	 11: After seeing a non-white character in state 0 (eg a label definition)
 	 -1: output string in out_string and go to the state in old_state
 	 12: no longer used
 #ifdef DOUBLEBAR_PARALLEL
@@ -944,7 +946,11 @@ do_scrub_chars (size_t (*get) (char *, s
 	          && (state < 1 || strchr (tc_comment_chars, ch)))
 	      || IS_NEWLINE (ch)
 	      || IS_LINE_SEPARATOR (ch)
-	      || IS_PARALLEL_SEPARATOR (ch))
+	      || IS_PARALLEL_SEPARATOR (ch)
+	      /* See comma related comment near the bottom of the function.
+		 Reasoning equally applies to whitespace preceding a comma in
+		 most cases.  */
+	      || (ch == ',' && state > 2 && state != 11))
 	    {
 	      if (scrub_m68k_mri)
 		{
@@ -987,6 +993,7 @@ do_scrub_chars (size_t (*get) (char *, s
 		 character at the beginning of a line.  */
 	      goto recycle;
 	    case 2:
+	    case 9:
 	      state = 3;
 	      if (to + 1 < toend)
 		{
@@ -1010,20 +1017,6 @@ do_scrub_chars (size_t (*get) (char *, s
 		  break;
 		}
 	      goto recycle;	/* Sp in operands */
-	    case 9:
-	    case 10:
-#ifndef TC_KEEP_OPERAND_SPACES
-	      if (scrub_m68k_mri)
-#endif
-		{
-		  /* In MRI mode, we keep these spaces.  */
-		  state = 3;
-		  UNGET (ch);
-		  PUT (' ');
-		  break;
-		}
-	      state = 10;	/* Sp after symbol char */
-	      goto recycle;
 	    case 11:
 	      if (LABELS_WITHOUT_COLONS || flag_m68k_mri)
 		state = 1;
@@ -1094,27 +1087,17 @@ do_scrub_chars (size_t (*get) (char *, s
 	    {
 	      if (ch2 != EOF)
 		UNGET (ch2);
-	      if (state == 9 || state == 10)
-		state = 3;
+	      if (state == 1)
+		state = 2;
+	      else if (state == 3)
+		state = 9;
 	      PUT (ch);
 	    }
 	  break;
 
 	case LEX_IS_STRINGQUOTE:
 	  quotechar = ch;
-	  if (state == 10)
-	    {
-	      /* Preserve the whitespace in foo "bar".  */
-	      UNGET (ch);
-	      state = 3;
-	      PUT (' ');
-
-	      /* PUT didn't jump out.  We could just break, but we
-		 know what will happen, so optimize a bit.  */
-	      ch = GET ();
-	      old_state = 9;
-	    }
-	  else if (state == 3)
+	  if (state == 3)
 	    old_state = 9;
 	  else if (state == 0)
 	    old_state = 11; /* Now seeing label definition.  */
@@ -1135,14 +1118,6 @@ do_scrub_chars (size_t (*get) (char *, s
 	      UNGET (c);
 	    }
 #endif
-	  if (state == 10)
-	    {
-	      /* Preserve the whitespace in foo 'b'.  */
-	      UNGET (ch);
-	      state = 3;
-	      PUT (' ');
-	      break;
-	    }
 	  ch = GET ();
 	  if (ch == EOF)
 	    {
@@ -1177,10 +1152,7 @@ do_scrub_chars (size_t (*get) (char *, s
 	      PUT (out_buf[0]);
 	      break;
 	    }
-	  if (state == 9)
-	    old_state = 3;
-	  else
-	    old_state = state;
+	  old_state = state;
 	  state = -1;
 	  out_string = out_buf;
 	  PUT (*out_string++);
@@ -1190,10 +1162,10 @@ do_scrub_chars (size_t (*get) (char *, s
 #ifdef KEEP_WHITE_AROUND_COLON
 	  state = 9;
 #else
-	  if (state == 9 || state == 10)
-	    state = 3;
-	  else if (state != 3)
+	  if (state == 2 || state == 11)
 	    state = 1;
+	  else
+	    state = 9;
 #endif
 	  PUT (ch);
 	  break;
@@ -1318,20 +1290,6 @@ do_scrub_chars (size_t (*get) (char *, s
 	      break;
 	    }
 
-#ifdef TC_D10V
-	  /* All insns end in a char for which LEX_IS_SYMBOL_COMPONENT is true.
-	     Trap is the only short insn that has a first operand that is
-	     neither register nor label.
-	     We must prevent exef0f ||trap #1 to degenerate to exef0f ||trap#1 .
-	     We can't make '#' LEX_IS_SYMBOL_COMPONENT because it is
-	     already LEX_IS_LINE_COMMENT_START.  However, it is the
-	     only character in line_comment_chars for d10v, hence we
-	     can recognize it as such.  */
-	  /* An alternative approach would be to reset the state to 1 when
-	     we see '||', '<'- or '->', but that seems to be overkill.  */
-	  if (state == 10)
-	    PUT (' ');
-#endif
 	  /* We have a line comment character which is not at the
 	     start of a line.  If this is also a normal comment
 	     character, fall through.  Otherwise treat it as a default
@@ -1395,17 +1353,6 @@ do_scrub_chars (size_t (*get) (char *, s
 	  /* Fall through.  */
 
 	case LEX_IS_SYMBOL_COMPONENT:
-	  if (state == 10)
-	    {
-	      /* This is a symbol character following another symbol
-		 character, with whitespace in between.  We skipped
-		 the whitespace earlier, so output it now.  */
-	      UNGET (ch);
-	      state = 3;
-	      PUT (' ');
-	      break;
-	    }
-
 #ifdef TC_Z80
 	  /* "af'" is a symbol containing '\''.  */
 	  if (state == 3 && (ch == 'a' || ch == 'A'))
@@ -1431,7 +1378,16 @@ do_scrub_chars (size_t (*get) (char *, s
 		}
 	    }
 #endif
-	  if (state == 3)
+
+	  /* Fall through.  */
+	default:
+	de_fault:
+	  /* Some relatively `normal' character.  */
+	  if (state == 0)
+	    state = 11;	/* Now seeing label definition.  */
+	  else if (state == 1)
+	    state = 2;	/* Ditto.  */
+	  else if (state == 3)
 	    state = 9;
 
 	  /* This is a common case.  Quickly copy CH and all the
@@ -1441,6 +1397,10 @@ do_scrub_chars (size_t (*get) (char *, s
 #if defined TC_ARM && defined OBJ_ELF
 	      && symver_state == NULL
 #endif
+#ifdef TC_Z80
+	      /* See comma related comment below.  */
+	      && ch != ','
+#endif
 	      )
 	    {
 	      char *s;
@@ -1455,6 +1415,12 @@ do_scrub_chars (size_t (*get) (char *, s
 		  if (type != 0
 		      && type != LEX_IS_SYMBOL_COMPONENT)
 		    break;
+#ifdef TC_Z80
+		  /* Need to split at commas, to be able to enter state 16
+		     when needed.  */
+		  if (ch2 == ',')
+		    break;
+#endif
 		}
 
 	      if (s > from)
@@ -1479,52 +1445,15 @@ do_scrub_chars (size_t (*get) (char *, s
 		}
 	    }
 
-	  /* Fall through.  */
-	default:
-	de_fault:
-	  /* Some relatively `normal' character.  */
-	  if (state == 0)
-	    {
-	      state = 11;	/* Now seeing label definition.  */
-	    }
-	  else if (state == 1)
-	    {
-	      state = 2;	/* Ditto.  */
-	    }
-	  else if (state == 9)
-	    {
-	      if (!IS_SYMBOL_COMPONENT (ch))
-		state = 3;
-	    }
-	  else if (state == 10)
-	    {
-	      if (ch == '\\')
-		{
-		  /* Special handling for backslash: a backslash may
-		     be the beginning of a formal parameter (of a
-		     macro) following another symbol character, with
-		     whitespace in between.  If that is the case, we
-		     output a space before the parameter.  Strictly
-		     speaking, correct handling depends upon what the
-		     macro parameter expands into; if the parameter
-		     expands into something which does not start with
-		     an operand character, then we don't want to keep
-		     the space.  We don't have enough information to
-		     make the right choice, so here we are making the
-		     choice which is more likely to be correct.  */
-		  if (to + 1 >= toend)
-		    {
-		      /* If we're near the end of the buffer, save the
-		         character for the next time round.  Otherwise
-		         we'll lose our state.  */
-		      UNGET (ch);
-		      goto tofull;
-		    }
-		  *to++ = ' ';
-		}
+	  /* As a special case, to limit the delta to previous behavior, e.g.
+	     also affecting listings, go straight to state 3 when seeing a
+	     comma. Commas are special: While they can be used to separate
+	     macro parameters or arguments, they cannot (on their own, i.e.
+	     without quoting) be arguments (or parameter default values).
+	     Hence successive whitespace is not meaningful there.  */
+	  if (ch == ',' && state == 9)
+	    state = 3;
 
-	      state = 3;
-	    }
 	  PUT (ch);
 	  break;
 	}
--- a/gas/config/tc-aarch64.c
+++ b/gas/config/tc-aarch64.c
@@ -643,6 +643,7 @@ const char FLT_CHARS[] = "rRsSfFdDxXeEpP
 static inline bool
 skip_past_char (char **str, char c)
 {
+  skip_whitespace (*str);
   if (**str == c)
     {
       (*str)++;
@@ -893,6 +894,7 @@ parse_reg (char **ccp)
   start++;
 #endif
 
+  skip_whitespace (start);
   p = start;
   if (!ISALPHA (*p) || !is_name_beginner (*p))
     return NULL;
@@ -1202,13 +1204,17 @@ parse_typed_reg (char **ccp, aarch64_reg
 		 struct vector_type_el *typeinfo, unsigned int flags)
 {
   char *str = *ccp;
-  bool is_alpha = ISALPHA (*str);
-  const reg_entry *reg = parse_reg (&str);
+  bool is_alpha;
+  const reg_entry *reg;
   struct vector_type_el atype;
   struct vector_type_el parsetype;
   bool is_typed_vecreg = false;
   unsigned int err_flags = (flags & PTR_IN_REGLIST) ? SEF_IN_REGLIST : 0;
 
+  skip_whitespace (str);
+  is_alpha = ISALPHA (*str);
+  reg = parse_reg (&str);
+
   atype.defined = 0;
   atype.type = NT_invtype;
   atype.width = -1;
@@ -1429,10 +1435,7 @@ parse_vector_reg_list (char **ccp, aarch
   do
     {
       if (in_range)
-	{
-	  str++;		/* skip over '-' */
-	  val_range = val;
-	}
+	val_range = val;
 
       const reg_entry *reg;
       if (has_qualifier)
@@ -1494,7 +1497,8 @@ parse_vector_reg_list (char **ccp, aarch
       in_range = 0;
       ptr_flags |= PTR_GOOD_MATCH;
     }
-  while (skip_past_comma (&str) || (in_range = 1, *str == '-'));
+  while (skip_past_comma (&str)
+	 || (in_range = 1, skip_past_char (&str, '-')));
 
   skip_whitespace (str);
   if (*str != '}')
@@ -8289,6 +8293,7 @@ parse_operands (char *str, const aarch64
     }
 
   /* Check if we have parsed all the operands.  */
+  skip_whitespace (str);
   if (*str != '\0' && ! error_p ())
     {
       /* Set I to the index of the last present operand; this is
--- a/gas/config/tc-arm.c
+++ b/gas/config/tc-arm.c
@@ -1148,6 +1148,8 @@ my_get_expression (expressionS * ep, cha
     prefix_mode = (prefix_mode == GE_OPT_PREFIX_BIG) ? prefix_mode
 		  : GE_OPT_PREFIX;
 
+  skip_whitespace (*str);
+
   switch (prefix_mode)
     {
     case GE_NO_PREFIX: break;
--- a/gas/config/tc-crx.c
+++ b/gas/config/tc-crx.c
@@ -1723,9 +1723,13 @@ preprocess_reglist (char *param, int *al
 
   while (*paramP != '}')
     {
-      regP = paramP;
       memset (&reg_name, '\0', sizeof (reg_name));
 
+      while (ISSPACE (*paramP))
+	paramP++;
+
+      regP = paramP;
+
       while (ISALNUM (*paramP))
 	paramP++;
 
--- a/gas/config/tc-csky.c
+++ b/gas/config/tc-csky.c
@@ -2409,10 +2409,18 @@ parse_rt (char *s,
     /* Indicate nothing there.  */
     ep->X_op = O_absent;
 
+  /* Skip whitespace.  */
+  while (ISSPACE (*s))
+    ++s;
+
   if (*s == '[')
     {
       s = parse_exp (s + 1, &e);
 
+      /* Skip whitespace.  */
+      while (ISSPACE (*s))
+	++s;
+
       if (*s == ']')
 	s++;
       else
@@ -2935,6 +2943,11 @@ is_reg_lshift_illegal (char **oper, int
     }
 
   *oper += len;
+
+  /* Skip whitespace.  */
+  while (ISSPACE (**oper))
+    ++*oper;
+
   if ((*oper)[0] != '<' || (*oper)[1] != '<')
     {
       SET_ERROR_STRING (ERROR_UNDEFINE,
@@ -3461,6 +3474,9 @@ get_operand_value (struct csky_opcode_in
 	  return false;
 	}
 
+      while (ISSPACE (**oper))
+	++*oper;
+
       if (!get_operand_value (op, oper, &soprnd->subs[0]))
 	{
 	  *s = rc;
@@ -3481,7 +3497,7 @@ get_operand_value (struct csky_opcode_in
 	}
 
       *s = rc;
-      *oper += 1;
+      *oper = s + 1;
       return true;
     }
 
@@ -4277,11 +4293,16 @@ get_operand_value (struct csky_opcode_in
     case OPRND_TYPE_VREG_WITH_INDEX:
       if (parse_type_freg (oper, 0))
 	{
+	  /* Skip whitespace.  */
+	  while (ISSPACE (**oper))
+	    ++*oper;
 	  if (**oper == '[')
 	    {
 	      (*oper)++;
 	      if (is_imm_within_range (oper, 0, 0xf))
 		{
+		  while (ISSPACE (**oper))
+		    ++*oper;
 		  if (**oper == ']')
 		    {
 		      unsigned int idx = --csky_insn.idx;
--- a/gas/config/tc-pru.c
+++ b/gas/config/tc-pru.c
@@ -1399,7 +1399,6 @@ pru_parse_args (pru_insn_infoS *insn ATT
 		  const char *parsestr, char **parsed_args)
 {
   char *p;
-  char *end = NULL;
   int i;
   p = argstr;
   i = 0;
@@ -1426,14 +1425,7 @@ pru_parse_args (pru_insn_infoS *insn ATT
       else
 	{
 	  /* Check that the argument string has no trailing arguments.  */
-	  /* If we've got a %pmem relocation, we've zapped the parens with
-	     spaces.  */
-	  if (strprefix (p, "%pmem") || strprefix (p, "%label"))
-	    end = strpbrk (p, ",");
-	  else
-	    end = strpbrk (p, " ,");
-
-	  if (end != NULL)
+	  if (strpbrk (p, ",") != NULL)
 	    as_bad (_("too many arguments"));
 	}
 
--- a/gas/config/tc-sparc.c
+++ b/gas/config/tc-sparc.c
@@ -1778,6 +1778,9 @@ sparc_ip (char *str, const struct sparc_
          operands match.  */
       for (args = insn->args;; ++args)
 	{
+	  if (*s == ' ' && *args != ' ')
+	    ++s;
+
 	  switch (*args)
 	    {
 	    case 'K':
@@ -2717,11 +2720,6 @@ sparc_ip (char *str, const struct sparc_
 		   'symbols' in the input string.  Try not to create U
 		   symbols for registers, etc.  */
 
-		/* This stuff checks to see if the expression ends in
-		   +%reg.  If it does, it removes the register from
-		   the expression, and re-sets 's' to point to the
-		   right place.  */
-
 		if (op_arg)
 		  {
 		    int npar = 0;
@@ -2751,6 +2749,8 @@ sparc_ip (char *str, const struct sparc_
 			return special_case;
 		      }
 		    s = s1 + 1;
+		    if (*s == ' ')
+		      s++;
 		    if (*s == ',' || *s == ']' || !*s)
 		      continue;
 		    if (*s != '+' && *s != '-')
@@ -2764,17 +2764,45 @@ sparc_ip (char *str, const struct sparc_
 		    memset (&the_insn.exp, 0, sizeof (the_insn.exp));
 		  }
 
+		/* This stuff checks to see if the expression ends in
+		   +%reg.  If it does, it removes the register from
+		   the expression, and re-sets 's' to point to the
+		   right place.  */
+
 		for (s1 = s; *s1 && *s1 != ',' && *s1 != ']'; s1++)
 		  ;
 
+		if (s1 != s && s1[-1] == ' ')
+		  --s1;
 		if (s1 != s && ISDIGIT (s1[-1]))
 		  {
 		    if (s1[-2] == '%' && s1[-3] == '+')
-		      s1 -= 3;
-		    else if (strchr ("golir0123456789", s1[-2]) && s1[-3] == '%' && s1[-4] == '+')
-		      s1 -= 4;
-		    else if (s1[-3] == 'r' && s1[-4] == '%' && s1[-5] == '+')
-		      s1 -= 5;
+		      {
+			if (s1[-3] == '+')
+			  s1 -= 3;
+			else if (s1[-3] == ' ' && s1[-4] == '+')
+			  s1 -= 4;
+			else
+			  s1 = NULL;
+		      }
+		    else if (strchr ("golir0123456789", s1[-2]) && s1[-3] == '%')
+		      {
+			if (s1[-4] == '+')
+			  s1 -= 4;
+			else if (s1[-4] == ' ' && s1[-5] == '+')
+			  s1 -= 5;
+			else
+			  s1 = NULL;
+		      }
+		    else if (s1[-3] == 'r' && s1[-4] == '%')
+		      {
+			if (s1[-5] == '+')
+			  s1 -= 5;
+			else if (s1[-5] == ' ' && s1[-6] == '+')
+			  s1 -= 6;
+			else
+			  s1 = NULL;
+		      }
 		    else
 		      s1 = NULL;
 		    if (s1)
--- a/gas/config/tc-v850.c
+++ b/gas/config/tc-v850.c
@@ -1456,6 +1456,8 @@ parse_register_list (unsigned long *insn
 	    }
 	}
 
+      skip_white_space ();
+
       if (*input_line_pointer == '}')
 	{
 	  input_line_pointer++;
@@ -1475,6 +1477,8 @@ parse_register_list (unsigned long *insn
 	  /* Skip the dash.  */
 	  ++input_line_pointer;
 
+	  skip_white_space ();
+
 	  /* Get the second register in the range.  */
 	  if (! register_name (&exp2))
 	    {
--- a/gas/testsuite/gas/all/macro.l
+++ b/gas/testsuite/gas/all/macro.l
@@ -22,4 +22,14 @@
 [ 	]*[1-9][0-9]*[ 	]+.... 0+70*[ 	]+>  .byte 7
 [ 	]*[1-9][0-9]*[ 	]+.... 0+80*[ 	]+>  .byte 8
 [ 	]*[1-9][0-9]*[ 	]+m[ 	]+""[ 	]+""[ 	]+""
+[ 	]*[1-9][0-9]*[ 	]+
+[ 	]*[1-9][0-9]*[ 	]+m[ 	]+1[ 	]+\+2
+[ 	]*[1-9][0-9]*[ 	]+.... 0+10*[ 	]+>  .byte 1
+[ 	]*[1-9][0-9]*[ 	]+.... 0+20*[ 	]+>  .byte \+2
+[ 	]*[1-9][0-9]*[ 	]+m[ 	]+\(3\)[ 	]+\+4
+[ 	]*[1-9][0-9]*[ 	]+.... 0+30*[ 	]+>  .byte \(3\)
+[ 	]*[1-9][0-9]*[ 	]+.... 0+40*[ 	]+>  .byte \+4
+[ 	]*[1-9][0-9]*[ 	]+m[ 	]+\(5\)[ 	]+\(6\)
+[ 	]*[1-9][0-9]*[ 	]+.... 0+50*[ 	]+>  .byte \(5\)
+[ 	]*[1-9][0-9]*[ 	]+.... 0+60*[ 	]+>  .byte \(6\)
 #pass
--- a/gas/testsuite/gas/all/macro.s
+++ b/gas/testsuite/gas/all/macro.s
@@ -9,8 +9,8 @@
 	m "7" "8"
 	m "" "" ""
 
-	.if 0
 	m 1 +2
 	m (3) +4
 	m (5) (6)
-	.endif
+
+	.byte -1
--- a/gas/testsuite/gas/i386/x86-64-apx-nf.s
+++ b/gas/testsuite/gas/i386/x86-64-apx-nf.s
@@ -1390,13 +1390,13 @@ optimize:
 	{nf}	\op	$128, %ecx, %edx
 	{nf}	\op	$128, %r9
 	{nf}	\op	$128, %r9, %r31
-	{nf}	\op\()b	$128, (%rax)
+	{nf}	\op\(b)	$128, (%rax)
 	{nf}	\op	$128, (%rax), %bl
-	{nf}	\op\()w	$128, (%rax)
+	{nf}	\op\(w)	$128, (%rax)
 	{nf}	\op	$128, (%rax), %dx
-	{nf}	\op\()l	$128, (%rax)
+	{nf}	\op\(l)	$128, (%rax)
 	{nf}	\op	$128, (%rax), %ecx
-	{nf}	\op\()q	$128, (%rax)
+	{nf}	\op\(q)	$128, (%rax)
 	{nf}	\op	$128, (%rax), %r9
 
 	{nf}	\op	$1, %bl
@@ -1407,13 +1407,13 @@ optimize:
 	{nf}	\op	$1, %ecx, %edx
 	{nf}	\op	$1, %r9
 	{nf}	\op	$1, %r9, %r31
-	{nf}	\op\()b	$1, (%rax)
+	{nf}	\op\(b)	$1, (%rax)
 	{nf}	\op	$1, (%rax), %bl
-	{nf}	\op\()w	$1, (%rax)
+	{nf}	\op\(w)	$1, (%rax)
 	{nf}	\op	$1, (%rax), %dx
-	{nf}	\op\()l	$1, (%rax)
+	{nf}	\op\(l)	$1, (%rax)
 	{nf}	\op	$1, (%rax), %ecx
-	{nf}	\op\()q	$1, (%rax)
+	{nf}	\op\(q)	$1, (%rax)
 	{nf}	\op	$1, (%rax), %r9
 
 	{nf}	\op	$0xff, %bl
@@ -1424,13 +1424,13 @@ optimize:
 	{nf}	\op	$-1, %ecx, %edx
 	{nf}	\op	$-1, %r9
 	{nf}	\op	$-1, %r9, %r31
-	{nf}	\op\()b	$0xff, (%rax)
+	{nf}	\op\(b)	$0xff, (%rax)
 	{nf}	\op	$-1, (%rax), %bl
-	{nf}	\op\()w	$0xffff, (%rax)
+	{nf}	\op\(w)	$0xffff, (%rax)
 	{nf}	\op	$-1, (%rax), %dx
-	{nf}	\op\()l	$0xffffffff, (%rax)
+	{nf}	\op\(l)	$0xffffffff, (%rax)
 	{nf}	\op	$-1, (%rax), %ecx
-	{nf}	\op\()q	$-1, (%rax)
+	{nf}	\op\(q)	$-1, (%rax)
 	{nf}	\op	$-1, (%rax), %r9
 	.endr
 
@@ -1444,13 +1444,13 @@ optimize:
 	{nf}	ro\dir	$63, %rdx
 	{nf}	ro\dir	$63, %rdx, %rax
 
-	{nf}	ro\dir\()b	$7, (%rdx)
+	{nf}	ro\dir\(b)	$7, (%rdx)
 	{nf}	ro\dir		$7, (%rdx), %al
-	{nf}	ro\dir\()w	$15, (%rdx)
+	{nf}	ro\dir\(w)	$15, (%rdx)
 	{nf}	ro\dir		$15, (%rdx), %ax
-	{nf}	ro\dir\()l	$31, (%rdx)
+	{nf}	ro\dir\(l)	$31, (%rdx)
 	{nf}	ro\dir		$31, (%rdx), %eax
-	{nf}	ro\dir\()q	$63, (%rdx)
+	{nf}	ro\dir\(q)	$63, (%rdx)
 	{nf}	ro\dir		$63, (%rdx), %rax
 	.endr
 
@@ -1476,10 +1476,10 @@ optimize:
 	# Note: 2-6 want leaving alone with -Os.
 	.irp n, 1, 2, 6, 7
 	# Note: 16-bit 3-operand src!=dst non-ZU form needs leaving alone.
-	{nf} imul $1<<\n, %\r\()dx, %\r\()cx
-	{nf} imul $1<<\n, (%rdx), %\r\()cx
-	{nf} imul $1<<\n, %\r\()cx, %\r\()cx
-	{nf} imul $1<<\n, %\r\()cx
+	{nf} imul $1<<\n, %\r\(dx), %\r\(cx)
+	{nf} imul $1<<\n, (%rdx), %\r\(cx)
+	{nf} imul $1<<\n, %\r\(cx), %\r\(cx)
+	{nf} imul $1<<\n, %\r\(cx)
 
 	.ifeqs "\r",""
 	{nf} imulzu $1<<\n, %dx, %cx
--- a/opcodes/cgen-asm.in
+++ b/opcodes/cgen-asm.in
@@ -68,6 +68,7 @@ char *
   char rxbuf[CGEN_MAX_RX_ELEMENTS];
   char *rx = rxbuf;
   const CGEN_SYNTAX_CHAR_TYPE *syn;
+  char prev_syntax_char = 0;
   int reg_err;
 
   syn = CGEN_SYNTAX_STRING (CGEN_OPCODE_SYNTAX (opc));
@@ -105,6 +106,15 @@ char *
 	{
 	  char c = CGEN_SYNTAX_CHAR (* syn);
 
+	  /* See whitespace related comments in parse_insn_normal().  */
+	  if (c != ' ' && prev_syntax_char != ' '
+	      && (!ISALNUM (c) || !ISALNUM (prev_syntax_char)))
+	    {
+	      *rx++ = ' ';
+	      *rx++ = '*';
+	    }
+	  prev_syntax_char = c;
+
 	  switch (c)
 	    {
 	      /* Escape any regex metacharacters in the syntax.  */
@@ -138,6 +148,7 @@ char *
 	  /* Replace non-syntax fields with globs.  */
 	  *rx++ = '.';
 	  *rx++ = '*';
+	  prev_syntax_char = 0;
 	}
     }
 
@@ -195,10 +206,8 @@ parse_insn_normal (CGEN_CPU_DESC cd,
   const char *errmsg;
   const char *p;
   const CGEN_SYNTAX_CHAR_TYPE * syn;
-#ifdef CGEN_MNEMONIC_OPERANDS
-  /* FIXME: wip */
-  int past_opcode_p;
-#endif
+  char prev_syntax_char = 0;
+  bool past_opcode_p;
 
   /* For now we assume the mnemonic is first (there are no leading operands).
      We can parse it without needing to set up operand parsing.
@@ -214,13 +223,13 @@ parse_insn_normal (CGEN_CPU_DESC cd,
 #ifndef CGEN_MNEMONIC_OPERANDS
   if (* str && ! ISSPACE (* str))
     return _("unrecognized instruction");
+  past_opcode_p = true;
+#else
+  past_opcode_p = false;
 #endif
 
   CGEN_INIT_PARSE (cd);
   cgen_init_parse_operand (cd);
-#ifdef CGEN_MNEMONIC_OPERANDS
-  past_opcode_p = 0;
-#endif
 
   /* We don't check for (*str != '\0') here because we want to parse
      any trailing fake arguments in the syntax string.  */
@@ -234,18 +243,28 @@ parse_insn_normal (CGEN_CPU_DESC cd,
 
   while (* syn != 0)
     {
+      char c = CGEN_SYNTAX_CHAR_P (*syn) ? CGEN_SYNTAX_CHAR (*syn) : 0;
+
+      /* FIXME: Despite this check we may still take inappropriate advantage of
+	 the fact that GAS's input scrubber will remove extraneous whitespace.
+	 We may also be a little too lax with this now, yet being more strict
+	 would require targets to indicate where whitespace is permissible.  */
+      if (past_opcode_p && c != ' ' && ISSPACE (*str)
+	  /* No whitespace between consecutive alphanumeric syntax elements.  */
+	  && (!ISALNUM (c) || !ISALNUM (prev_syntax_char)))
+	++str;
+      prev_syntax_char = c;
+
       /* Non operand chars must match exactly.  */
-      if (CGEN_SYNTAX_CHAR_P (* syn))
+      if (c != 0)
 	{
 	  /* FIXME: While we allow for non-GAS callers above, we assume the
 	     first char after the mnemonic part is a space.  */
-	  /* FIXME: We also take inappropriate advantage of the fact that
-	     GAS's input scrubber will remove extraneous blanks.  */
-	  if (TOLOWER (*str) == TOLOWER (CGEN_SYNTAX_CHAR (* syn)))
+	  if (TOLOWER (*str) == TOLOWER (c))
 	    {
 #ifdef CGEN_MNEMONIC_OPERANDS
-	      if (CGEN_SYNTAX_CHAR(* syn) == ' ')
-		past_opcode_p = 1;
+	      if (c == ' ')
+		past_opcode_p = true;
 #endif
 	      ++ syn;
 	      ++ str;
@@ -257,7 +276,7 @@ parse_insn_normal (CGEN_CPU_DESC cd,
 
 	      /* xgettext:c-format */
 	      sprintf (msg, _("syntax error (expected char `%c', found `%c')"),
-		       CGEN_SYNTAX_CHAR(*syn), *str);
+		       c, *str);
 	      return msg;
 	    }
 	  else
@@ -267,15 +286,12 @@ parse_insn_normal (CGEN_CPU_DESC cd,
 
 	      /* xgettext:c-format */
 	      sprintf (msg, _("syntax error (expected char `%c', found end of instruction)"),
-		       CGEN_SYNTAX_CHAR(*syn));
+		       c);
 	      return msg;
 	    }
 	  continue;
 	}
 
-#ifdef CGEN_MNEMONIC_OPERANDS
-      (void) past_opcode_p;
-#endif
       /* We have an operand of some sort.  */
       errmsg = cd->parse_operand (cd, CGEN_SYNTAX_FIELD (*syn), &str, fields);
       if (errmsg)
--- a/opcodes/nds32-asm.c
+++ b/opcodes/nds32-asm.c
@@ -2486,6 +2486,9 @@ parse_insn (nds32_asm_desc_t *pdesc, nds
 
       while (*plex)
 	{
+	  if (ISSPACE (*p))
+	    ++p;
+
 	  if (IS_LEX_CHAR (*plex))
 	    {
 	      /* If it's a plain char, just compare it.  */
@@ -2530,6 +2533,8 @@ parse_insn (nds32_asm_desc_t *pdesc, nds
 	}
 
       /* Check whether this syntax is accepted.  */
+      if (ISSPACE (*p))
+	++p;
       if (*plex == 0 && (*p == '\0' || *p == '!' || *p == '#'))
 	return 1;