[PATCH 7/8] x86: derive mandatory prefix attribute from base opcode

Jan Beulich jbeulich@suse.com
Mon Mar 22 16:46:14 GMT 2021


Just like is already done for legacy encoded insns, record the mandatory
prefix information in the respective opcode modifier field. Do this
without changing the source table, but rather by deriving the values from
their existing source representation.

gas/
2021-03-XX  Jan Beulich  <jbeulich@suse.com>

	* config/tc-i386.c (md_begin): Add assertion.
	(build_vex_prefix): Drop implied prefix calculation.
	(build_evex_prefix): Likewise.
	(optimize_encoding): Adjust opcode checks.
	(load_insn_p): Also check opcodeprefix.
	(match_template): Also check opcodespace.
	(process_suffix): Likewise.
	(process_operands): Likewise.
	(output_insn): Likewise. Also check isprefix when discaring
	standalone LOCK.
	* config/tc-i386-intel.c (i386_intel_operand): Also check
	opcodespace.

opcodes/
2021-03-XX  Jan Beulich  <jbeulich@suse.com>

	* i386-gen.c (process_i386_opcode_modifier): Return void. New
	parameter "prefix". Drop local variable "regular_encoding".
	Record prefix setting / check for consistency.
	(output_i386_opcode): Parse opcode_length and base_opcode
	earlier. Derive prefix encoding. Drop no longer applicable
	consistency checking. Adjust process_i386_opcode_modifier()
	invocation.
	(process_i386_opcodes): Adjust process_i386_opcode_modifier()
	invocation.
	* i386-tbl.h: Re-generate.
---
Things are being done this way because I generally think we may want to
revert that aspect of the earlier change introducing the opcode modifier
representation. This is because I believe the opcode table source is
easier to read and maintain that way. I'm similarly considering to put
the encoding space into the base opcode as well for VEX/XOP/EVEX
templates, such that the templates overall (and the SSE2AVX ones in
particular) would become more similar with one another.

--- a/gas/config/tc-i386-intel.c
+++ b/gas/config/tc-i386-intel.c
@@ -642,7 +642,8 @@ i386_intel_operand (char *operand_string
     return 0;
 
   if (intel_state.op_modifier != O_absent
-      && current_templates->start->base_opcode != 0x8d /* lea */)
+      && (current_templates->start->opcode_modifier.opcodespace != SPACE_BASE
+          || current_templates->start->base_opcode != 0x8d /* lea */))
     {
       i.types[this_operand].bitfield.unspecified = 0;
 
@@ -666,7 +667,8 @@ i386_intel_operand (char *operand_string
 	  if ((current_templates->start->name[0] == 'l'
 	       && current_templates->start->name[2] == 's'
 	       && current_templates->start->name[3] == 0)
-	      || current_templates->start->base_opcode == 0x62 /* bound */)
+	      || (current_templates->start->opcode_modifier.opcodespace == SPACE_BASE
+		  && current_templates->start->base_opcode == 0x62 /* bound */))
 	    suffix = WORD_MNEM_SUFFIX;
 	  else if (flag_code != CODE_32BIT
 		   && (current_templates->start->opcode_modifier.jump == JUMP
@@ -696,7 +698,8 @@ i386_intel_operand (char *operand_string
 
 	case O_qword_ptr: /* O_mmword_ptr */
 	  i.types[this_operand].bitfield.qword = 1;
-	  if (current_templates->start->base_opcode == 0x62 /* bound */
+	  if ((current_templates->start->opcode_modifier.opcodespace == SPACE_BASE
+	       && current_templates->start->base_opcode == 0x62 /* bound */)
 	      || got_a_float == 1)	/* "f..." */
 	    suffix = LONG_MNEM_SUFFIX;
 	  else
--- a/gas/config/tc-i386.c
+++ b/gas/config/tc-i386.c
@@ -3065,6 +3065,8 @@ md_begin (void)
 
     while (1)
       {
+	gas_assert (optab->opcode_length == 4
+		    || !(optab->base_opcode >> (8 * optab->opcode_length)));
 	++optab;
 	if (optab->name == NULL
 	    || strcmp (optab->name, (optab - 1)->name) != 0)
@@ -3588,7 +3590,6 @@ static void
 build_vex_prefix (const insn_template *t)
 {
   unsigned int register_specifier;
-  unsigned int implied_prefix;
   unsigned int vector_length;
   unsigned int w;
 
@@ -3696,24 +3697,6 @@ build_vex_prefix (const insn_template *t
 	  }
     }
 
-  switch ((i.tm.base_opcode >> (i.tm.opcode_length << 3)) & 0xff)
-    {
-    case 0:
-      implied_prefix = 0;
-      break;
-    case DATA_PREFIX_OPCODE:
-      implied_prefix = 1;
-      break;
-    case REPE_PREFIX_OPCODE:
-      implied_prefix = 2;
-      break;
-    case REPNE_PREFIX_OPCODE:
-      implied_prefix = 3;
-      break;
-    default:
-      abort ();
-    }
-
   /* Check the REX.W bit and VEXW.  */
   if (i.tm.opcode_modifier.vexw == VEXWIG)
     w = (vexwig == vexw1 || (i.rex & REX_W)) ? 1 : 0;
@@ -3739,7 +3722,7 @@ build_vex_prefix (const insn_template *t
       i.vex.bytes[1] = (r << 7
 			| register_specifier << 3
 			| vector_length << 2
-			| implied_prefix);
+			| i.tm.opcode_modifier.opcodeprefix);
     }
   else
     {
@@ -3769,7 +3752,7 @@ build_vex_prefix (const insn_template *t
       i.vex.bytes[2] = (w << 7
 			| register_specifier << 3
 			| vector_length << 2
-			| implied_prefix);
+			| i.tm.opcode_modifier.opcodeprefix);
     }
 }
 
@@ -3792,8 +3775,7 @@ is_any_vex_encoding (const insn_template
 static void
 build_evex_prefix (void)
 {
-  unsigned int register_specifier;
-  unsigned int implied_prefix, w;
+  unsigned int register_specifier, w;
   rex_byte vrex_used = 0;
 
   /* Check register specifier.  */
@@ -3822,24 +3804,6 @@ build_evex_prefix (void)
 	vrex_used |= REX_X;
     }
 
-  switch ((i.tm.base_opcode >> 8) & 0xff)
-    {
-    case 0:
-      implied_prefix = 0;
-      break;
-    case DATA_PREFIX_OPCODE:
-      implied_prefix = 1;
-      break;
-    case REPE_PREFIX_OPCODE:
-      implied_prefix = 2;
-      break;
-    case REPNE_PREFIX_OPCODE:
-      implied_prefix = 3;
-      break;
-    default:
-      abort ();
-    }
-
   /* 4 byte EVEX prefix.  */
   i.vex.length = 4;
   i.vex.bytes[0] = 0x62;
@@ -3882,11 +3846,11 @@ build_evex_prefix (void)
   else
     w = (flag_code == CODE_64BIT ? i.rex & REX_W : evexwig == evexw1) ? 1 : 0;
 
-  /* Encode the U bit.  */
-  implied_prefix |= 0x4;
-
   /* The third byte of the EVEX prefix.  */
-  i.vex.bytes[2] = (w << 7 | register_specifier << 3 | implied_prefix);
+  i.vex.bytes[2] = ((w << 7)
+		    | (register_specifier << 3)
+		    | 4 /* Encode the U bit.  */
+		    | i.tm.opcode_modifier.opcodeprefix);
 
   /* The fourth byte of the EVEX prefix.  */
   /* The zeroing-masking bit.  */
@@ -4192,19 +4156,15 @@ optimize_encoding (void)
 		       || (i.tm.operand_types[2].bitfield.zmmword
 			   && i.types[2].bitfield.ymmword))))
 	   && ((i.tm.base_opcode == 0x55
-		|| i.tm.base_opcode == 0x6655
-		|| i.tm.base_opcode == 0x66df
 		|| i.tm.base_opcode == 0x57
-		|| i.tm.base_opcode == 0x6657
-		|| i.tm.base_opcode == 0x66ef
-		|| i.tm.base_opcode == 0x66f8
-		|| i.tm.base_opcode == 0x66f9
-		|| i.tm.base_opcode == 0x66fa
-		|| i.tm.base_opcode == 0x66fb
+		|| i.tm.base_opcode == 0xdf
+		|| i.tm.base_opcode == 0xef
+		|| i.tm.base_opcode == 0xf8
+		|| i.tm.base_opcode == 0xf9
+		|| i.tm.base_opcode == 0xfa
+		|| i.tm.base_opcode == 0xfb
 		|| i.tm.base_opcode == 0x42
-		|| i.tm.base_opcode == 0x6642
-		|| i.tm.base_opcode == 0x47
-		|| i.tm.base_opcode == 0x6647)
+		|| i.tm.base_opcode == 0x47)
 	       && i.tm.extension_opcode == None))
     {
       /* Optimize: -O1:
@@ -4257,7 +4217,7 @@ optimize_encoding (void)
 	}
       else if (i.tm.operand_types[0].bitfield.class == RegMask)
 	{
-	  i.tm.base_opcode &= 0xff;
+	  i.tm.opcode_modifier.opcodeprefix = PREFIX_NONE;
 	  i.tm.opcode_modifier.vexw = VEXW0;
 	}
       else
@@ -4276,11 +4236,9 @@ optimize_encoding (void)
 	   && !i.mask
 	   && !i.broadcast
 	   && is_evex_encoding (&i.tm)
-	   && ((i.tm.base_opcode & ~Opcode_SIMD_IntD) == 0x666f
-	       || (i.tm.base_opcode & ~Opcode_SIMD_IntD) == 0xf36f
-	       || (i.tm.base_opcode & ~Opcode_SIMD_IntD) == 0xf26f
-	       || (i.tm.base_opcode & ~4) == 0x66db
-	       || (i.tm.base_opcode & ~4) == 0x66eb)
+	   && ((i.tm.base_opcode & ~Opcode_SIMD_IntD) == 0x6f
+	       || (i.tm.base_opcode & ~4) == 0xdb
+	       || (i.tm.base_opcode & ~4) == 0xeb)
 	   && i.tm.extension_opcode == None)
     {
       /* Optimize: -O1:
@@ -4331,13 +4289,14 @@ optimize_encoding (void)
 	    i.types[j].bitfield.disp8 = vex_disp8;
 	    break;
 	  }
-      if ((i.tm.base_opcode & ~Opcode_SIMD_IntD) == 0xf26f)
-	i.tm.base_opcode ^= 0xf36f ^ 0xf26f;
+      if ((i.tm.base_opcode & ~Opcode_SIMD_IntD) == 0x6f
+	  && i.tm.opcode_modifier.opcodeprefix == PREFIX_0XF2)
+	i.tm.opcode_modifier.opcodeprefix = PREFIX_0XF3;
       i.tm.opcode_modifier.vex
 	= i.types[0].bitfield.ymmword ? VEX256 : VEX128;
       i.tm.opcode_modifier.vexw = VEXW0;
       /* VPAND, VPOR, and VPXOR are commutative.  */
-      if (i.reg_operands == 3 && i.tm.base_opcode != 0x66df)
+      if (i.reg_operands == 3 && i.tm.base_opcode != 0xdf)
 	i.tm.opcode_modifier.commutative = 1;
       i.tm.opcode_modifier.evex = 0;
       i.tm.opcode_modifier.masking = 0;
@@ -4395,6 +4354,7 @@ load_insn_p (void)
       if (i.tm.base_opcode == 0xae
 	  && i.tm.opcode_modifier.vex
 	  && i.tm.opcode_modifier.opcodespace == SPACE_0F
+	  && i.tm.opcode_modifier.opcodeprefix == PREFIX_NONE
 	  && i.tm.extension_opcode == 2)
 	return 1;
     }
@@ -6388,7 +6348,9 @@ match_template (char mnem_suffix)
 	}
 
       /* Force 0x8b encoding for "mov foo@GOT, %eax".  */
-      if (i.reloc[0] == BFD_RELOC_386_GOT32 && t->base_opcode == 0xa0)
+      if (i.reloc[0] == BFD_RELOC_386_GOT32
+	  && t->base_opcode == 0xa0
+	  && t->opcode_modifier.opcodespace == SPACE_BASE)
 	continue;
 
       /* We check register size if needed.  */
@@ -6415,6 +6377,7 @@ match_template (char mnem_suffix)
 	     zero-extend %eax to %rax.  */
 	  if (flag_code == CODE_64BIT
 	      && t->base_opcode == 0x90
+	      && t->opcode_modifier.opcodespace == SPACE_BASE
 	      && i.types[0].bitfield.instance == Accum
 	      && i.types[0].bitfield.dword
 	      && i.types[1].bitfield.instance == Accum
@@ -6425,6 +6388,7 @@ match_template (char mnem_suffix)
 	  if (flag_code != CODE_64BIT
 	      && i.hle_prefix
 	      && t->base_opcode == 0xa0
+	      && t->opcode_modifier.opcodespace == SPACE_BASE
 	      && i.types[0].bitfield.instance == Accum
 	      && (i.flags[1] & Operand_Mem))
 	    continue;
@@ -6780,7 +6744,9 @@ process_suffix (void)
 	 ambiguity checking below.  The suffix will be replaced afterwards
 	 to represent the destination (register).  */
       if (((i.tm.base_opcode | 8) == 0xfbe && i.tm.opcode_modifier.w)
-	  || (i.tm.base_opcode == 0x63 && i.tm.cpu_flags.bitfield.cpu64))
+	  || (i.tm.base_opcode == 0x63
+	      && i.tm.opcode_modifier.opcodespace == SPACE_BASE
+	      && i.tm.cpu_flags.bitfield.cpu64))
 	--i.operands;
 
       /* crc32 needs REX.W set regardless of suffix / source operand size.  */
@@ -7028,6 +6994,7 @@ process_suffix (void)
 	    i.suffix = SHORT_MNEM_SUFFIX;
 	  else if ((i.tm.base_opcode | 8) == 0xfbe
 		   || (i.tm.base_opcode == 0x63
+		       && i.tm.opcode_modifier.opcodespace == SPACE_BASE
 		       && i.tm.cpu_flags.bitfield.cpu64))
 	    /* handled below */;
 	  else if (evex)
@@ -7042,7 +7009,9 @@ process_suffix (void)
     }
 
   if ((i.tm.base_opcode | 8) == 0xfbe
-      || (i.tm.base_opcode == 0x63 && i.tm.cpu_flags.bitfield.cpu64))
+      || (i.tm.base_opcode == 0x63
+	  && i.tm.opcode_modifier.opcodespace == SPACE_BASE
+	  && i.tm.cpu_flags.bitfield.cpu64))
     {
       /* In Intel syntax, movsx/movzx must have a "suffix" (checked above).
 	 In AT&T syntax, if there is no suffix (warned about above), the default
@@ -7741,6 +7710,7 @@ process_operands (void)
 
   if ((i.seg[0] || i.prefix[SEG_PREFIX])
       && i.tm.base_opcode == 0x8d /* lea */
+      && i.tm.opcode_modifier.opcodespace == SPACE_BASE
       && !is_any_vex_encoding(&i.tm))
     {
       if (!quiet_warnings)
@@ -9244,7 +9214,8 @@ output_insn (void)
 	  || i.tm.cpu_flags.bitfield.cpupopcnt
 	  /* LAHF-SAHF insns in 64-bit mode.  */
 	  || (flag_code == CODE_64BIT
-	      && (i.tm.base_opcode | 1) == 0x9f))
+	      && (i.tm.base_opcode | 1) == 0x9f
+	      && i.tm.opcode_modifier.opcodespace == SPACE_BASE))
 	x86_isa_1_used |= GNU_PROPERTY_X86_ISA_1_V2;
       if (i.tm.cpu_flags.bitfield.cpuavx
 	  || i.tm.cpu_flags.bitfield.cpuavx2
@@ -9354,7 +9325,8 @@ output_insn (void)
 	 assembler ignore LOCK prefix and serves as a workaround.  */
       if (omit_lock_prefix)
 	{
-	  if (i.tm.base_opcode == LOCK_PREFIX_OPCODE)
+	  if (i.tm.base_opcode == LOCK_PREFIX_OPCODE
+	      && i.tm.opcode_modifier.isprefix)
 	    return;
 	  i.prefix[LOCK_PREFIX] = 0;
 	}
--- a/opcodes/i386-gen.c
+++ b/opcodes/i386-gen.c
@@ -1175,12 +1175,12 @@ adjust_broadcast_modifier (char **opnd)
   return bcst_type;
 }
 
-static int
-process_i386_opcode_modifier (FILE *table, char *mod, char **opnd, int lineno)
+static void
+process_i386_opcode_modifier (FILE *table, char *mod, unsigned int prefix,
+			      char **opnd, int lineno)
 {
   char *str, *next, *last;
   bitfield modifiers [ARRAY_SIZE (opcode_modifiers)];
-  unsigned int regular_encoding = 1;
 
   active_isstring = 0;
 
@@ -1199,18 +1199,7 @@ process_i386_opcode_modifier (FILE *tabl
 	    {
 	      int val = 1;
 	      if (strcasecmp(str, "Broadcast") == 0)
-		{
-		  val = adjust_broadcast_modifier (opnd);
-		  regular_encoding = 0;
-		}
-	      else if (strcasecmp(str, "Vex") == 0
-		       || strncasecmp(str, "Vex=", 4) == 0
-		       || strcasecmp(str, "EVex") == 0
-		       || strncasecmp(str, "EVex=", 5) == 0
-		       || strncasecmp(str, "Disp8MemShift=", 14) == 0
-		       || strncasecmp(str, "Masking=", 8) == 0
-		       || strcasecmp(str, "SAE") == 0)
-		regular_encoding = 0;
+		val = adjust_broadcast_modifier (opnd);
 
 	      set_bitfield (str, modifiers, val, ARRAY_SIZE (modifiers),
 			    lineno);
@@ -1231,6 +1220,19 @@ process_i386_opcode_modifier (FILE *tabl
 	    }
 	}
 
+      if (prefix)
+	{
+	  if (!modifiers[OpcodePrefix].value)
+	    modifiers[OpcodePrefix].value = prefix;
+	  else if (modifiers[OpcodePrefix].value != prefix)
+	    fail (_("%s:%d: Conflicting prefix specifications\n"),
+		  filename, lineno);
+	  else
+	    fprintf (stderr,
+		     _("%s:%d: Warning: redundant prefix specification\n"),
+		     filename, lineno);
+	}
+
       if (have_w && !bwlq_suf)
 	fail ("%s: %d: stray W modifier\n", filename, lineno);
       if (have_w && !(bwlq_suf & 1))
@@ -1242,8 +1244,6 @@ process_i386_opcode_modifier (FILE *tabl
 		 filename, lineno);
     }
   output_opcode_modifier (table, modifiers, ARRAY_SIZE (modifiers));
-
-  return regular_encoding;
 }
 
 enum stage {
@@ -1355,9 +1355,10 @@ static void
 output_i386_opcode (FILE *table, const char *name, char *str,
 		    char *last, int lineno)
 {
-  unsigned int i;
-  char *base_opcode, *extension_opcode, *opcode_length;
+  unsigned int i, prefix = 0;
+  char *base_opcode, *extension_opcode, *opcode_length, *end;
   char *cpu_flags, *opcode_modifier, *operand_types [MAX_OPERANDS];
+  unsigned long int opcode, length;
 
   /* Find base_opcode.  */
   base_opcode = next_field (str, ',', &str, last);
@@ -1407,46 +1408,33 @@ output_i386_opcode (FILE *table, const c
 	}
     }
 
-  fprintf (table, "  { \"%s\", %s, %s, %s, %u,\n",
-	   name, base_opcode, extension_opcode, opcode_length, i);
+  length = strtoul (opcode_length, &end, 0);
+  opcode = strtoul (base_opcode, &end, 0);
 
-  if (process_i386_opcode_modifier (table, opcode_modifier,
-				    operand_types, lineno))
+  /* Transform prefixes encoded in the opcode into opcode modifier
+     representation.  */
+  if (length < 4)
     {
-      char *end;
-      unsigned long int length = strtoul (opcode_length, &end, 0);
-      unsigned long int opcode = strtoul (base_opcode, &end, 0);
-      switch (length)
+      switch (opcode >> (8 * length))
 	{
-	case 4:
-	  break;
-	case 3:
-	  if ((opcode >> 24) != 0)
-	    fail (_("%s: %s: (base_opcode >> 24) != 0: %s\n"),
-		  filename, name, base_opcode);
-	  break;
-	case 2:
-	  if ((opcode >> 16) != 0)
-	    fail (_("%s: %s: (base_opcode >> 16) != 0: %s\n"),
-		  filename, name, base_opcode);
-	  break;
-	case 1:
-	  if ((opcode >> 8) != 0)
-	    fail (_("%s: %s: (base_opcode >> 8) != 0: %s\n"),
-		  filename, name, base_opcode);
-	  break;
-	case 0:
-	  if (opcode != 0)
-	    fail (_("%s: %s: base_opcode != 0: %s\n"),
-		  filename, name, base_opcode);
-	  break;
+	case 0: break;
+	case 0x66: prefix = PREFIX_0X66; break;
+	case 0xF3: prefix = PREFIX_0XF3; break;
+	case 0xF2: prefix = PREFIX_0XF2; break;
 	default:
-	  fail (_("%s: %s: invalid opcode length: %s\n"),
-		filename, name, opcode_length);
-	  break;
+	  fail (_("%s:%d: %s: Unexpected opcode prefix %02lx\n"),
+		filename, lineno, name, opcode >> (8 * length));
 	}
+
+      opcode &= (1UL << (8 * length)) - 1;
     }
 
+  fprintf (table, "  { \"%s\", 0x%0*lx%s, %s, %lu, %u,\n",
+	   name, 2 * (int)length, opcode, end, extension_opcode, length, i);
+
+  process_i386_opcode_modifier (table, opcode_modifier, prefix,
+				operand_types, lineno);
+
   process_i386_cpu_flag (table, cpu_flags, 0, ",", "    ", lineno);
 
   fprintf (table, "    { ");
@@ -1836,7 +1824,7 @@ process_i386_opcodes (FILE *table)
 
   fprintf (table, "  { NULL, 0, 0, 0, 0,\n");
 
-  process_i386_opcode_modifier (table, "0", NULL, -1);
+  process_i386_opcode_modifier (table, "0", 0, NULL, -1);
 
   process_i386_cpu_flag (table, "0", 0, ",", "    ", -1);
 



More information about the Binutils mailing list