[PATCH] x86: improve/fix string instruction operand handling

Jan Beulich jbeulich@novell.com
Thu Aug 28 10:56:00 GMT 2008


All addressing forms were silently accepted for string instructions. While
most other assemblers appear to also do so, warn about them now. Also, fix
silent generation of bad code when a displacement is present in one of the
operands.

Built and tested on i686-pc-linux-gnu.

gas/
2008-08-27  Jan Beulich  <jbeulich@novell.com>

	* config/tc-i386.c (md_assemble): Force number of displacement
	operands to zero when processing string instruction.
	(i386_index_check): Special-case string instruction operands. Don't
	fudge address prefix if there already was a memory operand. Fix
	error message to correctly reflect the addressing mode used.
	(i386_att_operand): Fix comment.
	(i386_intel_operand): Snapshot, clear, and restore base and index
	reg for each operand processed. Increment count of memory operands
	later.

gas/testsuite/
2008-08-27  Jan Beulich  <jbeulich@novell.com>

	* gas/i386/string-bad.{l,s}, gas/i386/string-ok.{d,e,s}: New.
	* gas/i386/i386.exp: Run new tests.

--- 2008-08-27/gas/config/tc-i386.c	2008-08-27 15:33:14.000000000 +0200
+++ 2008-08-27/gas/config/tc-i386.c	2008-08-27 18:08:01.000000000 +0200
@@ -2762,6 +2762,7 @@ md_assemble (char *line)
     {
       if (!check_string ())
 	return;
+      i.disp_operands = 0;
     }
 
   if (!process_suffix ())
@@ -6888,13 +6889,77 @@ static int
 i386_index_check (const char *operand_string)
 {
   int ok;
+  const char *kind = "base/index";
 #if INFER_ADDR_PREFIX
   int fudged = 0;
 
  tryprefix:
 #endif
   ok = 1;
-  if (flag_code == CODE_64BIT)
+  if (current_templates->start->opcode_modifier.isstring
+      && !current_templates->start->opcode_modifier.immext
+      && (current_templates->end[-1].opcode_modifier.isstring
+	  || i.mem_operands))
+    {
+      /* Memory operands of string insns are special in that they only allow
+	 a single register (rDI, rSI, or rBX) as their memory address.  */
+      unsigned int expected;
+
+      kind = "string address";
+
+      if (current_templates->start->opcode_modifier.w)
+	{
+	  i386_operand_type type = current_templates->end[-1].operand_types[0];
+
+	  if (!type.bitfield.baseindex
+	      || ((!i.mem_operands != !intel_syntax)
+		  && current_templates->end[-1].operand_types[1]
+		     .bitfield.baseindex))
+	    type = current_templates->end[-1].operand_types[1];
+	  expected = type.bitfield.esseg ? 7 /* rDI */ : 6 /* rSI */;
+	}
+      else
+	expected = 3 /* rBX */;
+
+      if (!i.base_reg || i.index_reg
+	  || operand_type_check (i.types[this_operand], disp))
+	ok = -1;
+      else if (!(flag_code == CODE_64BIT
+		 ? i.prefix[ADDR_PREFIX]
+		   ? i.base_reg->reg_type.bitfield.reg32
+		   : i.base_reg->reg_type.bitfield.reg64
+		 : (flag_code == CODE_16BIT) ^ !i.prefix[ADDR_PREFIX]
+		   ? i.base_reg->reg_type.bitfield.reg32
+		   : i.base_reg->reg_type.bitfield.reg16))
+	ok = 0;
+      else if (i.base_reg->reg_num != expected)
+	ok = -1;
+
+      if (ok < 0)
+	{
+	  unsigned int j;
+
+	  for (j = 0; j < i386_regtab_size; ++j)
+	    if ((flag_code == CODE_64BIT
+		 ? i.prefix[ADDR_PREFIX]
+		   ? i386_regtab[j].reg_type.bitfield.reg32
+		   : i386_regtab[j].reg_type.bitfield.reg64
+		 : (flag_code == CODE_16BIT) ^ !i.prefix[ADDR_PREFIX]
+		   ? i386_regtab[j].reg_type.bitfield.reg32
+		   : i386_regtab[j].reg_type.bitfield.reg16)
+		&& i386_regtab[j].reg_num == expected)
+	      break;
+	  assert (j < i386_regtab_size);
+	  as_warn (_("`%s' is not valid here (expected `%c%s%s%c')"),
+		   operand_string,
+		   intel_syntax ? '[' : '(',
+		   register_prefix,
+		   i386_regtab[j].reg_name,
+		   intel_syntax ? ']' : ')');
+	  ok = 1;
+	}
+    }
+  else if (flag_code == CODE_64BIT)
     {
       if ((i.base_reg
 	   && ((i.prefix[ADDR_PREFIX] == 0
@@ -6947,7 +7012,7 @@ i386_index_check (const char *operand_st
   if (!ok)
     {
 #if INFER_ADDR_PREFIX
-      if (i.prefix[ADDR_PREFIX] == 0)
+      if (!i.mem_operands && !i.prefix[ADDR_PREFIX])
 	{
 	  i.prefix[ADDR_PREFIX] = ADDR_PREFIX_OPCODE;
 	  i.prefixes += 1;
@@ -6965,18 +7030,24 @@ i386_index_check (const char *operand_st
 	  goto tryprefix;
 	}
       if (fudged)
-	as_bad (_("`%s' is not a valid base/index expression"),
-		operand_string);
+	as_bad (_("`%s' is not a valid %s expression"),
+		operand_string,
+		kind);
       else
 #endif
-	as_bad (_("`%s' is not a valid %s bit base/index expression"),
+	as_bad (_("`%s' is not a valid %s-bit %s expression"),
 		operand_string,
-		flag_code_names[flag_code]);
+		flag_code_names[i.prefix[ADDR_PREFIX]
+					 ? flag_code == CODE_32BIT
+					   ? CODE_16BIT
+					   : CODE_32BIT
+					 : flag_code],
+		kind);
     }
   return ok;
 }
 
-/* Parse OPERAND_STRING into the i386_insn structure I.  Returns non-zero
+/* Parse OPERAND_STRING into the i386_insn structure I.  Returns zero
    on error.  */
 
 static int
@@ -8793,6 +8864,8 @@ i386_intel_operand (char *operand_string
 {
   int ret;
   char *p;
+  const reg_entry *final_base = i.base_reg;
+  const reg_entry *final_index = i.index_reg;
 
   p = intel_parser.op_string = xstrdup (operand_string);
   intel_parser.disp = (char *) xmalloc (strlen (operand_string) + 1);
@@ -8814,6 +8887,9 @@ i386_intel_operand (char *operand_string
       intel_parser.disp[0] = '\0';
       intel_parser.next_operand = NULL;
 
+      i.base_reg = NULL;
+      i.index_reg = NULL;
+
       /* Read the first token and start the parser.  */
       intel_get_token ();
       ret = intel_expr ();
@@ -8842,8 +8918,6 @@ i386_intel_operand (char *operand_string
 	  else
 	    {
 	      char *s = intel_parser.disp;
-	      i.types[this_operand].bitfield.mem = 1;
-	      i.mem_operands++;
 
 	      if (!quiet_warnings && intel_parser.is_mem < 0)
 		/* See the comments in intel_bracket_expr.  */
@@ -8871,6 +8945,11 @@ i386_intel_operand (char *operand_string
 		    }
 		  ret = i386_index_check (operand_string);
 		}
+	      if (ret)
+		{
+		  i.types[this_operand].bitfield.mem = 1;
+		  i.mem_operands++;
+		}
 	    }
 	}
 
@@ -8887,6 +8966,12 @@ i386_intel_operand (char *operand_string
 	  ret = i386_immediate (intel_parser.disp);
 	}
 
+      if (!final_base && !final_index)
+  	{
+	  final_base = i.base_reg;
+	  final_index = i.index_reg;
+  	}
+
       if (intel_parser.next_operand && this_operand >= MAX_OPERANDS - 1)
 	ret = 0;
       if (!ret || !intel_parser.next_operand)
@@ -8899,6 +8984,12 @@ i386_intel_operand (char *operand_string
   free (p);
   free (intel_parser.disp);
 
+  if (final_base || final_index)
+    {
+      i.base_reg = final_base;
+      i.index_reg = final_index;
+    }
+
   return ret;
 }
 
--- 2008-08-27/gas/testsuite/gas/i386/i386.exp	2008-08-27 15:33:18.000000000 +0200
+++ 2008-08-27/gas/testsuite/gas/i386/i386.exp	2008-08-27 17:03:16.000000000 +0200
@@ -184,6 +184,11 @@ if [expr ([istarget "i*86-*-*"] ||  [ist
     set ASFLAGS "$old_ASFLAGS"
 }
 
+if [expr [istarget "i*86-*-*"] || [istarget "x86_64-*-*"]] then {
+    run_dump_test "string-ok"
+    run_list_test "string-bad" ""
+}
+
 if [expr ([istarget "i*86-*-*"] || [istarget "x86_64-*-*"]) && [gas_64_check]] then {
 
     global ASFLAGS
--- 2008-08-27/gas/testsuite/gas/i386/string-bad.l	1970-01-01 01:00:00.000000000 +0100
+++ 2008-08-27/gas/testsuite/gas/i386/string-bad.l	2008-08-27 17:03:16.000000000 +0200
@@ -0,0 +1,17 @@
+.*: Assembler messages:
+.*:4: Error: .*
+.*:5: Error: .*
+.*:6: Error: .*
+.*:7: Error: .*
+.*:8: Error: .*
+.*:9: Error: .*
+.*:10: Error: .*
+.*:14: Error: .*
+.*:15: Error: .*
+.*:16: Error: .*
+.*:17: Error: .*
+.*:18: Error: .*
+.*:19: Error: .*
+.*:20: Error: .*
+.*:21: Error: .*
+.*:22: Error: .*
--- 2008-08-27/gas/testsuite/gas/i386/string-bad.s	1970-01-01 01:00:00.000000000 +0100
+++ 2008-08-27/gas/testsuite/gas/i386/string-bad.s	2008-08-27 17:03:16.000000000 +0200
@@ -0,0 +1,22 @@
+	.text
+	.code32
+start:
+	movsb	(%esi), (%di)
+	movsb	(%si), (%edi)
+	movsb	(%esi), %ds:(%edi)
+	stosb	%ds:(%edi)
+	cmpsb	%ds:(%edi), (%esi)
+	scasb	%ds:(%edi)
+	insb	(%dx), %ds:(%edi)
+
+	.intel_syntax noprefix
+
+	movs	byte ptr [edi], [si]
+	movs	byte ptr [di], [esi]
+	movs	byte ptr ds:[edi], [esi]
+	movs	byte ptr [edi], word ptr [esi]
+	stos	byte ptr ds:[edi]
+	cmps	byte ptr [esi], ds:[edi]
+	cmps	byte ptr [esi], dword ptr [edi]
+	scas	byte ptr ds:[edi]
+	ins	byte ptr ds:[edi], dx
--- 2008-08-27/gas/testsuite/gas/i386/string-ok.d	1970-01-01 01:00:00.000000000 +0100
+++ 2008-08-27/gas/testsuite/gas/i386/string-ok.d	2008-08-27 17:03:16.000000000 +0200
@@ -0,0 +1,80 @@
+#as: -J
+#objdump: -dw -mi386
+#name: string insn operands
+#stderr: string-ok.e
+.*: +file format .*
+
+Disassembly of section .text:
+
+0+ <.*start32>:
+[ 	]+[0-9a-f]+:	2e a6[ 	]+cmpsb  (%es:)?\(%edi\),%cs:\(%esi\)
+[ 	]+[0-9a-f]+:	a6[ 	]+cmpsb  (%es:)?\(%edi\),(%ds:)?\(%esi\)
+[ 	]+[0-9a-f]+:	67 a6[ 	]+(addr16 )?cmpsb (%es:)?\(%di\),(%ds:)?\(%si\)
+[ 	]+[0-9a-f]+:	a6[ 	]+cmpsb  (%es:)?\(%edi\),(%ds:)?\(%esi\)
+[ 	]+[0-9a-f]+:	6c[ 	]+insb   \(%dx\),(%es:)?\(%edi\)
+[ 	]+[0-9a-f]+:	6c[ 	]+insb   \(%dx\),(%es:)?\(%edi\)
+[ 	]+[0-9a-f]+:	2e ac[ 	]+lods   %cs:\(%esi\),%al
+[ 	]+[0-9a-f]+:	ac[ 	]+lods   (%ds:)?\(%esi\),%al
+[ 	]+[0-9a-f]+:	2e a4[ 	]+movsb  %cs:\(%esi\),(%es:)?\(%edi\)
+[ 	]+[0-9a-f]+:	a4[ 	]+movsb  (%ds:)?\(%esi\),(%es:)?\(%edi\)
+[ 	]+[0-9a-f]+:	67 a4[ 	]+(addr16 )?movsb (%ds:)?\(%si\),(%es:)?\(%di\)
+[ 	]+[0-9a-f]+:	a4[ 	]+movsb  (%ds:)?\(%esi\),(%es:)?\(%edi\)
+[ 	]+[0-9a-f]+:	a4[ 	]+movsb  (%ds:)?\(%esi\),(%es:)?\(%edi\)
+[ 	]+[0-9a-f]+:	2e 6e[ 	]+outsb  %cs:\(%esi\),\(%dx\)
+[ 	]+[0-9a-f]+:	6e[ 	]+outsb  (%ds:)?\(%esi\),\(%dx\)
+[ 	]+[0-9a-f]+:	ae[ 	]+scas   (%es:)?\(%edi\),%al
+[ 	]+[0-9a-f]+:	ae[ 	]+scas   (%es:)?\(%edi\),%al
+[ 	]+[0-9a-f]+:	aa[ 	]+stos   %al,(%es:)?\(%edi\)
+[ 	]+[0-9a-f]+:	aa[ 	]+stos   %al,(%es:)?\(%edi\)
+[ 	]+[0-9a-f]+:	2e d7[ 	]+xlat   %cs:\(%ebx\)
+[ 	]+[0-9a-f]+:	d7[ 	]+xlat   (%ds:)?\(%ebx\)
+[ 	]+[0-9a-f]+:	d7[ 	]+xlat   (%ds:)?\(%ebx\)
+[ 	]+[0-9a-f]+:	d7[ 	]+xlat   (%ds:)?\(%ebx\)
+[ 	]+[0-9a-f]+:	d7[ 	]+xlat   (%ds:)?\(%ebx\)
+[ 	]+[0-9a-f]+:	d7[ 	]+xlat   (%ds:)?\(%ebx\)
+
+[0-9a-f]+ <.*start16>:
+[ 	]+[0-9a-f]+:	a6[ 	]+cmpsb  (%es:)?\(%edi\),(%ds:)?\(%esi\)
+[ 	]+[0-9a-f]+:	67 a4[ 	]+(addr16 )?movsb (%ds:)?\(%si\),(%es:)?\(%di\)
+
+[0-9a-f]+ <.*start64>:
+[ 	]+[0-9a-f]+:	a6[ 	]+cmpsb  (%es:)?\(%edi\),(%ds:)?\(%esi\)
+[ 	]+[0-9a-f]+:	67 a4[ 	]+(addr16 )?movsb (%ds:)?\(%si\),(%es:)?\(%di\)
+
+[0-9a-f]+ <.*intel32>:
+[ 	]+[0-9a-f]+:	2e a6[ 	]+cmpsb  (%es:)?\(%edi\),%cs:\(%esi\)
+[ 	]+[0-9a-f]+:	a6[ 	]+cmpsb  (%es:)?\(%edi\),(%ds:)?\(%esi\)
+[ 	]+[0-9a-f]+:	a6[ 	]+cmpsb  (%es:)?\(%edi\),(%ds:)?\(%esi\)
+[ 	]+[0-9a-f]+:	67 a6[ 	]+(addr16 )?cmpsb (%es:)?\(%di\),(%ds:)?\(%si\)
+[ 	]+[0-9a-f]+:	a6[ 	]+cmpsb  (%es:)?\(%edi\),(%ds:)?\(%esi\)
+[ 	]+[0-9a-f]+:	6c[ 	]+insb   \(%dx\),(%es:)?\(%edi\)
+[ 	]+[0-9a-f]+:	6c[ 	]+insb   \(%dx\),(%es:)?\(%edi\)
+[ 	]+[0-9a-f]+:	2e ac[ 	]+lods   %cs:\(%esi\),%al
+[ 	]+[0-9a-f]+:	ac[ 	]+lods   (%ds:)?\(%esi\),%al
+[ 	]+[0-9a-f]+:	2e a4[ 	]+movsb  %cs:\(%esi\),(%es:)?\(%edi\)
+[ 	]+[0-9a-f]+:	a4[ 	]+movsb  (%ds:)?\(%esi\),(%es:)?\(%edi\)
+[ 	]+[0-9a-f]+:	a4[ 	]+movsb  (%ds:)?\(%esi\),(%es:)?\(%edi\)
+[ 	]+[0-9a-f]+:	67 a4[ 	]+(addr16 )?movsb (%ds:)?\(%si\),(%es:)?\(%di\)
+[ 	]+[0-9a-f]+:	a4[ 	]+movsb  (%ds:)?\(%esi\),(%es:)?\(%edi\)
+[ 	]+[0-9a-f]+:	a4[ 	]+movsb  (%ds:)?\(%esi\),(%es:)?\(%edi\)
+[ 	]+[0-9a-f]+:	2e 6e[ 	]+outsb  %cs:\(%esi\),\(%dx\)
+[ 	]+[0-9a-f]+:	6e[ 	]+outsb  (%ds:)?\(%esi\),\(%dx\)
+[ 	]+[0-9a-f]+:	ae[ 	]+scas   (%es:)?\(%edi\),%al
+[ 	]+[0-9a-f]+:	ae[ 	]+scas   (%es:)?\(%edi\),%al
+[ 	]+[0-9a-f]+:	aa[ 	]+stos   %al,(%es:)?\(%edi\)
+[ 	]+[0-9a-f]+:	aa[ 	]+stos   %al,(%es:)?\(%edi\)
+[ 	]+[0-9a-f]+:	2e d7[ 	]+xlat   %cs:\(%ebx\)
+[ 	]+[0-9a-f]+:	d7[ 	]+xlat   (%ds:)?\(%ebx\)
+[ 	]+[0-9a-f]+:	d7[ 	]+xlat   (%ds:)?\(%ebx\)
+[ 	]+[0-9a-f]+:	d7[ 	]+xlat   (%ds:)?\(%ebx\)
+[ 	]+[0-9a-f]+:	d7[ 	]+xlat   (%ds:)?\(%ebx\)
+[ 	]+[0-9a-f]+:	d7[ 	]+xlat   (%ds:)?\(%ebx\)
+
+[0-9a-f]+ <.*intel16>:
+[ 	]+[0-9a-f]+:	a6[ 	]+cmpsb  (%es:)?\(%edi\),(%ds:)?\(%esi\)
+[ 	]+[0-9a-f]+:	67 a4[ 	]+(addr16 )?movsb (%ds:)?\(%si\),(%es:)?\(%di\)
+
+[0-9a-f]+ <.*intel64>:
+[ 	]+[0-9a-f]+:	a6[ 	]+cmpsb  (%es:)?\(%edi\),(%ds:)?\(%esi\)
+[ 	]+[0-9a-f]+:	67 a4[ 	]+(addr16 )?movsb (%ds:)?\(%si\),(%es:)?\(%di\)
+#pass
--- 2008-08-27/gas/testsuite/gas/i386/string-ok.e	1970-01-01 01:00:00.000000000 +0100
+++ 2008-08-27/gas/testsuite/gas/i386/string-ok.e	2008-08-27 17:03:16.000000000 +0200
@@ -0,0 +1,30 @@
+.*: Assembler messages:
+.*:7: Warning: .*
+.*:7: Warning: .*
+.*:10: Warning: .*
+.*:13: Warning: .*
+.*:18: Warning: .*
+.*:19: Warning: .*
+.*:22: Warning: .*
+.*:25: Warning: .*
+.*:28: Warning: .*
+.*:31: Warning: .*
+.*:32: Warning: .*
+.*:33: Warning: .*
+.*:34: Warning: .*
+.*:35: Warning: .*
+
+.*:54: Warning: .*
+.*:54: Warning: .*
+.*:57: Warning: .*
+.*:60: Warning: .*
+.*:66: Warning: .*
+.*:67: Warning: .*
+.*:70: Warning: .*
+.*:73: Warning: .*
+.*:76: Warning: .*
+.*:79: Warning: .*
+.*:80: Warning: .*
+.*:81: Warning: .*
+.*:82: Warning: .*
+.*:83: Warning: .*
--- 2008-08-27/gas/testsuite/gas/i386/string-ok.s	1970-01-01 01:00:00.000000000 +0100
+++ 2008-08-27/gas/testsuite/gas/i386/string-ok.s	2008-08-27 17:03:16.000000000 +0200
@@ -0,0 +1,93 @@
+	.text
+	.code32
+start32:
+	cmpsb	(%edi), %cs:(%esi)
+	cmpsb	%es:(%edi), (%esi)
+	cmpsb	(%di), (%si)
+	cmpsb	(%esi), (%edi)
+
+	insb	(%dx), %es:(%edi)
+	insb	(%dx), (%esi)
+
+	lodsb	%cs:(%esi)
+	lodsb	(%edi)
+
+	movsb	%cs:(%esi), (%edi)
+	movsb	(%esi), %es:(%edi)
+	movsb	(%si), (%di)
+	movsb	(%ebx), (%edi)
+	movsb	(%esi), (%ebx)
+
+	outsb	%cs:(%esi), (%dx)
+	outsb	(%edi), (%dx)
+
+	scasb	%es:(%edi)
+	scasb	(%esi)
+
+	stosb	%es:(%edi)
+	stosb	(%esi)
+
+	xlatb	%cs:(%ebx)
+	xlatb	(%esi)
+	xlatb	(,%ebx)
+	xlatb	1(%ebx)
+	xlatb	x(%ebx)
+	xlatb	0
+
+	.code16
+start16:
+	cmpsb	(%di), (%si)
+	movsb	(%esi), (%edi)
+
+	.code64
+start64:
+	cmpsb	(%rdi), (%rsi)
+	movsb	(%esi), (%edi)
+
+	.intel_syntax noprefix
+	.code32
+intel32:
+	cmps	byte ptr cs:[esi], [edi]
+	cmps	byte ptr [esi], es:[edi]
+	cmps	byte ptr [esi], byte ptr [edi]
+	cmps	byte ptr [si], [di]
+	cmps	byte ptr [edi], [esi]
+
+	ins	byte ptr es:[edi], dx
+	ins	byte ptr [esi], dx
+
+	lods	byte ptr cs:[esi]
+	lods	byte ptr [edi]
+
+	movs	byte ptr [edi], cs:[esi]
+	movs	byte ptr es:[edi], [esi]
+	movs	byte ptr [edi], byte ptr [esi]
+	movs	byte ptr [di], [si]
+	movs	byte ptr [edi], [ebx]
+	movs	byte ptr [ebx], [esi]
+
+	outs	dx, byte ptr cs:[esi]
+	outs	dx, byte ptr [edi]
+
+	scas	byte ptr es:[edi]
+	scas	byte ptr [esi]
+
+	stos	byte ptr es:[edi]
+	stos	byte ptr [esi]
+
+	xlat	byte ptr cs:[ebx]
+	xlat	byte ptr [esi]
+	xlat	byte ptr [%ebx*1]
+	xlat	byte ptr [ebx+1]
+	xlat	byte ptr x[ebx]
+	xlat	byte ptr FLAT:0
+
+	.code16
+intel16:
+	cmps	byte ptr [si], [di]
+	movs	byte ptr [edi], [esi]
+
+	.code64
+intel64:
+	cmps	byte ptr [rsi], [rdi]
+	movs	byte ptr [edi], [esi]




More information about the Binutils mailing list