[PATCH] don't generate long nops on i686 (only when -march=[intel])

Quentin Neill quentin.neill.gnu@gmail.com
Mon Aug 2 23:19:00 GMT 2010


On Tue, Jul 13, 2010 at 8:33 AM, H.J. Lu <hjl.tools@gmail.com> wrote:
> On Tue, Jul 13, 2010 at 5:59 AM, Kyle McMartin <kyle@mcmartin.ca> wrote:
>> From: Kyle McMartin <kyle@redhat.com>
>>
>> Hi,
>>
>> In Fedora, glibc started building itself with -march=i686 flags to
>> binutils as an "optimization." This exposes issues on the Geode, which
>> is i686, but for long nops, since they are apparently not an architected
>> part of the i686 ISA.
>>
>> Clarify this by only enabling long nops if we're generating 64-bit code
>> (as far as I can tell, all x86_64 cpus support this) or explicitly
>> generating code for the pentiumpro and above.
>>
>> Using this patch prevents people from overambitiously optimizing and
>> breaking the Geode.
>>
>> Signed-off-by: Kyle McMartin <kyle@redhat.com>
>
> There is no ChangeLog. Please add CpuNop to i386-opc.h and use it
> to determine when to generate nops.
>
> Thanks.

Kyle, I was working on a similar patch.  Here it is for previewing.

It has a ChangeLog entry but without test cases.  I will post with the
test cases as soon as I get a clean "make check" .

-- 
Quentin Neill
-------------- next part --------------
gas/
	* config/tc-i386.c (cpu_arch): Make pentiumpro use
	  CPU_PENTIUMPRO, not PROCESSOR_I686_FLAGS.  Add
	  Nop and noNop isa extension flags.
	  (i386_align_code): Use cpunop isa flags to decide
	  when to generate NOPs for alignment, not cpui686.

opcodes/
	* i386-gen.c (cpu_flag_init): New CPU_PENTIUMPRO_FLAGS
	  that uses new CpuNop.  Add CpuNop to CPU_P2_FLAGS
	  and later processors.  Add CPU_NOP_FLAGS.
	  (cpu_flags): Add CpuNop bitfield.
	* i386-opc.h (enum): Add CpuNop bitfield definition.
	  Fix typos in comments.
	  (i386_cpu_flags): Add cpunop bit.
	* i386-opc.tbl:  Use CpuNop in cpu_flags for nop instruction.


Index: gas/config/tc-i386.c
===================================================================
RCS file: /cvs/src/src/gas/config/tc-i386.c,v
retrieving revision 1.442
diff -u -d -u -p -r1.442 tc-i386.c
--- gas/config/tc-i386.c	3 Jul 2010 22:15:58 -0000	1.442
+++ gas/config/tc-i386.c	2 Aug 2010 22:46:28 -0000
@@ -586,7 +586,7 @@ static const arch_entry cpu_arch[] =
   { STRING_COMMA_LEN ("pentium"), PROCESSOR_PENTIUM,
     CPU_I586_FLAGS, 0 },
   { STRING_COMMA_LEN ("pentiumpro"), PROCESSOR_PENTIUMPRO,
-    CPU_I686_FLAGS, 0 },
+    CPU_PENTIUMPRO_FLAGS, 0 },
   { STRING_COMMA_LEN ("pentiumii"), PROCESSOR_PENTIUMPRO,
     CPU_P2_FLAGS, 0 },
   { STRING_COMMA_LEN ("pentiumiii"),PROCESSOR_PENTIUMPRO,
@@ -691,6 +691,10 @@ static const arch_entry cpu_arch[] =
     CPU_EPT_FLAGS, 0 },
   { STRING_COMMA_LEN (".clflush"), PROCESSOR_UNKNOWN,
     CPU_CLFLUSH_FLAGS, 0 },
+  { STRING_COMMA_LEN (".Nop"), PROCESSOR_UNKNOWN,
+    CPU_NOP_FLAGS, 0 },
+  { STRING_COMMA_LEN (".noNop"), PROCESSOR_UNKNOWN,
+    CPU_NOP_FLAGS, 0 },
   { STRING_COMMA_LEN (".syscall"), PROCESSOR_UNKNOWN,
     CPU_SYSCALL_FLAGS, 0 },
   { STRING_COMMA_LEN (".rdtscp"), PROCESSOR_UNKNOWN,
@@ -998,7 +1002,7 @@ i386_align_code (fragS *fragP, int count
      will be used.
 
      When -mtune= isn't used, alt_long_patt will be used if
-     cpu_arch_isa_flags has Cpu686. Otherwise, f32_patt will
+     cpu_arch_isa_flags has CpuNop.  Otherwise, f32_patt will
      be used.
 
      When -march= or .arch is used, we can't use anything beyond
@@ -1028,8 +1032,8 @@ i386_align_code (fragS *fragP, int count
 	    {
 	    case PROCESSOR_UNKNOWN:
 	      /* We use cpu_arch_isa_flags to check if we SHOULD
-		 optimize for Cpu686.  */
-	      if (fragP->tc_frag_data.isa_flags.bitfield.cpui686)
+		 optimize with nops.  */
+	      if (fragP->tc_frag_data.isa_flags.bitfield.cpunop)
 		patt = alt_long_patt;
 	      else
 		patt = f32_patt;
@@ -1079,8 +1083,8 @@ i386_align_code (fragS *fragP, int count
 	    case PROCESSOR_BDVER1:
 	    case PROCESSOR_GENERIC32:
 	      /* We use cpu_arch_isa_flags to check if we CAN optimize
-		 for Cpu686.  */
-	      if (fragP->tc_frag_data.isa_flags.bitfield.cpui686)
+		 with nops.  */
+	      if (fragP->tc_frag_data.isa_flags.bitfield.cpunop)
 		patt = alt_short_patt;
 	      else
 		patt = f32_patt;
@@ -1092,7 +1096,7 @@ i386_align_code (fragS *fragP, int count
 	    case PROCESSOR_CORE2:
 	    case PROCESSOR_COREI7:
 	    case PROCESSOR_L1OM:
-	      if (fragP->tc_frag_data.isa_flags.bitfield.cpui686)
+	      if (fragP->tc_frag_data.isa_flags.bitfield.cpunop)
 		patt = alt_long_patt;
 	      else
 		patt = f32_patt;
 
Index: opcodes/i386-gen.c
===================================================================
RCS file: /cvs/src/src/opcodes/i386-gen.c,v
retrieving revision 1.68
diff -u -d -u -p -r1.68 i386-gen.c
--- opcodes/i386-gen.c	1 Jul 2010 21:55:01 -0000	1.68
+++ opcodes/i386-gen.c	2 Aug 2010 22:46:28 -0000
@@ -62,32 +62,34 @@ static initializer cpu_flag_init[] =
     "Cpu186|Cpu286|Cpu386|Cpu486|Cpu586|Cpu387" },
   { "CPU_I686_FLAGS",
     "Cpu186|Cpu286|Cpu386|Cpu486|Cpu586|Cpu686|Cpu387|Cpu687" },
+  { "CPU_PENTIUMPRO_FLAGS",
+    "Cpu186|Cpu286|Cpu386|Cpu486|Cpu586|Cpu686|Cpu387|Cpu687|CpuNop" },
   { "CPU_P2_FLAGS",
-    "Cpu186|Cpu286|Cpu386|Cpu486|Cpu586|Cpu686|Cpu387|Cpu687|CpuMMX" },
+    "Cpu186|Cpu286|Cpu386|Cpu486|Cpu586|Cpu686|Cpu387|Cpu687|CpuNop|CpuMMX" },
   { "CPU_P3_FLAGS",
-    "Cpu186|Cpu286|Cpu386|Cpu486|Cpu586|Cpu686|Cpu387|Cpu687|CpuMMX|CpuSSE" },
+    "Cpu186|Cpu286|Cpu386|Cpu486|Cpu586|Cpu686|Cpu387|Cpu687|CpuNop|CpuMMX|CpuSSE" },
   { "CPU_P4_FLAGS",
-    "Cpu186|Cpu286|Cpu386|Cpu486|Cpu586|Cpu686|CpuClflush|Cpu387|Cpu687|CpuMMX|CpuSSE|CpuSSE2" },
+    "Cpu186|Cpu286|Cpu386|Cpu486|Cpu586|Cpu686|CpuClflush|Cpu387|Cpu687|CpuNop|CpuMMX|CpuSSE|CpuSSE2" },
   { "CPU_NOCONA_FLAGS",
-    "Cpu186|Cpu286|Cpu386|Cpu486|Cpu586|Cpu686|CpuClflush|Cpu387|Cpu687|CpuFISTTP|CpuMMX|CpuSSE|CpuSSE2|CpuSSE3|CpuLM" },
+    "Cpu186|Cpu286|Cpu386|Cpu486|Cpu586|Cpu686|CpuClflush|Cpu387|Cpu687|CpuFISTTP|CpuNop|CpuMMX|CpuSSE|CpuSSE2|CpuSSE3|CpuLM" },
   { "CPU_CORE_FLAGS",
-    "Cpu186|Cpu286|Cpu386|Cpu486|Cpu586|Cpu686|CpuClflush|Cpu387|Cpu687|CpuFISTTP|CpuMMX|CpuSSE|CpuSSE2|CpuSSE3" },
+    "Cpu186|Cpu286|Cpu386|Cpu486|Cpu586|Cpu686|CpuClflush|Cpu387|Cpu687|CpuFISTTP|CpuNop|CpuMMX|CpuSSE|CpuSSE2|CpuSSE3" },
   { "CPU_CORE2_FLAGS",
-    "Cpu186|Cpu286|Cpu386|Cpu486|Cpu586|Cpu686|CpuClflush|Cpu387|Cpu687|CpuFISTTP|CpuMMX|CpuSSE|CpuSSE2|CpuSSE3|CpuSSSE3|CpuLM" },
+    "Cpu186|Cpu286|Cpu386|Cpu486|Cpu586|Cpu686|CpuClflush|Cpu387|Cpu687|CpuFISTTP|CpuNop|CpuMMX|CpuSSE|CpuSSE2|CpuSSE3|CpuSSSE3|CpuLM" },
   { "CPU_COREI7_FLAGS",
-    "Cpu186|Cpu286|Cpu386|Cpu486|Cpu586|Cpu686|CpuClflush|Cpu387|Cpu687|CpuFISTTP|CpuMMX|CpuSSE|CpuSSE2|CpuSSE3|CpuSSSE3|CpuSSE4_1|CpuSSE4_2|CpuRdtscp|CpuLM" },
+    "Cpu186|Cpu286|Cpu386|Cpu486|Cpu586|Cpu686|CpuClflush|Cpu387|Cpu687|CpuFISTTP|CpuNop|CpuMMX|CpuSSE|CpuSSE2|CpuSSE3|CpuSSSE3|CpuSSE4_1|CpuSSE4_2|CpuRdtscp|CpuLM" },
   { "CPU_K6_FLAGS",
-    "Cpu186|Cpu286|Cpu386|Cpu486|Cpu586|CpuSYSCALL|Cpu387|CpuMMX" },
+    "Cpu186|Cpu286|Cpu386|Cpu486|Cpu586|CpuSYSCALL|Cpu387|CpuNop|CpuMMX" },
   { "CPU_K6_2_FLAGS",
-    "Cpu186|Cpu286|Cpu386|Cpu486|Cpu586|CpuSYSCALL|Cpu387|CpuMMX|Cpu3dnow" },
+    "Cpu186|Cpu286|Cpu386|Cpu486|Cpu586|CpuSYSCALL|Cpu387|CpuNop|CpuMMX|Cpu3dnow" },
   { "CPU_ATHLON_FLAGS",
-    "Cpu186|Cpu286|Cpu386|Cpu486|Cpu586|Cpu686|CpuSYSCALL|Cpu387|Cpu687|CpuMMX|Cpu3dnow|Cpu3dnowA" },
+    "Cpu186|Cpu286|Cpu386|Cpu486|Cpu586|Cpu686|CpuSYSCALL|Cpu387|Cpu687|CpuNop|CpuMMX|Cpu3dnow|Cpu3dnowA" },
   { "CPU_K8_FLAGS",
-    "Cpu186|Cpu286|Cpu386|Cpu486|Cpu586|Cpu686|CpuSYSCALL|CpuRdtscp|Cpu387|Cpu687|CpuMMX|Cpu3dnow|Cpu3dnowA|CpuSSE|CpuSSE2|CpuLM" },
+    "Cpu186|Cpu286|Cpu386|Cpu486|Cpu586|Cpu686|CpuSYSCALL|CpuRdtscp|Cpu387|Cpu687|CpuNop|CpuMMX|Cpu3dnow|Cpu3dnowA|CpuSSE|CpuSSE2|CpuLM" },
   { "CPU_AMDFAM10_FLAGS",
-    "Cpu186|Cpu286|Cpu386|Cpu486|Cpu586|Cpu686|CpuSYSCALL|CpuRdtscp|Cpu387|Cpu687|CpuFISTTP|CpuMMX|Cpu3dnow|Cpu3dnowA|CpuSSE|CpuSSE2|CpuSSE3|CpuSSE4a|CpuABM|CpuLM" },
+    "Cpu186|Cpu286|Cpu386|Cpu486|Cpu586|Cpu686|CpuSYSCALL|CpuRdtscp|Cpu387|Cpu687|CpuFISTTP|CpuNop|CpuMMX|Cpu3dnow|Cpu3dnowA|CpuSSE|CpuSSE2|CpuSSE3|CpuSSE4a|CpuABM|CpuLM" },
   { "CPU_BDVER1_FLAGS",
-    "Cpu186|Cpu286|Cpu386|Cpu486|Cpu586|Cpu686|CpuSYSCALL|CpuRdtscp|Cpu387|Cpu687|CpuFISTTP|CpuMMX|Cpu3dnow|Cpu3dnowA|CpuSSE|CpuSSE2|CpuSSE3|CpuSSE4a|CpuABM|CpuLM|CpuFMA4|CpuXOP|CpuLWP" },
+    "Cpu186|Cpu286|Cpu386|Cpu486|Cpu586|Cpu686|CpuSYSCALL|CpuRdtscp|Cpu387|Cpu687|CpuFISTTP|CpuNop|CpuMMX|Cpu3dnow|Cpu3dnowA|CpuSSE|CpuSSE2|CpuSSE3|CpuSSE4a|CpuABM|CpuLM|CpuFMA4|CpuXOP|CpuLWP" },
   { "CPU_8087_FLAGS",
     "Cpu8087" },
   { "CPU_287_FLAGS",
@@ -98,6 +100,8 @@ static initializer cpu_flag_init[] =
     "Cpu8087|Cpu287|Cpu387|Cpu687|CpuFISTTP" },
   { "CPU_CLFLUSH_FLAGS",
     "CpuClflush" },
+  { "CPU_NOP_FLAGS",
+    "CpuNop" },
   { "CPU_SYSCALL_FLAGS",
     "CpuSYSCALL" },
   { "CPU_MMX_FLAGS",
@@ -284,6 +288,7 @@ static bitfield cpu_flags[] =
   BITFIELD (Cpu586),
   BITFIELD (Cpu686),
   BITFIELD (CpuClflush),
+  BITFIELD (CpuNop),
   BITFIELD (CpuSYSCALL),
   BITFIELD (Cpu8087),
   BITFIELD (Cpu287),
Index: opcodes/i386-opc.h
===================================================================
RCS file: /cvs/src/src/opcodes/i386-opc.h,v
retrieving revision 1.73
diff -u -d -u -p -r1.73 i386-opc.h
--- opcodes/i386-opc.h	5 Jul 2010 16:40:32 -0000	1.73
+++ opcodes/i386-opc.h	2 Aug 2010 22:46:28 -0000
@@ -44,9 +44,11 @@ enum
   Cpu586,
   /* i686 or better required */
   Cpu686,
-  /* CLFLUSH Instuction support required */
+  /* CLFLUSH Instruction support required */
   CpuClflush,
-  /* SYSCALL Instuctions support required */
+  /* nop Instruction support required */
+  CpuNop,
+  /* SYSCALL Instructions support required */
   CpuSYSCALL,
   /* Floating point support required */
   Cpu8087,
@@ -92,9 +94,9 @@ enum
   CpuAVX,
   /* Intel L1OM support required */
   CpuL1OM,
-  /* Xsave/xrstor New Instuctions support required */
+  /* Xsave/xrstor New Instructions support required */
   CpuXsave,
-  /* Xsaveopt New Instuctions support required */
+  /* Xsaveopt New Instructions support required */
   CpuXsaveopt,
   /* AES support required */
   CpuAES,
@@ -108,11 +110,11 @@ enum
   CpuXOP,
   /* LWP support required */
   CpuLWP,
-  /* MOVBE Instuction support required */
+  /* MOVBE Instruction support required */
   CpuMovbe,
   /* EPT Instructions required */
   CpuEPT,
-  /* RDTSCP Instuction support required */
+  /* RDTSCP Instruction support required */
   CpuRdtscp,
   /* FSGSBASE Instructions required */
   CpuFSGSBase,
@@ -152,6 +154,7 @@ typedef union i386_cpu_flags
       unsigned int cpui586:1;
       unsigned int cpui686:1;
       unsigned int cpuclflush:1;
+      unsigned int cpunop:1;
       unsigned int cpusyscall:1;
       unsigned int cpu8087:1;
       unsigned int cpu287:1;
Index: opcodes/i386-opc.tbl
===================================================================
RCS file: /cvs/src/src/opcodes/i386-opc.tbl,v
retrieving revision 1.87
diff -u -d -u -p -r1.87 i386-opc.tbl
--- opcodes/i386-opc.tbl	5 Jul 2010 17:14:21 -0000	1.87
+++ opcodes/i386-opc.tbl	2 Aug 2010 22:46:28 -0000
@@ -500,7 +500,7 @@ bound, 2, 0x62, None, 1, Cpu186|CpuNo64,
 
 hlt, 0, 0xf4, None, 1, 0, No_bSuf|No_wSuf|No_lSuf|No_sSuf|No_qSuf|No_ldSuf, { 0 }
 
-nop, 1, 0xf1f, 0x0, 2, Cpu686, Modrm|No_bSuf|No_sSuf|No_ldSuf, { Reg16|Reg32|Reg64|Word|Dword|Qword|Unspecified|BaseIndex|Disp8|Disp16|Disp32|Disp32S }
+nop, 1, 0xf1f, 0x0, 2, CpuNop, Modrm|No_bSuf|No_sSuf|No_ldSuf, { Reg16|Reg32|Reg64|Word|Dword|Qword|Unspecified|BaseIndex|Disp8|Disp16|Disp32|Disp32S }
 
 // nop is actually "xchg %ax,%ax" in 16bit mode, "xchg %eax,%eax" in
 // 32bit mode and "xchg %rax,%rax" in 64bit mode.


More information about the Binutils mailing list