[PATCH] don't generate long nops on i686 (only when -march=[intel])
Quentin Neill
quentin.neill.gnu@gmail.com
Mon Aug 2 23:19:00 GMT 2010
On Tue, Jul 13, 2010 at 8:33 AM, H.J. Lu <hjl.tools@gmail.com> wrote:
> On Tue, Jul 13, 2010 at 5:59 AM, Kyle McMartin <kyle@mcmartin.ca> wrote:
>> From: Kyle McMartin <kyle@redhat.com>
>>
>> Hi,
>>
>> In Fedora, glibc started building itself with -march=i686 flags to
>> binutils as an "optimization." This exposes issues on the Geode, which
>> is i686, but for long nops, since they are apparently not an architected
>> part of the i686 ISA.
>>
>> Clarify this by only enabling long nops if we're generating 64-bit code
>> (as far as I can tell, all x86_64 cpus support this) or explicitly
>> generating code for the pentiumpro and above.
>>
>> Using this patch prevents people from overambitiously optimizing and
>> breaking the Geode.
>>
>> Signed-off-by: Kyle McMartin <kyle@redhat.com>
>
> There is no ChangeLog. Please add CpuNop to i386-opc.h and use it
> to determine when to generate nops.
>
> Thanks.
Kyle, I was working on a similar patch. Here it is for previewing.
It has a ChangeLog entry but without test cases. I will post with the
test cases as soon as I get a clean "make check" .
--
Quentin Neill
-------------- next part --------------
gas/
* config/tc-i386.c (cpu_arch): Make pentiumpro use
CPU_PENTIUMPRO, not PROCESSOR_I686_FLAGS. Add
Nop and noNop isa extension flags.
(i386_align_code): Use cpunop isa flags to decide
when to generate NOPs for alignment, not cpui686.
opcodes/
* i386-gen.c (cpu_flag_init): New CPU_PENTIUMPRO_FLAGS
that uses new CpuNop. Add CpuNop to CPU_P2_FLAGS
and later processors. Add CPU_NOP_FLAGS.
(cpu_flags): Add CpuNop bitfield.
* i386-opc.h (enum): Add CpuNop bitfield definition.
Fix typos in comments.
(i386_cpu_flags): Add cpunop bit.
* i386-opc.tbl: Use CpuNop in cpu_flags for nop instruction.
Index: gas/config/tc-i386.c
===================================================================
RCS file: /cvs/src/src/gas/config/tc-i386.c,v
retrieving revision 1.442
diff -u -d -u -p -r1.442 tc-i386.c
--- gas/config/tc-i386.c 3 Jul 2010 22:15:58 -0000 1.442
+++ gas/config/tc-i386.c 2 Aug 2010 22:46:28 -0000
@@ -586,7 +586,7 @@ static const arch_entry cpu_arch[] =
{ STRING_COMMA_LEN ("pentium"), PROCESSOR_PENTIUM,
CPU_I586_FLAGS, 0 },
{ STRING_COMMA_LEN ("pentiumpro"), PROCESSOR_PENTIUMPRO,
- CPU_I686_FLAGS, 0 },
+ CPU_PENTIUMPRO_FLAGS, 0 },
{ STRING_COMMA_LEN ("pentiumii"), PROCESSOR_PENTIUMPRO,
CPU_P2_FLAGS, 0 },
{ STRING_COMMA_LEN ("pentiumiii"),PROCESSOR_PENTIUMPRO,
@@ -691,6 +691,10 @@ static const arch_entry cpu_arch[] =
CPU_EPT_FLAGS, 0 },
{ STRING_COMMA_LEN (".clflush"), PROCESSOR_UNKNOWN,
CPU_CLFLUSH_FLAGS, 0 },
+ { STRING_COMMA_LEN (".Nop"), PROCESSOR_UNKNOWN,
+ CPU_NOP_FLAGS, 0 },
+ { STRING_COMMA_LEN (".noNop"), PROCESSOR_UNKNOWN,
+ CPU_NOP_FLAGS, 0 },
{ STRING_COMMA_LEN (".syscall"), PROCESSOR_UNKNOWN,
CPU_SYSCALL_FLAGS, 0 },
{ STRING_COMMA_LEN (".rdtscp"), PROCESSOR_UNKNOWN,
@@ -998,7 +1002,7 @@ i386_align_code (fragS *fragP, int count
will be used.
When -mtune= isn't used, alt_long_patt will be used if
- cpu_arch_isa_flags has Cpu686. Otherwise, f32_patt will
+ cpu_arch_isa_flags has CpuNop. Otherwise, f32_patt will
be used.
When -march= or .arch is used, we can't use anything beyond
@@ -1028,8 +1032,8 @@ i386_align_code (fragS *fragP, int count
{
case PROCESSOR_UNKNOWN:
/* We use cpu_arch_isa_flags to check if we SHOULD
- optimize for Cpu686. */
- if (fragP->tc_frag_data.isa_flags.bitfield.cpui686)
+ optimize with nops. */
+ if (fragP->tc_frag_data.isa_flags.bitfield.cpunop)
patt = alt_long_patt;
else
patt = f32_patt;
@@ -1079,8 +1083,8 @@ i386_align_code (fragS *fragP, int count
case PROCESSOR_BDVER1:
case PROCESSOR_GENERIC32:
/* We use cpu_arch_isa_flags to check if we CAN optimize
- for Cpu686. */
- if (fragP->tc_frag_data.isa_flags.bitfield.cpui686)
+ with nops. */
+ if (fragP->tc_frag_data.isa_flags.bitfield.cpunop)
patt = alt_short_patt;
else
patt = f32_patt;
@@ -1092,7 +1096,7 @@ i386_align_code (fragS *fragP, int count
case PROCESSOR_CORE2:
case PROCESSOR_COREI7:
case PROCESSOR_L1OM:
- if (fragP->tc_frag_data.isa_flags.bitfield.cpui686)
+ if (fragP->tc_frag_data.isa_flags.bitfield.cpunop)
patt = alt_long_patt;
else
patt = f32_patt;
Index: opcodes/i386-gen.c
===================================================================
RCS file: /cvs/src/src/opcodes/i386-gen.c,v
retrieving revision 1.68
diff -u -d -u -p -r1.68 i386-gen.c
--- opcodes/i386-gen.c 1 Jul 2010 21:55:01 -0000 1.68
+++ opcodes/i386-gen.c 2 Aug 2010 22:46:28 -0000
@@ -62,32 +62,34 @@ static initializer cpu_flag_init[] =
"Cpu186|Cpu286|Cpu386|Cpu486|Cpu586|Cpu387" },
{ "CPU_I686_FLAGS",
"Cpu186|Cpu286|Cpu386|Cpu486|Cpu586|Cpu686|Cpu387|Cpu687" },
+ { "CPU_PENTIUMPRO_FLAGS",
+ "Cpu186|Cpu286|Cpu386|Cpu486|Cpu586|Cpu686|Cpu387|Cpu687|CpuNop" },
{ "CPU_P2_FLAGS",
- "Cpu186|Cpu286|Cpu386|Cpu486|Cpu586|Cpu686|Cpu387|Cpu687|CpuMMX" },
+ "Cpu186|Cpu286|Cpu386|Cpu486|Cpu586|Cpu686|Cpu387|Cpu687|CpuNop|CpuMMX" },
{ "CPU_P3_FLAGS",
- "Cpu186|Cpu286|Cpu386|Cpu486|Cpu586|Cpu686|Cpu387|Cpu687|CpuMMX|CpuSSE" },
+ "Cpu186|Cpu286|Cpu386|Cpu486|Cpu586|Cpu686|Cpu387|Cpu687|CpuNop|CpuMMX|CpuSSE" },
{ "CPU_P4_FLAGS",
- "Cpu186|Cpu286|Cpu386|Cpu486|Cpu586|Cpu686|CpuClflush|Cpu387|Cpu687|CpuMMX|CpuSSE|CpuSSE2" },
+ "Cpu186|Cpu286|Cpu386|Cpu486|Cpu586|Cpu686|CpuClflush|Cpu387|Cpu687|CpuNop|CpuMMX|CpuSSE|CpuSSE2" },
{ "CPU_NOCONA_FLAGS",
- "Cpu186|Cpu286|Cpu386|Cpu486|Cpu586|Cpu686|CpuClflush|Cpu387|Cpu687|CpuFISTTP|CpuMMX|CpuSSE|CpuSSE2|CpuSSE3|CpuLM" },
+ "Cpu186|Cpu286|Cpu386|Cpu486|Cpu586|Cpu686|CpuClflush|Cpu387|Cpu687|CpuFISTTP|CpuNop|CpuMMX|CpuSSE|CpuSSE2|CpuSSE3|CpuLM" },
{ "CPU_CORE_FLAGS",
- "Cpu186|Cpu286|Cpu386|Cpu486|Cpu586|Cpu686|CpuClflush|Cpu387|Cpu687|CpuFISTTP|CpuMMX|CpuSSE|CpuSSE2|CpuSSE3" },
+ "Cpu186|Cpu286|Cpu386|Cpu486|Cpu586|Cpu686|CpuClflush|Cpu387|Cpu687|CpuFISTTP|CpuNop|CpuMMX|CpuSSE|CpuSSE2|CpuSSE3" },
{ "CPU_CORE2_FLAGS",
- "Cpu186|Cpu286|Cpu386|Cpu486|Cpu586|Cpu686|CpuClflush|Cpu387|Cpu687|CpuFISTTP|CpuMMX|CpuSSE|CpuSSE2|CpuSSE3|CpuSSSE3|CpuLM" },
+ "Cpu186|Cpu286|Cpu386|Cpu486|Cpu586|Cpu686|CpuClflush|Cpu387|Cpu687|CpuFISTTP|CpuNop|CpuMMX|CpuSSE|CpuSSE2|CpuSSE3|CpuSSSE3|CpuLM" },
{ "CPU_COREI7_FLAGS",
- "Cpu186|Cpu286|Cpu386|Cpu486|Cpu586|Cpu686|CpuClflush|Cpu387|Cpu687|CpuFISTTP|CpuMMX|CpuSSE|CpuSSE2|CpuSSE3|CpuSSSE3|CpuSSE4_1|CpuSSE4_2|CpuRdtscp|CpuLM" },
+ "Cpu186|Cpu286|Cpu386|Cpu486|Cpu586|Cpu686|CpuClflush|Cpu387|Cpu687|CpuFISTTP|CpuNop|CpuMMX|CpuSSE|CpuSSE2|CpuSSE3|CpuSSSE3|CpuSSE4_1|CpuSSE4_2|CpuRdtscp|CpuLM" },
{ "CPU_K6_FLAGS",
- "Cpu186|Cpu286|Cpu386|Cpu486|Cpu586|CpuSYSCALL|Cpu387|CpuMMX" },
+ "Cpu186|Cpu286|Cpu386|Cpu486|Cpu586|CpuSYSCALL|Cpu387|CpuNop|CpuMMX" },
{ "CPU_K6_2_FLAGS",
- "Cpu186|Cpu286|Cpu386|Cpu486|Cpu586|CpuSYSCALL|Cpu387|CpuMMX|Cpu3dnow" },
+ "Cpu186|Cpu286|Cpu386|Cpu486|Cpu586|CpuSYSCALL|Cpu387|CpuNop|CpuMMX|Cpu3dnow" },
{ "CPU_ATHLON_FLAGS",
- "Cpu186|Cpu286|Cpu386|Cpu486|Cpu586|Cpu686|CpuSYSCALL|Cpu387|Cpu687|CpuMMX|Cpu3dnow|Cpu3dnowA" },
+ "Cpu186|Cpu286|Cpu386|Cpu486|Cpu586|Cpu686|CpuSYSCALL|Cpu387|Cpu687|CpuNop|CpuMMX|Cpu3dnow|Cpu3dnowA" },
{ "CPU_K8_FLAGS",
- "Cpu186|Cpu286|Cpu386|Cpu486|Cpu586|Cpu686|CpuSYSCALL|CpuRdtscp|Cpu387|Cpu687|CpuMMX|Cpu3dnow|Cpu3dnowA|CpuSSE|CpuSSE2|CpuLM" },
+ "Cpu186|Cpu286|Cpu386|Cpu486|Cpu586|Cpu686|CpuSYSCALL|CpuRdtscp|Cpu387|Cpu687|CpuNop|CpuMMX|Cpu3dnow|Cpu3dnowA|CpuSSE|CpuSSE2|CpuLM" },
{ "CPU_AMDFAM10_FLAGS",
- "Cpu186|Cpu286|Cpu386|Cpu486|Cpu586|Cpu686|CpuSYSCALL|CpuRdtscp|Cpu387|Cpu687|CpuFISTTP|CpuMMX|Cpu3dnow|Cpu3dnowA|CpuSSE|CpuSSE2|CpuSSE3|CpuSSE4a|CpuABM|CpuLM" },
+ "Cpu186|Cpu286|Cpu386|Cpu486|Cpu586|Cpu686|CpuSYSCALL|CpuRdtscp|Cpu387|Cpu687|CpuFISTTP|CpuNop|CpuMMX|Cpu3dnow|Cpu3dnowA|CpuSSE|CpuSSE2|CpuSSE3|CpuSSE4a|CpuABM|CpuLM" },
{ "CPU_BDVER1_FLAGS",
- "Cpu186|Cpu286|Cpu386|Cpu486|Cpu586|Cpu686|CpuSYSCALL|CpuRdtscp|Cpu387|Cpu687|CpuFISTTP|CpuMMX|Cpu3dnow|Cpu3dnowA|CpuSSE|CpuSSE2|CpuSSE3|CpuSSE4a|CpuABM|CpuLM|CpuFMA4|CpuXOP|CpuLWP" },
+ "Cpu186|Cpu286|Cpu386|Cpu486|Cpu586|Cpu686|CpuSYSCALL|CpuRdtscp|Cpu387|Cpu687|CpuFISTTP|CpuNop|CpuMMX|Cpu3dnow|Cpu3dnowA|CpuSSE|CpuSSE2|CpuSSE3|CpuSSE4a|CpuABM|CpuLM|CpuFMA4|CpuXOP|CpuLWP" },
{ "CPU_8087_FLAGS",
"Cpu8087" },
{ "CPU_287_FLAGS",
@@ -98,6 +100,8 @@ static initializer cpu_flag_init[] =
"Cpu8087|Cpu287|Cpu387|Cpu687|CpuFISTTP" },
{ "CPU_CLFLUSH_FLAGS",
"CpuClflush" },
+ { "CPU_NOP_FLAGS",
+ "CpuNop" },
{ "CPU_SYSCALL_FLAGS",
"CpuSYSCALL" },
{ "CPU_MMX_FLAGS",
@@ -284,6 +288,7 @@ static bitfield cpu_flags[] =
BITFIELD (Cpu586),
BITFIELD (Cpu686),
BITFIELD (CpuClflush),
+ BITFIELD (CpuNop),
BITFIELD (CpuSYSCALL),
BITFIELD (Cpu8087),
BITFIELD (Cpu287),
Index: opcodes/i386-opc.h
===================================================================
RCS file: /cvs/src/src/opcodes/i386-opc.h,v
retrieving revision 1.73
diff -u -d -u -p -r1.73 i386-opc.h
--- opcodes/i386-opc.h 5 Jul 2010 16:40:32 -0000 1.73
+++ opcodes/i386-opc.h 2 Aug 2010 22:46:28 -0000
@@ -44,9 +44,11 @@ enum
Cpu586,
/* i686 or better required */
Cpu686,
- /* CLFLUSH Instuction support required */
+ /* CLFLUSH Instruction support required */
CpuClflush,
- /* SYSCALL Instuctions support required */
+ /* nop Instruction support required */
+ CpuNop,
+ /* SYSCALL Instructions support required */
CpuSYSCALL,
/* Floating point support required */
Cpu8087,
@@ -92,9 +94,9 @@ enum
CpuAVX,
/* Intel L1OM support required */
CpuL1OM,
- /* Xsave/xrstor New Instuctions support required */
+ /* Xsave/xrstor New Instructions support required */
CpuXsave,
- /* Xsaveopt New Instuctions support required */
+ /* Xsaveopt New Instructions support required */
CpuXsaveopt,
/* AES support required */
CpuAES,
@@ -108,11 +110,11 @@ enum
CpuXOP,
/* LWP support required */
CpuLWP,
- /* MOVBE Instuction support required */
+ /* MOVBE Instruction support required */
CpuMovbe,
/* EPT Instructions required */
CpuEPT,
- /* RDTSCP Instuction support required */
+ /* RDTSCP Instruction support required */
CpuRdtscp,
/* FSGSBASE Instructions required */
CpuFSGSBase,
@@ -152,6 +154,7 @@ typedef union i386_cpu_flags
unsigned int cpui586:1;
unsigned int cpui686:1;
unsigned int cpuclflush:1;
+ unsigned int cpunop:1;
unsigned int cpusyscall:1;
unsigned int cpu8087:1;
unsigned int cpu287:1;
Index: opcodes/i386-opc.tbl
===================================================================
RCS file: /cvs/src/src/opcodes/i386-opc.tbl,v
retrieving revision 1.87
diff -u -d -u -p -r1.87 i386-opc.tbl
--- opcodes/i386-opc.tbl 5 Jul 2010 17:14:21 -0000 1.87
+++ opcodes/i386-opc.tbl 2 Aug 2010 22:46:28 -0000
@@ -500,7 +500,7 @@ bound, 2, 0x62, None, 1, Cpu186|CpuNo64,
hlt, 0, 0xf4, None, 1, 0, No_bSuf|No_wSuf|No_lSuf|No_sSuf|No_qSuf|No_ldSuf, { 0 }
-nop, 1, 0xf1f, 0x0, 2, Cpu686, Modrm|No_bSuf|No_sSuf|No_ldSuf, { Reg16|Reg32|Reg64|Word|Dword|Qword|Unspecified|BaseIndex|Disp8|Disp16|Disp32|Disp32S }
+nop, 1, 0xf1f, 0x0, 2, CpuNop, Modrm|No_bSuf|No_sSuf|No_ldSuf, { Reg16|Reg32|Reg64|Word|Dword|Qword|Unspecified|BaseIndex|Disp8|Disp16|Disp32|Disp32S }
// nop is actually "xchg %ax,%ax" in 16bit mode, "xchg %eax,%eax" in
// 32bit mode and "xchg %rax,%rax" in 64bit mode.
More information about the Binutils
mailing list