This is the mail archive of the binutils@sourceware.org mailing list for the binutils project.


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]
Other format: [Raw text]

PATCH: Add -march=/-mtune= to x86 assembler


On Thu, Jun 15, 2006 at 07:58:26PM +0100, Paul Brook wrote:
> 
> > in assembler. The reasons we haven't run into any serious problems are
> >
> > 1. By default, cpu_arch_flags is set to accept everything.
> > 2. .arch directive isn't used much.
> >
> > So my -mtune=CPU switch will optimize for CPU by generating instruction
> > for CPU if instruction set of CPU is available.
> 
> But you said these instructions only existed on ppro or later. You can't have 
> mtune= effect the choice of instruction unless you also implement -march=. 
> 
> Consider gcc -march=386 -mtune=686. IIUC you're proposing that gcc doesn't 
> emit .arch, and doesn't pass through -march=, so gas defaults to allowing all 
> instructions. gas then sees -mtune= and generates instructions that don't 
> work on 386.
> 

This is a patch to add -march=/-mtune= to x86 assembler. Currently,
it doesn't generate different code. I will add processor specific
optimization later.


H.J.
----
2006-06-14  H.J. Lu  <hongjiu.lu@intel.com>

	* config/tc-i386.h (processor_type): New.
	(arch_entry): Add type.

	* config/tc-i386.c (cpu_arch_tune): New.
	(cpu_arch_tune_flags): Likewise.
	(cpu_arch_isa): Likewise.
	(cpu_arch_isa_flags): Likewise.
	(cpu_arch): Updated.
	(set_cpu_arch): Also update cpu_arch_isa/cpu_arch_isa_flags.
	(i386_target_format): Likewise.
	(OPTION_MARCH): New.
	(OPTION_MTUNE): Likewise.
	(md_longopts): Add -march= and -mtune=.
	(md_parse_option): Support -march= and -mtune=.
	(md_show_usage): Add -march=CPU/-mtune=CPU.

	* doc/as.texinfo: Add -march=CPU/-mtune=CPU.

	* doc/c-i386.texi: Document -march=CPU/-mtune=CPU.

--- gas/config/tc-i386.c.tune	2006-06-12 12:50:18.000000000 -0700
+++ gas/config/tc-i386.c	2006-06-15 15:40:18.000000000 -0700
@@ -323,6 +323,18 @@ static const char *cpu_sub_arch_name = N
 /* CPU feature flags.  */
 static unsigned int cpu_arch_flags = CpuUnknownFlags | CpuNo64;
 
+/* Cpu we are generating instructions for.  */
+enum processor_type cpu_arch_tune = PROCESSOR_UNKNOWN;
+
+/* CPU feature flags of cpu we are generating instructions for.  */
+static unsigned int cpu_arch_tune_flags = CpuUnknownFlags;
+
+/* CPU instruction set architecture to use.  */
+enum processor_type cpu_arch_isa = PROCESSOR_UNKNOWN;
+
+/* CPU feature flags of instruction set architecture used.  */
+static unsigned int cpu_arch_isa_flags = CpuUnknownFlags;
+
 /* If set, conditional jumps are not automatically promoted to handle
    larger than a byte offset.  */
 static unsigned int no_cond_jump_promotion = 0;
@@ -415,35 +427,85 @@ const relax_typeS md_relax_table[] =
   {0, 0, 4, 0}
 };
 
-static const arch_entry cpu_arch[] = {
-  {"i8086",	Cpu086 },
-  {"i186",	Cpu086|Cpu186 },
-  {"i286",	Cpu086|Cpu186|Cpu286 },
-  {"i386",	Cpu086|Cpu186|Cpu286|Cpu386 },
-  {"i486",	Cpu086|Cpu186|Cpu286|Cpu386|Cpu486 },
-  {"i586",	Cpu086|Cpu186|Cpu286|Cpu386|Cpu486|Cpu586 },
-  {"i686",	Cpu086|Cpu186|Cpu286|Cpu386|Cpu486|Cpu586|Cpu686 },
-  {"pentium",	Cpu086|Cpu186|Cpu286|Cpu386|Cpu486|Cpu586 },
-  {"pentiumpro",Cpu086|Cpu186|Cpu286|Cpu386|Cpu486|Cpu586|Cpu686 },
-  {"pentiumii",	Cpu086|Cpu186|Cpu286|Cpu386|Cpu486|Cpu586|Cpu686|CpuMMX },
-  {"pentiumiii",Cpu086|Cpu186|Cpu286|Cpu386|Cpu486|Cpu586|Cpu686|CpuMMX|CpuMMX2|CpuSSE },
-  {"pentium4",	Cpu086|Cpu186|Cpu286|Cpu386|Cpu486|Cpu586|Cpu686|CpuP4|CpuMMX|CpuMMX2|CpuSSE|CpuSSE2 },
-  {"prescott",	Cpu086|Cpu186|Cpu286|Cpu386|Cpu486|Cpu586|Cpu686|CpuP4|CpuMMX|CpuMMX2|CpuSSE|CpuSSE2|CpuPNI },
-  {"k6",	Cpu086|Cpu186|Cpu286|Cpu386|Cpu486|Cpu586|CpuK6|CpuMMX },
-  {"k6_2",	Cpu086|Cpu186|Cpu286|Cpu386|Cpu486|Cpu586|CpuK6|CpuMMX|Cpu3dnow },
-  {"athlon",	Cpu086|Cpu186|Cpu286|Cpu386|Cpu486|Cpu586|Cpu686|CpuK6|CpuAthlon|CpuMMX|CpuMMX2|Cpu3dnow|Cpu3dnowA },
-  {"sledgehammer",Cpu086|Cpu186|Cpu286|Cpu386|Cpu486|Cpu586|Cpu686|CpuK6|CpuAthlon|CpuSledgehammer|CpuMMX|CpuMMX2|Cpu3dnow|Cpu3dnowA|CpuSSE|CpuSSE2 },
-  {"opteron",	Cpu086|Cpu186|Cpu286|Cpu386|Cpu486|Cpu586|Cpu686|CpuK6|CpuAthlon|CpuSledgehammer|CpuMMX|CpuMMX2|Cpu3dnow|Cpu3dnowA|CpuSSE|CpuSSE2 },
-  {".mmx",	CpuMMX },
-  {".sse",	CpuMMX|CpuMMX2|CpuSSE },
-  {".sse2",	CpuMMX|CpuMMX2|CpuSSE|CpuSSE2 },
-  {".sse3",	CpuMMX|CpuMMX2|CpuSSE|CpuSSE2|CpuSSE3 },
-  {".3dnow",	CpuMMX|Cpu3dnow },
-  {".3dnowa",	CpuMMX|CpuMMX2|Cpu3dnow|Cpu3dnowA },
-  {".padlock",	CpuPadLock },
-  {".pacifica",	CpuSVME },
-  {".svme",	CpuSVME },
-  {NULL, 0 }
+static const arch_entry cpu_arch[] =
+{
+  {"generic32", PROCESSOR_GENERIC32,
+   Cpu086|Cpu186|Cpu286|Cpu386},
+  {"generic64", PROCESSOR_GENERIC64,
+   Cpu086|Cpu186|Cpu286|Cpu386|Cpu486|Cpu586|Cpu686|CpuP4|CpuMMX
+   |CpuMMX2|CpuSSE|CpuSSE2},
+  {"i8086", PROCESSOR_UNKNOWN,
+   Cpu086},
+  {"i186", PROCESSOR_UNKNOWN,
+   Cpu086|Cpu186},
+  {"i286", PROCESSOR_UNKNOWN,
+   Cpu086|Cpu186|Cpu286},
+  {"i386", PROCESSOR_GENERIC32,
+   Cpu086|Cpu186|Cpu286|Cpu386},
+  {"i486", PROCESSOR_I486,
+   Cpu086|Cpu186|Cpu286|Cpu386|Cpu486},
+  {"i586", PROCESSOR_PENTIUM,
+   Cpu086|Cpu186|Cpu286|Cpu386|Cpu486|Cpu586},
+  {"i686", PROCESSOR_PENTIUMPRO,
+   Cpu086|Cpu186|Cpu286|Cpu386|Cpu486|Cpu586|Cpu686},
+  {"pentium", PROCESSOR_PENTIUM,
+   Cpu086|Cpu186|Cpu286|Cpu386|Cpu486|Cpu586},
+  {"pentiumpro",PROCESSOR_PENTIUMPRO,
+   Cpu086|Cpu186|Cpu286|Cpu386|Cpu486|Cpu586|Cpu686},
+  {"pentiumii",	PROCESSOR_PENTIUMPRO,
+   Cpu086|Cpu186|Cpu286|Cpu386|Cpu486|Cpu586|Cpu686|CpuMMX},
+  {"pentiumiii",PROCESSOR_PENTIUMPRO,
+   Cpu086|Cpu186|Cpu286|Cpu386|Cpu486|Cpu586|Cpu686|CpuMMX|CpuMMX2
+   |CpuSSE},
+  {"pentium4", PROCESSOR_PENTIUM4,
+   Cpu086|Cpu186|Cpu286|Cpu386|Cpu486|Cpu586|Cpu686|CpuP4|CpuMMX
+   |CpuMMX2|CpuSSE|CpuSSE2},
+  {"prescott", PROCESSOR_NOCONA,
+   Cpu086|Cpu186|Cpu286|Cpu386|Cpu486|Cpu586|Cpu686|CpuP4|CpuMMX
+   |CpuMMX2|CpuSSE|CpuSSE2|CpuSSE3},
+  {"nocona", PROCESSOR_NOCONA,
+   Cpu086|Cpu186|Cpu286|Cpu386|Cpu486|Cpu586|Cpu686|CpuP4|CpuMMX
+   |CpuMMX2|CpuSSE|CpuSSE2|CpuSSE3},
+  {"yonah", PROCESSOR_YONAH,
+   Cpu086|Cpu186|Cpu286|Cpu386|Cpu486|Cpu586|Cpu686|CpuP4|CpuMMX
+   |CpuMMX2|CpuSSE|CpuSSE2|CpuSSE3},
+  {"merom", PROCESSOR_MEROM,
+   Cpu086|Cpu186|Cpu286|Cpu386|Cpu486|Cpu586|Cpu686|CpuP4|CpuMMX
+   |CpuMMX2|CpuSSE|CpuSSE2|CpuSSE3|CpuMNI},
+  {"k6", PROCESSOR_K6,
+   Cpu086|Cpu186|Cpu286|Cpu386|Cpu486|Cpu586|CpuK6|CpuMMX},
+  {"k6_2", PROCESSOR_K6,
+   Cpu086|Cpu186|Cpu286|Cpu386|Cpu486|Cpu586|CpuK6|CpuMMX|Cpu3dnow},
+  {"athlon", PROCESSOR_ATHLON,
+   Cpu086|Cpu186|Cpu286|Cpu386|Cpu486|Cpu586|Cpu686|CpuK6|CpuAthlon
+   |CpuMMX|CpuMMX2|Cpu3dnow|Cpu3dnowA},
+  {"sledgehammer", PROCESSOR_K8,
+   Cpu086|Cpu186|Cpu286|Cpu386|Cpu486|Cpu586|Cpu686|CpuK6|CpuAthlon
+   |CpuSledgehammer|CpuMMX|CpuMMX2|Cpu3dnow|Cpu3dnowA|CpuSSE|CpuSSE2},
+  {"opteron", PROCESSOR_K8,
+   Cpu086|Cpu186|Cpu286|Cpu386|Cpu486|Cpu586|Cpu686|CpuK6|CpuAthlon
+   |CpuSledgehammer|CpuMMX|CpuMMX2|Cpu3dnow|Cpu3dnowA|CpuSSE|CpuSSE2},
+  {"k8", PROCESSOR_K8,
+   Cpu086|Cpu186|Cpu286|Cpu386|Cpu486|Cpu586|Cpu686|CpuK6|CpuAthlon
+   |CpuSledgehammer|CpuMMX|CpuMMX2|Cpu3dnow|Cpu3dnowA|CpuSSE|CpuSSE2},
+  {".mmx", PROCESSOR_UNKNOWN,
+   CpuMMX},
+  {".sse", PROCESSOR_UNKNOWN,
+   CpuMMX|CpuMMX2|CpuSSE},
+  {".sse2", PROCESSOR_UNKNOWN,
+   CpuMMX|CpuMMX2|CpuSSE|CpuSSE2},
+  {".sse3", PROCESSOR_UNKNOWN,
+   CpuMMX|CpuMMX2|CpuSSE|CpuSSE2|CpuSSE3},
+  {".3dnow", PROCESSOR_UNKNOWN,
+   CpuMMX|Cpu3dnow},
+  {".3dnowa", PROCESSOR_UNKNOWN,
+   CpuMMX|CpuMMX2|Cpu3dnow|Cpu3dnowA},
+  {".padlock", PROCESSOR_UNKNOWN,
+   CpuPadLock},
+  {".pacifica", PROCESSOR_UNKNOWN,
+   CpuSVME},
+  {".svme", PROCESSOR_UNKNOWN,
+   CpuSVME}
 };
 
 const pseudo_typeS md_pseudo_table[] =
@@ -866,9 +928,9 @@ set_cpu_arch (dummy)
     {
       char *string = input_line_pointer;
       int e = get_symbol_end ();
-      int i;
+      unsigned int i;
 
-      for (i = 0; cpu_arch[i].name; i++)
+      for (i = 0; i < ARRAY_SIZE (cpu_arch); i++)
 	{
 	  if (strcmp (string, cpu_arch[i].name) == 0)
 	    {
@@ -878,6 +940,8 @@ set_cpu_arch (dummy)
 		  cpu_sub_arch_name = NULL;
 		  cpu_arch_flags = (cpu_arch[i].flags
 				    | (flag_code == CODE_64BIT ? Cpu64 : CpuNo64));
+		  cpu_arch_isa = cpu_arch[i].type;
+		  cpu_arch_isa_flags = cpu_arch[i].flags;
 		  break;
 		}
 	      if ((cpu_arch_flags | cpu_arch[i].flags) != cpu_arch_flags)
@@ -890,7 +954,7 @@ set_cpu_arch (dummy)
 	      return;
 	    }
 	}
-      if (!cpu_arch[i].name)
+      if (i >= ARRAY_SIZE (cpu_arch))
 	as_bad (_("no such architecture: `%s'"), string);
 
       *input_line_pointer = e;
@@ -5428,6 +5492,8 @@ const char *md_shortopts = "qn";
 #define OPTION_32 (OPTION_MD_BASE + 0)
 #define OPTION_64 (OPTION_MD_BASE + 1)
 #define OPTION_DIVIDE (OPTION_MD_BASE + 2)
+#define OPTION_MARCH (OPTION_MD_BASE + 3)
+#define OPTION_MTUNE (OPTION_MD_BASE + 4)
 
 struct option md_longopts[] = {
   {"32", no_argument, NULL, OPTION_32},
@@ -5435,15 +5501,17 @@ struct option md_longopts[] = {
   {"64", no_argument, NULL, OPTION_64},
 #endif
   {"divide", no_argument, NULL, OPTION_DIVIDE},
+  {"march", required_argument, NULL, OPTION_MARCH},
+  {"mtune", required_argument, NULL, OPTION_MTUNE},
   {NULL, no_argument, NULL, 0}
 };
 size_t md_longopts_size = sizeof (md_longopts);
 
 int
-md_parse_option (c, arg)
-     int c;
-     char *arg ATTRIBUTE_UNUSED;
+md_parse_option (int c, char *arg)
 {
+  unsigned int i;
+
   switch (c)
     {
     case 'n':
@@ -5513,6 +5581,38 @@ md_parse_option (c, arg)
 #endif
       break;
 
+    case OPTION_MARCH:
+      if (*arg == '.')
+	as_fatal (_("Invalid -march= option: `%s'"), arg);
+      for (i = 0; i < ARRAY_SIZE (cpu_arch); i++)
+	{
+	  if (strcmp (arg, cpu_arch [i].name) == 0)
+	    {
+	      cpu_arch_isa = cpu_arch [i].type;
+	      cpu_arch_isa_flags = cpu_arch[i].flags;
+	      break;
+	    }
+	}
+      if (i >= ARRAY_SIZE (cpu_arch))
+	as_fatal (_("Invalid -march= option: `%s'"), arg);
+      break;
+
+    case OPTION_MTUNE:
+      if (*arg == '.')
+	as_fatal (_("Invalid -mtune= option: `%s'"), arg);
+      for (i = 0; i < ARRAY_SIZE (cpu_arch); i++)
+	{
+	  if (strcmp (arg, cpu_arch [i].name) == 0)
+	    {
+	      cpu_arch_tune = cpu_arch [i].type;
+	      cpu_arch_tune_flags = cpu_arch[i].flags;
+	      break;
+	    }
+	}
+      if (i >= ARRAY_SIZE (cpu_arch))
+	as_fatal (_("Invalid -mtune= option: `%s'"), arg);
+      break;
+
     default:
       return 0;
     }
@@ -5543,6 +5643,11 @@ md_show_usage (stream)
   fprintf (stream, _("\
   --divide                ignored\n"));
 #endif
+  fprintf (stream, _("\
+  -march=CPU/-mtune=CPU   generate code/optimize for CPU, where CPU is one of:\n\
+                           i386, i486, pentium, pentiumpro, pentium4, nocona,\n\
+			   yonah, merom, k6, athlon, k8, generic32, generic64\n"));
+
 }
 
 #if ((defined (OBJ_MAYBE_COFF) && defined (OBJ_MAYBE_AOUT)) \
@@ -5554,9 +5659,37 @@ const char *
 i386_target_format ()
 {
   if (!strcmp (default_arch, "x86_64"))
-    set_code_flag (CODE_64BIT);
+    {
+      set_code_flag (CODE_64BIT);
+      if (cpu_arch_isa == PROCESSOR_UNKNOWN)
+	{
+	  cpu_arch_isa = PROCESSOR_GENERIC64;
+	  cpu_arch_isa_flags = Cpu086|Cpu186|Cpu286|Cpu386|Cpu486
+			       |Cpu586|Cpu686|CpuP4|CpuMMX|CpuMMX2
+			       |CpuSSE|CpuSSE2;
+	}
+      if (cpu_arch_tune == PROCESSOR_UNKNOWN)
+	{
+	  cpu_arch_tune = PROCESSOR_GENERIC64;
+	  cpu_arch_tune_flags = Cpu086|Cpu186|Cpu286|Cpu386|Cpu486
+				|Cpu586|Cpu686|CpuP4|CpuMMX|CpuMMX2
+				|CpuSSE|CpuSSE2;
+	}
+    }
   else if (!strcmp (default_arch, "i386"))
-    set_code_flag (CODE_32BIT);
+    {
+      set_code_flag (CODE_32BIT);
+      if (cpu_arch_isa == PROCESSOR_UNKNOWN)
+	{
+	  cpu_arch_isa = PROCESSOR_GENERIC32;
+	  cpu_arch_isa_flags = Cpu086|Cpu186|Cpu286|Cpu386;
+	}
+      if (cpu_arch_tune == PROCESSOR_UNKNOWN)
+	{
+	  cpu_arch_tune = PROCESSOR_GENERIC32;
+	  cpu_arch_tune_flags = Cpu086|Cpu186|Cpu286|Cpu386;
+	}
+    }
   else
     as_fatal (_("Unknown architecture"));
   switch (OUTPUT_FLAVOR)
--- gas/config/tc-i386.h.tune	2006-05-19 06:31:21.000000000 -0700
+++ gas/config/tc-i386.h	2006-06-15 14:14:01.000000000 -0700
@@ -377,11 +377,29 @@ typedef struct
 }
 sib_byte;
 
-/* x86 arch names and features */
+enum processor_type
+{
+  PROCESSOR_UNKNOWN,
+  PROCESSOR_I486,
+  PROCESSOR_PENTIUM,
+  PROCESSOR_PENTIUMPRO,
+  PROCESSOR_PENTIUM4,
+  PROCESSOR_NOCONA,
+  PROCESSOR_YONAH,
+  PROCESSOR_MEROM,
+  PROCESSOR_K6,
+  PROCESSOR_ATHLON,
+  PROCESSOR_K8,
+  PROCESSOR_GENERIC32,
+  PROCESSOR_GENERIC64
+};
+
+/* x86 arch names, types and features */
 typedef struct
 {
-  const char *name;	/* arch name */
-  unsigned int flags;	/* cpu feature flags */
+  const char *name;		/* arch name */
+  enum processor_type type;	/* arch type */
+  unsigned int flags;		/* cpu feature flags */
 }
 arch_entry;
 
--- gas/doc/as.texinfo.tune	2006-06-01 09:15:58.000000000 -0700
+++ gas/doc/as.texinfo	2006-06-15 15:26:26.000000000 -0700
@@ -296,6 +296,7 @@ gcc(1), ld(1), and the Info entries for 
 
 @emph{Target i386 options:}
    [@b{--32}|@b{--64}] [@b{-n}]
+   [@b{-march}=@var{CPU}] [@b{-mtune}=@var{CPU}] 
 @end ifset
 @ifset I960
 
--- gas/doc/c-i386.texi.tune	2005-11-11 10:48:53.000000000 -0800
+++ gas/doc/c-i386.texi	2006-06-15 15:28:43.000000000 -0700
@@ -76,6 +76,49 @@ character, which means that it cannot be
 not disable @samp{/} at the beginning of a line starting a comment, or
 affect using @samp{#} for starting a comment.
 
+@cindex @samp{-march=} option, i386
+@cindex @samp{-march=} option, x86-64
+@item -march=@var{CPU}
+This option specifies an instruction set architecture for generating
+instructions.  The following architectures are recognized:
+@code{i8086},
+@code{i186},
+@code{i286},
+@code{i386},
+@code{i486},
+@code{i586},
+@code{i686},
+@code{pentium},
+@code{pentiumpro},
+@code{pentiumii},
+@code{pentiumiii},
+@code{pentium4},
+@code{prescott},
+@code{nocona},
+@code{yonah},
+@code{merom},
+@code{k6},
+@code{k6_2},
+@code{athlon},
+@code{sledgehammer},
+@code{opteron},
+@code{k8},
+@code{generic32} and
+@code{generic64}.
+
+This option only affects instructions generated by the assembler. The
+@code{.arch} directive will take precedent.
+
+@cindex @samp{-mtune=} option, i386
+@cindex @samp{-mtune=} option, x86-64
+@item -mtune=@var{CPU}
+This option specifies a processor to optimize for. When used in
+conjunction with the @option{-march} option, only instructions
+of the processor specified by the @option{-march} option will be
+generated.
+
+Valid @var{CPU} values are identical to @option{-march=@var{CPU}}.
+
 @end table
 
 @node i386-Syntax
@@ -709,8 +752,11 @@ supported on the CPU specified.  The cho
 @item @samp{i8086} @tab @samp{i186} @tab @samp{i286} @tab @samp{i386}
 @item @samp{i486} @tab @samp{i586} @tab @samp{i686} @tab @samp{pentium}
 @item @samp{pentiumpro} @tab @samp{pentiumii} @tab @samp{pentiumiii} @tab @samp{pentium4}
-@item @samp{k6} @tab @samp{athlon} @samp{sledgehammer}
-@item @samp{.mmx} @samp{.sse} @samp{.sse2} @samp{.sse3} @samp{.3dnow}
+@item @samp{prescott} @tab @samp{nocona} @tab @samp{yonah} @tab @samp{merom}
+@item @samp{k6} @tab @samp{athlon} @tab @samp{sledgehammer} @tab @samp{k8} 
+@item @samp{.mmx} @tab @samp{.sse} @tab @samp{.sse2} @tab @samp{.sse3}
+@item @samp{.3dnow} @tab @samp{.3dnowa} @tab @samp{.padlock} @tab @samp{.pacifica}
+@item @samp{.svme}
 @end multitable
 
 Apart from the warning, there are only two other effects on


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]