PATCH: Use a hash table for 26X linker speedup

H. J. Lu hjl@lucon.org
Fri May 27 16:42:00 GMT 2005


On Sat, May 21, 2005 at 08:40:55AM +0200, Christian Joensson wrote:
> Whatever happened with this?
> 
> /ChJ
> 
> On 5/1/05, H. J. Lu <hjl@lucon.org> wrote:
> > On Sun, May 01, 2005 at 09:57:07AM -0700, H. J. Lu wrote:
> > > Linker is very slow on input files with many sections. The 64k section
> > > test is only enabled for CRIS. I did a profile. 70% of linker time is
> > > spent in lang_check_section_addresses:
> > >
> > > Flat profile:
> > >
> > > Each sample counts as 0.01 seconds.
> > >   %   cumulative   self              self     total
> > >  time   seconds   seconds    calls  Ks/call  Ks/call  name
> > >  72.92    948.31   948.31        1     0.95     0.95 lang_check_section_addresses
> > >  22.37   1239.21   290.90   132089     0.00     0.00 lang_output_section_find_1
> > >
> > > The main problem is we use a single section list. We have to scan the
> > > whole list for anything. In case of address overlap check, we only
> > > need to check the previous section. There are many other places in
> > > bfd, assembler and linker where a double section list will help. With
> > > this patch, I got 30% linker speed up in 64k section test:
> > >
> > > The old linker:
> > >
> > > sh 1  502.74s user 0.90s system 99% cpu 8:23.73 total
> > >
> > > The new linker:
> > >
> > > sh 1  340.58s user 0.90s system 99% cpu 5:41.55 total
> > >
> > > The profile data shows:
> > >
> > > Flat profile:
> > >
> > > Each sample counts as 0.01 seconds.
> > >   %   cumulative   self              self     total
> > >  time   seconds   seconds    calls   s/call   s/call  name
> > >  81.20    256.42   256.42   132089     0.00     0.00 lang_output_section_find_1
> > >  13.27    298.33    41.91   673985     0.00     0.00  bfd_hash_lookup
> > >
> > > I will work on lang_output_section_find_1 next. I am planning to use
> > > a hash table. I hope to enable 64k section on all ELF targets.
> > >
> > 
> > This is the patch. I got
> > 
> > sh 1 13.39s user 1.28s system 95% cpu 15.431 total
> > 
> > vs.
> > 
> > sh 1  340.58s user 0.90s system 99% cpu 5:41.55 total
> > 
> > That is a 26X speed up. I enabled 64k section test for all ELF targets.
> > 


Here is the updated patch. I have been using this for a while. Is there
any objection?


H.J.
-----
ld/

2005-05-01  H.J. Lu  <hongjiu.lu@intel.com>

	* ldlang.c (output_statement_hash_entry): New type.
	(output_statement_table): New variable for hash table.
	(output_statement_newfunc): New function.
	(output_statement_table_init): Likewise.
	(output_statement_table_free): Likewise.
	(lang_init): Call output_statement_table_init.
	(lang_finish): Renamed to ...
	(lang_end): This.
	(lang_process): Updated.
	(lang_finish): New function.
	(lang_output_section_find_1): Use hash table.
	(lang_output_section_statement_lookup_1): Likewise.

	* ldlang.h (lang_finish): New.

	* ldmain.c (main): Call lang_finish.

ld/testsuite/

2005-05-01  H.J. Lu  <hongjiu.lu@intel.com>

	* ld-elf/sec64k.exp: Enabled for all ELF targets.

--- ld/ldlang.c.hash	2005-05-11 08:12:08.000000000 -0700
+++ ld/ldlang.c	2005-05-11 09:14:33.000000000 -0700
@@ -864,6 +864,45 @@ lang_add_input_file (const char *name,
   return new_afile (name, file_type, target, TRUE);
 }
 
+struct output_statement_hash_entry
+{
+  struct bfd_hash_entry root;
+  lang_output_section_statement_type *entry;
+};
+
+/* The hash table.  */
+
+static struct bfd_hash_table output_statement_table;
+
+/* Support routines for the hash table used by lang_output_section_find_1,
+   initialize the table, fill in an entry and remove the table.  */
+
+static struct bfd_hash_entry *
+output_statement_newfunc (struct bfd_hash_entry *entry ATTRIBUTE_UNUSED, 
+			  struct bfd_hash_table *table,
+			  const char *string ATTRIBUTE_UNUSED)
+{
+  struct output_statement_hash_entry *ret
+    = bfd_hash_allocate (table,
+			 sizeof (struct output_statement_hash_entry));
+  ret->entry = NULL;
+  return (struct bfd_hash_entry *) ret;
+}
+
+static void
+output_statement_table_init (void)
+{
+  if (! bfd_hash_table_init_n (&output_statement_table,
+			       output_statement_newfunc, 61))
+    einfo (_("%P%F: Failed to create hash table\n"));
+}
+
+static void
+output_statement_table_free (void)
+{
+  bfd_hash_table_free (&output_statement_table);
+}
+
 /* Build enough state so that the parser can build its tree.  */
 
 void
@@ -873,6 +912,8 @@ lang_init (void)
 
   stat_ptr = &statement_list;
 
+  output_statement_table_init ();
+
   lang_list_init (stat_ptr);
 
   lang_list_init (&input_file_chain);
@@ -899,6 +940,12 @@ lang_init (void)
   lang_statement_iteration = 0;
 }
 
+void
+lang_finish (void)
+{
+  output_statement_table_free ();
+}
+
 /*----------------------------------------------------------------------
   A region is an area of memory declared with the
   MEMORY {  name:org=exp, len=exp ... }
@@ -984,18 +1031,30 @@ static lang_output_section_statement_typ
 lang_output_section_find_1 (const char *const name, int constraint)
 {
   lang_output_section_statement_type *lookup;
+  struct output_statement_hash_entry *entry;
+  unsigned long hash;
 
-  for (lookup = &lang_output_section_statement.head->output_section_statement;
-       lookup != NULL;
-       lookup = lookup->next)
+  entry = ((struct output_statement_hash_entry *)
+	   bfd_hash_lookup (&output_statement_table, name, FALSE,
+			    FALSE));
+  if (entry == NULL || (lookup = entry->entry) == NULL)
+    return NULL;
+
+  hash = entry->root.hash;
+  do
     {
-      if (strcmp (name, lookup->name) == 0
-	  && lookup->constraint != -1
+      if (lookup->constraint != -1
 	  && (constraint == 0
 	      || (constraint == lookup->constraint
 		  && constraint != SPECIAL)))
 	return lookup;
+      entry = (struct output_statement_hash_entry *) entry->root.next;
+      lookup = entry ? entry->entry : NULL;
     }
+  while (entry != NULL
+	 && entry->root.hash == hash
+	 && strcmp (name, lookup->name) == 0);
+
   return NULL;
 }
 
@@ -1013,6 +1072,8 @@ lang_output_section_statement_lookup_1 (
   lookup = lang_output_section_find_1 (name, constraint);
   if (lookup == NULL)
     {
+      struct output_statement_hash_entry *entry;
+
       lookup = new_stat (lang_output_section_statement, stat_ptr);
       lookup->region = NULL;
       lookup->lma_region = NULL;
@@ -1036,6 +1097,15 @@ lang_output_section_statement_lookup_1 (
       lookup->update_dot_tree = NULL;
       lookup->phdrs = NULL;
 
+      entry = ((struct output_statement_hash_entry *)
+	       bfd_hash_lookup (&output_statement_table, name, TRUE,
+				FALSE));
+      if (entry == NULL)
+	einfo (_("%P%F: bfd_hash_lookup failed creating section `%s'\n"),
+	       name);
+
+      entry->entry = lookup;
+
       lang_statement_append (&lang_output_section_statement,
 			     (lang_statement_union_type *) lookup,
 			     (lang_statement_union_type **) &lookup->next);
@@ -4546,7 +4616,7 @@ lang_set_startof (void)
 }
 
 static void
-lang_finish (void)
+lang_end (void)
 {
   struct bfd_link_hash_entry *h;
   bfd_boolean warn;
@@ -5332,7 +5402,7 @@ lang_process (void)
   /* Final stuffs.  */
   lang_mark_used_section ();
   ldemul_finish ();
-  lang_finish ();
+  lang_end ();
 }
 
 /* EXPORTED TO YACC */
--- ld/ldlang.h.hash	2005-05-04 08:52:34.000000000 -0700
+++ ld/ldlang.h	2005-05-11 08:13:47.000000000 -0700
@@ -448,6 +448,8 @@ extern int lang_statement_iteration;
 
 extern void lang_init
   (void);
+extern void lang_finish
+  (void);
 extern lang_memory_region_type *lang_memory_region_lookup
   (const char *const, bfd_boolean);
 extern lang_memory_region_type *lang_memory_region_default
--- ld/ldmain.c.hash	2005-05-11 08:12:08.000000000 -0700
+++ ld/ldmain.c	2005-05-11 08:13:47.000000000 -0700
@@ -474,6 +474,8 @@ main (int argc, char **argv)
   if (nocrossref_list != NULL)
     check_nocrossrefs ();
 
+  lang_finish ();
+
   /* Even if we're producing relocatable output, some non-fatal errors should
      be reported in the exit status.  (What non-fatal errors, if any, do we
      want to ignore for relocatable output?)  */
--- ld/testsuite/ld-elf/sec64k.exp.hash	2005-03-03 08:56:35.000000000 -0800
+++ ld/testsuite/ld-elf/sec64k.exp	2005-05-11 08:13:47.000000000 -0700
@@ -24,12 +24,6 @@ if ![is_elf_format] {
     return
 }
 
-# Per-port excludes, since this test takes an overwhelmingly long time
-# currently.
-if { ![istarget cris-*-*] } {
-    return
-}
-
 # Test >64k sections, with and without -r.  First, create the assembly
 # files.  Have a relocation to another section and one within the local
 # section.



More information about the Binutils mailing list