PATCH: Use a hash table for 26X linker speedup
H. J. Lu
hjl@lucon.org
Sun May 1 19:09:00 GMT 2005
On Sun, May 01, 2005 at 09:57:07AM -0700, H. J. Lu wrote:
> Linker is very slow on input files with many sections. The 64k section
> test is only enabled for CRIS. I did a profile. 70% of linker time is
> spent in lang_check_section_addresses:
>
> Flat profile:
>
> Each sample counts as 0.01 seconds.
> % cumulative self self total
> time seconds seconds calls Ks/call Ks/call name
> 72.92 948.31 948.31 1 0.95 0.95 lang_check_section_addresses
> 22.37 1239.21 290.90 132089 0.00 0.00 lang_output_section_find_1
>
> The main problem is we use a single section list. We have to scan the
> whole list for anything. In case of address overlap check, we only
> need to check the previous section. There are many other places in
> bfd, assembler and linker where a double section list will help. With
> this patch, I got 30% linker speed up in 64k section test:
>
> The old linker:
>
> sh 1 502.74s user 0.90s system 99% cpu 8:23.73 total
>
> The new linker:
>
> sh 1 340.58s user 0.90s system 99% cpu 5:41.55 total
>
> The profile data shows:
>
> Flat profile:
>
> Each sample counts as 0.01 seconds.
> % cumulative self self total
> time seconds seconds calls s/call s/call name
> 81.20 256.42 256.42 132089 0.00 0.00 lang_output_section_find_1
> 13.27 298.33 41.91 673985 0.00 0.00 bfd_hash_lookup
>
> I will work on lang_output_section_find_1 next. I am planning to use
> a hash table. I hope to enable 64k section on all ELF targets.
>
This is the patch. I got
sh 1 13.39s user 1.28s system 95% cpu 15.431 total
vs.
sh 1 340.58s user 0.90s system 99% cpu 5:41.55 total
That is a 26X speed up. I enabled 64k section test for all ELF targets.
H.J.
----
ld/
2005-05-01 H.J. Lu <hongjiu.lu@intel.com>
* ldlang.c (output_statement_hash_entry): New type.
(output_statement_table): New variable for hash table.
(output_statement_newfunc): New function.
(output_statement_table_init): Likewise.
(output_statement_table_free): Likewise.
(lang_init): Call output_statement_table_init.
(lang_finish): Renamed to ...
(lang_end): This.
(lang_process): Updated.
(lang_finish): New function.
(lang_output_section_find_1): Use hash table.
(lang_output_section_statement_lookup_1): Likewise.
* ldlang.h (lang_finish): New.
* ldmain.c (main): Call lang_finish.
ld/testsuite/
2005-05-01 H.J. Lu <hongjiu.lu@intel.com>
* ld-elf/sec64k.exp: Enabled for all ELF targets.
--- ld/ldlang.c.hash 2005-05-01 09:00:10.000000000 -0700
+++ ld/ldlang.c 2005-05-01 12:05:32.000000000 -0700
@@ -863,6 +863,45 @@ lang_add_input_file (const char *name,
return new_afile (name, file_type, target, TRUE);
}
+struct output_statement_hash_entry
+{
+ struct bfd_hash_entry root;
+ lang_output_section_statement_type *entry;
+};
+
+/* The hash table. */
+
+static struct bfd_hash_table output_statement_table;
+
+/* Support routines for the hash table used by lang_output_section_find_1,
+ initialize the table, fill in an entry and remove the table. */
+
+static struct bfd_hash_entry *
+output_statement_newfunc (struct bfd_hash_entry *entry ATTRIBUTE_UNUSED,
+ struct bfd_hash_table *table,
+ const char *string ATTRIBUTE_UNUSED)
+{
+ struct output_statement_hash_entry *ret
+ = bfd_hash_allocate (table,
+ sizeof (struct output_statement_hash_entry));
+ ret->entry = NULL;
+ return (struct bfd_hash_entry *) ret;
+}
+
+static void
+output_statement_table_init (void)
+{
+ if (! bfd_hash_table_init_n (&output_statement_table,
+ output_statement_newfunc, 61))
+ einfo (_("%P%F: Failed to create hash table\n"));
+}
+
+static void
+output_statement_table_free (void)
+{
+ bfd_hash_table_free (&output_statement_table);
+}
+
/* Build enough state so that the parser can build its tree. */
void
@@ -872,6 +911,8 @@ lang_init (void)
stat_ptr = &statement_list;
+ output_statement_table_init ();
+
lang_list_init (stat_ptr);
lang_list_init (&input_file_chain);
@@ -898,6 +939,12 @@ lang_init (void)
lang_statement_iteration = 0;
}
+void
+lang_finish (void)
+{
+ output_statement_table_free ();
+}
+
/*----------------------------------------------------------------------
A region is an area of memory declared with the
MEMORY { name:org=exp, len=exp ... }
@@ -983,16 +1030,28 @@ static lang_output_section_statement_typ
lang_output_section_find_1 (const char *const name, int constraint)
{
lang_output_section_statement_type *lookup;
+ struct output_statement_hash_entry *entry;
+ unsigned long hash;
- for (lookup = &lang_output_section_statement.head->output_section_statement;
- lookup != NULL;
- lookup = lookup->next)
+ entry = ((struct output_statement_hash_entry *)
+ bfd_hash_lookup (&output_statement_table, name, FALSE,
+ FALSE));
+ if (entry == NULL || (lookup = entry->entry) == NULL)
+ return NULL;
+
+ hash = entry->root.hash;
+ do
{
- if (strcmp (name, lookup->name) == 0
- && lookup->constraint != -1
+ if (lookup->constraint != -1
&& (constraint == 0 || constraint == lookup->constraint))
return lookup;
+ entry = (struct output_statement_hash_entry *) entry->root.next;
+ lookup = entry ? entry->entry : NULL;
}
+ while (entry != NULL
+ && entry->root.hash == hash
+ && strcmp (name, lookup->name) == 0);
+
return NULL;
}
@@ -1010,6 +1069,8 @@ lang_output_section_statement_lookup_1 (
lookup = lang_output_section_find_1 (name, constraint);
if (lookup == NULL)
{
+ struct output_statement_hash_entry *entry;
+
lookup = new_stat (lang_output_section_statement, stat_ptr);
lookup->region = NULL;
lookup->lma_region = NULL;
@@ -1034,6 +1095,15 @@ lang_output_section_statement_lookup_1 (
lookup->update_dot_tree = NULL;
lookup->phdrs = NULL;
+ entry = ((struct output_statement_hash_entry *)
+ bfd_hash_lookup (&output_statement_table, name, TRUE,
+ FALSE));
+ if (entry == NULL)
+ einfo (_("%P%F: bfd_hash_lookup failed creating section `%s'\n"),
+ name);
+
+ entry->entry = lookup;
+
lang_statement_append (&lang_output_section_statement,
(lang_statement_union_type *) lookup,
(lang_statement_union_type **) &lookup->next);
@@ -4495,7 +4565,7 @@ lang_set_startof (void)
}
static void
-lang_finish (void)
+lang_end (void)
{
struct bfd_link_hash_entry *h;
bfd_boolean warn;
@@ -5294,7 +5364,7 @@ lang_process (void)
/* Final stuffs. */
ldemul_finish ();
- lang_finish ();
+ lang_end ();
}
/* EXPORTED TO YACC */
--- ld/ldlang.h.hash 2005-05-01 09:00:10.000000000 -0700
+++ ld/ldlang.h 2005-05-01 11:39:36.000000000 -0700
@@ -449,6 +449,8 @@ extern int lang_statement_iteration;
extern void lang_init
(void);
+extern void lang_finish
+ (void);
extern lang_memory_region_type *lang_memory_region_lookup
(const char *const, bfd_boolean);
extern lang_memory_region_type *lang_memory_region_default
--- ld/ldmain.c.hash 2005-03-16 17:37:00.000000000 -0800
+++ ld/ldmain.c 2005-05-01 11:40:39.000000000 -0700
@@ -474,6 +474,8 @@ main (int argc, char **argv)
if (nocrossref_list != NULL)
check_nocrossrefs ();
+ lang_finish ();
+
/* Even if we're producing relocatable output, some non-fatal errors should
be reported in the exit status. (What non-fatal errors, if any, do we
want to ignore for relocatable output?) */
--- ld/testsuite/ld-elf/sec64k.exp.hash 2005-03-03 08:56:35.000000000 -0800
+++ ld/testsuite/ld-elf/sec64k.exp 2005-05-01 11:50:34.000000000 -0700
@@ -24,12 +24,6 @@ if ![is_elf_format] {
return
}
-# Per-port excludes, since this test takes an overwhelmingly long time
-# currently.
-if { ![istarget cris-*-*] } {
- return
-}
-
# Test >64k sections, with and without -r. First, create the assembly
# files. Have a relocation to another section and one within the local
# section.
More information about the Binutils
mailing list