This is the mail archive of the binutils@sourceware.org mailing list for the binutils project.


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]
Other format: [Raw text]

[PATCH] Objcopy: use a hash table for symbol renaming


Using objcopy from binutils 2.20:

Symbol renaming in objcopy (from binutils 2.20) uses a linked list and a
linear search through that list.  This is a tad slow when the symbol
list is long (in my case with ~45000 symbols it takes around 20
minutes).

The attached patch replaces the linked list with a hash table
implementation.  I have taken care to retain the behaviour of the old
code (in particular, failing with an error message when the same symbol
appears twice, either as source or target symbol.)  This makes the case
above complete in 5 seconds.  And it even seems to do the right thing :)


Obvious points of possible improvements:

- A better hash function (sym_hash()).
- A better choice of hash table sizes.
- A better algorithm for when to resize the hash table.


Note: I'm off for christmas vacation now, so I may not be responding to
replies to this mail until next year.

eirik

>From f5fbb2bc0d49c36e6b7c694e7112e621dcaca314 Mon Sep 17 00:00:00 2001
From: Eirik Byrkjeflot Anonsen <eirik@opera.com>
Date: Fri, 18 Dec 2009 14:33:38 +0100
Subject: [PATCH 1/1] Replace symbol redefinition list with a hash table.

---
 binutils/objcopy.c |  150 ++++++++++++++++++++++++++++++++++++++++++++++------
 1 files changed, 134 insertions(+), 16 deletions(-)

diff --git a/binutils/objcopy.c b/binutils/objcopy.c
index f92bdca..0718948 100644
--- a/binutils/objcopy.c
+++ b/binutils/objcopy.c
@@ -57,12 +57,19 @@ struct is_specified_symbol_predicate_data
   bfd_boolean	found;
 };
 
-/* A list to support redefine_sym.  */
+/* An entry in the collection of symbols to rename.
+ *
+ * These entries are stored in two hash tables using open hashing.
+ * One hash table is hashed on 'source', and the other on 'target'.
+ * 'next_source' and 'next_target' are the linked lists in the two
+ * hash tables.
+ */
 struct redefine_node
 {
   char *source;
   char *target;
-  struct redefine_node *next;
+  struct redefine_node *next_source;
+  struct redefine_node *next_target;
 };
 
 typedef struct section_rename
@@ -214,7 +221,10 @@ static htab_t localize_specific_htab = NULL;
 static htab_t globalize_specific_htab = NULL;
 static htab_t keepglobal_specific_htab = NULL;
 static htab_t weaken_specific_htab = NULL;
-static struct redefine_node *redefine_sym_list = NULL;
+static struct redefine_node **redefine_sym_hash = NULL;
+static struct redefine_node **redefine_sym_hash_target = NULL;
+static size_t redefine_sym_hash_size = 0;
+static size_t redefine_sym_entry_count = 0;
 
 /* If this is TRUE, we weaken global symbols (set BSF_WEAK).  */
 static bfd_boolean weaken = FALSE;
@@ -1013,7 +1023,7 @@ filter_symbols (bfd *abfd, bfd *obfd, asymbol **osyms,
 
       undefined = bfd_is_und_section (bfd_get_section (sym));
 
-      if (redefine_sym_list)
+      if (redefine_sym_hash)
 	{
 	  char *old_name, *new_name;
 
@@ -1175,14 +1185,91 @@ filter_symbols (bfd *abfd, bfd *obfd, asymbol **osyms,
   return dst_count;
 }
 
+/* redefine_sym_hash helper functions. */
+
+static size_t
+sym_hash(const char * sym)
+{
+  /* Use sdbm hashing, for no particular reason. */
+  size_t hash;
+  hash = 0;
+  while (*sym != 0)
+    {
+      hash = *sym + (hash << 6) + (hash << 16) - hash;
+      sym ++;
+    };
+  return hash;
+}
+
+/* Grow the hash tables. */
+static void
+reallocate_redefine_sym_hash(void)
+{
+  size_t i;
+  size_t new_size;
+  struct redefine_node ** new_hash;
+  struct redefine_node ** new_hash_target;
+  /* Simple choice of new size.  Can probably be improved. */
+  new_size = redefine_sym_hash_size * 3 + 1;
+  if (new_size < 241)
+    new_size = 241;
+  else if (new_size > redefine_sym_entry_count)
+    /* Avoid making too big hash tables.  It's better to have longer lists. */
+    return;
+
+  new_hash = (struct redefine_node **) xmalloc(sizeof(struct redefine_node) * new_size);
+  new_hash_target = (struct redefine_node **) xmalloc(sizeof(struct redefine_node) * new_size);
+  for (i = 0; i < new_size; i++)
+    {
+      new_hash[i] = 0;
+      new_hash_target[i] = 0;
+    }
+
+  for (i = 0; i < redefine_sym_hash_size; i++)
+    {
+      while (redefine_sym_hash[i] != NULL)
+	{
+	  struct redefine_node * sym;
+	  size_t source_hash;
+	  sym = redefine_sym_hash[i];
+	  redefine_sym_hash[i] = sym->next_source;
+	  source_hash = sym_hash(sym->source);
+	  sym->next_source = new_hash[source_hash % new_size];
+	  new_hash[source_hash % new_size] = sym;
+	};
+    }
+
+  for (i = 0; i < redefine_sym_hash_size; i++)
+    {
+      while (redefine_sym_hash_target[i] != NULL)
+	{
+	  struct redefine_node * sym;
+	  size_t target_hash;
+	  sym = redefine_sym_hash_target[i];
+	  redefine_sym_hash_target[i] = sym->next_target;
+	  target_hash = sym_hash(sym->target);
+	  sym->next_target = new_hash_target[target_hash % new_size];
+	  new_hash_target[target_hash % new_size] = sym;
+	};
+    }
+
+  free(redefine_sym_hash);
+  free(redefine_sym_hash_target);
+  redefine_sym_hash = new_hash;
+  redefine_sym_hash_target = new_hash_target;
+  redefine_sym_hash_size = new_size;
+};
+
 /* Find the redefined name of symbol SOURCE.  */
 
 static const char *
 lookup_sym_redefinition (const char *source)
 {
   struct redefine_node *list;
+  size_t hash_entry;
+  hash_entry = sym_hash(source) % redefine_sym_hash_size;
 
-  for (list = redefine_sym_list; list != NULL; list = list->next)
+  for (list = redefine_sym_hash[hash_entry]; list != NULL; list = list->next_source)
     if (strcmp (source, list->source) == 0)
       return list->target;
 
@@ -1194,28 +1281,59 @@ lookup_sym_redefinition (const char *source)
 static void
 redefine_list_append (const char *cause, const char *source, const char *target)
 {
-  struct redefine_node **p;
-  struct redefine_node *list;
-  struct redefine_node *new_node;
+  size_t source_hash;
+  size_t target_hash;
+  size_t chain_depth;
+  size_t max_chain_depth;
+  struct redefine_node ** list;
+  struct redefine_node * new_node;
 
-  for (p = &redefine_sym_list; (list = *p) != NULL; p = &list->next)
+  /* If any list is longer than this, try to resize the hash table. */
+  max_chain_depth = 10;
+
+  if (redefine_sym_hash_size == 0)
+    {
+      reallocate_redefine_sym_hash();
+      if (redefine_sym_hash_size == 0)
+	fatal(_("%s: Failed to allocate memory for list of redefined symbols"),
+	      cause);
+    }
+
+  source_hash = sym_hash(source);
+  target_hash = sym_hash(target);
+  chain_depth = 0;
+  for  (list = &redefine_sym_hash[source_hash % redefine_sym_hash_size];
+	*list != NULL;
+	list = &((*list)->next_source))
     {
-      if (strcmp (source, list->source) == 0)
+      chain_depth ++;
+      if (strcmp (source, (*list)->source) == 0)
 	fatal (_("%s: Multiple redefinition of symbol \"%s\""),
 	       cause, source);
-
-      if (strcmp (target, list->target) == 0)
+    }
+  if (chain_depth <= max_chain_depth)
+    chain_depth = 0;
+  for  (list = &redefine_sym_hash_target[target_hash % redefine_sym_hash_size];
+	*list != NULL;
+	list = &((*list)->next_target))
+    {
+      chain_depth ++;
+      if (strcmp (target, (*list)->target) == 0)
 	fatal (_("%s: Symbol \"%s\" is target of more than one redefinition"),
 	       cause, target);
     }
+  if (chain_depth > max_chain_depth)
+    reallocate_redefine_sym_hash();
 
+  redefine_sym_entry_count ++;
   new_node = (struct redefine_node *) xmalloc (sizeof (struct redefine_node));
 
   new_node->source = strdup (source);
   new_node->target = strdup (target);
-  new_node->next = NULL;
-
-  *p = new_node;
+  new_node->next_source = redefine_sym_hash[source_hash % redefine_sym_hash_size];
+  new_node->next_target = redefine_sym_hash_target[target_hash % redefine_sym_hash_size];
+  redefine_sym_hash[source_hash % redefine_sym_hash_size] = new_node;
+  redefine_sym_hash_target[target_hash % redefine_sym_hash_size] = new_node;
 }
 
 /* Handle the --redefine-syms option.  Read lines containing "old new"
@@ -1815,7 +1933,7 @@ copy_object (bfd *ibfd, bfd *obfd)
       || convert_debugging
       || change_leading_char
       || remove_leading_char
-      || redefine_sym_list
+      || redefine_sym_hash
       || weaken)
     {
       /* Mark symbols used in output relocations so that they
-- 
1.6.3.3


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]