This is the mail archive of the binutils-cvs@sourceware.org mailing list for the binutils project.


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]
Other format: [Raw text]

[binutils-gdb] Greatly improve the speed if looking up DWARF line number information.


https://sourceware.org/git/gitweb.cgi?p=binutils-gdb.git;h=089e3718bd8de11fc4d6bbc8d32701033d467960

commit 089e3718bd8de11fc4d6bbc8d32701033d467960
Author: Igor Tsimbalist <tigor.tools@gmail.com>
Date:   Tue Nov 8 12:01:58 2016 +0000

    Greatly improve the speed if looking up DWARF line number information.
    
    	* dwarf2.c (comp_unit): Add new fields 'lookup_funcinfo_table' and
    	'number_of_functions' to keep lookup table and number of entries in
    	the table.
    	(line_sequence): Add new fields 'line_info_lookup' and 'num_lines'
    	to keep lookup table and number of entries in the table.
    	(lookup_funcinfo): New structure for lookup table for function
    	references.
    	(build_line_info_table): New function to create and build the lookup
    	table for line information.
    	(lookup_address_in_line_info_table): Use the lookup table instead of
    	traverse a linked list.
    	(compare_lookup_funcinfos): New compare fuction used in sorting of
    	lookup table for function references.
    	(build_lookup_funcinfo_table): New function to create, build and
    	sort the lookup table for functions references.
    	(lookup_address_in_function_table): Use the table instead of
    	traverse a linked list.
    	(_bfd_dwarf2_cleanup_debug_info): Free memory from function references
    	lookup table.

Diff:
---
 bfd/ChangeLog |  22 ++++
 bfd/dwarf2.c  | 377 +++++++++++++++++++++++++++++++++++++++++++++-------------
 2 files changed, 317 insertions(+), 82 deletions(-)

diff --git a/bfd/ChangeLog b/bfd/ChangeLog
index 810dd05..d7f097f 100644
--- a/bfd/ChangeLog
+++ b/bfd/ChangeLog
@@ -1,3 +1,25 @@
+2016-11-08  Igor Tsimbalist  <tigor.tools@gmail.com>
+
+	* dwarf2.c (comp_unit): Add new fields 'lookup_funcinfo_table' and
+	'number_of_functions' to keep lookup table and number of entries in
+	the table.
+	(line_sequence): Add new fields 'line_info_lookup' and 'num_lines'
+	to keep lookup table and number of entries in the table.
+	(lookup_funcinfo): New structure for lookup table for function
+	references.
+	(build_line_info_table): New function to create and build the lookup
+	table for line information.
+	(lookup_address_in_line_info_table): Use the lookup table instead of
+	traverse a linked list.
+	(compare_lookup_funcinfos): New compare fuction used in sorting of
+	lookup table for function references.
+	(build_lookup_funcinfo_table): New function to create, build and
+	sort the lookup table for functions references.
+	(lookup_address_in_function_table): Use the table instead of
+	traverse a linked list.
+	(_bfd_dwarf2_cleanup_debug_info): Free memory from function references
+	lookup table.
+
 2016-11-04  Nick Clifton  <nickc@redhat.com>
 
 	* targets.c (bfd_target_vector): Only add riscv_elf32_vec target
diff --git a/bfd/dwarf2.c b/bfd/dwarf2.c
index 81001c4..7215e7a 100644
--- a/bfd/dwarf2.c
+++ b/bfd/dwarf2.c
@@ -144,16 +144,16 @@ struct dwarf2_debug
   /* Length of the loaded .debug_str section.  */
   bfd_size_type dwarf_str_size;
 
-  /* Pointer to the .debug_ranges section loaded into memory. */
+  /* Pointer to the .debug_ranges section loaded into memory.  */
   bfd_byte *dwarf_ranges_buffer;
 
-  /* Length of the loaded .debug_ranges section. */
+  /* Length of the loaded .debug_ranges section.  */
   bfd_size_type dwarf_ranges_size;
 
   /* If the most recent call to bfd_find_nearest_line was given an
      address in an inlined function, preserve a pointer into the
      calling chain for subsequent calls to bfd_find_inliner_info to
-     use. */
+     use.  */
   struct funcinfo *inliner_chain;
 
   /* Section VMAs at the time the stash was built.  */
@@ -256,6 +256,12 @@ struct comp_unit
   /* A list of the functions found in this comp. unit.  */
   struct funcinfo *function_table;
 
+  /* A table of function information references searchable by address.  */
+  struct lookup_funcinfo *lookup_funcinfo_table;
+
+  /* Number of functions in the function_table and sorted_function_table.  */
+  bfd_size_type number_of_functions;
+
   /* A list of the variables found in this comp. unit.  */
   struct varinfo *variable_table;
 
@@ -390,7 +396,7 @@ struct info_hash_table
   struct bfd_hash_table base;
 };
 
-/* Function to create a new entry in info hash table. */
+/* Function to create a new entry in info hash table.  */
 
 static struct bfd_hash_entry *
 info_hash_table_newfunc (struct bfd_hash_entry *entry,
@@ -476,7 +482,7 @@ insert_info_hash_table (struct info_hash_table *hash_table,
 }
 
 /* Look up an info entry list from an info hash table.  Return NULL
-   if there is none. */
+   if there is none.  */
 
 static struct info_list_node *
 lookup_info_hash_table (struct info_hash_table *hash_table, const char *key)
@@ -1214,22 +1220,22 @@ non_mangled (int lang)
 
 struct line_info
 {
-  struct line_info* prev_line;
-  bfd_vma address;
-  char *filename;
-  unsigned int line;
-  unsigned int column;
-  unsigned int discriminator;
-  unsigned char op_index;
-  unsigned char end_sequence;		/* End of (sequential) code sequence.  */
+  struct line_info *	prev_line;
+  bfd_vma		address;
+  char *		filename;
+  unsigned int		line;
+  unsigned int		column;
+  unsigned int		discriminator;
+  unsigned char		op_index;
+  unsigned char		end_sequence;		/* End of (sequential) code sequence.  */
 };
 
 struct fileinfo
 {
-  char *name;
-  unsigned int dir;
-  unsigned int time;
-  unsigned int size;
+  char *		name;
+  unsigned int		dir;
+  unsigned int		time;
+  unsigned int		size;
 };
 
 struct line_sequence
@@ -1237,11 +1243,13 @@ struct line_sequence
   bfd_vma               low_pc;
   struct line_sequence* prev_sequence;
   struct line_info*     last_line;  /* Largest VMA.  */
+  struct line_info**    line_info_lookup;
+  bfd_size_type		num_lines;
 };
 
 struct line_info_table
 {
-  bfd*                  abfd;
+  bfd *                 abfd;
   unsigned int          num_files;
   unsigned int          num_dirs;
   unsigned int          num_sequences;
@@ -1260,23 +1268,37 @@ struct line_info_table
 struct funcinfo
 {
   /* Pointer to previous function in list of all functions.  */
-  struct funcinfo *prev_func;
+  struct funcinfo *	prev_func;
   /* Pointer to function one scope higher.  */
-  struct funcinfo *caller_func;
+  struct funcinfo *	caller_func;
   /* Source location file name where caller_func inlines this func.  */
-  char *caller_file;
+  char *		caller_file;
   /* Source location file name.  */
-  char *file;
+  char *		file;
   /* Source location line number where caller_func inlines this func.  */
-  int caller_line;
+  int			caller_line;
   /* Source location line number.  */
-  int line;
-  int tag;
-  bfd_boolean is_linkage;
-  const char *name;
-  struct arange arange;
+  int			line;
+  int			tag;
+  bfd			boolean is_linkage;
+  const char *		name;
+  struct arange		arange;
   /* Where the symbol is defined.  */
-  asection *sec;
+  asection *		sec;
+};
+
+struct lookup_funcinfo
+{
+  /* Function information corresponding to this lookup table entry.  */
+  struct funcinfo *	funcinfo;
+
+  /* The lowest address for this specific function.  */
+  bfd_vma 		low_addr;
+
+  /* The highest address of this function before the lookup table is sorted.
+     The highest address of all prior functions after the lookup table is
+     sorted, which is used for binary search.  */
+  bfd_vma 		high_addr;
 };
 
 struct varinfo
@@ -1539,7 +1561,7 @@ arange_add (const struct comp_unit *unit, struct arange *first_arange,
   while (arange);
 
   /* Need to allocate a new arange and insert it into the arange list.
-     Order isn't significant, so just insert after the first arange. */
+     Order isn't significant, so just insert after the first arange.  */
   arange = (struct arange *) bfd_alloc (unit->abfd, sizeof (*arange));
   if (arange == NULL)
     return FALSE;
@@ -1579,17 +1601,62 @@ compare_sequences (const void* a, const void* b)
   return 0;
 }
 
+/* Construct the line information table for quick lookup.  */
+
+static bfd_boolean
+build_line_info_table (struct line_info_table *  table,
+		       struct line_sequence *    seq)
+{
+  bfd_size_type      amt;
+  struct line_info** line_info_lookup;
+  struct line_info*  each_line;
+  unsigned int       num_lines;
+  unsigned int       index;
+
+  if (seq->line_info_lookup != NULL)
+    return TRUE;
+
+  /* Count the number of line information entries.  We could do this while
+     scanning the debug information, but some entries may be added via
+     lcl_head without having a sequence handy to increment the number of
+     lines.  */
+  num_lines = 0;
+  for (each_line = seq->last_line; each_line; each_line = each_line->prev_line)
+    num_lines++;
+
+  if (num_lines == 0)
+    return TRUE;
+
+  /* Allocate space for the line information lookup table.  */
+  amt = sizeof (struct line_info*) * num_lines;
+  line_info_lookup = (struct line_info**) bfd_alloc (table->abfd, amt);
+  if (line_info_lookup == NULL)
+    return FALSE;
+
+  /* Create the line information lookup table.  */
+  index = num_lines;
+  for (each_line = seq->last_line; each_line; each_line = each_line->prev_line)
+    line_info_lookup[--index] = each_line;
+
+  BFD_ASSERT (index == 0);
+
+  seq->num_lines = num_lines;
+  seq->line_info_lookup = line_info_lookup;
+
+  return TRUE;
+}
+
 /* Sort the line sequences for quick lookup.  */
 
 static bfd_boolean
 sort_line_sequences (struct line_info_table* table)
 {
-  bfd_size_type amt;
-  struct line_sequence* sequences;
-  struct line_sequence* seq;
-  unsigned int n = 0;
-  unsigned int num_sequences = table->num_sequences;
-  bfd_vma last_high_pc;
+  bfd_size_type          amt;
+  struct line_sequence*  sequences;
+  struct line_sequence*  seq;
+  unsigned int           n = 0;
+  unsigned int           num_sequences = table->num_sequences;
+  bfd_vma                last_high_pc;
 
   if (num_sequences == 0)
     return TRUE;
@@ -1610,6 +1677,8 @@ sort_line_sequences (struct line_info_table* table)
       sequences[n].low_pc = seq->low_pc;
       sequences[n].prev_sequence = NULL;
       sequences[n].last_line = seq->last_line;
+      sequences[n].line_info_lookup = NULL;
+      sequences[n].num_lines = 0;
       seq = seq->prev_sequence;
       free (last_seq);
     }
@@ -2091,7 +2160,7 @@ lookup_address_in_line_info_table (struct line_info_table *table,
 				   unsigned int *discriminator_ptr)
 {
   struct line_sequence *seq = NULL;
-  struct line_info *each_line;
+  struct line_info *info;
   int low, high, mid;
 
   /* Binary search the array of sequences.  */
@@ -2109,26 +2178,43 @@ lookup_address_in_line_info_table (struct line_info_table *table,
 	break;
     }
 
-  if (seq && addr >= seq->low_pc && addr < seq->last_line->address)
+  /* Check for a valid sequence.  */
+  if (!seq || addr < seq->low_pc || addr >= seq->last_line->address)
+    goto fail;
+
+  if (!build_line_info_table (table, seq))
+    goto fail;
+
+  /* Binary search the array of line information.  */
+  low = 0;
+  high = seq->num_lines;
+  info = NULL;
+  while (low < high)
     {
-      /* Note: seq->last_line should be a descendingly sorted list.  */
-      for (each_line = seq->last_line;
-	   each_line;
-	   each_line = each_line->prev_line)
-	if (addr >= each_line->address)
-	  break;
+      mid = (low + high) / 2;
+      info = seq->line_info_lookup[mid];
+      if (addr < info->address)
+	high = mid;
+      else if (addr >= seq->line_info_lookup[mid + 1]->address)
+	low = mid + 1;
+      else
+	break;
+    }
 
-      if (each_line
-	  && !(each_line->end_sequence || each_line == seq->last_line))
-	{
-	  *filename_ptr = each_line->filename;
-	  *linenumber_ptr = each_line->line;
-	  if (discriminator_ptr)
-	    *discriminator_ptr = each_line->discriminator;
-	  return seq->last_line->address - seq->low_pc;
-	}
+  /* Check for a valid line information entry.  */
+  if (info
+      && addr >= info->address
+      && addr < seq->line_info_lookup[mid + 1]->address
+      && !(info->end_sequence || info == seq->last_line))
+    {
+      *filename_ptr = info->filename;
+      *linenumber_ptr = info->line;
+      if (discriminator_ptr)
+	*discriminator_ptr = info->discriminator;
+      return seq->last_line->address - seq->low_pc;
     }
 
+fail:
   *filename_ptr = NULL;
   return 0;
 }
@@ -2136,16 +2222,102 @@ lookup_address_in_line_info_table (struct line_info_table *table,
 /* Read in the .debug_ranges section for future reference.  */
 
 static bfd_boolean
-read_debug_ranges (struct comp_unit *unit)
+read_debug_ranges (struct comp_unit * unit)
 {
-  struct dwarf2_debug *stash = unit->stash;
+  struct dwarf2_debug * stash = unit->stash;
+
   return read_section (unit->abfd, &stash->debug_sections[debug_ranges],
 		       stash->syms, 0,
-		       &stash->dwarf_ranges_buffer, &stash->dwarf_ranges_size);
+		       &stash->dwarf_ranges_buffer,
+		       &stash->dwarf_ranges_size);
 }
 
 /* Function table functions.  */
 
+static int
+compare_lookup_funcinfos (const void * a, const void * b)
+{
+  const struct lookup_funcinfo * lookup1 = a;
+  const struct lookup_funcinfo * lookup2 = b;
+
+  if (lookup1->low_addr < lookup2->low_addr)
+    return -1;
+  if (lookup1->low_addr > lookup2->low_addr)
+    return 1;
+  if (lookup1->high_addr < lookup2->high_addr)
+    return -1;
+  if (lookup1->high_addr > lookup2->high_addr)
+    return 1;
+
+  return 0;
+}
+
+static bfd_boolean
+build_lookup_funcinfo_table (struct comp_unit * unit)
+{
+  struct lookup_funcinfo *lookup_funcinfo_table = unit->lookup_funcinfo_table;
+  unsigned int number_of_functions = unit->number_of_functions;
+  struct funcinfo *each;
+  struct lookup_funcinfo *entry;
+  size_t index;
+  struct arange *range;
+  bfd_vma low_addr, high_addr;
+
+  if (lookup_funcinfo_table || number_of_functions == 0)
+    return TRUE;
+
+  /* Create the function info lookup table.  */
+  lookup_funcinfo_table = (struct lookup_funcinfo *)
+    bfd_malloc (number_of_functions * sizeof (struct lookup_funcinfo));
+  if (lookup_funcinfo_table == NULL)
+    return FALSE;
+
+  /* Populate the function info lookup table.  */
+  index = number_of_functions;
+  for (each = unit->function_table; each; each = each->prev_func)
+    {
+      entry = &lookup_funcinfo_table[--index];
+      entry->funcinfo = each;
+
+      /* Calculate the lowest and highest address for this function entry.  */
+      low_addr  = entry->funcinfo->arange.low;
+      high_addr = entry->funcinfo->arange.high;
+
+      for (range = entry->funcinfo->arange.next; range; range = range->next)
+	{
+	  if (range->low < low_addr)
+	    low_addr = range->low;
+	  if (range->high > high_addr)
+	    high_addr = range->high;
+	}
+
+      entry->low_addr = low_addr;
+      entry->high_addr = high_addr;
+    }
+
+  BFD_ASSERT (index == 0);
+
+  /* Sort the function by address.  */
+  qsort (lookup_funcinfo_table,
+	 number_of_functions,
+	 sizeof (struct lookup_funcinfo),
+	 compare_lookup_funcinfos);
+
+  /* Calculate the high watermark for each function in the lookup table.  */
+  high_addr = lookup_funcinfo_table[0].high_addr;
+  for (index = 1; index < number_of_functions; index++)
+    {
+      entry = &lookup_funcinfo_table[index];
+      if (entry->high_addr > high_addr)
+	high_addr = entry->high_addr;
+      else
+	entry->high_addr = high_addr;
+    }
+
+  unit->lookup_funcinfo_table = lookup_funcinfo_table;
+  return TRUE;
+}
+
 /* If ADDR is within UNIT's function tables, set FUNCTION_PTR, and return
    TRUE.  Note that we need to find the function that has the smallest range
    that contains ADDR, to handle inlined functions without depending upon
@@ -2156,37 +2328,71 @@ lookup_address_in_function_table (struct comp_unit *unit,
 				  bfd_vma addr,
 				  struct funcinfo **function_ptr)
 {
-  struct funcinfo* each_func;
+  unsigned int number_of_functions = unit->number_of_functions;
+  struct lookup_funcinfo* lookup_funcinfo = NULL;
+  struct funcinfo* funcinfo = NULL;
   struct funcinfo* best_fit = NULL;
   bfd_vma best_fit_len = 0;
+  bfd_size_type low, high, mid, first;
   struct arange *arange;
 
-  for (each_func = unit->function_table;
-       each_func;
-       each_func = each_func->prev_func)
+  if (!build_lookup_funcinfo_table (unit))
+    return FALSE;
+
+  /* Find the first function in the lookup table which may contain the
+     specified address.  */
+  low = 0;
+  high = number_of_functions;
+  first = high;
+  while (low < high)
     {
-      for (arange = &each_func->arange;
-	   arange;
-	   arange = arange->next)
+      mid = (low + high) / 2;
+      lookup_funcinfo = &unit->lookup_funcinfo_table[mid];
+      if (addr < lookup_funcinfo->low_addr)
+	high = mid;
+      else if (addr >= lookup_funcinfo->high_addr)
+	low = mid + 1;
+      else
+	high = first = mid;
+    }
+
+  /* Find the 'best' match for the address.  The prior algorithm defined the
+     best match as the function with the smallest address range containing
+     the specified address.  This definition should probably be changed to the
+     innermost inline routine containing the address, but right now we want
+     to get the same results we did before.  */
+  while (first < number_of_functions)
+    {
+      if (addr < unit->lookup_funcinfo_table[first].low_addr)
+	break;
+      funcinfo = unit->lookup_funcinfo_table[first].funcinfo;
+
+      for (arange = &funcinfo->arange; arange; arange = arange->next)
 	{
-	  if (addr >= arange->low && addr < arange->high)
+	  if (addr < arange->low || addr >= arange->high)
+	    continue;
+
+	  if (!best_fit
+	      || arange->high - arange->low < best_fit_len
+	      /* The following comparison is designed to return the same
+		 match as the previous algorithm for routines which have the
+		 same best fit length.  */
+	      || (arange->high - arange->low == best_fit_len
+		  && funcinfo > best_fit))
 	    {
-	      if (!best_fit
-		  || arange->high - arange->low < best_fit_len)
-		{
-		  best_fit = each_func;
-		  best_fit_len = arange->high - arange->low;
-		}
+	      best_fit = funcinfo;
+	      best_fit_len = arange->high - arange->low;
 	    }
 	}
-    }
 
-  if (best_fit)
-    {
-      *function_ptr = best_fit;
-      return TRUE;
+      first++;
     }
-  return FALSE;
+
+  if (!best_fit)
+    return FALSE;
+
+  *function_ptr = best_fit;
+  return TRUE;
 }
 
 /* If SYM at ADDR is within function table of UNIT, set FILENAME_PTR
@@ -2271,8 +2477,8 @@ lookup_symbol_in_variable_table (struct comp_unit *unit,
       *linenumber_ptr = each->line;
       return TRUE;
     }
-  else
-    return FALSE;
+
+  return FALSE;
 }
 
 static char *
@@ -2515,6 +2721,7 @@ scan_unit_for_symbols (struct comp_unit *unit)
 	  func->tag = abbrev->tag;
 	  func->prev_func = unit->function_table;
 	  unit->function_table = func;
+      unit->number_of_functions++;
 	  BFD_ASSERT (!unit->cached);
 
 	  if (func->tag == DW_TAG_inlined_subroutine)
@@ -2856,7 +3063,7 @@ parse_comp_unit (struct dwarf2_debug *stash,
 	  low_pc = attr.u.val;
 	  /* If the compilation unit DIE has a DW_AT_low_pc attribute,
 	     this is the base address to use when reading location
-	     lists or range lists. */
+	     lists or range lists.  */
 	  if (abbrev->tag == DW_TAG_compile_unit)
 	    unit->base_address = low_pc;
 	  break;
@@ -3127,7 +3334,7 @@ comp_unit_hash_info (struct dwarf2_debug *stash,
        each_func && okay;
        each_func = each_func->prev_func)
     {
-      /* Skip nameless functions. */
+      /* Skip nameless functions.  */
       if (each_func->name)
 	/* There is no need to copy name string into hash table as
 	   name string is either in the dwarf string buffer or
@@ -3509,7 +3716,7 @@ stash_maybe_update_info_hash_tables (struct dwarf2_debug *stash)
   return TRUE;
 }
 
-/* Check consistency of info hash tables.  This is for debugging only. */
+/* Check consistency of info hash tables.  This is for debugging only.  */
 
 static void ATTRIBUTE_UNUSED
 stash_verify_info_hash_table (struct dwarf2_debug *stash)
@@ -3943,7 +4150,7 @@ _bfd_dwarf2_find_nearest_line (bfd *abfd,
 	stash_maybe_enable_info_hash_tables (abfd, stash);
 
       /* Keep info hash table up to date if they are available.  Note that we
-	 may disable the hash tables if there is any error duing update. */
+	 may disable the hash tables if there is any error duing update.  */
       if (stash->info_hash_status == STASH_INFO_HASH_ON)
 	stash_maybe_update_info_hash_tables (stash);
 
@@ -4244,6 +4451,12 @@ _bfd_dwarf2_cleanup_debug_info (bfd *abfd, void **pinfo)
 	  function_table = function_table->prev_func;
 	}
 
+      if (each->lookup_funcinfo_table)
+	{
+	  free (each->lookup_funcinfo_table);
+	  each->lookup_funcinfo_table = NULL;
+	}
+
       while (variable_table)
 	{
 	  if (variable_table->file)


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]