[PATCH 3/4] libdwfl: store module lookup table separately from segments

Omar Sandoval osandov@osandov.com
Thu Dec 12 01:30:00 GMT 2019


From: Omar Sandoval <osandov@fb.com>

Currently, the address ranges for segments reported with
dwfl_report_segment and modules are stored in the same sorted array.
This requires complicated logic in reify_segments for splitting up
existing segments and inserting into the table, which can have quadratic
runtime in the worst case. Simplify it by adding a separate table for
the module list, which we can manage much more simply. This will be
especially important for the next change.

Signed-off-by: Omar Sandoval <osandov@fb.com>
---
 libdwfl/ChangeLog         |  14 +++-
 libdwfl/dwfl_addrmodule.c |  89 +++++++++++++++++++++++-
 libdwfl/dwfl_getmodules.c |  14 ++--
 libdwfl/dwfl_module.c     |  11 +--
 libdwfl/libdwflP.h        |  17 ++++-
 libdwfl/link_map.c        |   7 +-
 libdwfl/segment.c         | 143 +-------------------------------------
 7 files changed, 128 insertions(+), 167 deletions(-)

diff --git a/libdwfl/ChangeLog b/libdwfl/ChangeLog
index 09a4a473..8dc20961 100644
--- a/libdwfl/ChangeLog
+++ b/libdwfl/ChangeLog
@@ -3,12 +3,24 @@
 	* libdwflP.h: Add new NOT_LOADED DWFL_ERROR.
 	(Dwfl_Module): Remove coalescing state.
 	Rename lookup_tail_ndx to next_segndx.
+	Rename segment member to lookup.
+	(Dwfl): Replace module lookup table with new definition.
 	* relocate.c (__libdwfl_relocate_value): Return
 	DWFL_E_NOT_LOADED if section is not loaded.
 	(relocate): Handle DWFL_E_NOT_LOADED.
 	* dwfl_module_getsym: Ignore DWFL_E_NOT_LOADED.
-	* segment.c (dwfl_report_segment): Remove coalescing logic.
+	* segment.c: Remove old module lookup table implementation.
+	(dwfl_report_segment): Remove coalescing logic.
+	(dwfl_addrsegment): Use dwfl_addrmodule to get module.
 	* libdwfl.h (dwfl_report_segment): Document that IDENT is now ignored.
+	* dwfl_addrmodule.c: Add new module lookup table implementation.
+	(dwfl_addrmodule): Use new module lookup table instead of
+	dwfl_addrsegment.
+	* dwfl_getmodules.c (dwfl_getmodules): Use new module lookup table.
+	* dwfl_module.c (dwfl_report_begin): Clear new module lookup table.
+	(use): Likewise.
+	* link_map.c: Use dwfl_addrmodule instead of dwfl_addrsegment when
+	segment index is not needed.
 
 2019-12-05  Mark Wielaard  <mark@klomp.org>
 
diff --git a/libdwfl/dwfl_addrmodule.c b/libdwfl/dwfl_addrmodule.c
index abf1ff48..21db4883 100644
--- a/libdwfl/dwfl_addrmodule.c
+++ b/libdwfl/dwfl_addrmodule.c
@@ -32,11 +32,94 @@
 
 #include "libdwflP.h"
 
+static bool append_lookup_module (Dwfl *dwfl, Dwfl_Module *mod, GElf_Addr start,
+				  GElf_Addr end)
+{
+  if (dwfl->lookup_module_elts >= dwfl->lookup_module_alloc)
+    {
+      size_t n = dwfl->lookup_module_alloc;
+      n = n == 0 ? 16 : n * 2;
+      struct dwfl_module_segment *tmp;
+      tmp = realloc (dwfl->lookup_module, n * sizeof (tmp[0]));
+      if (tmp == NULL)
+	{
+	  __libdwfl_seterrno (DWFL_E_NOMEM);
+	  return true;
+	}
+      dwfl->lookup_module = tmp;
+      dwfl->lookup_module_alloc = n;
+    }
+  size_t i = dwfl->lookup_module_elts++;
+  dwfl->lookup_module[i].start = start;
+  dwfl->lookup_module[i].end = end;
+  dwfl->lookup_module[i].mod = mod;
+  return false;
+}
+
+static int compare_dwfl_module_segment (const void *a, const void *b)
+{
+  const struct dwfl_module_segment *seg1 = a;
+  const struct dwfl_module_segment *seg2 = b;
+  if (seg1->start < seg2->start)
+    return -1;
+  else if (seg1->start > seg2->start)
+    return 1;
+  else
+    return 0;
+}
+
+static bool
+create_lookup_module (Dwfl *dwfl)
+{
+  for (Dwfl_Module *mod = dwfl->modulelist; mod != NULL; mod = mod->next)
+    if (! mod->gc)
+      {
+	GElf_Addr start = __libdwfl_segment_start(dwfl, mod->low_addr);
+	GElf_Addr end = __libdwfl_segment_end(dwfl, mod->high_addr);
+	if (append_lookup_module(dwfl, mod, start, end))
+	  return true;
+      }
+
+  qsort (dwfl->lookup_module, dwfl->lookup_module_elts,
+	 sizeof (dwfl->lookup_module[0]), compare_dwfl_module_segment);
+  for (size_t i = 0; i < dwfl->lookup_module_elts; i++)
+    {
+      dwfl->lookup_module[i].mod->lookup = i;
+      /* If the upper boundary of the segment isn't part of the next segment,
+	 treat it as part of the segment.  */
+      if (i == dwfl->lookup_module_elts - 1
+	  || dwfl->lookup_module[i].end < dwfl->lookup_module[i + 1].start)
+	dwfl->lookup_module[i].end++;
+    }
+  return false;
+}
+
+static int search_dwfl_module_segment (const void *key, const void *elt)
+{
+  Dwarf_Addr address = *(Dwarf_Addr *)key;
+  const struct dwfl_module_segment *seg = elt;
+  if (address < seg->start)
+    return -1;
+  else if (address >= seg->end)
+    return 1;
+  else
+    return 0;
+}
+
 Dwfl_Module *
 dwfl_addrmodule (Dwfl *dwfl, Dwarf_Addr address)
 {
-  Dwfl_Module *mod;
-  (void) INTUSE(dwfl_addrsegment) (dwfl, address, &mod);
-  return mod;
+  if (unlikely (dwfl == NULL)
+      || (unlikely (dwfl->lookup_module_elts == 0)
+	  && unlikely (create_lookup_module (dwfl))))
+    return NULL;
+
+  struct dwfl_module_segment *seg = bsearch (&address, dwfl->lookup_module,
+					     dwfl->lookup_module_elts,
+					     sizeof (dwfl->lookup_module[0]),
+					     search_dwfl_module_segment);
+  if (seg == NULL)
+    return NULL;
+  return seg->mod;
 }
 INTDEF (dwfl_addrmodule)
diff --git a/libdwfl/dwfl_getmodules.c b/libdwfl/dwfl_getmodules.c
index 243cb04d..807fe855 100644
--- a/libdwfl/dwfl_getmodules.c
+++ b/libdwfl/dwfl_getmodules.c
@@ -61,17 +61,17 @@ dwfl_getmodules (Dwfl *dwfl,
 	else
 	  m = m->next;
     }
-  else if (((offset & 3) == 2) && likely (dwfl->lookup_module != NULL))
+  else if (((offset & 3) == 2) && likely (dwfl->lookup_module_elts > 0))
     {
       offset >>= 2;
 
-      if ((size_t) offset - 1 == dwfl->lookup_elts)
+      if ((size_t) offset - 1 == dwfl->lookup_module_elts)
 	return 0;
 
-      if (unlikely ((size_t) offset - 1 > dwfl->lookup_elts))
+      if (unlikely ((size_t) offset - 1 > dwfl->lookup_module_elts))
 	return -1;
 
-      m = dwfl->lookup_module[offset - 1];
+      m = dwfl->lookup_module[offset - 1].mod;
       if (unlikely (m == NULL))
 	return -1;
     }
@@ -87,9 +87,9 @@ dwfl_getmodules (Dwfl *dwfl,
       ++offset;
       m = m->next;
       if (ok != DWARF_CB_OK)
-	return ((dwfl->lookup_module == NULL) ? ((offset << 2) | 1)
-		: (((m == NULL ? (ptrdiff_t) dwfl->lookup_elts + 1
-		     : m->segment + 1) << 2) | 2));
+	return ((dwfl->lookup_module_elts == 0) ? ((offset << 2) | 1)
+		: (((m == NULL ? (ptrdiff_t) dwfl->lookup_module_elts + 1
+		     : m->lookup + 1) << 2) | 2));
     }
   return 0;
 }
diff --git a/libdwfl/dwfl_module.c b/libdwfl/dwfl_module.c
index e7dfdace..4aa9aa2c 100644
--- a/libdwfl/dwfl_module.c
+++ b/libdwfl/dwfl_module.c
@@ -135,8 +135,9 @@ INTDEF (dwfl_report_begin_add)
 void
 dwfl_report_begin (Dwfl *dwfl)
 {
-  /* Clear the segment lookup table.  */
+  /* Clear the segment and module lookup tables.  */
   dwfl->lookup_elts = 0;
+  dwfl->lookup_module_elts = 0;
 
   for (Dwfl_Module *m = dwfl->modulelist; m != NULL; m = m->next)
     m->gc = true;
@@ -150,13 +151,7 @@ use (Dwfl_Module *mod, Dwfl_Module **tailp, Dwfl *dwfl)
 {
   mod->next = *tailp;
   *tailp = mod;
-
-  if (unlikely (dwfl->lookup_module != NULL))
-    {
-      free (dwfl->lookup_module);
-      dwfl->lookup_module = NULL;
-    }
-
+  dwfl->lookup_module_elts = 0;
   return mod;
 }
 
diff --git a/libdwfl/libdwflP.h b/libdwfl/libdwflP.h
index d21471b1..6661cc4c 100644
--- a/libdwfl/libdwflP.h
+++ b/libdwfl/libdwflP.h
@@ -113,6 +113,13 @@ struct Dwfl_User_Core
   int fd;                       /* close if >= 0.  */
 };
 
+struct dwfl_module_segment
+{
+  GElf_Addr start;
+  GElf_Addr end;
+  Dwfl_Module *mod;
+};
+
 struct Dwfl
 {
   const Dwfl_Callbacks *callbacks;
@@ -127,14 +134,18 @@ struct Dwfl
 
   GElf_Addr segment_align;	/* Smallest granularity of segments.  */
 
-  /* Binary search table in three parallel malloc'd arrays.  */
+  /* Binary search table of segments in two parallel malloc'd arrays.  */
   size_t lookup_elts;		/* Elements in use.  */
   size_t lookup_alloc;		/* Elements allococated.  */
   GElf_Addr *lookup_addr;	/* Start address of segment.  */
-  Dwfl_Module **lookup_module;	/* Module associated with segment, or null.  */
   int *lookup_segndx;		/* User segment index, or -1.  */
   int next_segndx;
 
+  /* Binary search table of module address ranges.  */
+  struct dwfl_module_segment *lookup_module;
+  size_t lookup_module_elts;
+  size_t lookup_module_alloc;
+
   struct Dwfl_User_Core *user_core;
 };
 
@@ -216,7 +227,7 @@ struct Dwfl_Module
   Dwarf_CFI *dwarf_cfi;		/* Cached DWARF CFI for this module.  */
   Dwarf_CFI *eh_cfi;		/* Cached EH CFI for this module.  */
 
-  int segment;			/* Index of first segment table entry.  */
+  int lookup;			/* Index in module lookup table.  */
   bool gc;			/* Mark/sweep flag.  */
   bool is_executable;		/* Use Dwfl::executable_for_core?  */
 };
diff --git a/libdwfl/link_map.c b/libdwfl/link_map.c
index 29307c74..befb39e4 100644
--- a/libdwfl/link_map.c
+++ b/libdwfl/link_map.c
@@ -175,8 +175,7 @@ integrated_memory_callback (Dwfl *dwfl, int ndx,
 
   /* Now look for module text covering this address.  */
 
-  Dwfl_Module *mod;
-  (void) INTUSE(dwfl_addrsegment) (dwfl, vaddr, &mod);
+  Dwfl_Module *mod = INTUSE(dwfl_addrmodule) (dwfl, vaddr);
   if (mod == NULL)
     return false;
 
@@ -588,9 +587,7 @@ consider_executable (Dwfl_Module *mod, GElf_Addr at_phdr, GElf_Addr at_entry,
 		  mod->high_addr -= mod_bias;
 		  mod->low_addr += bias;
 		  mod->high_addr += bias;
-
-		  free (mod->dwfl->lookup_module);
-		  mod->dwfl->lookup_module = NULL;
+		  mod->dwfl->lookup_module_elts = 0;
 		}
 	    }
 	}
diff --git a/libdwfl/segment.c b/libdwfl/segment.c
index f6a3e84e..9bf8b282 100644
--- a/libdwfl/segment.c
+++ b/libdwfl/segment.c
@@ -76,19 +76,6 @@ insert (Dwfl *dwfl, size_t i, GElf_Addr start, GElf_Addr end, int segndx)
       dwfl->lookup_alloc = n;
       dwfl->lookup_addr = naddr;
       dwfl->lookup_segndx = nsegndx;
-
-      if (dwfl->lookup_module != NULL)
-	{
-	  /* Make sure this array is big enough too.  */
-	  Dwfl_Module **old = dwfl->lookup_module;
-	  dwfl->lookup_module = realloc (dwfl->lookup_module,
-					 sizeof dwfl->lookup_module[0] * n);
-	  if (unlikely (dwfl->lookup_module == NULL))
-	    {
-	      free (old);
-	      return true;
-	    }
-	}
     }
 
   if (unlikely (i < dwfl->lookup_elts))
@@ -98,17 +85,12 @@ insert (Dwfl *dwfl, size_t i, GElf_Addr start, GElf_Addr end, int segndx)
 	       move * sizeof dwfl->lookup_addr[0]);
       memmove (&dwfl->lookup_segndx[i + need], &dwfl->lookup_segndx[i],
 	       move * sizeof dwfl->lookup_segndx[0]);
-      if (dwfl->lookup_module != NULL)
-	memmove (&dwfl->lookup_module[i + need], &dwfl->lookup_module[i],
-		 move * sizeof dwfl->lookup_module[0]);
     }
 
   if (need_start)
     {
       dwfl->lookup_addr[i] = start;
       dwfl->lookup_segndx[i] = segndx;
-      if (dwfl->lookup_module != NULL)
-	dwfl->lookup_module[i] = NULL;
       ++i;
     }
   else
@@ -118,8 +100,6 @@ insert (Dwfl *dwfl, size_t i, GElf_Addr start, GElf_Addr end, int segndx)
     {
       dwfl->lookup_addr[i] = end;
       dwfl->lookup_segndx[i] = -1;
-      if (dwfl->lookup_module != NULL)
-	dwfl->lookup_module[i] = NULL;
     }
 
   dwfl->lookup_elts += need;
@@ -154,131 +134,20 @@ lookup (Dwfl *dwfl, GElf_Addr address, int hint)
   return -1;
 }
 
-static bool
-reify_segments (Dwfl *dwfl)
-{
-  int hint = -1;
-  int highest = -1;
-  bool fixup = false;
-  for (Dwfl_Module *mod = dwfl->modulelist; mod != NULL; mod = mod->next)
-    if (! mod->gc)
-      {
-	const GElf_Addr start = __libdwfl_segment_start (dwfl, mod->low_addr);
-	const GElf_Addr end = __libdwfl_segment_end (dwfl, mod->high_addr);
-	bool resized = false;
-
-	int idx = lookup (dwfl, start, hint);
-	if (unlikely (idx < 0))
-	  {
-	    /* Module starts below any segment.  Insert a low one.  */
-	    if (unlikely (insert (dwfl, 0, start, end, -1)))
-	      return true;
-	    idx = 0;
-	    resized = true;
-	  }
-	else if (dwfl->lookup_addr[idx] > start)
-	  {
-	    /* The module starts in the middle of this segment.  Split it.  */
-	    if (unlikely (insert (dwfl, idx + 1, start, end,
-				  dwfl->lookup_segndx[idx])))
-	      return true;
-	    ++idx;
-	    resized = true;
-	  }
-	else if (dwfl->lookup_addr[idx] < start)
-	  {
-	    /* The module starts past the end of this segment.
-	       Add a new one.  */
-	    if (unlikely (insert (dwfl, idx + 1, start, end, -1)))
-	      return true;
-	    ++idx;
-	    resized = true;
-	  }
-
-	if ((size_t) idx + 1 < dwfl->lookup_elts
-	    && end < dwfl->lookup_addr[idx + 1])
-	  {
-	    /* The module ends in the middle of this segment.  Split it.  */
-	    if (unlikely (insert (dwfl, idx + 1,
-				  end, dwfl->lookup_addr[idx + 1], -1)))
-	      return true;
-	    resized = true;
-	  }
-
-	if (dwfl->lookup_module == NULL)
-	  {
-	    dwfl->lookup_module = calloc (dwfl->lookup_alloc,
-					  sizeof dwfl->lookup_module[0]);
-	    if (unlikely (dwfl->lookup_module == NULL))
-	      return true;
-	  }
-
-	/* Cache a backpointer in the module.  */
-	mod->segment = idx;
-
-	/* Put MOD in the table for each segment that's inside it.  */
-	do
-	  dwfl->lookup_module[idx++] = mod;
-	while ((size_t) idx < dwfl->lookup_elts
-	       && dwfl->lookup_addr[idx] < end);
-	assert (dwfl->lookup_module[mod->segment] == mod);
-
-	if (resized && idx - 1 >= highest)
-	  /* Expanding the lookup tables invalidated backpointers
-	     we've already stored.  Reset those ones.  */
-	  fixup = true;
-
-	highest = idx - 1;
-	hint = (size_t) idx < dwfl->lookup_elts ? idx : -1;
-      }
-
-  if (fixup)
-    /* Reset backpointer indices invalidated by table insertions.  */
-    for (size_t idx = 0; idx < dwfl->lookup_elts; ++idx)
-      if (dwfl->lookup_module[idx] != NULL)
-	dwfl->lookup_module[idx]->segment = idx;
-
-  return false;
-}
-
 int
 dwfl_addrsegment (Dwfl *dwfl, Dwarf_Addr address, Dwfl_Module **mod)
 {
   if (unlikely (dwfl == NULL))
     return -1;
 
-  if (unlikely (dwfl->lookup_module == NULL)
-      && mod != NULL
-      && unlikely (reify_segments (dwfl)))
-    {
-      __libdwfl_seterrno (DWFL_E_NOMEM);
-      return -1;
-    }
-
   int idx = lookup (dwfl, address, -1);
-  if (likely (mod != NULL))
-    {
-      if (unlikely (idx < 0) || unlikely (dwfl->lookup_module == NULL))
-	*mod = NULL;
-      else
-	{
-	  *mod = dwfl->lookup_module[idx];
-
-	  /* If this segment does not have a module, but the address is
-	     the upper boundary of the previous segment's module, use that.  */
-	  if (*mod == NULL && idx > 0 && dwfl->lookup_addr[idx] == address)
-	    {
-	      *mod = dwfl->lookup_module[idx - 1];
-	      if (*mod != NULL && (*mod)->high_addr != address)
-		*mod = NULL;
-	    }
-	}
-    }
-
   if (likely (idx >= 0))
     /* Translate internal segment table index to user segment index.  */
     idx = dwfl->lookup_segndx[idx];
 
+  if (mod != NULL)
+    *mod = INTUSE(dwfl_addrmodule) (dwfl, address);
+
   return idx;
 }
 INTDEF (dwfl_addrsegment)
@@ -301,12 +170,6 @@ dwfl_report_segment (Dwfl *dwfl, int ndx, const GElf_Phdr *phdr, GElf_Addr bias,
 			    phdr->p_align < dwfl->segment_align))
     dwfl->segment_align = phdr->p_align;
 
-  if (unlikely (dwfl->lookup_module != NULL))
-    {
-      free (dwfl->lookup_module);
-      dwfl->lookup_module = NULL;
-    }
-
   GElf_Addr start = __libdwfl_segment_start (dwfl, bias + phdr->p_vaddr);
   GElf_Addr end = __libdwfl_segment_end (dwfl,
 					 bias + phdr->p_vaddr + phdr->p_memsz);
-- 
2.24.0



More information about the Elfutils-devel mailing list