[PATCH v3 23/33] Parallelize DWARF indexing

Sat Dec 4 20:38:34 GMT 2021

This parallelizes the new DWARF indexer.  The indexer's storage was
designed so that each storage object and each indexer is fully
independent.  This setup makes it simple to scan different CUs
independently.

This patch creates a new cooked index storage object per thread, and
then scans a subset of all the CUs in each such thread, using gdb's
existing thread pool.

In the ongoing "gdb gdb" example, this patch reduces the wall time
down to 0.668923, from 0.903534.  (Note that the 0.903534 is the time
for the new index -- that is, when the "enable the new index" patch is
rebased to before this one.  However, in the final series, that patch
appears toward the end.  Hopefully this isn't too confusing.)
---
 gdb/dwarf2/cooked-index.c | 142 ++++++++++++++++++++++++++++----------
 gdb/dwarf2/cooked-index.h | 116 ++++++++++++++++++++++---------
 gdb/dwarf2/read.c         | 113 +++++++++++++++++++++++++-----
 gdb/dwarf2/read.h         |   4 +-
 4 files changed, 287 insertions(+), 88 deletions(-)

diff --git a/gdb/dwarf2/cooked-index.c b/gdb/dwarf2/cooked-index.c
index 1b89e4c91f3..bb10b884a43 100644
--- a/gdb/dwarf2/cooked-index.c
+++ b/gdb/dwarf2/cooked-index.c
@@ -116,10 +116,41 @@ cooked_index::add (sect_offset die_offset, enum dwarf_tag tag,
   return result;
 }
 
+cooked_index_vector::cooked_index_vector (vec_type &&vec)
+  : m_vector (std::move (vec))
+{
+  finalize ();
+}
+
+/* See cooked-index.h.  */
+
+dwarf2_per_cu_data *
+cooked_index_vector::lookup (CORE_ADDR addr)
+{
+  for (const auto &index : m_vector)
+    {
+      dwarf2_per_cu_data *result = index->lookup (addr);
+      if (result != nullptr)
+	return result;
+    }
+  return nullptr;
+}
+
+/* See cooked-index.h.  */
+
+std::vector<addrmap *>
+cooked_index_vector::get_addrmaps ()
+{
+  std::vector<addrmap *> result;
+  for (const auto &index : m_vector)
+    result.push_back (index->m_addrmap);
+  return result;
+}
+
 /* See cooked-index.h.  */
 
-cooked_index::range
-cooked_index::find (gdb::string_view name, bool completing)
+cooked_index_vector::range
+cooked_index_vector::find (gdb::string_view name, bool completing)
 {
   auto lower = std::lower_bound (m_entries.begin (), m_entries.end (),
 				 name,
@@ -149,8 +180,8 @@ cooked_index::find (gdb::string_view name, bool completing)
 /* See cooked-index.h.  */
 
 gdb::unique_xmalloc_ptr<char>
-cooked_index::handle_gnat_encoded_entry (cooked_index_entry *entry,
-					 htab_t gnat_entries)
+cooked_index_vector::handle_gnat_encoded_entry (cooked_index_entry *entry,
+						htab_t gnat_entries)
 {
   std::string canonical = ada_decode (entry->name, false, false);
   if (canonical.empty ())
@@ -173,9 +204,11 @@ cooked_index::handle_gnat_encoded_entry (cooked_index_entry *entry,
 	{
 	  gdb::unique_xmalloc_ptr<char> new_name
 	    = make_unique_xstrndup (name.data (), name.length ());
-	  last = create (entry->die_offset, DW_TAG_namespace,
-			 0, new_name.get (), parent,
-			 entry->per_cu);
+	  /* It doesn't matter which obstack we allocate this on, so
+	     we pick the most convenient one.  */
+	  last = m_vector[0]->create (entry->die_offset, DW_TAG_namespace,
+				      0, new_name.get (), parent,
+				      entry->per_cu);
 	  last->canonical = last->name;
 	  m_names.push_back (std::move (new_name));
 	  *slot = last;
@@ -190,8 +223,28 @@ cooked_index::handle_gnat_encoded_entry (cooked_index_entry *entry,
 
 /* See cooked-index.h.  */
 
+const cooked_index_entry *
+cooked_index_vector::get_main () const
+{
+  const cooked_index_entry *result = nullptr;
+
+  for (const auto &index : m_vector)
+    {
+      const cooked_index_entry *entry = index->get_main ();
+      if (result == nullptr
+	  || ((result->flags & IS_MAIN) == 0
+	      && entry != nullptr
+	      && (entry->flags & IS_MAIN) != 0))
+	result = entry;
+    }
+
+  return result;
+}
+
+/* See cooked-index.h.  */
+
 void
-cooked_index::finalize ()
+cooked_index_vector::finalize ()
 {
   auto hash_name_ptr = [] (const void *p)
     {
@@ -213,38 +266,26 @@ cooked_index::finalize ()
   htab_up seen_names (htab_create_alloc (10, hash_name_ptr, eq_name_ptr,
 					 nullptr, xcalloc, xfree));
 
-  htab_up gnat_entries (htab_create_alloc (10, hash_entry, eq_entry,
-					   nullptr, xcalloc, xfree));
-
-  for (cooked_index_entry *entry : m_entries)
+  for (auto &index : m_vector)
     {
-      gdb_assert (entry->canonical == nullptr);
-      if ((entry->per_cu->lang != language_cplus
-	   && entry->per_cu->lang != language_ada)
-	  || (entry->flags & IS_LINKAGE) != 0)
-	entry->canonical = entry->name;
-      else
+      htab_up gnat_entries (htab_create_alloc (10, hash_entry, eq_entry,
+					       nullptr, xcalloc, xfree));
+
+      std::vector<cooked_index_entry *> entries
+	= std::move (index->m_entries);
+      for (cooked_index_entry *entry : entries)
 	{
-	  if (entry->per_cu->lang == language_ada)
-	    {
-	      gdb::unique_xmalloc_ptr<char> canon_name
-		= handle_gnat_encoded_entry (entry, gnat_entries.get ());
-	      if (canon_name == nullptr)
-		entry->canonical = entry->name;
-	      else
-		{
-		  entry->canonical = canon_name.get ();
-		  m_names.push_back (std::move (canon_name));
-		}
-	    }
+	  gdb_assert (entry->canonical == nullptr);
+	  if ((entry->per_cu->lang != language_cplus
+	       && entry->per_cu->lang != language_ada)
+	      || (entry->flags & IS_LINKAGE) != 0)
+	    entry->canonical = entry->name;
 	  else
 	    {
-	      void **slot = htab_find_slot (seen_names.get (), entry,
-					    INSERT);
-	      if (*slot == nullptr)
+	      if (entry->per_cu->lang == language_ada)
 		{
 		  gdb::unique_xmalloc_ptr<char> canon_name
-		    = cp_canonicalize_string (entry->name);
+		    = handle_gnat_encoded_entry (entry, gnat_entries.get ());
 		  if (canon_name == nullptr)
 		    entry->canonical = entry->name;
 		  else
@@ -255,12 +296,39 @@ cooked_index::finalize ()
 		}
 	      else
 		{
-		  const cooked_index_entry *other
-		    = (const cooked_index_entry *) *slot;
-		  entry->canonical = other->canonical;
+		  void **slot = htab_find_slot (seen_names.get (), entry,
+						INSERT);
+		  if (*slot == nullptr)
+		    {
+		      gdb::unique_xmalloc_ptr<char> canon_name
+			= cp_canonicalize_string (entry->name);
+		      if (canon_name == nullptr)
+			entry->canonical = entry->name;
+		      else
+			{
+			  entry->canonical = canon_name.get ();
+			  m_names.push_back (std::move (canon_name));
+			}
+		    }
+		  else
+		    {
+		      const cooked_index_entry *other
+			= (const cooked_index_entry *) *slot;
+		      entry->canonical = other->canonical;
+		    }
 		}
 	    }
 	}
+
+      if (m_entries.empty ())
+	m_entries = std::move (entries);
+      else
+	{
+	  size_t old_size = m_entries.size ();
+	  m_entries.resize (m_entries.size () + entries.size ());
+	  memcpy (m_entries.data () + old_size,
+		  entries.data (), entries.size () * sizeof (entries[0]));
+	}
     }
 
   m_names.shrink_to_fit ();
diff --git a/gdb/dwarf2/cooked-index.h b/gdb/dwarf2/cooked-index.h
index 746ccfce4e5..a4bfc3eb183 100644
--- a/gdb/dwarf2/cooked-index.h
+++ b/gdb/dwarf2/cooked-index.h
@@ -30,6 +30,7 @@
 #include "gdb_obstack.h"
 #include "addrmap.h"
 #include "gdbsupport/iterator-range.h"
+#include "gdbsupport/thread-pool.h"
 
 struct dwarf2_per_cu_data;
 
@@ -155,6 +156,8 @@ struct cooked_index_entry : public allocate_on_obstack
   void write_scope (struct obstack *storage, const char *sep) const;
 };
 
+class cooked_index_vector;
+
 /* An index of interesting DIEs.  This is "cooked", in contrast to a
    mapped .debug_names or .gdb_index, which are "raw".  An entry in
    the index is of type cooked_index_entry.
@@ -178,13 +181,6 @@ class cooked_index
 				 const cooked_index_entry *parent_entry,
 				 dwarf2_per_cu_data *per_cu);
 
-  /* Return the entry that is believed to represent the program's
-     "main".  This will return NULL if no such entry is available.  */
-  const cooked_index_entry *get_main () const
-  {
-    return m_main;
-  }
-
   /* Install a new fixed addrmap from the given mutable addrmap.  */
   void install_addrmap (addrmap *map)
   {
@@ -192,6 +188,17 @@ class cooked_index
     m_addrmap = addrmap_create_fixed (map, &m_storage);
   }
 
+  friend class cooked_index_vector;
+
+private:
+
+  /* Return the entry that is believed to represent the program's
+     "main".  This will return NULL if no such entry is available.  */
+  const cooked_index_entry *get_main () const
+  {
+    return m_main;
+  }
+
   /* Look up ADDR in the address map, and return either the
      corresponding CU, or nullptr if the address could not be
      found.  */
@@ -200,10 +207,52 @@ class cooked_index
     return (dwarf2_per_cu_data *) addrmap_find (m_addrmap, addr);
   }
 
-  /* Finalize the index.  This should be called a single time, when
-     the index has been fully populated.  It enters all the entries
-     into the internal hash table.  */
-  void finalize ();
+  /* Create a new cooked_index_entry and register it with this object.
+     Entries are owned by this object.  The new item is returned.  */
+  cooked_index_entry *create (sect_offset die_offset,
+			      enum dwarf_tag tag,
+			      cooked_index_flag flags,
+			      const char *name,
+			      const cooked_index_entry *parent_entry,
+			      dwarf2_per_cu_data *per_cu)
+  {
+    return new (&m_storage) cooked_index_entry (die_offset, tag, flags,
+						name, parent_entry,
+						per_cu);
+  }
+
+  /* Storage for the entries.  */
+  auto_obstack m_storage;
+  /* List of all entries.  */
+  std::vector<cooked_index_entry *> m_entries;
+  /* If we found "main" or an entry with 'is_main' set, store it
+     here.  */
+  cooked_index_entry *m_main = nullptr;
+  /* When constructing the index, entries are stored on a linked list.
+     This member points to the head of that list.  Later, they are
+     entered into the hash table, at which point this is no longer
+     used.  */
+  cooked_index_entry *m_start = nullptr;
+  /* The addrmap.  This maps address ranges to dwarf2_per_cu_data
+     objects.  */
+  addrmap *m_addrmap = nullptr;
+};
+
+/* The main index of DIEs.  The parallel DIE indexers create
+   cooked_index objects.  Then, these are all handled to a
+   cooked_index_vector for storage and final indexing.  The index is
+   made by iterating over the entries previously created.  */
+
+class cooked_index_vector
+{
+public:
+
+  /* A convenience typedef for the vector that is contained in this
+     object.  */
+  typedef std::vector<std::unique_ptr<cooked_index>> vec_type;
+
+  explicit cooked_index_vector (vec_type &&vec);
+  DISABLE_COPY_AND_ASSIGN (cooked_index_vector);
 
   /* A simple range over part of m_entries.  */
   typedef iterator_range<std::vector<cooked_index_entry *>::iterator> range;
@@ -219,6 +268,19 @@ class cooked_index
     return { m_entries.begin (), m_entries.end () };
   }
 
+  /* Look up ADDR in the address map, and return either the
+     corresponding CU, or nullptr if the address could not be
+     found.  */
+  dwarf2_per_cu_data *lookup (CORE_ADDR addr);
+
+  /* Return a new vector of all the addrmaps used by all the indexes
+     held by this object.  */
+  std::vector<addrmap *> get_addrmaps ();
+
+  /* Return the entry that is believed to represent the program's
+     "main".  This will return NULL if no such entry is available.  */
+  const cooked_index_entry *get_main () const;
+
 private:
 
   /* GNAT only emits mangled ("encoded") names in the DWARF, and does
@@ -229,32 +291,20 @@ class cooked_index
   gdb::unique_xmalloc_ptr<char> handle_gnat_encoded_entry
        (cooked_index_entry *entry, htab_t gnat_entries);
 
-  /* Create a new cooked_index_entry and register it with this object.
-     Entries are owned by this object.  The new item is returned.  */
-  cooked_index_entry *create (sect_offset die_offset,
-			      enum dwarf_tag tag,
-			      cooked_index_flag flags,
-			      const char *name,
-			      const cooked_index_entry *parent_entry,
-			      dwarf2_per_cu_data *per_cu)
-  {
-    return new (&m_storage) cooked_index_entry (die_offset, tag, flags,
-						name, parent_entry,
-						per_cu);
-  }
+  /* Finalize the index.  This should be called a single time, when
+     the index has been fully populated.  It enters all the entries
+     into the internal hash table.  */
+  void finalize ();
 
-  /* Storage for the entries.  */
-  auto_obstack m_storage;
-  /* List of all entries.  */
+  /* The vector of cooked_index objects.  This is stored because the
+     entries are stored on the obstacks in those objects.  */
+  vec_type m_vector;
+
+  /* List of all entries.  This is sorted during finalization.  */
   std::vector<cooked_index_entry *> m_entries;
-  /* If we found "main" or an entry with 'is_main' set, store it
-     here.  */
-  cooked_index_entry *m_main = nullptr;
+
   /* Storage for canonical names.  */
   std::vector<gdb::unique_xmalloc_ptr<char>> m_names;
-  /* The addrmap.  This maps address ranges to dwarf2_per_cu_data
-     objects.  */
-  addrmap *m_addrmap = nullptr;
 };
 
 #endif /* GDB_DWARF2_COOKED_INDEX_H */
diff --git a/gdb/dwarf2/read.c b/gdb/dwarf2/read.c
index 81b51f97baf..97b2ccdf216 100644
--- a/gdb/dwarf2/read.c
+++ b/gdb/dwarf2/read.c
@@ -92,6 +92,8 @@
 #include "dwarf2/abbrev-cache.h"
 #include "cooked-index.h"
 #include "split-name.h"
+#include "gdbsupport/parallel-for.h"
+#include "gdbsupport/thread-pool.h"
 
 /* When == 1, print basic high level tracing messages.
    When > 1, be more verbose.
@@ -6518,6 +6520,12 @@ lookup_dwo_id (struct dwarf2_cu *cu, struct die_info* comp_unit_die)
 static struct dwo_unit *
 lookup_dwo_unit (dwarf2_cu *cu, die_info *comp_unit_die, const char *dwo_name)
 {
+  /* We need a lock here both to handle the DWO hash table, and BFD,
+     which is not thread-safe.  */
+  static std::mutex dwo_lock;
+
+  std::lock_guard<std::mutex> guard (dwo_lock);
+
   dwarf2_per_cu_data *per_cu = cu->per_cu;
   struct dwo_unit *dwo_unit;
   const char *comp_dir;
@@ -6665,9 +6673,14 @@ cutu_reader::cutu_reader (dwarf2_per_cu_data *this_cu,
     }
   else
     {
-      /* If an existing_cu is provided, a dwarf2_cu must not exist for this_cu
-	 in per_objfile yet.  */
-      gdb_assert (per_objfile->get_cu (this_cu) == nullptr);
+      /* If an existing_cu is provided, a dwarf2_cu must not exist for
+	 this_cu in per_objfile yet.  Here, CACHE doubles as a flag to
+	 let us know that the CU is being scanned using the parallel
+	 indexer.  This assert is avoided in this case because (1) it
+	 is irrelevant, and (2) the get_cu method is not
+	 thread-safe.  */
+      gdb_assert (cache != nullptr
+		  || per_objfile->get_cu (this_cu) == nullptr);
       m_new_cu.reset (new dwarf2_cu (this_cu, per_objfile));
       cu = m_new_cu.get ();
     }
@@ -7481,9 +7494,9 @@ process_psymtab_comp_unit (dwarf2_per_cu_data *this_cu,
     {
       if (per_objfile->per_bfd->using_index)
 	{
-	  if (!this_cu->scanned)
+	  bool nope = false;
+	  if (this_cu->scanned.compare_exchange_strong (nope, true))
 	    {
-	      this_cu->scanned = true;
 	      prepare_one_comp_unit (reader.cu, reader.comp_unit_die,
 				     pretend_language);
 	      gdb_assert (storage != nullptr);
@@ -7851,6 +7864,7 @@ dwarf2_build_psymtabs_hard (dwarf2_per_objfile *per_objfile)
     = per_bfd->using_index ? &index_storage : nullptr;
   create_all_comp_units (per_objfile);
   build_type_psymtabs (per_objfile, index_storage_ptr);
+  std::vector<std::unique_ptr<cooked_index>> indexes;
   if (per_bfd->using_index)
     {
       per_bfd->quick_file_names_table
@@ -7859,15 +7873,70 @@ dwarf2_build_psymtabs_hard (dwarf2_per_objfile *per_objfile)
       if (!per_bfd->debug_aranges.empty ())
 	read_addrmap_from_aranges (per_objfile, &per_bfd->debug_aranges,
 				   index_storage.get_addrmap ());
-    }
 
-  for (const auto &per_cu : per_bfd->all_comp_units)
+      {
+	/* Ensure that complaints are handled correctly.  */
+	complaint_interceptor complaint_handler;
+
+	using iter_type = typeof (per_bfd->all_comp_units.begin ());
+
+	/* Each thread returns a pair holding a cooked index, and a vector
+	   of errors that should be printed.  The latter is done because
+	   GDB's I/O system is not thread-safe.  run_on_main_thread could be
+	   used, but that would mean the messages are printed after the
+	   prompt, which looks weird.  */
+	using result_type = std::pair<std::unique_ptr<cooked_index>,
+				      std::vector<gdb_exception>>;
+	std::vector<result_type> results
+	  = gdb::parallel_for_each (1, per_bfd->all_comp_units.begin (),
+				    per_bfd->all_comp_units.end (),
+				    [=] (iter_type iter, iter_type end)
+				    {
+				      std::vector<gdb_exception> errors;
+				      cooked_index_storage thread_storage;
+				      for (; iter != end; ++iter)
+					{
+					  dwarf2_per_cu_data *per_cu = iter->get ();
+					  try
+					    {
+					      process_psymtab_comp_unit (per_cu, per_objfile,
+									 false,
+									 language_minimal,
+									 &thread_storage);
+					    }
+					  catch (gdb_exception &except)
+					    {
+					      errors.push_back (std::move (except));
+					    }
+					}
+				      return result_type (thread_storage.release (),
+							  std::move (errors));
+				    });
+
+	/* Only show a given exception a single time.  */
+	std::unordered_set<gdb_exception> seen_exceptions;
+	for (auto &one_result : results)
+	  {
+	    indexes.push_back (std::move (one_result.first));
+	    for (auto &one_exc : one_result.second)
+	      {
+		if (seen_exceptions.insert (one_exc).second)
+		  exception_print (gdb_stderr, one_exc);
+	      }
+	  }
+      }
+    }
+  else
     {
-      if (!per_bfd->using_index && per_cu->v.psymtab != NULL)
-	/* In case a forward DW_TAG_imported_unit has read the CU already.  */
-	continue;
-      process_psymtab_comp_unit (per_cu.get (), per_objfile, false,
-				 language_minimal, index_storage_ptr);
+      for (const auto &per_cu : per_bfd->all_comp_units)
+	{
+	  if (!per_bfd->using_index && per_cu->v.psymtab != NULL)
+	    /* In case a forward DW_TAG_imported_unit has read the CU
+	       already.  */
+	    continue;
+	  process_psymtab_comp_unit (per_cu.get (), per_objfile, false,
+				     language_minimal, nullptr);
+	}
     }
 
   /* This has to wait until we read the CUs, we need the list of DWOs.  */
@@ -7885,8 +7954,20 @@ dwarf2_build_psymtabs_hard (dwarf2_per_objfile *per_objfile)
 
   if (per_bfd->using_index)
     {
-      per_bfd->cooked_index_table = index_storage.release ();
-      per_bfd->cooked_index_table->finalize ();
+      indexes.push_back (index_storage.release ());
+      /* Remove any NULL entries.  This might happen if parallel-for
+	 decides to throttle the number of threads that were used.  */
+      indexes.erase
+	(std::remove_if (indexes.begin (),
+			 indexes.end (),
+			 [] (const std::unique_ptr<cooked_index> &entry)
+			 {
+			   return entry == nullptr;
+			 }),
+	 indexes.end ());
+      indexes.shrink_to_fit ();
+      per_bfd->cooked_index_table.reset
+	(new cooked_index_vector (std::move (indexes)));
 
       const cooked_index_entry *main_entry
 	= per_bfd->cooked_index_table->get_main ();
@@ -19365,9 +19446,9 @@ cooked_indexer::ensure_cu_exists (cutu_reader *reader,
      Doing this check here avoids self-imports as well.  */
   if (for_scanning)
     {
-      if (per_cu->scanned)
+      bool nope = false;
+      if (!per_cu->scanned.compare_exchange_strong (nope, true))
 	return nullptr;
-      per_cu->scanned = true;
     }
   if (per_cu == m_per_cu)
     return reader;
diff --git a/gdb/dwarf2/read.h b/gdb/dwarf2/read.h
index a8a03c574e1..4e2776fe436 100644
--- a/gdb/dwarf2/read.h
+++ b/gdb/dwarf2/read.h
@@ -169,7 +169,7 @@ struct dwarf2_per_cu_data
 
   /* True if this CU has been scanned by the indexer; false if
      not.  */
-  bool scanned : 1;
+  std::atomic<bool> scanned;
 
   /* Our index in the unshared "symtabs" vector.  */
   unsigned index = 0;
@@ -457,7 +457,7 @@ struct dwarf2_per_bfd
   std::unique_ptr<mapped_debug_names> debug_names_table;
 
   /* The cooked index, or NULL if not using one.  */
-  std::unique_ptr<cooked_index> cooked_index_table;
+  std::unique_ptr<cooked_index_vector> cooked_index_table;
 
   /* When using index_table, this keeps track of all quick_file_names entries.
      TUs typically share line table entries with a CU, so we maintain a
-- 
2.31.1