[RFC/dwarf-2] Add support for included files

Thu Apr 15 22:13:00 GMT 2004

> Could you try one more thing for me?

Sure!

> As you noticed, the only two functions that dwarf_decode_lines calls
> to actually record the information it gets are
> buildsym.c:start_subfile (via dwarf2_start_subfile) and
> buildsym.c:record_line.  Could you change dwarf_decode_lines to take
> two function pointers (with an accompanying closure pointer) for those
> two functions?  I guess this is an instance of the 'builder' pattern,
> so maybe name it something appropriately suggestive.  Let
> dwarf_decode_lines continue to call dwarf2_start_subfile directly,
> just passing the builder func and closure along with filename and
> dirname.
> 
> Then, instead of duplicating dwarf_decode_lines, have the existing
> call in read_file_scope and the new call you've added just pass
> different function/closures to it.

I was just wondering, before trying that, if we couldn't maybe do
it in a little simpler way, but non-generic way.

Using the approach you suggest (which I like): We need to define a
couple of wrapper functions around start_subfile and record_line
because their profile will not match the profile of our callbacks.
We'll need 3 callbacks per case (3 for psymtabs, 3 for symtabs).
We'll also have to use void* pointers if we want to make the context
generic, which I would prefer to avoid. It's great for heavily reused
code, but is this worth it for this function.

What we could do instead, is:
  . Add a new parameter: struct partial_symtab *pst
  . If this pst is NULL, then scan for symtabs as before
  . If pst is non NULL, then scan for partial symtabs, and do not
    call the 2 buildsym.c functions.

Just for illustration purposes, I tweaked my previous patch. Attached
is a new patch that shows what it would become. I did this because
even if we end up not choosing this solution, it's still a good
intermediate version on which to implement your suggestion.

I verified that the performance impact on GDB was still pretty small.

If you like it, then I'll review, clean it up and properly submit it
for review. Otherwise, I'll send a new version including your
suggestion.

Thanks,
-- 
Joel
-------------- next part --------------
Index: dwarf2read.c
===================================================================
RCS file: /cvs/src/src/gdb/dwarf2read.c,v
retrieving revision 1.142
diff -u -p -r1.142 dwarf2read.c

--- dwarf2read.c	2 Apr 2004 04:35:46 -0000	1.142
+++ dwarf2read.c	15 Apr 2004 22:02:36 -0000
@@ -583,6 +583,13 @@ static void dwarf2_locate_sections (bfd 
 static void dwarf2_build_psymtabs_easy (struct objfile *, int);
 #endif
 
+static void dwarf2_create_include_psymtab (char *, struct partial_symtab *,
+                                           struct objfile *);
+
+static void dwarf2_build_include_psymtabs (struct dwarf2_cu *,
+                                           const unsigned int,
+                                           struct partial_symtab *);
+
 static void dwarf2_build_psymtabs_hard (struct objfile *, int);
 
 static char *scan_partial_symbols (char *, CORE_ADDR *, CORE_ADDR *,
@@ -629,7 +636,8 @@ static struct abbrev_info *dwarf2_lookup
 						 struct dwarf2_cu *);
 
 static char *read_partial_die (struct partial_die_info *,
-			       bfd *, char *, struct dwarf2_cu *);
+			       bfd *, char *, struct dwarf2_cu *,
+                               unsigned int *);
 
 static char *read_full_die (struct die_info **, bfd *, char *,
 			    struct dwarf2_cu *, int *);
@@ -684,12 +692,15 @@ static struct die_info *die_specificatio
 
 static void free_line_header (struct line_header *lh);
 
+static void add_file_name (struct line_header *, char *, unsigned int,
+                           unsigned int, unsigned int);
+
 static struct line_header *(dwarf_decode_line_header
                             (unsigned int offset,
                              bfd *abfd, struct dwarf2_cu *cu));
 
 static void dwarf_decode_lines (struct line_header *, char *, bfd *,
-				struct dwarf2_cu *);
+				struct dwarf2_cu *, struct partial_symtab *);
 
 static void dwarf2_start_subfile (char *, char *);
 
@@ -1100,6 +1111,65 @@ read_comp_unit_head (struct comp_unit_he
   return info_ptr;
 }
 
+/* Allocate a new partial symtab for file named NAME and mark this new
+   partial symtab as being an include of PST.  */
+
+static void
+dwarf2_create_include_psymtab (char *name, struct partial_symtab *pst,
+                               struct objfile *objfile)
+{
+  struct partial_symtab *subpst = allocate_psymtab (name, objfile);
+
+  subpst->section_offsets = pst->section_offsets;
+  subpst->textlow = 0;
+  subpst->texthigh = 0;
+
+  subpst->dependencies = (struct partial_symtab **)
+    obstack_alloc (&objfile->objfile_obstack,
+                   sizeof (struct partial_symtab *));
+  subpst->dependencies[0] = pst;
+  subpst->number_of_dependencies = 1;
+
+  subpst->globals_offset = 0;
+  subpst->n_global_syms = 0;
+  subpst->statics_offset = 0;
+  subpst->n_static_syms = 0;
+  subpst->symtab = NULL;
+  subpst->read_symtab = pst->read_symtab;
+  subpst->readin = 0;
+
+  /* No private part is necessary for include psymtabs.  This property
+     can be used to differentiate between such include psymtabs and
+     the regular ones.  If it ever happens that a regular psymtab can
+     legitimally have a NULL PST_PRIVATE part, then we'll have to add a
+     dedicated field for that in the dwarf2_pinfo structure.  */
+  PST_PRIVATE (subpst) = NULL;
+}
+
+/* Read the line number information located at LINE_OFFSET,
+   and extract the list of sources files included by the
+   source file represented by PST.  Build an include partial
+   symtab for each of these included files.  */
+
+static void
+dwarf2_build_include_psymtabs (struct dwarf2_cu *cu,
+                               const unsigned int line_offset,
+                               struct partial_symtab *pst)
+{
+  struct objfile *objfile = cu->objfile;
+  bfd *abfd = objfile->obfd;
+  struct line_header *lh;
+
+  lh = dwarf_decode_line_header (line_offset, abfd, cu);
+  if (lh == NULL)
+    return;  /* No linetable, so no includes.  */
+
+  dwarf_decode_lines (lh, NULL, abfd, cu, pst);
+
+  free_line_header (lh);
+}
+
+
 /* Build the partial symbol table by doing a quick pass through the
    .debug_info and .debug_abbrev sections.  */
 
@@ -1168,6 +1238,8 @@ dwarf2_build_psymtabs_hard (struct objfi
     {
       struct cleanup *back_to_inner;
       struct dwarf2_cu cu;
+      unsigned int line_offset = 0;
+
       beg_of_comp_unit = info_ptr;
 
       cu.objfile = objfile;
@@ -1208,7 +1280,7 @@ dwarf2_build_psymtabs_hard (struct objfi
 
       /* Read the compilation unit die */
       info_ptr = read_partial_die (&comp_unit_die, abfd, info_ptr,
-				   &cu);
+				   &cu, &line_offset);
 
       /* Set the language we're debugging */
       set_cu_language (comp_unit_die.language, &cu);
@@ -1269,6 +1341,10 @@ dwarf2_build_psymtabs_hard (struct objfi
       info_ptr = beg_of_comp_unit + cu.header.length 
                                   + cu.header.initial_length_size;
 
+      /* Get the list of files included in the current compilation unit,
+         and build a psymtab for each of them.  */
+      dwarf2_build_include_psymtabs (&cu, line_offset, pst);
+
       do_cleanups (back_to_inner);
     }
   do_cleanups (back_to);
@@ -1300,7 +1376,7 @@ scan_partial_symbols (char *info_ptr, CO
 	 inside the loop.  */
       int info_ptr_updated = 0;
 
-      info_ptr = read_partial_die (&pdi, abfd, info_ptr, cu);
+      info_ptr = read_partial_die (&pdi, abfd, info_ptr, cu, NULL);
 
       /* Anonymous namespaces have no name but have interesting
 	 children, so we need to look at them.  Ditto for anonymous
@@ -1652,7 +1728,7 @@ add_partial_structure (struct partial_di
 	  struct partial_die_info child_pdi;
 
 	  next_child = read_partial_die (&child_pdi, abfd, next_child,
-					 cu);
+					 cu, NULL);
 	  if (!child_pdi.tag)
 	    break;
 	  if (child_pdi.tag == DW_TAG_subprogram)
@@ -1691,7 +1767,7 @@ add_partial_enumeration (struct partial_
   
   while (1)
     {
-      info_ptr = read_partial_die (&pdi, abfd, info_ptr, cu);
+      info_ptr = read_partial_die (&pdi, abfd, info_ptr, cu, NULL);
       if (pdi.tag == 0)
 	break;
       if (pdi.tag != DW_TAG_enumerator || pdi.name == NULL)
@@ -1916,6 +1992,32 @@ psymtab_to_symtab_1 (struct partial_symt
   struct cleanup *back_to;
   struct attribute *attr;
   CORE_ADDR baseaddr;
+  int i;
+
+  for (i = 0; i < pst->number_of_dependencies; i++)
+    if (!pst->dependencies[i]->readin)
+      {
+        /* Inform about additional files that need to be read in.  */
+        if (info_verbose)
+          {
+            fputs_filtered (" ", gdb_stdout);
+            wrap_here ("");
+            fputs_filtered ("and ", gdb_stdout);
+            wrap_here ("");
+            printf_filtered ("%s...", pst->dependencies[i]->filename);
+            wrap_here ("");     /* Flush output */
+            gdb_flush (gdb_stdout);
+          }
+        psymtab_to_symtab_1 (pst->dependencies[i]);
+      }
+
+  if (PST_PRIVATE (pst) == NULL)
+    {
+      /* It's an include file, no symbols to read for it.
+         Everything is in the parent symtab.  */
+      pst->readin = 1;
+      return;
+    }
 
   dwarf2_per_objfile = objfile_data (pst->objfile, dwarf2_objfile_data_key);
 
@@ -2204,7 +2306,7 @@ read_file_scope (struct die_info *die, s
         {
           make_cleanup ((make_cleanup_ftype *) free_line_header,
                         (void *) line_header);
-          dwarf_decode_lines (line_header, comp_dir, abfd, cu);
+          dwarf_decode_lines (line_header, comp_dir, abfd, cu, NULL);
         }
     }
 
@@ -4414,11 +4516,14 @@ dwarf2_lookup_abbrev (unsigned int numbe
   return NULL;
 }
 
-/* Read a minimal amount of information into the minimal die structure.  */
+/* Read a minimal amount of information into the minimal die structure.
+   If not NULL, the offset where the Line Number Information data is
+   stored will be saved in LINE_OFFSET.  */
 
 static char *
 read_partial_die (struct partial_die_info *part_die, bfd *abfd,
-		  char *info_ptr, struct dwarf2_cu *cu)
+		  char *info_ptr, struct dwarf2_cu *cu,
+                  unsigned int *line_offset)
 {
   unsigned int abbrev_number, bytes_read, i;
   struct abbrev_info *abbrev;
@@ -4512,6 +4617,9 @@ read_partial_die (struct partial_die_inf
 	    part_die->sibling = dwarf2_per_objfile->info_buffer
 	      + dwarf2_get_ref_die_offset (&attr, cu);
 	  break;
+        case DW_AT_stmt_list:
+          if (line_offset != NULL)
+            *line_offset = DW_UNSND (&attr);
 	default:
 	  break;
 	}
@@ -4527,7 +4635,7 @@ read_partial_die (struct partial_die_inf
 
       spec_ptr = dwarf2_per_objfile->info_buffer
 	+ dwarf2_get_ref_die_offset (&spec_attr, cu);
-      read_partial_die (&spec_die, abfd, spec_ptr, cu);
+      read_partial_die (&spec_die, abfd, spec_ptr, cu, NULL);
       if (spec_die.name)
 	{
 	  part_die->name = spec_die.name;
@@ -5399,7 +5507,7 @@ check_cu_functions (CORE_ADDR address, s
 
 static void
 dwarf_decode_lines (struct line_header *lh, char *comp_dir, bfd *abfd,
-		    struct dwarf2_cu *cu)
+		    struct dwarf2_cu *cu, struct partial_symtab *pst)
 {
   char *line_ptr;
   char *line_end;
@@ -5408,6 +5516,38 @@ dwarf_decode_lines (struct line_header *
   CORE_ADDR baseaddr;
   struct objfile *objfile = cu->objfile;
 
+  /* When decoding the Line Number Program for the purpose of building
+     the partial symtabs included by the current CU, we need to do
+     the following:
+
+     We first scan the Line Header.  It contains a list of files referenced
+     by the Line Number Program.  We then scan the Line Number Program
+     for opcodes changing the source file.  For each file selected in
+     the program, we mark it as included using the FILE_IS_INCLUDED
+     array. Once we're finished scanning the Line Number Program, we can
+     then iterate over FILE_IS_INCLUDED and create a corresponding
+     include partial symtab for each file that was marked as included.
+     */
+
+  /* FILE_IS_INCLUDED is an array allocated on the heap, which size
+     is stored in FILE_IS_INCLUDED_SIZE.  Each element of this array
+     corresponds to the file of the same index in the Line Header,
+     as stored in the line_header struct we built for the current unit.
+     Each element is initially set to zero, and then to nonzero if the
+     corresponding file is included.  The size of this array may be
+     larger than necessary, and the number of meaningful entries is
+     stored in lh->num_file_names.  */
+  char *file_is_included = NULL;
+  int file_is_included_size = 0;
+  const int decode_for_pst_p = (pst != NULL);
+
+  if (decode_for_pst_p)
+    {
+      file_is_included = xmalloc (lh->file_names_size * sizeof (char));
+      memset (file_is_included, 0, lh->file_names_size * sizeof (char));
+      file_is_included_size = lh->file_names_size;
+    }
+
   baseaddr = ANOFFSET (objfile->section_offsets, SECT_OFF_TEXT (objfile));
 
   line_ptr = lh->statement_program_start;
@@ -5425,9 +5565,9 @@ dwarf_decode_lines (struct line_header *
       int basic_block = 0;
       int end_sequence = 0;
 
-      /* Start a subfile for the current file of the state machine.  */
-      if (lh->num_file_names >= file)
+      if (!decode_for_pst_p && lh->num_file_names >= file)
 	{
+          /* Start a subfile for the current file of the state machine.  */
 	  /* lh->include_dirs and lh->file_names are 0-based, but the
 	     directory and file name numbers in the statement program
 	     are 1-based.  */
@@ -5452,9 +5592,12 @@ dwarf_decode_lines (struct line_header *
 	      address += (adj_opcode / lh->line_range)
 		* lh->minimum_instruction_length;
 	      line += lh->line_base + (adj_opcode % lh->line_range);
-	      /* append row to matrix using current values */
-	      record_line (current_subfile, line, 
-	                   check_cu_functions (address, cu));
+              if (!decode_for_pst_p)
+                {
+	          /* append row to matrix using current values */
+	          record_line (current_subfile, line, 
+	                       check_cu_functions (address, cu));
+                }
 	      basic_block = 1;
 	    }
 	  else switch (op_code)
@@ -5467,7 +5610,8 @@ dwarf_decode_lines (struct line_header *
 		{
 		case DW_LNE_end_sequence:
 		  end_sequence = 1;
-		  record_line (current_subfile, 0, address);
+                  if (!decode_for_pst_p)
+		    record_line (current_subfile, 0, address);
 		  break;
 		case DW_LNE_set_address:
 		  address = read_address (abfd, line_ptr, cu, &bytes_read);
@@ -5491,6 +5635,20 @@ dwarf_decode_lines (struct line_header *
                       read_unsigned_leb128 (abfd, line_ptr, &bytes_read);
                     line_ptr += bytes_read;
                     add_file_name (lh, cur_file, dir_index, mod_time, length);
+
+                    if (decode_for_pst_p)
+                      {
+                        if (file_is_included_size != lh->file_names_size)
+                          {
+                            /* The add_file_name() operation above caused
+                               the file_names array size in the line_header
+                               struct to be increased.  Increase our
+                               file_is_included array size accordingly.  */
+                            file_is_included = xrealloc (file_is_included,
+                                                         lh->file_names_size);
+                          }
+                        file_is_included [lh->num_file_names - 1] = 0;
+                      }
                   }
 		  break;
 		default:
@@ -5500,8 +5658,9 @@ dwarf_decode_lines (struct line_header *
 		}
 	      break;
 	    case DW_LNS_copy:
-	      record_line (current_subfile, line, 
-	                   check_cu_functions (address, cu));
+              if (!decode_for_pst_p)
+	        record_line (current_subfile, line, 
+	                     check_cu_functions (address, cu));
 	      basic_block = 0;
 	      break;
 	    case DW_LNS_advance_pc:
@@ -5527,7 +5686,10 @@ dwarf_decode_lines (struct line_header *
                   dir = lh->include_dirs[fe->dir_index - 1];
                 else
                   dir = comp_dir;
-                dwarf2_start_subfile (fe->name, dir);
+                if (decode_for_pst_p)
+                  file_is_included[file - 1] = 1;
+                else
+                  dwarf2_start_subfile (fe->name, dir);
               }
 	      break;
 	    case DW_LNS_set_column:
@@ -5564,6 +5726,22 @@ dwarf_decode_lines (struct line_header *
 	      }
 	    }
 	}
+    }
+
+  if (decode_for_pst_p)
+    {
+      int file_index;
+
+      /* Now that we're done scanning the Line Header Program, we can
+         create the psymtab of each included file.  */
+      for (file_index = 0; file_index < lh->num_file_names; file_index++)
+        if (file_is_included[file_index] == 1)
+          {
+            char *include_name = lh->file_names [file_index].name;
+    
+            if (strcmp (include_name, pst->filename) != 0)
+              dwarf2_create_include_psymtab (include_name, pst, objfile);
+          }
     }
 }