[PATCH V4] Fix issues with reading rnglists, especially from dwo files, for DWARF v5

Sat Jul 11 17:54:22 GMT 2020

> @@ -1378,12 +1387,13 @@ static void read_call_site_scope (struct die_info *die, struct dwarf2_cu *cu);
>
>  static void read_variable (struct die_info *die, struct dwarf2_cu *cu);
>
> -static int dwarf2_ranges_read (unsigned, CORE_ADDR *, CORE_ADDR *,
> -			       struct dwarf2_cu *, dwarf2_psymtab *);
> -
>  /* Return the .debug_loclists section to use for cu.  */
>  static struct dwarf2_section_info *cu_debug_loc_section (struct dwarf2_cu *cu);
>
> +/* Return the .debug_rnglists section to use for cu.  */
> +static struct dwarf2_section_info *cu_debug_rnglists_section
> +(struct dwarf2_cu *cu, dwarf_tag tag);

Indent this last line with two spaces.

> @@ -13802,17 +13817,33 @@ dwarf2_rnglists_process (unsigned offset, struct dwarf2_cu *cu,
>    const gdb_byte *buffer;
>    CORE_ADDR baseaddr;
>    bool overflow = false;
> +  ULONGEST addr_index;
> +  bool ignore_dwo_unit = false;
> +  struct dwarf2_section_info *rnglists_section;
>
>    base = cu->base_address;
>
> -  per_objfile->per_bfd->rnglists.read (objfile);
> -  if (offset >= per_objfile->per_bfd->rnglists.size)
> +  /* Make sure we read the .debug_rnglists section from the file that
> +     contains the DW_AT_ranges attribute we are reading.  Normally that
> +     would be the .dwo file, if there is one.  However for DW_TAG_compile_unit
> +     we always want to read from objfile/linked program (which would be the
> +     DW_TAG_skeleton_unit DIE if we're using split dwarf).  */
> +  if (tag == DW_TAG_compile_unit)
> +    ignore_dwo_unit = true;
> +
> +  if (cu->dwo_unit != nullptr && !ignore_dwo_unit)
> +      rnglists_section = &cu->dwo_unit->dwo_file->sections.rnglists;
> +  else
> +      rnglists_section = &per_objfile->per_bfd->rnglists;

If think you can omit the `ignore_dwo_unit` variable and write this directly,
without loss of readability:

  if (cu->dwo_unit != nullptr && tag != DW_TAG_compile_unit)

Also, should this use cu_debug_rnglists_section?

> @@ -19094,13 +19184,65 @@ read_loclist_index (struct dwarf2_cu *cu, ULONGEST loclist_index)
>      return bfd_get_64 (abfd, info_ptr) + loclist_base;
>  }
>
> +/* Given a DW_FORM_rnglistx value rnglist_index, fetch the offset from the

rnglist_index -> RNGLIST_INDEX

> +   array of offsets in the .debug_rnglists section.  */
> +static CORE_ADDR
> +read_rnglist_index (struct dwarf2_cu *cu, ULONGEST rnglist_index,
> +		    dwarf_tag tag)
> +{
> +  struct dwarf2_per_objfile *dwarf2_per_objfile = cu->per_objfile;
> +  struct objfile *objfile = dwarf2_per_objfile->objfile;
> +  bfd *abfd = objfile->obfd;
> +  ULONGEST rnglist_header_size =
> +    (cu->header.initial_length_size == 4 ? RNGLIST_HEADER_SIZE32
> +     : RNGLIST_HEADER_SIZE64);
> +  ULONGEST rnglist_base =
> +      (cu->dwo_unit != nullptr) ? rnglist_header_size : cu->ranges_base;
> +  ULONGEST start_offset =
> +    rnglist_base + rnglist_index * cu->header.offset_size;
> +
> +  /* Get rnglists section.  */
> +  struct dwarf2_section_info *section = cu_debug_rnglists_section (cu, tag);
> +  if (section == nullptr)
> +    error (_("Cannot find .debug_rnglists section [in module %s]"),
> +	   objfile_name (objfile));

As of now, cu_debug_rnglists_section can't return nullptr.  And I think it should
stay this way, it throws an error if it can't return a valid section.

> +
> +  /* Read the rnglists section content.  */
> +  section->read (objfile);
> +  if (section->buffer == nullptr)
> +    error (_("DW_FORM_rnglistx used without .debug_rnglists section "
> +	     "[in module %s]"),
> +       objfile_name (objfile));

Align `objfile_name` with the opening parenthesis.

> +
> +  /* Verify the rnglist header is valid (same as loclist header).  */

Is this comment accurate?  The code validates rnglist_index, not the header, right?

> +  struct loclist_header header;

Rename this type `loclists_rnglists_header`, to match the function.

> +  read_loclists_rnglists_header (&header, section);
> +  if (rnglist_index >= header.offset_entry_count)
> +    error (_("DW_FORM_rnglistx index pointing outside of "
> +	     ".debug_rnglists offset array [in module %s]"),
> +	    objfile_name (objfile));

Same here.

> +
> +  /* Validate that the offset is within the section's range.  */
> +  if (start_offset >= section->size)

Pendantically, I suppose we should verify not only that the start_offset
is within the section's range, but also that there are enough bytes to read?

For example, if we have a 100 bytes-long section, and start_offset is 99, we
won't be able to read 4 or 8 bytes.

> +    error (_("DW_FORM_rnglistx pointing outside of "
> +             ".debug_rnglists section [in module %s]"),
> +          objfile_name (objfile));

Same here.

> @@ -23383,6 +23530,30 @@ cu_debug_loc_section (struct dwarf2_cu *cu)
>  				  : &per_objfile->per_bfd->loc);
>  }
>
> +/* Return the .debug_rnglists section to use for CU.  */
> +static struct dwarf2_section_info *
> +cu_debug_rnglists_section (struct dwarf2_cu *cu, dwarf_tag tag)
> +{
> +  if (cu->header.version < 5)
> +    error (_(".debug_rnglists section cannot be used in DWARF %d"),
> +	   cu->header.version);
> +  struct dwarf2_per_objfile *dwarf2_per_objfile = cu->per_objfile;
> +
> +  /* Make sure we read the .debug_rnglists section from the file that
> +     contains the DW_AT_ranges attribute we are reading.  Normally that
> +     would be the .dwo file, if there is one.  However for DW_TAG_compile_unit
> +     we always want to read from objfile/linked program (which would be the
> +     DW_TAG_skeleton_unit DIE if we're using split dwarf).  */
> +  if (cu->dwo_unit != nullptr && tag != DW_TAG_compile_unit)
> +    {
> +      struct dwo_sections *sections = &cu->dwo_unit->dwo_file->sections;
> +
> +      if (sections->rnglists.size > 0)
> +	return &sections->rnglists;
> +    }
> +  return &dwarf2_per_objfile->per_bfd->rnglists;

I expressed a concern about this earlier: if an attribute in the .dwo is of form
DW_FORM_rnglistx, but for some reason the .dwo has no .debug_rnglist section, what
happens?  It seems to me like we're going to return the section from the executable,
which would be wrong.

Also, I just debugged this function, and saw that it is called in this context:

#0  cu_debug_rnglists_section (cu=0x615000026c80, tag=DW_TAG_skeleton_unit) at /home/simark/src/binutils-gdb/gdb/dwarf2/read.c:23557
#1  0x0000557c4612cde5 in read_rnglist_index (cu=0x615000026c80, rnglist_index=0, tag=DW_TAG_skeleton_unit) at /home/simark/src/binutils-gdb/gdb/dwarf2/read.c:19205
#2  0x0000557c4612d66e in read_attribute_reprocess (reader=0x7fffb47cf240, attr=0x621000186198, tag=DW_TAG_skeleton_unit) at /home/simark/src/binutils-gdb/gdb/dwarf2/read.c:19258
#3  0x0000557c4612152d in read_full_die_1 (reader=0x7fffb47cf240, diep=0x7fffb47cf280, info_ptr=0x60400001e8c0 "", num_extra_attrs=0) at /home/simark/src/binutils-gdb/gdb/dwarf2/read.c:18253
#4  0x0000557c461217d9 in read_full_die (reader=0x7fffb47cf240, diep=0x7fffb47cf280, info_ptr=0x60400001e8a4 "\001") at /home/simark/src/binutils-gdb/gdb/dwarf2/read.c:18268
#5  0x0000557c46094af2 in cutu_reader::cutu_reader (this=0x7fffb47cf240, this_cu=0x621000183910, per_objfile=0x61300000a840, abbrev_table=0x60b00006ff30, existing_cu=0x0, skip_partial=false) at /
home/simark/src/binutils-gdb/gdb/dwarf2/read.c:7202
#6  0x0000557c4609b0ca in process_psymtab_comp_unit (this_cu=0x621000183910, per_objfile=0x61300000a840, want_partial_unit=false, pretend_language=language_minimal) at /home/simark/src/binutils-gdb/gdb/dwarf2/read.c:7667
#7  0x0000557c460a0fd9 in dwarf2_build_psymtabs_hard (per_objfile=0x61300000a840) at /home/simark/src/binutils-gdb/gdb/dwarf2/read.c:8051
#8  0x0000557c46086ae3 in dwarf2_build_psymtabs (objfile=0x614000006440) at /home/simark/src/binutils-gdb/gdb/dwarf2/read.c:6106

As you can see, tag == DW_TAG_skeleton_unit.

I thought that somehow, when reading a CU that uses a DWO, we were creating
a "logical" DIE tree by combining the DW_TAG_skeleton_unit DIE and the
children of the DWO's DW_TAG_compile_unit DIE, and while doing this,
overwriting the DW_TAG_skeleton_unit's DIE to use the DW_TAG_compile_unit
tag instead.  Therefore making it appear to the rest of the DWARF reader
as if it was a "standard" DW_TAG_compile_unit DIE.  But no, maybe I just dreamed
all of this, or I can't find it anymore.

In fact, the reason the code was checking for DW_TAG_compile_unit must be that
in the GCC/pre-standard version, the skeleton DIE in the executable is a
DW_TAG_compile_unit.  With DWARF5, we'll see DW_TAG_skeleton_unit here.

So I believe we should use

  (tag != DW_TAG_compile_unit && tag != DW_TAG_skeleton_unit)

to cover both versions, GCC pre-standard and DWARF 5.  Does that make sense?

Wherever we use the logic:

	  int need_ranges_base = (die->tag != DW_TAG_compile_unit
				  && attr->form != DW_FORM_rnglistx);

we should maybe check for DW_TAG_skeleton_unit as well?

Simon