This is the mail archive of the elfutils-devel@sourceware.org mailing list for the elfutils project.


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]
Other format: [Raw text]

[PATCH] libdw: Parse new DWARF5 units and CU DIEs.


Parse DWARF5 units, add the unit_type to the Dwarf_CU and generalize some
code calculating the header length and getting at the first DIE of a unit.
Unit headers can have different sizes depending on the unit type.

Signed-off-by: Mark Wielaard <mark@klomp.org>
---
 libdw/ChangeLog                            |  36 ++++++
 libdw/dwarf.h                              |  16 ++-
 libdw/dwarf_cu_die.c                       |  12 +-
 libdw/dwarf_formref_die.c                  |  45 ++++---
 libdw/dwarf_getaranges.c                   |  16 +--
 libdw/dwarf_getlocation_die.c              |   2 +-
 libdw/dwarf_getlocation_implicit_pointer.c |   2 +-
 libdw/dwarf_nextcu.c                       | 191 ++++++++++++++++++++++-------
 libdw/libdwP.h                             | 113 ++++++++++++++---
 libdw/libdw_findcu.c                       |  67 ++++++----
 libdwfl/ChangeLog                          |   7 ++
 libdwfl/cu.c                               |  14 +--
 12 files changed, 394 insertions(+), 127 deletions(-)

diff --git a/libdw/ChangeLog b/libdw/ChangeLog
index 37edcd7..58c882b 100644
--- a/libdw/ChangeLog
+++ b/libdw/ChangeLog
@@ -1,3 +1,39 @@
+2018-01-29  Mark Wielaard  <mark@klomp.org>
+
+	* dwarf.h (DW_UT_*): Add DWARF Unit Header Types.
+	* dwarf_cu_die.c (dwarf_cu_die): Rename arguments. type_signaturep
+	is now called unit_idp. type_offsetp is now called subdie_offsetp.
+	* dwarf_formref_die.c (dwarf_formref_die): Scan both .debug_info
+	and .debug_types sections for type units when type signature ref
+	not found.
+	* dwarf_getaranges.c (dwarf_getaranges): Use __libdw_findcu and
+	__libdw_first_die_off_from_cu instead of trying by hand.
+	* dwarf_getlocation_die.c (dwarf_getlocation_die): Use ISV4TU
+	instead of checking type_offset by hand.
+	* dwarf_getlocation_implicit_pointer.c
+	(dwarf_getlocation_implicit_pointer): Likewise.
+	* dwarf_nextcu.c (dwarf_next_unit): Call __libdw_next_unit.
+	(__libdw_next_unit): New function based on dwarf_next_unit with
+	DWARF5 header support.
+	* libdwP.h (struct Dwarf_CU): Renamed type_offset to subdie_offset
+	and type_sig8 to unit_id8.
+	(ISV4TU): New macro to determine whether a CU is a version 4 type
+	unit (which comes from the .debug_types section).
+	(DIE_OFFSET_FROM_CU_OFFSET): Replaced macro by real function...
+	(__libdw_first_die_from_cu_start): ... that also handles DWARF5
+	unit headers.
+	(__libdw_first_die_off_from_cu): New function that calls the above
+	using the CU fields.
+	(CUDIE): Use __libdw_first_die_off_from_cu.
+	(SUBDIE): New macro that provides the DIE for a CU using the
+	subdie_offset.
+	(__libdw_next_unit): New internal function declaration.
+	* libdw_findcu.c (__libdw_intern_next_unit): Use __libdw_next_unit.
+	Accept DWARF version 5 headers. Setup unit_type.
+	(__libdw_findcu): Rename debug_types argument to v4_debug_types
+	argument (to indicate that we are looking in the .debug_types
+	section). Support finding the exact offset (unit header start).
+
 2018-01-25  Mark Wielaard  <mark@klomp.org>
 
 	* Makefile.am (libdw_a_SOURCES): Add dwarf_die_addr_die.c.
diff --git a/libdw/dwarf.h b/libdw/dwarf.h
index 8edf719..bf81694 100644
--- a/libdw/dwarf.h
+++ b/libdw/dwarf.h
@@ -1,5 +1,5 @@
 /* This file defines standard DWARF types, structures, and macros.
-   Copyright (C) 2000-2011, 2014, 2016, 2017 Red Hat, Inc.
+   Copyright (C) 2000-2011, 2014, 2016, 2017, 2018 Red Hat, Inc.
    This file is part of elfutils.
 
    This file is free software; you can redistribute it and/or modify
@@ -29,6 +29,20 @@
 #ifndef _DWARF_H
 #define	_DWARF_H 1
 
+/* DWARF Unit Header Types.  */
+enum
+  {
+    DW_UT_compile = 0x01,
+    DW_UT_type = 0x02,
+    DW_UT_partial = 0x03,
+    DW_UT_skeleton = 0x04,
+    DW_UT_split_compile = 0x05,
+    DW_UT_split_type = 0x06,
+
+    DW_UT_lo_user = 0x80,
+    DW_UT_hi_user = 0xff
+  };
+
 /* DWARF tags.  */
 enum
   {
diff --git a/libdw/dwarf_cu_die.c b/libdw/dwarf_cu_die.c
index 194da58..7594e7d 100644
--- a/libdw/dwarf_cu_die.c
+++ b/libdw/dwarf_cu_die.c
@@ -37,8 +37,8 @@
 Dwarf_Die *
 dwarf_cu_die (Dwarf_CU *cu, Dwarf_Die *result, Dwarf_Half *versionp,
 	      Dwarf_Off *abbrev_offsetp, uint8_t *address_sizep,
-	      uint8_t *offset_sizep, uint64_t *type_signaturep,
-	      Dwarf_Off *type_offsetp)
+	      uint8_t *offset_sizep, uint64_t *unit_idp,
+	      Dwarf_Off *subdie_offsetp)
 {
   if (cu == NULL)
     return NULL;
@@ -53,10 +53,10 @@ dwarf_cu_die (Dwarf_CU *cu, Dwarf_Die *result, Dwarf_Half *versionp,
     *address_sizep = cu->address_size;
   if (offset_sizep != NULL)
     *offset_sizep = cu->offset_size;
-  if (type_signaturep != NULL)
-    *type_signaturep = cu->type_sig8;
-  if (type_offsetp != NULL)
-    *type_offsetp = cu->type_offset;
+  if (unit_idp != NULL)
+    *unit_idp = cu->unit_id8;
+  if (subdie_offsetp != NULL)
+    *subdie_offsetp = cu->subdie_offset;
 
   return result;
 }
diff --git a/libdw/dwarf_formref_die.c b/libdw/dwarf_formref_die.c
index 704816f..d47fb2f 100644
--- a/libdw/dwarf_formref_die.c
+++ b/libdw/dwarf_formref_die.c
@@ -73,27 +73,38 @@ dwarf_formref_die (Dwarf_Attribute *attr, Dwarf_Die *result)
   if (attr->form == DW_FORM_ref_sig8)
     {
       /* This doesn't have an offset, but instead a value we
-	 have to match in the .debug_types type unit headers.  */
+	 have to match in the type unit headers.  */
 
       uint64_t sig = read_8ubyte_unaligned (cu->dbg, attr->valp);
       cu = Dwarf_Sig8_Hash_find (&cu->dbg->sig8_hash, sig, NULL);
       if (cu == NULL)
-	/* Not seen before.  We have to scan through the type units.  */
-	do
-	  {
-	    cu = __libdw_intern_next_unit (attr->cu->dbg, true);
-	    if (cu == NULL)
-	      {
-		__libdw_seterrno (INTUSE(dwarf_errno) ()
-				  ?: DWARF_E_INVALID_REFERENCE);
-		return NULL;
-	      }
-	  }
-	while (cu->type_sig8 != sig);
-
-      datap = cu->dbg->sectiondata[IDX_debug_types]->d_buf;
-      size = cu->dbg->sectiondata[IDX_debug_types]->d_size;
-      offset = cu->start + cu->type_offset;
+	{
+	  /* Not seen before.  We have to scan through the type units.
+	     Since DWARFv5 these can (also) be found in .debug_info,
+	     so scan that first.  */
+	  bool scan_debug_types = false;
+	  do
+	    {
+	      cu = __libdw_intern_next_unit (attr->cu->dbg, scan_debug_types);
+	      if (cu == NULL)
+		{
+		  if (scan_debug_types == false)
+		    scan_debug_types = true;
+		  else
+		    {
+		      __libdw_seterrno (INTUSE(dwarf_errno) ()
+					?: DWARF_E_INVALID_REFERENCE);
+		      return NULL;
+		    }
+		}
+	    }
+	  while (cu == NULL || cu->unit_id8 != sig);
+	}
+
+      int secid = cu_sec_idx (cu);
+      datap = cu->dbg->sectiondata[secid]->d_buf;
+      size = cu->dbg->sectiondata[secid]->d_size;
+      offset = cu->start + cu->subdie_offset;
     }
   else
     {
diff --git a/libdw/dwarf_getaranges.c b/libdw/dwarf_getaranges.c
index 4252746..50a98cf 100644
--- a/libdw/dwarf_getaranges.c
+++ b/libdw/dwarf_getaranges.c
@@ -1,5 +1,5 @@
 /* Return list address ranges.
-   Copyright (C) 2000-2010 Red Hat, Inc.
+   Copyright (C) 2000-2010, 2016, 2017 Red Hat, Inc.
    This file is part of elfutils.
    Written by Ulrich Drepper <drepper@redhat.com>, 2000.
 
@@ -195,16 +195,10 @@ dwarf_getaranges (Dwarf *dbg, Dwarf_Aranges **aranges, size_t *naranges)
 	  new_arange->arange.length = range_length;
 
 	  /* We store the actual CU DIE offset, not the CU header offset.  */
-	  const char *cu_header = (dbg->sectiondata[IDX_debug_info]->d_buf
-				   + offset);
-	  unsigned int offset_size;
-	  if (read_4ubyte_unaligned_noncvt (cu_header) == DWARF3_LENGTH_64_BIT)
-	    offset_size = 8;
-	  else
-	    offset_size = 4;
-	  new_arange->arange.offset = DIE_OFFSET_FROM_CU_OFFSET (offset,
-								 offset_size,
-								 false);
+	  Dwarf_CU *cu = __libdw_findcu (dbg, offset, false);
+	  if (unlikely (cu == NULL))
+	    goto fail;
+	  new_arange->arange.offset = __libdw_first_die_off_from_cu (cu);
 
 	  new_arange->next = arangelist;
 	  arangelist = new_arange;
diff --git a/libdw/dwarf_getlocation_die.c b/libdw/dwarf_getlocation_die.c
index 21b4365..a07031e 100644
--- a/libdw/dwarf_getlocation_die.c
+++ b/libdw/dwarf_getlocation_die.c
@@ -69,7 +69,7 @@ dwarf_getlocation_die (Dwarf_Attribute *attr, const Dwarf_Op *op,
     }
 
   if (__libdw_offdie (attr->cu->dbg, dieoff, result,
-                     attr->cu->type_offset != 0) == NULL)
+		      ISV4TU(attr->cu)) == NULL)
     return -1;
 
   return 0;
diff --git a/libdw/dwarf_getlocation_implicit_pointer.c b/libdw/dwarf_getlocation_implicit_pointer.c
index 9505382..b704c70 100644
--- a/libdw/dwarf_getlocation_implicit_pointer.c
+++ b/libdw/dwarf_getlocation_implicit_pointer.c
@@ -63,7 +63,7 @@ dwarf_getlocation_implicit_pointer (Dwarf_Attribute *attr, const Dwarf_Op *op,
 
   Dwarf_Die die;
   if (__libdw_offdie (attr->cu->dbg, op->number, &die,
-		      attr->cu->type_offset != 0) == NULL)
+		      ISV4TU(attr->cu)) == NULL)
     return -1;
 
   if (INTUSE(dwarf_attr) (&die, DW_AT_location, result) == NULL
diff --git a/libdw/dwarf_nextcu.c b/libdw/dwarf_nextcu.c
index fa9b0af..4b394f3 100644
--- a/libdw/dwarf_nextcu.c
+++ b/libdw/dwarf_nextcu.c
@@ -1,5 +1,5 @@
 /* Advance to next CU header.
-   Copyright (C) 2002-2010 Red Hat, Inc.
+   Copyright (C) 2002-2010, 2016, 2017 Red Hat, Inc.
    This file is part of elfutils.
    Written by Ulrich Drepper <drepper@redhat.com>, 2002.
 
@@ -39,11 +39,31 @@ int
 dwarf_next_unit (Dwarf *dwarf, Dwarf_Off off, Dwarf_Off *next_off,
 		 size_t *header_sizep, Dwarf_Half *versionp,
 		 Dwarf_Off *abbrev_offsetp, uint8_t *address_sizep,
-		 uint8_t *offset_sizep, uint64_t *type_signaturep,
-		 Dwarf_Off *type_offsetp)
+		 uint8_t *offset_sizep, uint64_t *v4_type_signaturep,
+		 Dwarf_Off *v4_type_offsetp)
 {
-  const bool debug_types = type_signaturep != NULL;
-  const size_t sec_idx = debug_types ? IDX_debug_types : IDX_debug_info;
+  const bool v4_debug_types = v4_type_signaturep != NULL;
+  return __libdw_next_unit (dwarf, v4_debug_types, off, next_off,
+			     header_sizep, versionp, NULL,
+			     abbrev_offsetp, address_sizep, offset_sizep,
+			     v4_type_signaturep, v4_type_offsetp);
+}
+INTDEF(dwarf_next_unit)
+
+int
+internal_function
+__libdw_next_unit (Dwarf *dwarf, bool v4_debug_types, Dwarf_Off off,
+		   Dwarf_Off *next_off, size_t *header_sizep,
+		   Dwarf_Half *versionp, uint8_t *unit_typep,
+		   Dwarf_Off *abbrev_offsetp, uint8_t *address_sizep,
+		   uint8_t *offset_sizep, uint64_t *unit_id8p,
+		   Dwarf_Off *subdie_offsetp)
+{
+  /* Note that debug_type units come from .debug_types in DWARF < 5 and
+     from .debug_info in DWARF >= 5.  If the user requested the
+     v4_type_signature we return from .debug_types always.  If no signature
+     is requested we return units (any type) from .debug_info.  */
+  const size_t sec_idx = v4_debug_types ? IDX_debug_types : IDX_debug_info;
 
   /* Maybe there has been an error before.  */
   if (dwarf == NULL)
@@ -61,12 +81,14 @@ dwarf_next_unit (Dwarf *dwarf, Dwarf_Off off, Dwarf_Off *next_off,
       return 1;
     }
 
-  /* This points into the .debug_info section to the beginning of the
-     CU entry.  */
+  /* This points into the .debug_info or .debug_types section to the
+     beginning of the CU entry.  */
   const unsigned char *data = dwarf->sectiondata[sec_idx]->d_buf;
   const unsigned char *bytes = data + off;
 
-  /* The format of the CU header is described in dwarf2p1 7.5.1:
+  /* The format of the CU header is described in dwarf2p1 7.5.1 and
+     changed in DWARFv5 (to include unit type, switch location of some
+     fields and add some optional fields).
 
      1.  A 4-byte or 12-byte unsigned integer representing the length
 	 of the .debug_info contribution for that compilation unit, not
@@ -74,23 +96,58 @@ dwarf_next_unit (Dwarf *dwarf, Dwarf_Off off, Dwarf_Off *next_off,
 	 this is a 4-byte unsigned integer (which must be less than
 	 0xfffffff0); in the 64-bit DWARF format, this consists of the
 	 4-byte value 0xffffffff followed by an 8-byte unsigned integer
-	 that gives the actual length (see Section 7.2.2).
+	 that gives the actual length (see Section 7.2.2). This field
+	 indicates whether this unit is 32-bit of 64-bit DWARF, which
+	 affects all other offset fields in this header.
 
       2. A 2-byte unsigned integer representing the version of the
 	 DWARF information for that compilation unit. For DWARF Version
-	 2.1, the value in this field is 2.
+	 2.1, the value in this field is 2 (3 for v3, 4 for v4, 5 for v5).
+	 This fields determines the order of the next fields and whether
+	 there are any optional fields in this header.
 
-      3. A 4-byte or 8-byte unsigned offset into the .debug_abbrev
+      3. For DWARF 2, 3 and 4 (including v4 type units):
+         A 4-byte or 8-byte unsigned offset into the .debug_abbrev
 	 section. This offset associates the compilation unit with a
 	 particular set of debugging information entry abbreviations. In
 	 the 32-bit DWARF format, this is a 4-byte unsigned length; in
 	 the 64-bit DWARF format, this is an 8-byte unsigned length (see
 	 Section 7.4).
 
-      4. A 1-byte unsigned integer representing the size in bytes of
+	 For DWARF 5:
+	 A 1-byte unsigned integer representing the unit (header) type.
+	 This field determines what the optional fields in the header
+	 represent.  If this is an unknown unit type then we cannot
+	 assume anything about the rest of the unit (header).
+
+      4. For all DWARF versions (including v4 type units):
+         A 1-byte unsigned integer representing the size in bytes of
 	 an address on the target architecture. If the system uses
 	 segmented addressing, this value represents the size of the
-	 offset portion of an address.  */
+	 offset portion of an address. This is the last field in the header
+	 for DWARF versions 2, 3 and 4 (except for v4 type units).
+
+      5. For DWARF 5 only (this is field 3 for DWARF 2, 3, 4 and v4 types):
+         A 4-byte or 8-byte unsigned offset into the .debug_abbrev
+	 section. This offset associates the compilation unit with a
+	 particular set of debugging information entry abbreviations. In
+	 the 32-bit DWARF format, this is a 4-byte unsigned length; in
+	 the 64-bit DWARF format, this is an 8-byte unsigned length.
+
+      6. For v4 type units (this is really field 5 for v4 types) and
+         DWARF 5 optional (skeleton, split_compile, type and
+         split_type): An 8 byte (opaque) integer constant value. For
+         v4 and v5 type units this is the type signature. For skeleton
+         and split compile units this is the compilation ID.
+
+      7. For v4 type units (this is really field 6 for v4 types) and
+         DWARF 5 optional (type and split_type) and v4 type units:
+         A 4-byte or 8-byte unsigned offset. In the 32-bit DWARF format,
+         this is a 4-byte unsigned length; in the 64-bit DWARF format,
+         this is an 8-byte unsigned length. This is the type DIE offset
+	 (which is not necessarily the first DIE in the unit).
+  */
+
   uint64_t length = read_4ubyte_unaligned_inc (dwarf, bytes);
   size_t offset_size = 4;
   /* Lengths of 0xfffffff0 - 0xffffffff are escape codes.  Oxffffffff is
@@ -106,14 +163,6 @@ dwarf_next_unit (Dwarf *dwarf, Dwarf_Off off, Dwarf_Off *next_off,
       return -1;
     }
 
-  /* Now we know how large the header is.  */
-  if (unlikely (DIE_OFFSET_FROM_CU_OFFSET (off, offset_size, debug_types)
-		>= dwarf->sectiondata[sec_idx]->d_size))
-    {
-      *next_off = -1;
-      return 1;
-    }
-
   if (length == DWARF3_LENGTH_64_BIT)
     /* This is a 64-bit DWARF format.  */
     length = read_8ubyte_unaligned_inc (dwarf, bytes);
@@ -121,41 +170,99 @@ dwarf_next_unit (Dwarf *dwarf, Dwarf_Off off, Dwarf_Off *next_off,
   /* Read the version stamp.  Always a 16-bit value.  */
   uint_fast16_t version = read_2ubyte_unaligned_inc (dwarf, bytes);
 
+  /* We keep unit_type at zero for older DWARF since we cannot
+     easily guess whether it is a compile or partial unit.  */
+  uint8_t unit_type = 0;
+  if (version >= 5)
+    unit_type = *bytes++;
+
+  /* All these are optional.  */
+  Dwarf_Off subdie_off = 0;
+  uint64_t sig_id = 0;
+  Dwarf_Off abbrev_offset = 0;
+  uint8_t address_size = 0;
+
+  if (version < 2 || version > 5
+      || (version == 5 && ! (unit_type == DW_UT_compile
+			     || unit_type == DW_UT_partial
+			     || unit_type == DW_UT_skeleton
+			     || unit_type == DW_UT_split_compile
+			     || unit_type == DW_UT_type
+			     || unit_type == DW_UT_split_type)))
+    {
+      /* We cannot really know more about the header.  Just report
+	 the length of the unit, version and unit type.  */
+      goto done;
+    }
+
+  /* We have to guess the unit_type. But we don't have a real CUDIE.  */
+  if (version < 5)
+    unit_type = v4_debug_types ? DW_UT_type : DW_UT_compile;
+
+  /* Now we know how large the header is (should be).  */
+  if (unlikely (__libdw_first_die_from_cu_start (off, offset_size, version,
+						 unit_type)
+		>= dwarf->sectiondata[sec_idx]->d_size))
+    {
+      *next_off = -1;
+      return 1;
+    }
+
+  /* The address size.  Always an 8-bit value.
+     Comes after abbrev_offset for version < 5, otherwise unit type
+     and address size (if a known unit type) comes before abbrev_offset.  */
+  if (version >= 5)
+    address_size = *bytes++;
+
   /* Get offset in .debug_abbrev.  Note that the size of the entry
      depends on whether this is a 32-bit or 64-bit DWARF definition.  */
-  uint64_t abbrev_offset;
   if (__libdw_read_offset_inc (dwarf, sec_idx, &bytes, offset_size,
 			       &abbrev_offset, IDX_debug_abbrev, 0))
     return -1;
 
-  /* The address size.  Always an 8-bit value.  */
-  uint8_t address_size = *bytes++;
+  if (version < 5)
+    address_size = *bytes++;
 
-  if (debug_types)
+  /* Extra fields, signature/id and type offset/padding.  */
+  if (v4_debug_types
+      || (version >= 5
+	  && (unit_type == DW_UT_skeleton || unit_type == DW_UT_split_compile
+	      || unit_type == DW_UT_type || unit_type == DW_UT_split_type)))
     {
-      uint64_t type_sig8 = read_8ubyte_unaligned_inc (dwarf, bytes);
-
-      Dwarf_Off type_offset;
-      if (__libdw_read_offset_inc (dwarf, sec_idx, &bytes, offset_size,
-				   &type_offset, sec_idx, 0))
-	return -1;
+      sig_id = read_8ubyte_unaligned_inc (dwarf, bytes);
+
+      if ((v4_debug_types
+	   || unit_type == DW_UT_type || unit_type == DW_UT_split_type))
+	{
+	  if (__libdw_read_offset_inc (dwarf, sec_idx, &bytes, offset_size,
+				       &subdie_off, sec_idx, 0))
+	    return -1;
+
+	  /* Validate that the TYPE_OFFSET points past the header.  */
+	  if (unlikely (subdie_off < (size_t) (bytes - (data + off))))
+	    goto invalid;
+	}
+    }
 
-      /* Validate that the TYPE_OFFSET points past the header.  */
-      if (unlikely (type_offset < (size_t) (bytes - (data + off))))
-	goto invalid;
+ done:
+  if (unit_id8p != NULL)
+    *unit_id8p = sig_id;
 
-      *type_signaturep = type_sig8;
-      if (type_offsetp != NULL)
-	*type_offsetp = type_offset;
-    }
+  if (subdie_offsetp != NULL)
+    *subdie_offsetp = subdie_off;
 
-  /* Store the header length.  */
+  /* Store the header length.  This is really how much we have read
+     from the header.  If we didn't recognize the unit type the
+     header might actually be bigger.  */
   if (header_sizep != NULL)
     *header_sizep = bytes - (data + off);
 
   if (versionp != NULL)
     *versionp = version;
 
+  if (unit_typep != NULL)
+    *unit_typep = unit_type;
+
   if (abbrev_offsetp != NULL)
     *abbrev_offsetp = abbrev_offset;
 
@@ -166,13 +273,13 @@ dwarf_next_unit (Dwarf *dwarf, Dwarf_Off off, Dwarf_Off *next_off,
   if (offset_sizep != NULL)
     *offset_sizep = offset_size;
 
-  /* See definition of DIE_OFFSET_FROM_CU_OFFSET macro
-     for an explanation of the trick in this expression.  */
+  /* The length of the unit doesn't include the length field itself.
+     The length field is either, with offset == 4: 2 * 4 - 4 == 4,
+     or with offset == 8: 2 * 8 - 4 == 12.  */
   *next_off = off + 2 * offset_size - 4 + length;
 
   return 0;
 }
-INTDEF(dwarf_next_unit)
 
 int
 dwarf_nextcu (Dwarf *dwarf, Dwarf_Off off, Dwarf_Off *next_off,
diff --git a/libdw/libdwP.h b/libdw/libdwP.h
index a38dcfb..10d1a86 100644
--- a/libdw/libdwP.h
+++ b/libdw/libdwP.h
@@ -298,9 +298,15 @@ struct Dwarf_CU
 
   size_t sec_idx; /* Normally .debug_info, could be .debug_type or "fake". */
 
-  /* Zero if this is a normal CU.  Nonzero if it is a type unit.  */
-  size_t type_offset;
-  uint64_t type_sig8;
+  /* The unit type if version >= 5.  Otherwise 0 for normal CUs (from
+     .debug_info) or 1 for v4 type units (from .debug_types).  */
+  uint8_t unit_type;
+
+  /* Zero if the unit type doesn't support a die/type offset and/or id/sig.
+     Nonzero if it is a v4 type unit or for DWARFv5 units depending on
+     unit_type.  */
+  size_t subdie_offset;
+  uint64_t unit_id8;
 
   /* Hash table for the abbreviations.  */
   Dwarf_Abbrev_Hash abbrev_hash;
@@ -323,8 +329,34 @@ struct Dwarf_CU
   void *endp;
 };
 
-/* Compute the offset of a CU's first DIE from its offset.  This
-   is either:
+#define ISV4TU(cu) ((cu)->version == 4 && (cu)->sec_idx == IDX_debug_types)
+
+/* Compute the offset of a CU's first DIE from the CU offset.
+   CU must be a valid/known version/unit_type.  */
+static inline Dwarf_Off
+__libdw_first_die_from_cu_start (Dwarf_Off cu_start,
+				 uint8_t offset_size,
+				 uint16_t version,
+				 uint8_t unit_type)
+{
+/*
+  assert (offset_size == 4 || offset_size == 8);
+  assert (version >= 2 && version <= 5);
+  assert (version >= 5 || (unit_type == DW_UT_compile
+			   || unit_type == DW_UT_partial
+			   || unit_type == DW_UT_type));
+  assert (version != 5 || (unit_type == DW_UT_compile
+			   || unit_type == DW_UT_partial
+			   || unit_type == DW_UT_skeleton
+			   || unit_type == DW_UT_split_compile
+			   || unit_type == DW_UT_type
+			   || unit_type == DW_UT_split_type));
+*/
+
+  Dwarf_Off off = cu_start;
+  if (version < 5)
+    {
+   /*
         LEN       VER     OFFSET    ADDR
       4-bytes + 2-bytes + 4-bytes + 1-byte  for 32-bit dwarf
      12-bytes + 2-bytes + 8-bytes + 1-byte  for 64-bit dwarf
@@ -333,22 +365,61 @@ struct Dwarf_CU
      12-bytes + 2-bytes + 8-bytes + 1-byte + 8-bytes + 8-bytes  for 64-bit
 
    Note the trick in the computation.  If the offset_size is 4
-   the '- 4' term changes the '3 *' into a '2 *'.  If the
-   offset_size is 8 it accounts for the 4-byte escape value
+   the '- 4' term changes the '3 *' (or '4 *') into a '2 *' (or '3 *).
+   If the offset_size is 8 it accounts for the 4-byte escape value
    used at the start of the length.  */
-#define DIE_OFFSET_FROM_CU_OFFSET(cu_offset, offset_size, type_unit)	\
-  ((type_unit) ? ((cu_offset) + 4 * (offset_size) - 4 + 3 + 8)		\
-   : ((cu_offset) + 3 * (offset_size) - 4 + 3))
+      if (unit_type != DW_UT_type)
+	off += 3 * offset_size - 4 + 3;
+      else
+	off += 4 * offset_size - 4 + 3 + 8;
+    }
+  else
+    {
+     /*
+        LEN       VER      TYPE     ADDR     OFFSET   SIGNATURE  TYPE-OFFSET
+      4-bytes + 2-bytes + 1-byte + 1-byte + 4-bytes + 8-bytes + 4-bytes 32-bit
+     12-bytes + 2-bytes + 1-byte + 1-byte + 8-bytes + 8-bytes + 8-bytes 64-bit
+        Both signature and type offset are optional.
+
+        Note same 4/8 offset size trick as above.
+        We explicitly ignore unknow unit types (see asserts above).  */
+      off += 3 * offset_size - 4 + 4;
+      if (unit_type == DW_UT_skeleton || unit_type == DW_UT_split_compile
+	  || unit_type == DW_UT_type || unit_type == DW_UT_split_type)
+	{
+	  off += 8;
+	  if (unit_type == DW_UT_type || unit_type == DW_UT_split_type)
+	    off += offset_size;
+	}
+    }
+
+  return off;
+}
+
+static inline Dwarf_Off
+__libdw_first_die_off_from_cu (struct Dwarf_CU *cu)
+{
+  return __libdw_first_die_from_cu_start (cu->start,
+					  cu->offset_size,
+					  cu->version,
+					  cu->unit_type);
+}
 
 #define CUDIE(fromcu)							      \
   ((Dwarf_Die)								      \
    {									      \
      .cu = (fromcu),							      \
-     .addr = ((char *) fromcu->dbg->sectiondata[cu_sec_idx (fromcu)]->d_buf   \
-	      + DIE_OFFSET_FROM_CU_OFFSET ((fromcu)->start,		      \
-					   (fromcu)->offset_size,	      \
-					   (fromcu)->type_offset != 0))	      \
-   })									      \
+     .addr = ((char *) (fromcu)->dbg->sectiondata[cu_sec_idx (fromcu)]->d_buf \
+	      + __libdw_first_die_off_from_cu (fromcu))			      \
+   })
+
+#define SUBDIE(fromcu)							      \
+  ((Dwarf_Die)								      \
+   {									      \
+     .cu = (fromcu),							      \
+     .addr = ((char *) (fromcu)->dbg->sectiondata[cu_sec_idx (fromcu)]->d_buf \
+	      + (fromcu)->start + (fromcu)->subdie_offset)		      \
+   })
 
 
 /* Prototype of a single .debug_macro operator.  */
@@ -441,6 +512,18 @@ extern void *__libdw_allocate (Dwarf *dbg, size_t minsize, size_t align)
 /* Default OOM handler.  */
 extern void __libdw_oom (void) __attribute ((noreturn)) attribute_hidden;
 
+/* Read next unit (or v4 debug type) and return next offset.  Doesn't
+   create an actual Dwarf_CU just provides necessary header fields.  */
+extern int
+internal_function
+__libdw_next_unit (Dwarf *dbg, bool v4_debug_types, Dwarf_Off off,
+		   Dwarf_Off *next_off, size_t *header_sizep,
+		   Dwarf_Half *versionp, uint8_t *unit_typep,
+		   Dwarf_Off *abbrev_offsetp, uint8_t *address_sizep,
+		   uint8_t *offset_sizep, uint64_t *unit_id8p,
+		   Dwarf_Off *subdie_offsetp)
+     __nonnull_attribute__ (4) internal_function;
+
 /* Allocate the internal data for a unit not seen before.  */
 extern struct Dwarf_CU *__libdw_intern_next_unit (Dwarf *dbg, bool debug_types)
      __nonnull_attribute__ (1) internal_function;
diff --git a/libdw/libdw_findcu.c b/libdw/libdw_findcu.c
index 3ec1ce5..4d1d842 100644
--- a/libdw/libdw_findcu.c
+++ b/libdw/libdw_findcu.c
@@ -1,5 +1,5 @@
 /* Find CU for given offset.
-   Copyright (C) 2003-2010, 2014, 2018 Red Hat, Inc.
+   Copyright (C) 2003-2010, 2014, 2016, 2017, 2018 Red Hat, Inc.
    This file is part of elfutils.
    Written by Ulrich Drepper <drepper@redhat.com>, 2003.
 
@@ -71,22 +71,24 @@ __libdw_intern_next_unit (Dwarf *dbg, bool debug_types)
 
   Dwarf_Off oldoff = *offsetp;
   uint16_t version;
+  uint8_t unit_type;
   uint8_t address_size;
   uint8_t offset_size;
   Dwarf_Off abbrev_offset;
-  uint64_t type_sig8 = 0;
-  Dwarf_Off type_offset = 0;
-
-  if (INTUSE(dwarf_next_unit) (dbg, oldoff, offsetp, NULL,
-			       &version, &abbrev_offset,
-			       &address_size, &offset_size,
-			       debug_types ? &type_sig8 : NULL,
-			       debug_types ? &type_offset : NULL) != 0)
+  uint64_t unit_id8;
+  Dwarf_Off subdie_offset;
+
+  if (__libdw_next_unit (dbg, debug_types, oldoff, offsetp, NULL,
+			 &version, &unit_type, &abbrev_offset,
+			 &address_size, &offset_size,
+			 &unit_id8, &subdie_offset) != 0)
     /* No more entries.  */
     return NULL;
 
-  /* We only know how to handle the DWARF version 2 through 4 formats.  */
-  if (unlikely (version < 2) || unlikely (version > 4))
+  /* We only know how to handle the DWARF version 2 through 5 formats.
+     For v4 debug types we only handle version 4. */
+  if (unlikely (version < 2) || unlikely (version > 5)
+      || (debug_types && unlikely (version != 4)))
     {
       __libdw_seterrno (DWARF_E_INVALID_DWARF);
       return NULL;
@@ -108,19 +110,41 @@ __libdw_intern_next_unit (Dwarf *dbg, bool debug_types)
   newp->address_size = address_size;
   newp->offset_size = offset_size;
   newp->version = version;
-  newp->type_sig8 = type_sig8;
-  newp->type_offset = type_offset;
+  newp->unit_id8 = unit_id8;
+  newp->subdie_offset = subdie_offset;
   Dwarf_Abbrev_Hash_init (&newp->abbrev_hash, 41);
   newp->orig_abbrev_offset = newp->last_abbrev_offset = abbrev_offset;
   newp->lines = NULL;
   newp->locs = NULL;
 
-  if (debug_types)
-    Dwarf_Sig8_Hash_insert (&dbg->sig8_hash, type_sig8, newp);
-
   newp->startp = data->d_buf + newp->start;
   newp->endp = data->d_buf + newp->end;
 
+  /* v4 debug type units have version == 4 and unit_type == 1.  */
+  if (debug_types)
+    newp->unit_type = DW_UT_type;
+  else if (version < 5)
+    {
+      /* This is a reasonable guess (and needed to get the CUDIE).  */
+      newp->unit_type = DW_UT_compile;
+
+      /* But set it correctly from the actual CUDIE tag.  */
+      Dwarf_Die cudie = CUDIE (newp);
+      int tag = dwarf_tag (&cudie);
+      if (tag == DW_TAG_compile_unit)
+	newp->unit_type = DW_UT_compile;
+      else if (tag == DW_TAG_partial_unit)
+	newp->unit_type = DW_UT_partial;
+      else if (tag == DW_TAG_type_unit)
+	newp->unit_type = DW_UT_type;
+    }
+  else
+    newp->unit_type = unit_type;
+
+  /* Store a reference to any type unit ids in the hash for quick lookup.  */
+  if (unit_type == DW_UT_type || unit_type == DW_UT_split_type)
+    Dwarf_Sig8_Hash_insert (&dbg->sig8_hash, unit_id8, newp);
+
   /* Add the new entry to the search tree.  */
   if (tsearch (newp, tree, findcu_cb) == NULL)
     {
@@ -135,11 +159,11 @@ __libdw_intern_next_unit (Dwarf *dbg, bool debug_types)
 
 struct Dwarf_CU *
 internal_function
-__libdw_findcu (Dwarf *dbg, Dwarf_Off start, bool debug_types)
+__libdw_findcu (Dwarf *dbg, Dwarf_Off start, bool v4_debug_types)
 {
-  void **tree = debug_types ? &dbg->tu_tree : &dbg->cu_tree;
+  void **tree = v4_debug_types ? &dbg->tu_tree : &dbg->cu_tree;
   Dwarf_Off *next_offset
-    = debug_types ? &dbg->next_tu_offset : &dbg->next_cu_offset;
+    = v4_debug_types ? &dbg->next_tu_offset : &dbg->next_cu_offset;
 
   /* Maybe we already know that CU.  */
   struct Dwarf_CU fake = { .start = start, .end = 0 };
@@ -156,13 +180,12 @@ __libdw_findcu (Dwarf *dbg, Dwarf_Off start, bool debug_types)
   /* No.  Then read more CUs.  */
   while (1)
     {
-      struct Dwarf_CU *newp = __libdw_intern_next_unit (dbg, debug_types);
+      struct Dwarf_CU *newp = __libdw_intern_next_unit (dbg, v4_debug_types);
       if (newp == NULL)
 	return NULL;
 
       /* Is this the one we are looking for?  */
-      if (start < *next_offset)
-	// XXX Match exact offset.
+      if (start < *next_offset || start == newp->start)
 	return newp;
     }
   /* NOTREACHED */
diff --git a/libdwfl/ChangeLog b/libdwfl/ChangeLog
index d487ec1..f0c6335 100644
--- a/libdwfl/ChangeLog
+++ b/libdwfl/ChangeLog
@@ -1,3 +1,10 @@
+2018-01-29  Mark Wielaard  <mark@klomp.org>
+
+	* cu.c (cudie_offset): Use __libdw_first_die_off_from_cu instead of
+	DIE_OFFSET_FROM_CU_OFFSET.
+	(intern_cu): Simply use a copy of the given die CU as key instead of
+	trying to construct a dummy one by hand.
+
 2017-11-20  Mark Wielaard  <mark@klomp.org>
 
 	* link_map.c (do_check64): Take a char * and calculate type and val
diff --git a/libdwfl/cu.c b/libdwfl/cu.c
index 7aa23b5..94bfad8 100644
--- a/libdwfl/cu.c
+++ b/libdwfl/cu.c
@@ -1,5 +1,5 @@
 /* Keeping track of DWARF compilation units in libdwfl.
-   Copyright (C) 2005-2010, 2015 Red Hat, Inc.
+   Copyright (C) 2005-2010, 2015, 2016, 2017 Red Hat, Inc.
    This file is part of elfutils.
 
    This file is free software; you can redistribute it and/or modify
@@ -155,12 +155,7 @@ less_lazy (Dwfl_Module *mod)
 static inline Dwarf_Off
 cudie_offset (const struct dwfl_cu *cu)
 {
-  /* These are real CUs, so there never is a type_sig8.  Note
-     initialization of dwkey.start and offset_size in intern_cu ()
-     to see why this calculates the same value for both key and
-     die.cu search items.  */
-  return DIE_OFFSET_FROM_CU_OFFSET (cu->die.cu->start, cu->die.cu->offset_size,
-				    0);
+  return __libdw_first_die_off_from_cu (cu->die.cu);
 }
 
 static int
@@ -198,11 +193,8 @@ intern_cu (Dwfl_Module *mod, Dwarf_Off cuoff, struct dwfl_cu **result)
   if (die == NULL)
     return DWFL_E_LIBDW;
 
-  struct Dwarf_CU dwkey;
   struct dwfl_cu key;
-  key.die.cu = &dwkey;
-  dwkey.offset_size = 0;
-  dwkey.start = cuoff - (3 * 0 - 4 + 3);
+  key.die.cu = die->cu;
   struct dwfl_cu **found = tsearch (&key, &mod->lazy_cu_root, &compare_cukey);
   if (unlikely (found == NULL))
     return DWFL_E_NOMEM;
-- 
1.8.3.1


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]