[PATCH] Mach-O: Follow Apple's dSYM files

shinichiro hamaji shinichiro.hamaji@gmail.com
Mon Dec 26 17:18:00 GMT 2011


Hi,

Here is a patch which reads debug information from .dSYM files in the
same directory as the executable, dylib, or bundle binary:
http://shinh.skr.jp/t/dsym.patch

To let find_line use debug BFD (in dSYM bundle) instead of the
original executable's BFD, I used slightly different approach from
gnu_debuglink. I factor out the part which reads debug info from
find_line and use it from mach-o.c.

If we take an approach like gnu_debuglink, we may need to add a
function pointer to bfd_target, say _bfd_follow_debug_bfd. I think
this is kinda overkill considering this function just returns NULL for
all other formats.

When I factor out _bfd_dwarf2_slurp_debug_info, I modified the order
of operations in find_line. I hope this change is OK.

It seems Apple's GDB checks all files under
"foo.dSYM/Contents/Resources/DWARF" directory for an executable "foo".
This patch checks only "foo.dSYM/Contents/Resources/DWARF/foo" and
leave this as a TODO. This is because 1) Apple's dsymutil (a linker
for debug information) uses this filename by default and 2) dirent.h
is not the standard header so I need a few autoconf changes. Even if
we need it, I'd like to do this change in different patch as this
patch is already getting large.

I ran "objdump -S" for bunch of mach-o/elf files and "make check". The
results look OK. I'm not 100% sure if my decisions are the best. Any
kind of suggestions will be really appreciated.

bfd/
2011-12-27  Shinichiro Hamaji  <shinichiro.hamaji@gmail.com>

	* dwarf2.c (_bfd_dwarf2_slurp_debug_info): Factor out the part
	which reads DWARF2 and stores in stash from find_line.
	(find_line) Call _bfd_dwarf2_slurp_debug_info.
	* libbfd-in.h (_bfd_dwarf2_slurp_debug_info): Add declaration.
	* libbfd.h (_bfd_dwarf2_slurp_debug_info): Regenerate.
	* mach-o.c (dsym_subdir): The name of subdir where debug
	information may be stored.
	(dsym_subdir_len): The length of dsym_subdir.
	(bfd_mach_o_lookup_uuid_command): New. Lookup a load command whose
	type is UUID.
	(bfd_mach_o_dsym_p): New. Check if the specified BFD is
	corresponding to the executable.
	(bfd_mach_o_find_macosx_dsym_in_fat): New. Find a debug information
	BFD in a FAT binary.
	(bfd_mach_o_find_macosx_dsym): New. Find a debug information BFD in
	the specified binary file.
	(bfd_mach_o_follow_macosx_dsym): New. Find a debug information BFD
	for the original BFD.
	(bfd_mach_o_find_nearest_line): Check dSYM files for Mach-O
	executables, dylibs, and bundles.
	(bfd_mach_o_close_and_cleanup): Clean up BFDs for the dSYM file.
	* mach-o.h (debug_filename): The filename of the dSYM file.
	(debug_bfd): The BFD of the dSYM file.
	(debug_fat_bfd): The BFD of the fat binary containing debug_bfd.


diff --git a/bfd/dwarf2.c b/bfd/dwarf2.c
index 767fa52..66fd16f 100644
--- a/bfd/dwarf2.c
+++ b/bfd/dwarf2.c
@@ -3117,6 +3117,122 @@ stash_find_line_fast (struct dwarf2_debug *stash,
 				   filename_ptr, linenumber_ptr);
 }

+/* Read debug information from DEBUG_BFD when DEBUG_BFD is specified.
+   If DEBUG_BFD is not specified, we read debug information from ABFD
+   or its gnu_debuglink. The results will be stored in PINFO.
+   The function returns TRUE iff debug information is ready.  */
+
+bfd_boolean
+_bfd_dwarf2_slurp_debug_info (bfd *abfd, bfd *debug_bfd,
+                              const struct dwarf_debug_section *debug_sections,
+                              asymbol **symbols,
+                              void **pinfo)
+{
+  bfd_size_type amt = sizeof (struct dwarf2_debug);
+  bfd_size_type total_size;
+  asection *msec;
+  struct dwarf2_debug *stash = (struct dwarf2_debug *) *pinfo;
+
+  if (stash != NULL)
+    return TRUE;
+
+  stash = (struct dwarf2_debug *) bfd_zalloc (abfd, amt);
+  if (! stash)
+    return FALSE;
+  stash->debug_sections = debug_sections;
+
+  *pinfo = stash;
+
+  if (debug_bfd == NULL)
+    debug_bfd = abfd;
+
+  msec = find_debug_info (debug_bfd, debug_sections, NULL);
+  if (msec == NULL && abfd == debug_bfd)
+    {
+      char * debug_filename = bfd_follow_gnu_debuglink (abfd, DEBUGDIR);
+
+      if (debug_filename == NULL)
+	/* No dwarf2 info, and no gnu_debuglink to follow.
+	   Note that at this point the stash has been allocated, but
+	   contains zeros.  This lets future calls to this function
+	   fail more quickly.  */
+	return FALSE;
+
+      if ((debug_bfd = bfd_openr (debug_filename, NULL)) == NULL
+	  || ! bfd_check_format (debug_bfd, bfd_object)
+	  || (msec = find_debug_info (debug_bfd,
+				      debug_sections, NULL)) == NULL)
+	{
+	  if (debug_bfd)
+	    bfd_close (debug_bfd);
+	  /* FIXME: Should we report our failure to follow the debuglink ?  */
+	  free (debug_filename);
+	  return FALSE;
+	}
+    }
+
+  /* There can be more than one DWARF2 info section in a BFD these
+     days.  First handle the easy case when there's only one.  If
+     there's more than one, try case two: none of the sections is
+     compressed.  In that case, read them all in and produce one
+     large stash.  We do this in two passes - in the first pass we
+     just accumulate the section sizes, and in the second pass we
+     read in the section's contents.  (The allows us to avoid
+     reallocing the data as we add sections to the stash.)  If
+     some or all sections are compressed, then do things the slow
+     way, with a bunch of reallocs.  */
+
+  if (! find_debug_info (debug_bfd, debug_sections, msec))
+    {
+      /* Case 1: only one info section.  */
+      total_size = msec->size;
+      if (! read_section (debug_bfd, &stash->debug_sections[debug_info],
+			  symbols, 0,
+			  &stash->info_ptr_memory, &total_size))
+	return FALSE;
+    }
+  else
+    {
+      /* Case 2: multiple sections.  */
+      for (total_size = 0;
+	   msec;
+	   msec = find_debug_info (debug_bfd, debug_sections, msec))
+	total_size += msec->size;
+
+      stash->info_ptr_memory = (bfd_byte *) bfd_malloc (total_size);
+      if (stash->info_ptr_memory == NULL)
+	return FALSE;
+
+      total_size = 0;
+      for (msec = find_debug_info (debug_bfd, debug_sections, NULL);
+	   msec;
+	   msec = find_debug_info (debug_bfd, debug_sections, msec))
+	{
+	  bfd_size_type size;
+
+	  size = msec->size;
+	  if (size == 0)
+	    continue;
+
+	  if (!(bfd_simple_get_relocated_section_contents
+		(debug_bfd, msec, stash->info_ptr_memory + total_size,
+		 symbols)))
+	    return FALSE;
+
+	  total_size += size;
+	}
+    }
+
+  stash->info_ptr = stash->info_ptr_memory;
+  stash->info_ptr_end = stash->info_ptr + total_size;
+  stash->sec = find_debug_info (debug_bfd, debug_sections, NULL);
+  stash->sec_info_ptr = stash->info_ptr;
+  stash->syms = symbols;
+  stash->bfd_ptr = debug_bfd;
+
+  return TRUE;
+}
+
 /* Find the source code location of SYMBOL.  If SYMBOL is NULL
    then find the nearest source code location corresponding to
    the address SECTION + OFFSET.
@@ -3157,17 +3273,16 @@ find_line (bfd *abfd,
   bfd_vma found = FALSE;
   bfd_boolean do_line;

-  stash = (struct dwarf2_debug *) *pinfo;
+  *filename_ptr = NULL;
+  if (functionname_ptr != NULL)
+    *functionname_ptr = NULL;
+  *linenumber_ptr = 0;

-  if (! stash)
-    {
-      bfd_size_type amt = sizeof (struct dwarf2_debug);
+  if (! _bfd_dwarf2_slurp_debug_info (abfd, NULL,
+				      debug_sections, symbols, pinfo))
+    return FALSE;

-      stash = (struct dwarf2_debug *) bfd_zalloc (abfd, amt);
-      if (! stash)
-	return FALSE;
-      stash->debug_sections = debug_sections;
-    }
+  stash = (struct dwarf2_debug *) *pinfo;

   /* In a relocatable file, 2 functions may have the same address.
      We change the section vma so that they won't overlap.  */
@@ -3197,110 +3312,11 @@ find_line (bfd *abfd,
     addr += section->output_section->vma + section->output_offset;
   else
     addr += section->vma;
-  *filename_ptr = NULL;
-  if (! do_line)
-    *functionname_ptr = NULL;
-  *linenumber_ptr = 0;
-
-  if (! *pinfo)
-    {
-      bfd *debug_bfd;
-      bfd_size_type total_size;
-      asection *msec;
-
-      *pinfo = stash;
-
-      msec = find_debug_info (abfd, debug_sections, NULL);
-      if (msec == NULL)
-	{
-	  char * debug_filename = bfd_follow_gnu_debuglink (abfd, DEBUGDIR);
-
-	  if (debug_filename == NULL)
-	    /* No dwarf2 info, and no gnu_debuglink to follow.
-	       Note that at this point the stash has been allocated, but
-	       contains zeros.  This lets future calls to this function
-	       fail more quickly.  */
-	    goto done;
-
-	  if ((debug_bfd = bfd_openr (debug_filename, NULL)) == NULL
-	      || ! bfd_check_format (debug_bfd, bfd_object)
-	      || (msec = find_debug_info (debug_bfd,
-                                          debug_sections, NULL)) == NULL)
-	    {
-	      if (debug_bfd)
-		bfd_close (debug_bfd);
-	      /* FIXME: Should we report our failure to follow the debuglink ?  */
-	      free (debug_filename);
-	      goto done;
-	    }
-	}
-      else
-	debug_bfd = abfd;
-
-      /* There can be more than one DWARF2 info section in a BFD these
-	 days.  First handle the easy case when there's only one.  If
-	 there's more than one, try case two: none of the sections is
-	 compressed.  In that case, read them all in and produce one
-	 large stash.  We do this in two passes - in the first pass we
-	 just accumulate the section sizes, and in the second pass we
-	 read in the section's contents.  (The allows us to avoid
-	 reallocing the data as we add sections to the stash.)  If
-	 some or all sections are compressed, then do things the slow
-	 way, with a bunch of reallocs.  */
-
-      if (! find_debug_info (debug_bfd, debug_sections, msec))
-	{
-	  /* Case 1: only one info section.  */
-	  total_size = msec->size;
-	  if (! read_section (debug_bfd, &stash->debug_sections[debug_info],
-                              symbols, 0,
-			      &stash->info_ptr_memory, &total_size))
-	    goto done;
-	}
-      else
-	{
-	  /* Case 2: multiple sections.  */
-	  for (total_size = 0;
-               msec;
-               msec = find_debug_info (debug_bfd, debug_sections, msec))
-	    total_size += msec->size;
-
-	  stash->info_ptr_memory = (bfd_byte *) bfd_malloc (total_size);
-	  if (stash->info_ptr_memory == NULL)
-	    goto done;
-
-	  total_size = 0;
-	  for (msec = find_debug_info (debug_bfd, debug_sections, NULL);
-	       msec;
-	       msec = find_debug_info (debug_bfd, debug_sections, msec))
-	    {
-	      bfd_size_type size;
-
-	      size = msec->size;
-	      if (size == 0)
-		continue;
-
-	      if (!(bfd_simple_get_relocated_section_contents
-		    (debug_bfd, msec, stash->info_ptr_memory + total_size,
-		     symbols)))
-		goto done;
-
-	      total_size += size;
-	    }
-	}
-
-      stash->info_ptr = stash->info_ptr_memory;
-      stash->info_ptr_end = stash->info_ptr + total_size;
-      stash->sec = find_debug_info (debug_bfd, debug_sections, NULL);
-      stash->sec_info_ptr = stash->info_ptr;
-      stash->syms = symbols;
-      stash->bfd_ptr = debug_bfd;
-    }

   /* A null info_ptr indicates that there is no dwarf2 info
      (or that an error occured while setting up the stash).  */
   if (! stash->info_ptr)
-    goto done;
+    return FALSE;

   stash->inliner_chain = NULL;

diff --git a/bfd/libbfd-in.h b/bfd/libbfd-in.h
index 7db09e4..f7a9e21 100644
--- a/bfd/libbfd-in.h
+++ b/bfd/libbfd-in.h
@@ -549,6 +549,10 @@ bfd_boolean _bfd_generic_find_line
 extern bfd_boolean _bfd_dwarf2_find_inliner_info
   (bfd *, const char **, const char **, unsigned int *, void **);

+/* Read DWARF 2 debugging information. */
+extern bfd_boolean _bfd_dwarf2_slurp_debug_info
+  (bfd *, bfd *, const struct dwarf_debug_section *, asymbol **, void **);
+
 /* Clean up the data used to handle DWARF 2 debugging information. */
 extern void _bfd_dwarf2_cleanup_debug_info
   (bfd *, void **);
diff --git a/bfd/libbfd.h b/bfd/libbfd.h
index 0beddb6..a10a651 100644
--- a/bfd/libbfd.h
+++ b/bfd/libbfd.h
@@ -554,6 +554,10 @@ bfd_boolean _bfd_generic_find_line
 extern bfd_boolean _bfd_dwarf2_find_inliner_info
   (bfd *, const char **, const char **, unsigned int *, void **);

+/* Read DWARF 2 debugging information. */
+extern bfd_boolean _bfd_dwarf2_slurp_debug_info
+  (bfd *, bfd *, const struct dwarf_debug_section *, asymbol **, void **);
+
 /* Clean up the data used to handle DWARF 2 debugging information. */
 extern void _bfd_dwarf2_cleanup_debug_info
   (bfd *, void **);
diff --git a/bfd/mach-o.c b/bfd/mach-o.c
index cc68d89..f46f94d 100644
--- a/bfd/mach-o.c
+++ b/bfd/mach-o.c
@@ -277,6 +277,9 @@ static const mach_o_segment_name_xlat segsec_names_xlat[] =
     { NULL, NULL }
   };

+static const char dsym_subdir[] = ".dSYM/Contents/Resources/DWARF";
+static const int dsym_subdir_len = sizeof(dsym_subdir);
+
 /* For both cases bfd-name => mach-o name and vice versa, the specific target
    is checked before the generic.  This allows a target (e.g. ppc for cstring)
    to override the generic definition with a more specific one.  */
@@ -3738,6 +3741,152 @@ bfd_mach_o_core_file_failing_signal (bfd *abfd
ATTRIBUTE_UNUSED)
   return 0;
 }

+static bfd_mach_o_uuid_command *
+bfd_mach_o_lookup_uuid_command (bfd *abfd)
+{
+  bfd_mach_o_load_command *uuid_cmd;
+  int ncmd = bfd_mach_o_lookup_command (abfd, BFD_MACH_O_LC_UUID, &uuid_cmd);
+  if (ncmd != 1)
+    return FALSE;
+  return &uuid_cmd->command.uuid;
+}
+
+static bfd_boolean
+bfd_mach_o_dsym_p (bfd *debug_bfd, bfd_mach_o_uuid_command *uuid_cmd)
+{
+  bfd_mach_o_uuid_command *debug_uuid_cmd;
+
+  BFD_ASSERT (debug_bfd);
+  BFD_ASSERT (uuid_cmd);
+
+  if (!bfd_check_format (debug_bfd, bfd_object))
+    return FALSE;
+
+  if (bfd_get_flavour (debug_bfd) != bfd_target_mach_o_flavour)
+    return FALSE;
+
+  debug_uuid_cmd = bfd_mach_o_lookup_uuid_command (debug_bfd);
+  if (debug_uuid_cmd == NULL)
+    return FALSE;
+
+  if (memcmp (uuid_cmd->uuid, debug_uuid_cmd->uuid,
+              sizeof (uuid_cmd->uuid)) != 0)
+    return FALSE;
+
+  return TRUE;
+}
+
+static bfd *
+bfd_mach_o_find_macosx_dsym_in_fat (bfd *fat_bfd,
+                                    bfd_mach_o_uuid_command *uuid_cmd)
+{
+  bfd *debug_bfd = NULL, *last_bfd = NULL;
+
+  BFD_ASSERT (fat_bfd);
+  BFD_ASSERT (uuid_cmd);
+
+  for (;;)
+    {
+      debug_bfd = bfd_mach_o_openr_next_archived_file (fat_bfd, debug_bfd);
+      if (debug_bfd == NULL)
+        break;
+
+      if (bfd_mach_o_dsym_p (debug_bfd, uuid_cmd))
+        break;
+
+      if (last_bfd != NULL)
+        bfd_close (last_bfd);
+      last_bfd = debug_bfd;
+    }
+
+  if (last_bfd != NULL)
+    bfd_close (last_bfd);
+  return debug_bfd;
+}
+
+static bfd *
+bfd_mach_o_find_macosx_dsym (bfd *abfd,
+                             bfd_mach_o_uuid_command *uuid_cmd,
+                             char *debug_filename)
+{
+  bfd *debug_bfd;
+  bfd_mach_o_data_struct *mdata;
+
+  BFD_ASSERT (abfd);
+  BFD_ASSERT (uuid_cmd);
+
+  debug_bfd = bfd_openr (debug_filename, NULL);
+  if (debug_bfd == NULL)
+    return NULL;
+
+  mdata = bfd_mach_o_get_data (abfd);
+
+  if (bfd_check_format (debug_bfd, bfd_archive))
+    {
+      bfd *r = bfd_mach_o_find_macosx_dsym_in_fat (debug_bfd, uuid_cmd);
+      if (r)
+        {
+          mdata->debug_filename = debug_filename;
+          mdata->debug_bfd = r;
+          mdata->debug_fat_bfd = debug_bfd;
+        }
+      return r;
+    }
+
+  if (bfd_mach_o_dsym_p (debug_bfd, uuid_cmd))
+    {
+      mdata->debug_filename = debug_filename;
+      mdata->debug_bfd = debug_bfd;
+      return debug_bfd;
+    }
+
+  bfd_close (debug_bfd);
+
+  return NULL;
+}
+
+static bfd *
+bfd_mach_o_follow_macosx_dsym (bfd *abfd)
+{
+  char *debug_filename;
+  bfd_mach_o_uuid_command *uuid_cmd;
+  bfd *debug_bfd, *base_bfd = abfd;
+  const char *base_basename;
+
+  if (abfd == NULL || bfd_get_flavour (abfd) != bfd_target_mach_o_flavour)
+    return NULL;
+
+  if (abfd->my_archive)
+    base_bfd = abfd->my_archive;
+  /* BFD may have been opened from a stream. */
+  if (base_bfd->filename == NULL)
+    {
+      bfd_set_error (bfd_error_invalid_operation);
+      return NULL;
+    }
+  base_basename = lbasename (base_bfd->filename);
+
+  uuid_cmd = bfd_mach_o_lookup_uuid_command (abfd);
+  if (uuid_cmd == NULL)
+    return NULL;
+
+  /* TODO: We assume the DWARF file has the same as the binary's.
+     It seems apple's GDB checks all files in the dSYM bundle directory.
+     http://opensource.apple.com/source/gdb/gdb-1708/src/gdb/macosx/macosx-tdep.c
+  */
+  debug_filename = (char *)bfd_malloc (strlen (base_bfd->filename)
+                                       + dsym_subdir_len + 1
+                                       + strlen (base_basename) + 1);
+  sprintf (debug_filename, "%s%s/%s",
+           base_bfd->filename, dsym_subdir, base_basename);
+
+  debug_bfd = bfd_mach_o_find_macosx_dsym (abfd, uuid_cmd, debug_filename);
+  if (debug_bfd == NULL)
+    free (debug_filename);
+
+  return debug_bfd;
+}
+
 bfd_boolean
 bfd_mach_o_find_nearest_line (bfd *abfd,
 			      asection *section,
@@ -3748,9 +3897,30 @@ bfd_mach_o_find_nearest_line (bfd *abfd,
 			      unsigned int *line_ptr)
 {
   bfd_mach_o_data_struct *mdata = bfd_mach_o_get_data (abfd);
-  /* TODO: Handle executables and dylibs by using dSYMs. */
-  if (mdata->header.filetype != BFD_MACH_O_MH_OBJECT)
+  if (mdata == NULL)
     return FALSE;
+  switch (mdata->header.filetype)
+    {
+    case BFD_MACH_O_MH_OBJECT:
+      break;
+    case BFD_MACH_O_MH_EXECUTE:
+    case BFD_MACH_O_MH_DYLIB:
+    case BFD_MACH_O_MH_BUNDLE:
+    case BFD_MACH_O_MH_KEXT_BUNDLE:
+      if (mdata->dwarf2_find_line_info == NULL)
+        {
+          bfd *debug_bfd = bfd_mach_o_follow_macosx_dsym (abfd);
+          if (! debug_bfd)
+            return FALSE;
+          if (! _bfd_dwarf2_slurp_debug_info (abfd, debug_bfd,
+                                              dwarf_debug_sections, symbols,
+                                              &mdata->dwarf2_find_line_info))
+            return FALSE;
+        }
+      break;
+    default:
+      return FALSE;
+    }
   if (_bfd_dwarf2_find_nearest_line (abfd, dwarf_debug_sections,
 				     section, symbols, offset,
 				     filename_ptr, functionname_ptr,
@@ -3768,6 +3938,21 @@ bfd_mach_o_close_and_cleanup (bfd *abfd)
     {
       _bfd_dwarf2_cleanup_debug_info (abfd, &mdata->dwarf2_find_line_info);
       bfd_mach_o_free_cached_info (abfd);
+      if (mdata->debug_bfd != NULL)
+        {
+          bfd_close (mdata->debug_bfd);
+          mdata->debug_bfd = NULL;
+        }
+      if (mdata->debug_fat_bfd != NULL)
+        {
+          bfd_close (mdata->debug_fat_bfd);
+          mdata->debug_fat_bfd = NULL;
+        }
+      if (mdata->debug_filename != NULL)
+        {
+          free (mdata->debug_filename);
+          mdata->debug_filename = NULL;
+        }
     }

   return _bfd_generic_close_and_cleanup (abfd);
diff --git a/bfd/mach-o.h b/bfd/mach-o.h
index 89dce1a..c94dd55 100644
--- a/bfd/mach-o.h
+++ b/bfd/mach-o.h
@@ -520,6 +520,13 @@ typedef struct mach_o_data_struct
   /* A place to stash dwarf2 info for this bfd.  */
   void *dwarf2_find_line_info;

+  /* Filename of .dSYM file. */
+  char *debug_filename;
+  /* BFD of .dSYM file. */
+  bfd *debug_bfd;
+  /* BFD of a fat binary which contains debug_bfd. */
+  bfd *debug_fat_bfd;
+
   /* Cache of dynamic relocs. */
   arelent *dyn_reloc_cache;
 }



More information about the Binutils mailing list