[gold][patch] Add support for reading compressed debug sections

Cary Coutant ccoutant@google.com
Fri Jul 9 23:02:00 GMT 2010


> This has been only minimally tested on x86_64, so I'm not looking for
> approval to commit just yet, but I wanted to make it available for
> review now. I'll be doing more testing tomorrow.

I removed a debug printf and fixed a couple of bugs. Here's an updated patch...

-cary

        * compressed_output.cc (zlib_decompress): New function.
        (get_uncompressed_size): New function.
        (decompress_input_section): New function.
        * compressed_output.h (get_uncompressed_size): New function.
        (decompress_input_section): New function.
        * dwarf_reader.cc (Sized_dwarf_line_info::Sized_dwarf_line_info)
        Handle compressed debug sections.
        * layout.cc (is_compressed_debug_section): New function.
        (Layout::output_section_name): Map compressed section names to
        canonical names.
        * layout.h (is_compressed_debug_section): New function.
        (is_debug_info_section): Recognize compressed debug sections.
        * merge.cc: Include compressed_output.h.
        (Output_merge_data::do_add_input_section): Handle compressed
        debug sections.
        (Output_merge_string::do_add_input_section): Handle compressed
        debug sections.
        * object.cc: Include compressed_output.h.
        (Sized_relobj::Sized_relobj): Initialize new data members.
        (build_compressed_section_map): New function.
        (Sized_relobj::do_read_symbols): Handle compressed debug sections.
        * object.h (Object::section_is_compressed): New method.
        (Object::do_section_is_compressed): New method.
        (Sized_relobj::Compressed_section_map): New type.
        (Sized_relobj::do_section_is_compressed): New method.
        (Sized_relobj::compressed_sections_): New data member.
        * output.cc (Output_section::add_input_section): Handle compressed
        debug sections.
        * reloc.cc: Include compressed_output.h.
        (Sized_relobj::write_sections): Handle compressed debug sections.
-------------- next part --------------
diff --git a/gold/compressed_output.cc b/gold/compressed_output.cc
index a0f8ed1..16d9129 100644
--- a/gold/compressed_output.cc
+++ b/gold/compressed_output.cc
@@ -1,6 +1,6 @@
-// compressed_output.cc -- manage compressed output sections for gold
+// compressed_output.cc -- manage compressed debug sections for gold
 
-// Copyright 2007, 2008 Free Software Foundation, Inc.
+// Copyright 2007, 2008, 2010 Free Software Foundation, Inc.
 // Written by Ian Lance Taylor <iant@google.com>.
 
 // This file is part of gold.
@@ -33,6 +33,8 @@
 namespace gold
 {
 
+#ifdef HAVE_ZLIB_H
+
 // Compress UNCOMPRESSED_DATA of size UNCOMPRESSED_SIZE.  Returns true
 // if it successfully compressed, false if it failed for any reason
 // (including not having zlib support in the library).  If it returns
@@ -42,8 +44,6 @@ namespace gold
 // "ZLIB", and 8 bytes indicating the uncompressed size, in big-endian
 // order.
 
-#ifdef HAVE_ZLIB_H
-
 static bool
 zlib_compress(const unsigned char* uncompressed_data,
               unsigned long uncompressed_size,
@@ -81,6 +81,49 @@ zlib_compress(const unsigned char* uncompressed_data,
     }
 }
 
+// Decompress COMPRESSED_DATA of size COMPRESSED_SIZE, into a buffer
+// UNCOMPRESSED_DATA of size UNCOMPRESSED_SIZE.  Returns TRUE if it
+// decompressed successfully, false if it failed.  The buffer, of
+// appropriate size, is provided by the caller, and is typically part
+// of the memory-mapped output file.
+
+static bool
+zlib_decompress(const unsigned char* compressed_data,
+		unsigned long compressed_size,
+		unsigned char* uncompressed_data,
+		unsigned long uncompressed_size)
+{
+  z_stream strm;
+  int rc;
+
+  /* It is possible the section consists of several compressed
+     buffers concatenated together, so we uncompress in a loop.  */
+  strm.zalloc = NULL;
+  strm.zfree = NULL;
+  strm.opaque = NULL;
+  strm.avail_in = compressed_size;
+  strm.next_in = const_cast<Bytef*>(compressed_data);
+  strm.avail_out = uncompressed_size;
+
+  rc = inflateInit(&strm);
+  while (strm.avail_in > 0)
+    {
+      if (rc != Z_OK)
+        return false;
+      strm.next_out = ((Bytef*) uncompressed_data
+                       + (uncompressed_size - strm.avail_out));
+      rc = inflate(&strm, Z_FINISH);
+      if (rc != Z_STREAM_END)
+        return false;
+      rc = inflateReset(&strm);
+    }
+  rc = inflateEnd(&strm);
+  if (rc != Z_OK || strm.avail_out != 0)
+    return false;
+
+  return true;
+}
+
 #else // !defined(HAVE_ZLIB_H)
 
 static bool
@@ -90,8 +133,62 @@ zlib_compress(const unsigned char*, unsigned long,
   return false;
 }
 
+static bool
+zlib_decompress(const unsigned char*, unsigned long,
+		unsigned char**, unsigned long*)
+{
+  return false;
+}
+
 #endif // !defined(HAVE_ZLIB_H)
 
+// Read the compression header of a compressed debug section and return
+// the uncompressed size.
+
+uint64_t
+get_uncompressed_size(const unsigned char* compressed_data,
+		      section_size_type compressed_size)
+{
+  const unsigned int zlib_header_size = 12;
+
+  /* Verify the compression header.  Currently, we support only zlib
+     compression, so it should be "ZLIB" followed by the uncompressed
+     section size, 8 bytes in big-endian order.  */
+  if (compressed_size >= zlib_header_size
+      && strncmp(reinterpret_cast<const char*>(compressed_data),
+		 "ZLIB", 4) == 0)
+    return elfcpp::Swap_unaligned<64, true>::readval(compressed_data + 4);
+  return -1ULL;
+}
+
+// Decompress a compressed debug section directly into the output file.
+
+bool
+decompress_input_section(const unsigned char* compressed_data,
+			 unsigned long compressed_size,
+			 unsigned char* uncompressed_data,
+			 unsigned long uncompressed_size)
+{
+  const unsigned int zlib_header_size = 12;
+
+  /* Verify the compression header.  Currently, we support only zlib
+     compression, so it should be "ZLIB" followed by the uncompressed
+     section size, 8 bytes in big-endian order.  */
+  if (compressed_size >= zlib_header_size
+      && strncmp(reinterpret_cast<const char*>(compressed_data),
+		 "ZLIB", 4) == 0)
+    {
+      unsigned long uncompressed_size_check =
+	  elfcpp::Swap_unaligned<64, true>::readval(compressed_data + 4);
+      gold_assert(uncompressed_size_check == uncompressed_size);
+      return zlib_decompress(compressed_data + zlib_header_size,
+			     compressed_size - zlib_header_size,
+			     uncompressed_data,
+			     uncompressed_size);
+    }
+  return false;
+}
+
 // Class Output_compressed_section.
 
 // Set the final data size of a compressed section.  This is where
diff --git a/gold/compressed_output.h b/gold/compressed_output.h
index 11b2762..2d6ebd9 100644
--- a/gold/compressed_output.h
+++ b/gold/compressed_output.h
@@ -1,6 +1,6 @@
 // compressed_output.h -- compressed output sections for gold  -*- C++ -*-
 
-// Copyright 2007, 2008 Free Software Foundation, Inc.
+// Copyright 2007, 2008, 2010 Free Software Foundation, Inc.
 // Written by Ian Lance Taylor <iant@google.com>.
 
 // This file is part of gold.
@@ -37,6 +37,18 @@ namespace gold
 
 class General_options;
 
+// Read the compression header of a compressed debug section and return
+// the uncompressed size.
+
+extern uint64_t
+get_uncompressed_size(const unsigned char*, section_size_type);
+
+// Decompress a compressed debug section directly into the output file.
+
+extern bool
+decompress_input_section(const unsigned char*, unsigned long, unsigned char*,
+			 unsigned long);
+
 // This is used for a section whose data should be compressed.  It is
 // a regular Output_section which computes its contents into a buffer
 // and then postprocesses it.
diff --git a/gold/dwarf_reader.cc b/gold/dwarf_reader.cc
index 4062fe6..c0188fb 100644
--- a/gold/dwarf_reader.cc
+++ b/gold/dwarf_reader.cc
@@ -32,6 +32,7 @@
 #include "reloc.h"
 #include "dwarf_reader.h"
 #include "int_encoding.h"
+#include "compressed_output.h"
 
 namespace gold {
 
@@ -80,6 +81,21 @@ Sized_dwarf_line_info<size, big_endian>::Sized_dwarf_line_info(Object* object,
   if (this->buffer_ == NULL)
     return;
 
+  section_size_type uncompressed_size = 0;
+  unsigned char* uncompressed_data = NULL;
+  if (object->section_is_compressed(debug_shndx, &uncompressed_size))
+    {
+      uncompressed_data = new unsigned char[uncompressed_size];
+      if (!decompress_input_section(this->buffer_,
+				    this->buffer_end_ - this->buffer_,
+				    uncompressed_data,
+				    uncompressed_size))
+	object->error(_("could not decompress section %s"),
+		      object->section_name(debug_shndx).c_str());
+      this->buffer_ = uncompressed_data;
+      this->buffer_end_ = this->buffer_ + uncompressed_size;
+    }
+
   // Find the relocation section for ".debug_line".
   // We expect these for relobjs (.o's) but not dynobjs (.so's).
   bool got_relocs = false;
diff --git a/gold/layout.cc b/gold/layout.cc
index 9310961..0c9b88a 100644
--- a/gold/layout.cc
+++ b/gold/layout.cc
@@ -947,6 +947,15 @@ is_compressible_debug_section(const char* secname)
   return (strncmp(secname, ".debug", sizeof(".debug") - 1) == 0);
 }
 
+// We may see compressed debug sections in input files.  Return TRUE
+// if this is the name of a compressed debug section.
+
+bool
+is_compressed_debug_section(const char* secname)
+{
+  return (strncmp(secname, ".zdebug", sizeof(".zdebug") - 1) == 0);
+}
+
 // Make a new Output_section, and attach it to segments as
 // appropriate.  IS_INTERP is true if this is the .interp section.
 // IS_DYNAMIC_LINKER_SECTION is true if this section is used by the
@@ -3772,6 +3781,19 @@ Layout::output_section_name(const char* name, size_t* plen)
 	}
     }
 
+  // Compressed debug sections should be mapped to the corresponding
+  // uncompressed section.
+  if (is_compressed_debug_section(name))
+    {
+      size_t len = strlen(name);
+      char *uncompressed_name = new char[len];
+      uncompressed_name[0] = '.';
+      gold_assert(name[0] == '.' && name[1] == 'z');
+      strncpy(&uncompressed_name[1], &name[2], len - 2);
+      uncompressed_name[len - 1] = '\0';
+      return uncompressed_name;
+    }
+
   return name;
 }
 
diff --git a/gold/layout.h b/gold/layout.h
index 55ad7a6..b3dc464 100644
--- a/gold/layout.h
+++ b/gold/layout.h
@@ -59,6 +59,10 @@ class Output_reduced_debug_info_section;
 class Eh_frame;
 class Target;
 
+// Return TRUE if SECNAME is the name of a compressed debug section.
+extern bool
+is_compressed_debug_section(const char* secname);
+
 // This task function handles mapping the input sections to output
 // sections and laying them out in memory.
 
diff --git a/gold/merge.cc b/gold/merge.cc
index 6e44ddd..ec11c90 100644
--- a/gold/merge.cc
+++ b/gold/merge.cc
@@ -1,6 +1,6 @@
 // merge.cc -- handle section merging for gold
 
-// Copyright 2006, 2007, 2008 Free Software Foundation, Inc.
+// Copyright 2006, 2007, 2008, 2010 Free Software Foundation, Inc.
 // Written by Ian Lance Taylor <iant@google.com>.
 
 // This file is part of gold.
@@ -26,6 +26,7 @@
 #include <algorithm>
 
 #include "merge.h"
+#include "compressed_output.h"
 
 namespace gold
 {
@@ -404,12 +405,29 @@ bool
 Output_merge_data::do_add_input_section(Relobj* object, unsigned int shndx)
 {
   section_size_type len;
+  section_size_type uncompressed_size = 0;
+  unsigned char* uncompressed_data = NULL;
   const unsigned char* p = object->section_contents(shndx, &len, false);
 
+  if (object->section_is_compressed(shndx, &uncompressed_size))
+    {
+      uncompressed_data = new unsigned char[uncompressed_size];
+      if (!decompress_input_section(p, len, uncompressed_data,
+				    uncompressed_size))
+	object->error(_("could not decompress section %s"),
+		      object->section_name(shndx).c_str());
+      p = uncompressed_data;
+      len = uncompressed_size;
+    }
+
   section_size_type entsize = convert_to_section_size_type(this->entsize());
 
   if (len % entsize != 0)
-    return false;
+    {
+      if (uncompressed_data != NULL)
+	delete[] uncompressed_data;
+      return false;
+    }
 
   this->input_count_ += len / entsize;
 
@@ -438,6 +456,9 @@ Output_merge_data::do_add_input_section(Relobj* object, unsigned int shndx)
   if (this->keeps_input_sections())
     record_input_section(object, shndx);
 
+  if (uncompressed_data != NULL)
+    delete[] uncompressed_data;
+
   return true;
 }
 
@@ -495,8 +516,21 @@ Output_merge_string<Char_type>::do_add_input_section(Relobj* object,
 						     unsigned int shndx)
 {
   section_size_type len;
+  section_size_type uncompressed_size = 0;
+  unsigned char* uncompressed_data = NULL;
   const unsigned char* pdata = object->section_contents(shndx, &len, false);
 
+  if (object->section_is_compressed(shndx, &uncompressed_size))
+    {
+      uncompressed_data = new unsigned char[uncompressed_size];
+      if (!decompress_input_section(pdata, len, uncompressed_data,
+				    uncompressed_size))
+	object->error(_("could not decompress section %s"),
+		      object->section_name(shndx).c_str());
+      pdata = uncompressed_data;
+      len = uncompressed_size;
+    }
+
   const Char_type* p = reinterpret_cast<const Char_type*>(pdata);
   const Char_type* pend = p + len / sizeof(Char_type);
 
@@ -504,6 +538,8 @@ Output_merge_string<Char_type>::do_add_input_section(Relobj* object,
     {
       object->error(_("mergeable string section length not multiple of "
 		      "character size"));
+      if (uncompressed_data != NULL)
+	delete[] uncompressed_data;
       return false;
     }
 
@@ -545,6 +581,9 @@ Output_merge_string<Char_type>::do_add_input_section(Relobj* object,
   if (this->keeps_input_sections())
     record_input_section(object, shndx);
 
+  if (uncompressed_data != NULL)
+    delete[] uncompressed_data;
+
   return true;
 }
 
diff --git a/gold/object.cc b/gold/object.cc
index b034ee2..b127a04 100644
--- a/gold/object.cc
+++ b/gold/object.cc
@@ -39,6 +39,7 @@
 #include "object.h"
 #include "dynobj.h"
 #include "plugin.h"
+#include "compressed_output.h"
 
 namespace gold
 {
@@ -367,7 +368,10 @@ Sized_relobj<size, big_endian>::Sized_relobj(
     local_got_offsets_(),
     kept_comdat_sections_(),
     has_eh_frame_(false),
-    discarded_eh_frame_shndx_(-1U)
+    discarded_eh_frame_shndx_(-1U),
+    deferred_layout_(),
+    deferred_layout_relocs_(),
+    compressed_sections_()
 {
 }
 
@@ -495,6 +499,55 @@ Sized_relobj<size, big_endian>::find_eh_frame(
   return false;
 }
 
+// Build a table for any compressed debug sections, mapping each section index
+// to the uncompressed size.
+
+template<int size, bool big_endian>
+Compressed_section_map*
+build_compressed_section_map(
+    const unsigned char* pshdrs,
+    unsigned int shnum,
+    const char* names,
+    section_size_type names_size,
+    Sized_relobj<size, big_endian>* obj)
+{
+  Compressed_section_map* uncompressed_sizes = new Compressed_section_map();
+  const unsigned int shdr_size = elfcpp::Elf_sizes<size>::shdr_size;
+  const unsigned char* p = pshdrs + shdr_size;
+  for (unsigned int i = 1; i < shnum; ++i, p += shdr_size)
+    {
+      typename elfcpp::Shdr<size, big_endian> shdr(p);
+      if (shdr.get_sh_type() == elfcpp::SHT_PROGBITS
+	  && (shdr.get_sh_flags() & elfcpp::SHF_ALLOC) == 0)
+	{
+	  if (shdr.get_sh_name() >= names_size)
+	    {
+	      obj->error(_("bad section name offset for section %u: %lu"),
+			 i, static_cast<unsigned long>(shdr.get_sh_name()));
+	      continue;
+	    }
+
+	  const char* name = names + shdr.get_sh_name();
+	  if (is_compressed_debug_section(name))
+	    {
+	      section_size_type len;
+	      const unsigned char* contents =
+		  obj->section_contents(i, &len, false);
+	      uint64_t uncompressed_size = get_uncompressed_size(contents, len);
+	      if (uncompressed_size != -1ULL)
+		(*uncompressed_sizes)[i] =
+		    convert_to_section_size_type(uncompressed_size);
+	      // TODO: Remove debug printf.
+	      printf("%s: compressed section [%d] %s: %lld -> %lld\n",
+		     obj->name().c_str(), i, name,
+		     static_cast<long long>(len),
+		     static_cast<long long>(uncompressed_size));
+	    }
+	}
+    }
+  return uncompressed_sizes;
+}
+
 // Read the sections and symbols from an object file.
 
 template<int size, bool big_endian>
@@ -514,6 +567,10 @@ Sized_relobj<size, big_endian>::do_read_symbols(Read_symbols_data* sd)
       if (this->find_eh_frame(pshdrs, names, sd->section_names_size))
         this->has_eh_frame_ = true;
     }
+  if (memmem(names, sd->section_names_size, ".zdebug_", 8) != NULL)
+    this->compressed_sections_ =
+        build_compressed_section_map(pshdrs, this->shnum(), names,
+				     sd->section_names_size, this);
 
   sd->symbols = NULL;
   sd->symbols_size = 0;
diff --git a/gold/object.h b/gold/object.h
index bc2b558..9efc28e 100644
--- a/gold/object.h
+++ b/gold/object.h
@@ -518,6 +518,13 @@ class Object
   set_no_export(bool value)
   { this->no_export_ = value; }
 
+  // Return TRUE if the section is a compressed debug section, and set
+  // *UNCOMPRESSED_SIZE to the size of the uncompressed data.
+  bool
+  section_is_compressed(unsigned int shndx,
+			section_size_type* uncompressed_size) const
+  { return do_section_is_compressed(shndx, uncompressed_size); }
+
  protected:
   // Returns NULL for Objects that are not plugin objects.  This method
   // is overridden in the Pluginobj class.
@@ -628,6 +635,12 @@ class Object
   bool
   handle_split_stack_section(const char* name);
 
+  // Return TRUE if the section is a compressed debug section, and set
+  // *UNCOMPRESSED_SIZE to the size of the uncompressed data.
+  virtual bool
+  do_section_is_compressed(unsigned int, section_size_type*) const
+  { return false; }
+
  private:
   // This class may not be copied.
   Object(const Object&);
@@ -1406,6 +1419,10 @@ class Reloc_symbol_changes
   std::vector<Symbol*> vec_;
 };
 
+// Type for mapping section index to uncompressed size.
+
+typedef std::map<unsigned int, section_size_type> Compressed_section_map;
+
 // A regular object file.  This is size and endian specific.
 
 template<int size, bool big_endian>
@@ -1781,7 +1798,26 @@ class Sized_relobj : public Relobj
   void
   set_output_local_symbol_count(unsigned int value)
   { this->output_local_symbol_count_ = value; }
-   
+
+  // Return TRUE if the section is a compressed debug section, and set
+  // *UNCOMPRESSED_SIZE to the size of the uncompressed data.
+  bool
+  do_section_is_compressed(unsigned int shndx,
+			   section_size_type* uncompressed_size) const
+  {
+    if (this->compressed_sections_ == NULL)
+      return false;
+    Compressed_section_map::const_iterator p =
+        this->compressed_sections_->find(shndx);
+    if (p != this->compressed_sections_->end())
+      {
+	if (uncompressed_size != NULL)
+	  *uncompressed_size = p->second;
+	return true;
+      }
+    return false;
+  }
+
  private:
   // For convenience.
   typedef Sized_relobj<size, big_endian> This;
@@ -2024,6 +2060,8 @@ class Sized_relobj : public Relobj
   std::vector<Deferred_layout> deferred_layout_;
   // The list of relocation sections whose layout was deferred.
   std::vector<Deferred_layout> deferred_layout_relocs_;
+  // For compressed debug sections, map section index to uncompressed size.
+  Compressed_section_map* compressed_sections_;
 };
 
 // A class to manage the list of all objects.
diff --git a/gold/output.cc b/gold/output.cc
index 329a4be..3ac8a3d 100644
--- a/gold/output.cc
+++ b/gold/output.cc
@@ -2086,8 +2086,13 @@ Output_section::add_input_section(Layout* layout,
         }
     }
 
+  section_size_type input_section_size = shdr.get_sh_size();
+  section_size_type uncompressed_size;
+  if (object->section_is_compressed(shndx, &uncompressed_size))
+    input_section_size = uncompressed_size;
+
   this->set_current_data_size_for_child(aligned_offset_in_section
-					+ shdr.get_sh_size());
+					+ input_section_size);
 
   // We need to keep track of this section if we are already keeping
   // track of sections, or if we are relaxing.  Also, if this is a
diff --git a/gold/reloc.cc b/gold/reloc.cc
index 8879f0a..9f7355e 100644
--- a/gold/reloc.cc
+++ b/gold/reloc.cc
@@ -32,6 +32,7 @@
 #include "target-reloc.h"
 #include "reloc.h"
 #include "icf.h"
+#include "compressed_output.h"
 
 namespace gold
 {
@@ -732,10 +733,17 @@ Sized_relobj<size, big_endian>::write_sections(const unsigned char* pshdrs,
 
       off_t view_start;
       section_size_type view_size;
+      bool must_decompress = false;
       if (output_offset != invalid_address)
 	{
 	  view_start = output_section_offset + output_offset;
 	  view_size = convert_to_section_size_type(shdr.get_sh_size());
+	  section_size_type uncompressed_size;
+	  if (this->section_is_compressed(i, &uncompressed_size))
+	    {
+	      view_size = uncompressed_size;
+	      must_decompress = true;
+	    }
 	}
       else
 	{
@@ -754,7 +762,7 @@ Sized_relobj<size, big_endian>::write_sections(const unsigned char* pshdrs,
 	{
 	  unsigned char* buffer = os->postprocessing_buffer();
 	  view = buffer + view_start;
-	  if (output_offset != invalid_address)
+	  if (output_offset != invalid_address && !must_decompress)
 	    {
 	      off_t sh_offset = shdr.get_sh_offset();
 	      if (!rm.empty() && rm.back().file_offset > sh_offset)
@@ -770,14 +778,27 @@ Sized_relobj<size, big_endian>::write_sections(const unsigned char* pshdrs,
 	  else
 	    {
 	      view = of->get_output_view(view_start, view_size);
-	      off_t sh_offset = shdr.get_sh_offset();
-	      if (!rm.empty() && rm.back().file_offset > sh_offset)
-		is_sorted = false;
-	      rm.push_back(File_read::Read_multiple_entry(sh_offset,
-							  view_size, view));
+	      if (!must_decompress)
+		{
+		  off_t sh_offset = shdr.get_sh_offset();
+		  if (!rm.empty() && rm.back().file_offset > sh_offset)
+		    is_sorted = false;
+		  rm.push_back(File_read::Read_multiple_entry(sh_offset,
+							      view_size, view));
+		}
 	    }
 	}
 
+      if (must_decompress)
+        {
+	  // Read and decompress the section.
+          section_size_type len;
+	  const unsigned char* p = this->section_contents(i, &len, false);
+	  if (!decompress_input_section(p, len, view, view_size))
+	    this->error(_("could not decompress section %s"),
+			this->section_name(i).c_str());
+        }
+
       pvs->view = view;
       pvs->address = os->address();
       if (output_offset != invalid_address)


More information about the Binutils mailing list