[gold][patch] Add support for reading compressed debug sections

Fri Jul 9 02:08:00 GMT 2010

This patch adds gold support for reading compressed debug sections
(generated by the new --compress-debug-sections option in gas).

This has been only minimally tested on x86_64, so I'm not looking for
approval to commit just yet, but I wanted to make it available for
review now. I'll be doing more testing tomorrow.

Is it OK to add test cases to the gold testsuite that depend on the
new functionality in gas? (I'd think so, given that the gold testsuite
requires as-new in the build tree.) If so, I'll add a test case and
resend the patch.

-cary

        * compressed_output.cc (zlib_decompress): New function.
        (get_uncompressed_size): New function.
        (decompress_input_section): New function.
        * compressed_output.h (get_uncompressed_size): New function.
        (decompress_input_section): New function.
        * dwarf_reader.cc (Sized_dwarf_line_info::Sized_dwarf_line_info)
        Handle compressed debug sections.
        * layout.cc (is_compressed_debug_section): New function.
        (Layout::output_section_name): Map compressed section names to
        canonical names.
        * layout.h (is_compressed_debug_section): New function.
        * merge.cc: Include compressed_output.h.
        (Output_merge_data::do_add_input_section): Handle compressed
        debug sections.
        (Output_merge_string::do_add_input_section): Handle compressed
        debug sections.
        * object.cc: Include compressed_output.h.
        (Sized_relobj::Sized_relobj): Initialize new data members.
        (build_compressed_section_map): New function.
        (Sized_relobj::do_read_symbols): Handle compressed debug sections.
        * object.h (Object::section_is_compressed): New method.
        (Object::do_section_is_compressed): New method.
        (Sized_relobj::Compressed_section_map): New type.
        (Sized_relobj::do_section_is_compressed): New method.
        (Sized_relobj::compressed_sections_): New data member.
        * output.cc (Output_section::add_input_section): Handle compressed
        debug sections.
        * reloc.cc: Include compressed_output.h.
        (Sized_relobj::write_sections): Handle compressed debug sections.
-------------- next part --------------

diff --git a/gold/compressed_output.cc b/gold/compressed_output.cc
index a0f8ed1..16d9129 100644
--- a/gold/compressed_output.cc
+++ b/gold/compressed_output.cc
@@ -1,6 +1,6 @@
-// compressed_output.cc -- manage compressed output sections for gold
+// compressed_output.cc -- manage compressed debug sections for gold
 
-// Copyright 2007, 2008 Free Software Foundation, Inc.
+// Copyright 2007, 2008, 2010 Free Software Foundation, Inc.
 // Written by Ian Lance Taylor <iant@google.com>.
 
 // This file is part of gold.
@@ -33,6 +33,8 @@
 namespace gold
 {
 
+#ifdef HAVE_ZLIB_H
+
 // Compress UNCOMPRESSED_DATA of size UNCOMPRESSED_SIZE.  Returns true
 // if it successfully compressed, false if it failed for any reason
 // (including not having zlib support in the library).  If it returns
@@ -42,8 +44,6 @@ namespace gold
 // "ZLIB", and 8 bytes indicating the uncompressed size, in big-endian
 // order.
 
-#ifdef HAVE_ZLIB_H
-
 static bool
 zlib_compress(const unsigned char* uncompressed_data,
               unsigned long uncompressed_size,
@@ -81,6 +81,49 @@ zlib_compress(const unsigned char* uncompressed_data,
     }
 }
 
+// Decompress COMPRESSED_DATA of size COMPRESSED_SIZE, into a buffer
+// UNCOMPRESSED_DATA of size UNCOMPRESSED_SIZE.  Returns TRUE if it
+// decompressed successfully, false if it failed.  The buffer, of
+// appropriate size, is provided by the caller, and is typically part
+// of the memory-mapped output file.
+
+static bool
+zlib_decompress(const unsigned char* compressed_data,
+		unsigned long compressed_size,
+		unsigned char* uncompressed_data,
+		unsigned long uncompressed_size)
+{
+  z_stream strm;
+  int rc;
+
+  /* It is possible the section consists of several compressed
+     buffers concatenated together, so we uncompress in a loop.  */
+  strm.zalloc = NULL;
+  strm.zfree = NULL;
+  strm.opaque = NULL;
+  strm.avail_in = compressed_size;
+  strm.next_in = const_cast<Bytef*>(compressed_data);
+  strm.avail_out = uncompressed_size;
+
+  rc = inflateInit(&strm);
+  while (strm.avail_in > 0)
+    {
+      if (rc != Z_OK)
+        return false;
+      strm.next_out = ((Bytef*) uncompressed_data
+                       + (uncompressed_size - strm.avail_out));
+      rc = inflate(&strm, Z_FINISH);
+      if (rc != Z_STREAM_END)
+        return false;
+      rc = inflateReset(&strm);
+    }
+  rc = inflateEnd(&strm);
+  if (rc != Z_OK || strm.avail_out != 0)
+    return false;
+
+  return true;
+}
+
 #else // !defined(HAVE_ZLIB_H)
 
 static bool
@@ -90,8 +133,62 @@ zlib_compress(const unsigned char*, unsigned long,
   return false;
 }
 
+static bool
+zlib_decompress(const unsigned char*, unsigned long,
+		unsigned char**, unsigned long*)
+{
+  return false;
+}
+
 #endif // !defined(HAVE_ZLIB_H)
 
+// Read the compression header of a compressed debug section and return
+// the uncompressed size.
+
+uint64_t
+get_uncompressed_size(const unsigned char* compressed_data,
+		      section_size_type compressed_size)
+{
+  const unsigned int zlib_header_size = 12;
+
+  /* Verify the compression header.  Currently, we support only zlib
+     compression, so it should be "ZLIB" followed by the uncompressed
+     section size, 8 bytes in big-endian order.  */
+  if (compressed_size >= zlib_header_size
+      && strncmp(reinterpret_cast<const char*>(compressed_data),
+		 "ZLIB", 4) == 0)
+    return elfcpp::Swap_unaligned<64, true>::readval(compressed_data + 4);
+  return -1ULL;
+}
+
+// Decompress a compressed debug section directly into the output file.
+
+bool
+decompress_input_section(const unsigned char* compressed_data,
+			 unsigned long compressed_size,
+			 unsigned char* uncompressed_data,
+			 unsigned long uncompressed_size)
+{
+  const unsigned int zlib_header_size = 12;
+
+  /* Verify the compression header.  Currently, we support only zlib
+     compression, so it should be "ZLIB" followed by the uncompressed
+     section size, 8 bytes in big-endian order.  */
+  if (compressed_size >= zlib_header_size
+      && strncmp(reinterpret_cast<const char*>(compressed_data),
+		 "ZLIB", 4) == 0)
+    {
+      unsigned long uncompressed_size_check =
+	  elfcpp::Swap_unaligned<64, true>::readval(compressed_data + 4);
+      gold_assert(uncompressed_size_check == uncompressed_size);
+      return zlib_decompress(compressed_data + zlib_header_size,
+			     compressed_size - zlib_header_size,
+			     uncompressed_data,
+			     uncompressed_size);
+    }
+  return false;
+}
+
 // Class Output_compressed_section.
 
 // Set the final data size of a compressed section.  This is where
diff --git a/gold/compressed_output.h b/gold/compressed_output.h
index 11b2762..2d6ebd9 100644
--- a/gold/compressed_output.h
+++ b/gold/compressed_output.h
@@ -1,6 +1,6 @@
 // compressed_output.h -- compressed output sections for gold  -*- C++ -*-
 
-// Copyright 2007, 2008 Free Software Foundation, Inc.
+// Copyright 2007, 2008, 2010 Free Software Foundation, Inc.
 // Written by Ian Lance Taylor <iant@google.com>.
 
 // This file is part of gold.
@@ -37,6 +37,18 @@ namespace gold
 
 class General_options;
 
+// Read the compression header of a compressed debug section and return
+// the uncompressed size.
+
+extern uint64_t
+get_uncompressed_size(const unsigned char*, section_size_type);
+
+// Decompress a compressed debug section directly into the output file.
+
+extern bool
+decompress_input_section(const unsigned char*, unsigned long, unsigned char*,
+			 unsigned long);
+
 // This is used for a section whose data should be compressed.  It is
 // a regular Output_section which computes its contents into a buffer
 // and then postprocesses it.
diff --git a/gold/dwarf_reader.cc b/gold/dwarf_reader.cc
index 4062fe6..c0188fb 100644
--- a/gold/dwarf_reader.cc
+++ b/gold/dwarf_reader.cc
@@ -32,6 +32,7 @@
 #include "reloc.h"
 #include "dwarf_reader.h"
 #include "int_encoding.h"
+#include "compressed_output.h"
 
 namespace gold {
 
@@ -80,6 +81,21 @@ Sized_dwarf_line_info<size, big_endian>::Sized_dwarf_line_info(Object* object,
   if (this->buffer_ == NULL)
     return;
 
+  section_size_type uncompressed_size = 0;
+  unsigned char* uncompressed_data = NULL;
+  if (object->section_is_compressed(debug_shndx, &uncompressed_size))
+    {
+      uncompressed_data = new unsigned char[uncompressed_size];
+      if (!decompress_input_section(this->buffer_,
+				    this->buffer_end_ - this->buffer_,
+				    uncompressed_data,
+				    uncompressed_size))
+	object->error(_("could not decompress section %s"),
+		      object->section_name(debug_shndx).c_str());
+      this->buffer_ = uncompressed_data;
+      this->buffer_end_ = this->buffer_ + uncompressed_size;
+    }
+
   // Find the relocation section for ".debug_line".
   // We expect these for relobjs (.o's) but not dynobjs (.so's).
   bool got_relocs = false;
diff --git a/gold/layout.cc b/gold/layout.cc
index 9310961..0c9b88a 100644
--- a/gold/layout.cc
+++ b/gold/layout.cc
@@ -947,6 +947,15 @@ is_compressible_debug_section(const char* secname)
   return (strncmp(secname, ".debug", sizeof(".debug") - 1) == 0);
 }
 
+// We may see compressed debug sections in input files.  Return TRUE
+// if this is the name of a compressed debug section.
+
+bool
+is_compressed_debug_section(const char* secname)
+{
+  return (strncmp(secname, ".zdebug", sizeof(".zdebug") - 1) == 0);
+}
+
 // Make a new Output_section, and attach it to segments as
 // appropriate.  IS_INTERP is true if this is the .interp section.
 // IS_DYNAMIC_LINKER_SECTION is true if this section is used by the
@@ -3772,6 +3781,19 @@ Layout::output_section_name(const char* name, size_t* plen)
 	}
     }
 
+  // Compressed debug sections should be mapped to the corresponding
+  // uncompressed section.
+  if (is_compressed_debug_section(name))
+    {
+      size_t len = strlen(name);
+      char *uncompressed_name = new char[len];
+      uncompressed_name[0] = '.';
+      gold_assert(name[0] == '.' && name[1] == 'z');
+      strncpy(&uncompressed_name[1], &name[2], len - 2);
+      uncompressed_name[len - 1] = '\0';
+      return uncompressed_name;
+    }
+
   return name;
 }
 
diff --git a/gold/layout.h b/gold/layout.h
index 55ad7a6..b3dc464 100644
--- a/gold/layout.h
+++ b/gold/layout.h
@@ -59,6 +59,10 @@ class Output_reduced_debug_info_section;
 class Eh_frame;
 class Target;
 
+// Return TRUE if SECNAME is the name of a compressed debug section.
+extern bool
+is_compressed_debug_section(const char* secname);
+
 // This task function handles mapping the input sections to output
 // sections and laying them out in memory.
 
diff --git a/gold/merge.cc b/gold/merge.cc
index 6e44ddd..ec11c90 100644
--- a/gold/merge.cc
+++ b/gold/merge.cc
@@ -1,6 +1,6 @@
 // merge.cc -- handle section merging for gold
 
-// Copyright 2006, 2007, 2008 Free Software Foundation, Inc.
+// Copyright 2006, 2007, 2008, 2010 Free Software Foundation, Inc.
 // Written by Ian Lance Taylor <iant@google.com>.
 
 // This file is part of gold.
@@ -26,6 +26,7 @@
 #include <algorithm>
 
 #include "merge.h"
+#include "compressed_output.h"
 
 namespace gold
 {
@@ -404,12 +405,29 @@ bool
 Output_merge_data::do_add_input_section(Relobj* object, unsigned int shndx)
 {
   section_size_type len;
+  section_size_type uncompressed_size = 0;
+  unsigned char* uncompressed_data = NULL;
   const unsigned char* p = object->section_contents(shndx, &len, false);
 
+  if (object->section_is_compressed(shndx, &uncompressed_size))
+    {
+      uncompressed_data = new unsigned char[uncompressed_size];
+      if (!decompress_input_section(p, len, uncompressed_data,
+				    uncompressed_size))
+	object->error(_("could not decompress section %s"),
+		      object->section_name(shndx).c_str());
+      p = uncompressed_data;
+      len = uncompressed_size;
+    }
+
   section_size_type entsize = convert_to_section_size_type(this->entsize());
 
   if (len % entsize != 0)
-    return false;
+    {
+      if (uncompressed_data != NULL)
+	delete[] uncompressed_data;
+      return false;
+    }
 
   this->input_count_ += len / entsize;
 
@@ -438,6 +456,9 @@ Output_merge_data::do_add_input_section(Relobj* object, unsigned int shndx)
   if (this->keeps_input_sections())
     record_input_section(object, shndx);
 
+  if (uncompressed_data != NULL)
+    delete[] uncompressed_data;
+
   return true;
 }
 
@@ -495,8 +516,21 @@ Output_merge_string<Char_type>::do_add_input_section(Relobj* object,
 						     unsigned int shndx)
 {
   section_size_type len;
+  section_size_type uncompressed_size = 0;
+  unsigned char* uncompressed_data = NULL;
   const unsigned char* pdata = object->section_contents(shndx, &len, false);
 
+  if (object->section_is_compressed(shndx, &uncompressed_size))
+    {
+      uncompressed_data = new unsigned char[uncompressed_size];
+      if (!decompress_input_section(pdata, len, uncompressed_data,
+				    uncompressed_size))
+	object->error(_("could not decompress section %s"),
+		      object->section_name(shndx).c_str());
+      pdata = uncompressed_data;
+      len = uncompressed_size;
+    }
+
   const Char_type* p = reinterpret_cast<const Char_type*>(pdata);
   const Char_type* pend = p + len / sizeof(Char_type);
 
@@ -504,6 +538,8 @@ Output_merge_string<Char_type>::do_add_input_section(Relobj* object,
     {
       object->error(_("mergeable string section length not multiple of "
 		      "character size"));
+      if (uncompressed_data != NULL)
+	delete[] uncompressed_data;
       return false;
     }
 
@@ -545,6 +581,9 @@ Output_merge_string<Char_type>::do_add_input_section(Relobj* object,
   if (this->keeps_input_sections())
     record_input_section(object, shndx);
 
+  if (uncompressed_data != NULL)
+    delete[] uncompressed_data;
+
   return true;
 }
 
diff --git a/gold/object.cc b/gold/object.cc
index b034ee2..b127a04 100644
--- a/gold/object.cc
+++ b/gold/object.cc
@@ -39,6 +39,7 @@
 #include "object.h"
 #include "dynobj.h"
 #include "plugin.h"
+#include "compressed_output.h"
 
 namespace gold
 {
@@ -367,7 +368,10 @@ Sized_relobj<size, big_endian>::Sized_relobj(
     local_got_offsets_(),
     kept_comdat_sections_(),
     has_eh_frame_(false),
-    discarded_eh_frame_shndx_(-1U)
+    discarded_eh_frame_shndx_(-1U),
+    deferred_layout_(),
+    deferred_layout_relocs_(),
+    compressed_sections_()
 {
 }
 
@@ -495,6 +499,55 @@ Sized_relobj<size, big_endian>::find_eh_frame(
   return false;
 }
 
+// Build a table for any compressed debug sections, mapping each section index
+// to the uncompressed size.
+
+template<int size, bool big_endian>
+Compressed_section_map*
+build_compressed_section_map(
+    const unsigned char* pshdrs,
+    unsigned int shnum,
+    const char* names,
+    section_size_type names_size,
+    Sized_relobj<size, big_endian>* obj)
+{
+  Compressed_section_map* uncompressed_sizes = new Compressed_section_map();
+  const unsigned int shdr_size = elfcpp::Elf_sizes<size>::shdr_size;
+  const unsigned char* p = pshdrs + shdr_size;
+  for (unsigned int i = 1; i < shnum; ++i, p += shdr_size)
+    {
+      typename elfcpp::Shdr<size, big_endian> shdr(p);
+      if (shdr.get_sh_type() == elfcpp::SHT_PROGBITS
+	  && (shdr.get_sh_flags() & elfcpp::SHF_ALLOC) == 0)
+	{
+	  if (shdr.get_sh_name() >= names_size)
+	    {
+	      obj->error(_("bad section name offset for section %u: %lu"),
+			 i, static_cast<unsigned long>(shdr.get_sh_name()));
+	      continue;
+	    }
+
+	  const char* name = names + shdr.get_sh_name();
+	  if (is_compressed_debug_section(name))
+	    {
+	      section_size_type len;
+	      const unsigned char* contents =
+		  obj->section_contents(i, &len, false);
+	      uint64_t uncompressed_size = get_uncompressed_size(contents, len);
+	      if (uncompressed_size != -1ULL)
+		(*uncompressed_sizes)[i] =
+		    convert_to_section_size_type(uncompressed_size);
+	      // TODO: Remove debug printf.
+	      printf("%s: compressed section [%d] %s: %lld -> %lld\n",
+		     obj->name().c_str(), i, name,
+		     static_cast<long long>(len),
+		     static_cast<long long>(uncompressed_size));
+	    }
+	}
+    }
+  return uncompressed_sizes;
+}
+
 // Read the sections and symbols from an object file.
 
 template<int size, bool big_endian>
@@ -514,6 +567,10 @@ Sized_relobj<size, big_endian>::do_read_symbols(Read_symbols_data* sd)
       if (this->find_eh_frame(pshdrs, names, sd->section_names_size))
         this->has_eh_frame_ = true;
     }
+  if (memmem(names, sd->section_names_size, ".zdebug_", 8) != NULL)
+    this->compressed_sections_ =
+        build_compressed_section_map(pshdrs, this->shnum(), names,
+				     sd->section_names_size, this);
 
   sd->symbols = NULL;
   sd->symbols_size = 0;
diff --git a/gold/object.h b/gold/object.h
index bc2b558..9efc28e 100644
--- a/gold/object.h
+++ b/gold/object.h
@@ -518,6 +518,13 @@ class Object
   set_no_export(bool value)
   { this->no_export_ = value; }
 
+  // Return TRUE if the section is a compressed debug section, and set
+  // *UNCOMPRESSED_SIZE to the size of the uncompressed data.
+  bool
+  section_is_compressed(unsigned int shndx,
+			section_size_type* uncompressed_size) const
+  { return do_section_is_compressed(shndx, uncompressed_size); }
+
  protected:
   // Returns NULL for Objects that are not plugin objects.  This method
   // is overridden in the Pluginobj class.
@@ -628,6 +635,12 @@ class Object
   bool
   handle_split_stack_section(const char* name);
 
+  // Return TRUE if the section is a compressed debug section, and set
+  // *UNCOMPRESSED_SIZE to the size of the uncompressed data.
+  virtual bool
+  do_section_is_compressed(unsigned int, section_size_type*) const
+  { return false; }
+
  private:
   // This class may not be copied.
   Object(const Object&);
@@ -1406,6 +1419,10 @@ class Reloc_symbol_changes
   std::vector<Symbol*> vec_;
 };
 
+// Type for mapping section index to uncompressed size.
+
+typedef std::map<unsigned int, section_size_type> Compressed_section_map;
+
 // A regular object file.  This is size and endian specific.
 
 template<int size, bool big_endian>
@@ -1781,7 +1798,26 @@ class Sized_relobj : public Relobj
   void
   set_output_local_symbol_count(unsigned int value)
   { this->output_local_symbol_count_ = value; }
-   
+
+  // Return TRUE if the section is a compressed debug section, and set
+  // *UNCOMPRESSED_SIZE to the size of the uncompressed data.
+  bool
+  do_section_is_compressed(unsigned int shndx,
+			   section_size_type* uncompressed_size) const
+  {
+    if (this->compressed_sections_ == NULL)
+      return false;
+    Compressed_section_map::const_iterator p =
+        this->compressed_sections_->find(shndx);
+    if (p != this->compressed_sections_->end())
+      {
+	if (uncompressed_size != NULL)
+	  *uncompressed_size = p->second;
+	return true;
+      }
+    return false;
+  }
+
  private:
   // For convenience.
   typedef Sized_relobj<size, big_endian> This;
@@ -2024,6 +2060,8 @@ class Sized_relobj : public Relobj
   std::vector<Deferred_layout> deferred_layout_;
   // The list of relocation sections whose layout was deferred.
   std::vector<Deferred_layout> deferred_layout_relocs_;
+  // For compressed debug sections, map section index to uncompressed size.
+  Compressed_section_map* compressed_sections_;
 };
 
 // A class to manage the list of all objects.
diff --git a/gold/output.cc b/gold/output.cc
index 329a4be..3ac8a3d 100644
--- a/gold/output.cc
+++ b/gold/output.cc
@@ -2086,8 +2086,13 @@ Output_section::add_input_section(Layout* layout,
         }
     }
 
+  section_size_type input_section_size = shdr.get_sh_size();
+  section_size_type uncompressed_size;
+  if (object->section_is_compressed(shndx, &uncompressed_size))
+    input_section_size = uncompressed_size;
+
   this->set_current_data_size_for_child(aligned_offset_in_section
-					+ shdr.get_sh_size());
+					+ input_section_size);
 
   // We need to keep track of this section if we are already keeping
   // track of sections, or if we are relaxing.  Also, if this is a
diff --git a/gold/reloc.cc b/gold/reloc.cc
index 8879f0a..9f7355e 100644
--- a/gold/reloc.cc
+++ b/gold/reloc.cc
@@ -32,6 +32,7 @@
 #include "target-reloc.h"
 #include "reloc.h"
 #include "icf.h"
+#include "compressed_output.h"
 
 namespace gold
 {
@@ -732,10 +733,17 @@ Sized_relobj<size, big_endian>::write_sections(const unsigned char* pshdrs,
 
       off_t view_start;
       section_size_type view_size;
+      bool must_decompress = false;
       if (output_offset != invalid_address)
 	{
 	  view_start = output_section_offset + output_offset;
 	  view_size = convert_to_section_size_type(shdr.get_sh_size());
+	  section_size_type uncompressed_size;
+	  if (this->section_is_compressed(i, &uncompressed_size))
+	    {
+	      view_size = uncompressed_size;
+	      must_decompress = true;
+	    }
 	}
       else
 	{
@@ -754,7 +762,7 @@ Sized_relobj<size, big_endian>::write_sections(const unsigned char* pshdrs,
 	{
 	  unsigned char* buffer = os->postprocessing_buffer();
 	  view = buffer + view_start;
-	  if (output_offset != invalid_address)
+	  if (output_offset != invalid_address && !must_decompress)
 	    {
 	      off_t sh_offset = shdr.get_sh_offset();
 	      if (!rm.empty() && rm.back().file_offset > sh_offset)
@@ -770,14 +778,27 @@ Sized_relobj<size, big_endian>::write_sections(const unsigned char* pshdrs,
 	  else
 	    {
 	      view = of->get_output_view(view_start, view_size);
-	      off_t sh_offset = shdr.get_sh_offset();
-	      if (!rm.empty() && rm.back().file_offset > sh_offset)
-		is_sorted = false;
-	      rm.push_back(File_read::Read_multiple_entry(sh_offset,
-							  view_size, view));
+	      if (!must_decompress)
+		{
+		  off_t sh_offset = shdr.get_sh_offset();
+		  if (!rm.empty() && rm.back().file_offset > sh_offset)
+		    is_sorted = false;
+		  rm.push_back(File_read::Read_multiple_entry(sh_offset,
+							      view_size, view));
+		}
 	    }
 	}
 
+      if (must_decompress)
+        {
+	  // Read and decompress the section.
+          section_size_type len;
+	  const unsigned char* p = this->section_contents(i, &len, false);
+	  if (!decompress_input_section(p, len, view, view_size))
+	    this->error(_("could not decompress section %s"),
+			this->section_name(i).c_str());
+        }
+
       pvs->view = view;
       pvs->address = os->address();
       if (output_offset != invalid_address)