[gold][patch] Reduce heap usage for string merge sections
Cary Coutant
ccoutant@google.com
Tue Jul 20 23:56:00 GMT 2010
This time with the patch attached.
On Tue, Jul 20, 2010 at 4:09 PM, Cary Coutant <ccoutant@google.com> wrote:
> I used tcmalloc to profile the heap usage of gold while building a
> large binary compiled with -O2 and -g, and found that the
> Merged_strings structures were responsible for just over 50% of the
> total heap used by gold. That structure has 6 fields: a pointer to the
> object, the input section index, the input offset, a pointer to the
> string itself, the length of the string, and the stringpool key. The
> string field was never used, so I removed it. The pointer to the
> object and the input section index were common across fairly large
> lists of strings, so I separated the one list per output section into
> separate lists for each (object, shndx), and removed those two fields
> from the Merged_string struct. The length field was easily recomputed
> by subtracting adjacent input offsets, so I removed it, too. The
> result is a 67% reduction in the size of the Merged_string structure,
> and the net effect was to reduce the total heap used for my benchmark
> from 3.45 GiB to 2.56 GiB (25% less heap).
>
> The Object_merge_map structure is now responsible for more heap than
> anything else; I'll be taking a look at reducing its memory footprint
> next.
>
> Tested on x86_64 Linux. OK?
>
> -cary
>
> * merge.h (Output_merge_string::Merged_string): Remove object, shndx,
> string, and length fields.
> (Output_merge_string::Merged_strings_list): New type.
> (Output_merge_string::Merged_strings_lists): New typedef.
> (Output_merge_string): Replace merged_strings_ with
> merged_strings_lists_.
> * merge.cc (Output_merge_string::do_add_input_section): Allocate new
> Merged_strings_list per input object and section. Don't store pointer
> to the string. Don't store length with each merged string entry.
> (Output_merge_string::finalize_merged_data): Loop over list of merged
> strings lists. Recompute length of each merged string.
>
-------------- next part --------------
Index: compressed_output.cc
===================================================================
RCS file: /cvs/src/src/gold/compressed_output.cc,v
retrieving revision 1.11
diff -u -p -r1.11 compressed_output.cc
--- compressed_output.cc 14 Dec 2009 19:53:04 -0000 1.11
+++ compressed_output.cc 12 Jul 2010 17:56:41 -0000
@@ -1,6 +1,6 @@
-// compressed_output.cc -- manage compressed output sections for gold
+// compressed_output.cc -- manage compressed debug sections for gold
-// Copyright 2007, 2008 Free Software Foundation, Inc.
+// Copyright 2007, 2008, 2010 Free Software Foundation, Inc.
// Written by Ian Lance Taylor <iant@google.com>.
// This file is part of gold.
@@ -33,6 +33,8 @@
namespace gold
{
+#ifdef HAVE_ZLIB_H
+
// Compress UNCOMPRESSED_DATA of size UNCOMPRESSED_SIZE. Returns true
// if it successfully compressed, false if it failed for any reason
// (including not having zlib support in the library). If it returns
@@ -42,8 +44,6 @@ namespace gold
// "ZLIB", and 8 bytes indicating the uncompressed size, in big-endian
// order.
-#ifdef HAVE_ZLIB_H
-
static bool
zlib_compress(const unsigned char* uncompressed_data,
unsigned long uncompressed_size,
@@ -81,6 +81,49 @@ zlib_compress(const unsigned char* uncom
}
}
+// Decompress COMPRESSED_DATA of size COMPRESSED_SIZE, into a buffer
+// UNCOMPRESSED_DATA of size UNCOMPRESSED_SIZE. Returns TRUE if it
+// decompressed successfully, false if it failed. The buffer, of
+// appropriate size, is provided by the caller, and is typically part
+// of the memory-mapped output file.
+
+static bool
+zlib_decompress(const unsigned char* compressed_data,
+ unsigned long compressed_size,
+ unsigned char* uncompressed_data,
+ unsigned long uncompressed_size)
+{
+ z_stream strm;
+ int rc;
+
+ /* It is possible the section consists of several compressed
+ buffers concatenated together, so we uncompress in a loop. */
+ strm.zalloc = NULL;
+ strm.zfree = NULL;
+ strm.opaque = NULL;
+ strm.avail_in = compressed_size;
+ strm.next_in = const_cast<Bytef*>(compressed_data);
+ strm.avail_out = uncompressed_size;
+
+ rc = inflateInit(&strm);
+ while (strm.avail_in > 0)
+ {
+ if (rc != Z_OK)
+ return false;
+ strm.next_out = ((Bytef*) uncompressed_data
+ + (uncompressed_size - strm.avail_out));
+ rc = inflate(&strm, Z_FINISH);
+ if (rc != Z_STREAM_END)
+ return false;
+ rc = inflateReset(&strm);
+ }
+ rc = inflateEnd(&strm);
+ if (rc != Z_OK || strm.avail_out != 0)
+ return false;
+
+ return true;
+}
+
#else // !defined(HAVE_ZLIB_H)
static bool
@@ -90,8 +133,62 @@ zlib_compress(const unsigned char*, unsi
return false;
}
+static bool
+zlib_decompress(const unsigned char*, unsigned long,
+ unsigned char**, unsigned long*)
+{
+ return false;
+}
+
#endif // !defined(HAVE_ZLIB_H)
+// Read the compression header of a compressed debug section and return
+// the uncompressed size.
+
+uint64_t
+get_uncompressed_size(const unsigned char* compressed_data,
+ section_size_type compressed_size)
+{
+ const unsigned int zlib_header_size = 12;
+
+ /* Verify the compression header. Currently, we support only zlib
+ compression, so it should be "ZLIB" followed by the uncompressed
+ section size, 8 bytes in big-endian order. */
+ if (compressed_size >= zlib_header_size
+ && strncmp(reinterpret_cast<const char*>(compressed_data),
+ "ZLIB", 4) == 0)
+ return elfcpp::Swap_unaligned<64, true>::readval(compressed_data + 4);
+ return -1ULL;
+}
+
+// Decompress a compressed debug section directly into the output file.
+
+bool
+decompress_input_section(const unsigned char* compressed_data,
+ unsigned long compressed_size,
+ unsigned char* uncompressed_data,
+ unsigned long uncompressed_size)
+{
+ const unsigned int zlib_header_size = 12;
+
+ /* Verify the compression header. Currently, we support only zlib
+ compression, so it should be "ZLIB" followed by the uncompressed
+ section size, 8 bytes in big-endian order. */
+ if (compressed_size >= zlib_header_size
+ && strncmp(reinterpret_cast<const char*>(compressed_data),
+ "ZLIB", 4) == 0)
+ {
+ unsigned long uncompressed_size_check =
+ elfcpp::Swap_unaligned<64, true>::readval(compressed_data + 4);
+ gold_assert(uncompressed_size_check == uncompressed_size);
+ return zlib_decompress(compressed_data + zlib_header_size,
+ compressed_size - zlib_header_size,
+ uncompressed_data,
+ uncompressed_size);
+ }
+ return false;
+}
+
// Class Output_compressed_section.
// Set the final data size of a compressed section. This is where
Index: compressed_output.h
===================================================================
RCS file: /cvs/src/src/gold/compressed_output.h,v
retrieving revision 1.6
diff -u -p -r1.6 compressed_output.h
--- compressed_output.h 14 Dec 2009 19:53:04 -0000 1.6
+++ compressed_output.h 12 Jul 2010 17:56:41 -0000
@@ -1,6 +1,6 @@
// compressed_output.h -- compressed output sections for gold -*- C++ -*-
-// Copyright 2007, 2008 Free Software Foundation, Inc.
+// Copyright 2007, 2008, 2010 Free Software Foundation, Inc.
// Written by Ian Lance Taylor <iant@google.com>.
// This file is part of gold.
@@ -37,6 +37,18 @@ namespace gold
class General_options;
+// Read the compression header of a compressed debug section and return
+// the uncompressed size.
+
+extern uint64_t
+get_uncompressed_size(const unsigned char*, section_size_type);
+
+// Decompress a compressed debug section directly into the output file.
+
+extern bool
+decompress_input_section(const unsigned char*, unsigned long, unsigned char*,
+ unsigned long);
+
// This is used for a section whose data should be compressed. It is
// a regular Output_section which computes its contents into a buffer
// and then postprocesses it.
Index: dwarf_reader.cc
===================================================================
RCS file: /cvs/src/src/gold/dwarf_reader.cc,v
retrieving revision 1.26
diff -u -p -r1.26 dwarf_reader.cc
--- dwarf_reader.cc 14 Dec 2009 19:53:04 -0000 1.26
+++ dwarf_reader.cc 12 Jul 2010 17:56:41 -0000
@@ -32,6 +32,7 @@
#include "reloc.h"
#include "dwarf_reader.h"
#include "int_encoding.h"
+#include "compressed_output.h"
namespace gold {
@@ -80,6 +81,21 @@ Sized_dwarf_line_info<size, big_endian>:
if (this->buffer_ == NULL)
return;
+ section_size_type uncompressed_size = 0;
+ unsigned char* uncompressed_data = NULL;
+ if (object->section_is_compressed(debug_shndx, &uncompressed_size))
+ {
+ uncompressed_data = new unsigned char[uncompressed_size];
+ if (!decompress_input_section(this->buffer_,
+ this->buffer_end_ - this->buffer_,
+ uncompressed_data,
+ uncompressed_size))
+ object->error(_("could not decompress section %s"),
+ object->section_name(debug_shndx).c_str());
+ this->buffer_ = uncompressed_data;
+ this->buffer_end_ = this->buffer_ + uncompressed_size;
+ }
+
// Find the relocation section for ".debug_line".
// We expect these for relobjs (.o's) but not dynobjs (.so's).
bool got_relocs = false;
Index: layout.cc
===================================================================
RCS file: /cvs/src/src/gold/layout.cc,v
retrieving revision 1.171
diff -u -p -r1.171 layout.cc
--- layout.cc 19 Jun 2010 21:11:55 -0000 1.171
+++ layout.cc 12 Jul 2010 17:56:41 -0000
@@ -944,7 +944,16 @@ Layout::section_flags_to_segment(elfcpp:
static bool
is_compressible_debug_section(const char* secname)
{
- return (strncmp(secname, ".debug", sizeof(".debug") - 1) == 0);
+ return (is_prefix_of(".debug", secname));
+}
+
+// We may see compressed debug sections in input files. Return TRUE
+// if this is the name of a compressed debug section.
+
+bool
+is_compressed_debug_section(const char* secname)
+{
+ return (is_prefix_of(".zdebug", secname));
}
// Make a new Output_section, and attach it to segments as
@@ -3772,6 +3781,20 @@ Layout::output_section_name(const char*
}
}
+ // Compressed debug sections should be mapped to the corresponding
+ // uncompressed section.
+ if (is_compressed_debug_section(name))
+ {
+ size_t len = strlen(name);
+ char *uncompressed_name = new char[len];
+ uncompressed_name[0] = '.';
+ gold_assert(name[0] == '.' && name[1] == 'z');
+ strncpy(&uncompressed_name[1], &name[2], len - 2);
+ uncompressed_name[len - 1] = '\0';
+ *plen = len - 1;
+ return uncompressed_name;
+ }
+
return name;
}
Index: layout.h
===================================================================
RCS file: /cvs/src/src/gold/layout.h,v
retrieving revision 1.81
diff -u -p -r1.81 layout.h
--- layout.h 1 Jun 2010 23:37:57 -0000 1.81
+++ layout.h 12 Jul 2010 17:56:41 -0000
@@ -59,6 +59,10 @@ class Output_reduced_debug_info_section;
class Eh_frame;
class Target;
+// Return TRUE if SECNAME is the name of a compressed debug section.
+extern bool
+is_compressed_debug_section(const char* secname);
+
// This task function handles mapping the input sections to output
// sections and laying them out in memory.
@@ -451,6 +455,7 @@ class Layout
{
// Debugging sections can only be recognized by name.
return (strncmp(name, ".debug", sizeof(".debug") - 1) == 0
+ || strncmp(name, ".zdebug", sizeof(".zdebug") - 1) == 0
|| strncmp(name, ".gnu.linkonce.wi.",
sizeof(".gnu.linkonce.wi.") - 1) == 0
|| strncmp(name, ".line", sizeof(".line") - 1) == 0
Index: merge.cc
===================================================================
RCS file: /cvs/src/src/gold/merge.cc,v
retrieving revision 1.35
diff -u -p -r1.35 merge.cc
--- merge.cc 23 May 2010 07:43:39 -0000 1.35
+++ merge.cc 12 Jul 2010 17:56:41 -0000
@@ -1,6 +1,6 @@
// merge.cc -- handle section merging for gold
-// Copyright 2006, 2007, 2008 Free Software Foundation, Inc.
+// Copyright 2006, 2007, 2008, 2010 Free Software Foundation, Inc.
// Written by Ian Lance Taylor <iant@google.com>.
// This file is part of gold.
@@ -26,6 +26,7 @@
#include <algorithm>
#include "merge.h"
+#include "compressed_output.h"
namespace gold
{
@@ -404,12 +405,29 @@ bool
Output_merge_data::do_add_input_section(Relobj* object, unsigned int shndx)
{
section_size_type len;
+ section_size_type uncompressed_size = 0;
+ unsigned char* uncompressed_data = NULL;
const unsigned char* p = object->section_contents(shndx, &len, false);
+ if (object->section_is_compressed(shndx, &uncompressed_size))
+ {
+ uncompressed_data = new unsigned char[uncompressed_size];
+ if (!decompress_input_section(p, len, uncompressed_data,
+ uncompressed_size))
+ object->error(_("could not decompress section %s"),
+ object->section_name(shndx).c_str());
+ p = uncompressed_data;
+ len = uncompressed_size;
+ }
+
section_size_type entsize = convert_to_section_size_type(this->entsize());
if (len % entsize != 0)
- return false;
+ {
+ if (uncompressed_data != NULL)
+ delete[] uncompressed_data;
+ return false;
+ }
this->input_count_ += len / entsize;
@@ -438,6 +456,9 @@ Output_merge_data::do_add_input_section(
if (this->keeps_input_sections())
record_input_section(object, shndx);
+ if (uncompressed_data != NULL)
+ delete[] uncompressed_data;
+
return true;
}
@@ -495,8 +516,21 @@ Output_merge_string<Char_type>::do_add_i
unsigned int shndx)
{
section_size_type len;
+ section_size_type uncompressed_size = 0;
+ unsigned char* uncompressed_data = NULL;
const unsigned char* pdata = object->section_contents(shndx, &len, false);
+ if (object->section_is_compressed(shndx, &uncompressed_size))
+ {
+ uncompressed_data = new unsigned char[uncompressed_size];
+ if (!decompress_input_section(pdata, len, uncompressed_data,
+ uncompressed_size))
+ object->error(_("could not decompress section %s"),
+ object->section_name(shndx).c_str());
+ pdata = uncompressed_data;
+ len = uncompressed_size;
+ }
+
const Char_type* p = reinterpret_cast<const Char_type*>(pdata);
const Char_type* pend = p + len / sizeof(Char_type);
@@ -504,6 +538,8 @@ Output_merge_string<Char_type>::do_add_i
{
object->error(_("mergeable string section length not multiple of "
"character size"));
+ if (uncompressed_data != NULL)
+ delete[] uncompressed_data;
return false;
}
@@ -545,6 +581,9 @@ Output_merge_string<Char_type>::do_add_i
if (this->keeps_input_sections())
record_input_section(object, shndx);
+ if (uncompressed_data != NULL)
+ delete[] uncompressed_data;
+
return true;
}
Index: object.cc
===================================================================
RCS file: /cvs/src/src/gold/object.cc,v
retrieving revision 1.125
diff -u -p -r1.125 object.cc
--- object.cc 10 Jun 2010 17:20:26 -0000 1.125
+++ object.cc 12 Jul 2010 17:56:41 -0000
@@ -39,6 +39,7 @@
#include "object.h"
#include "dynobj.h"
#include "plugin.h"
+#include "compressed_output.h"
namespace gold
{
@@ -367,7 +368,10 @@ Sized_relobj<size, big_endian>::Sized_re
local_got_offsets_(),
kept_comdat_sections_(),
has_eh_frame_(false),
- discarded_eh_frame_shndx_(-1U)
+ discarded_eh_frame_shndx_(-1U),
+ deferred_layout_(),
+ deferred_layout_relocs_(),
+ compressed_sections_()
{
}
@@ -495,6 +499,50 @@ Sized_relobj<size, big_endian>::find_eh_
return false;
}
+// Build a table for any compressed debug sections, mapping each section index
+// to the uncompressed size.
+
+template<int size, bool big_endian>
+Compressed_section_map*
+build_compressed_section_map(
+ const unsigned char* pshdrs,
+ unsigned int shnum,
+ const char* names,
+ section_size_type names_size,
+ Sized_relobj<size, big_endian>* obj)
+{
+ Compressed_section_map* uncompressed_sizes = new Compressed_section_map();
+ const unsigned int shdr_size = elfcpp::Elf_sizes<size>::shdr_size;
+ const unsigned char* p = pshdrs + shdr_size;
+ for (unsigned int i = 1; i < shnum; ++i, p += shdr_size)
+ {
+ typename elfcpp::Shdr<size, big_endian> shdr(p);
+ if (shdr.get_sh_type() == elfcpp::SHT_PROGBITS
+ && (shdr.get_sh_flags() & elfcpp::SHF_ALLOC) == 0)
+ {
+ if (shdr.get_sh_name() >= names_size)
+ {
+ obj->error(_("bad section name offset for section %u: %lu"),
+ i, static_cast<unsigned long>(shdr.get_sh_name()));
+ continue;
+ }
+
+ const char* name = names + shdr.get_sh_name();
+ if (is_compressed_debug_section(name))
+ {
+ section_size_type len;
+ const unsigned char* contents =
+ obj->section_contents(i, &len, false);
+ uint64_t uncompressed_size = get_uncompressed_size(contents, len);
+ if (uncompressed_size != -1ULL)
+ (*uncompressed_sizes)[i] =
+ convert_to_section_size_type(uncompressed_size);
+ }
+ }
+ }
+ return uncompressed_sizes;
+}
+
// Read the sections and symbols from an object file.
template<int size, bool big_endian>
@@ -514,6 +562,10 @@ Sized_relobj<size, big_endian>::do_read_
if (this->find_eh_frame(pshdrs, names, sd->section_names_size))
this->has_eh_frame_ = true;
}
+ if (memmem(names, sd->section_names_size, ".zdebug_", 8) != NULL)
+ this->compressed_sections_ =
+ build_compressed_section_map(pshdrs, this->shnum(), names,
+ sd->section_names_size, this);
sd->symbols = NULL;
sd->symbols_size = 0;
Index: object.h
===================================================================
RCS file: /cvs/src/src/gold/object.h,v
retrieving revision 1.96
diff -u -p -r1.96 object.h
--- object.h 10 Jun 2010 17:20:26 -0000 1.96
+++ object.h 12 Jul 2010 17:56:41 -0000
@@ -518,6 +518,13 @@ class Object
set_no_export(bool value)
{ this->no_export_ = value; }
+ // Return TRUE if the section is a compressed debug section, and set
+ // *UNCOMPRESSED_SIZE to the size of the uncompressed data.
+ bool
+ section_is_compressed(unsigned int shndx,
+ section_size_type* uncompressed_size) const
+ { return this->do_section_is_compressed(shndx, uncompressed_size); }
+
protected:
// Returns NULL for Objects that are not plugin objects. This method
// is overridden in the Pluginobj class.
@@ -628,6 +635,12 @@ class Object
bool
handle_split_stack_section(const char* name);
+ // Return TRUE if the section is a compressed debug section, and set
+ // *UNCOMPRESSED_SIZE to the size of the uncompressed data.
+ virtual bool
+ do_section_is_compressed(unsigned int, section_size_type*) const
+ { return false; }
+
private:
// This class may not be copied.
Object(const Object&);
@@ -1406,6 +1419,10 @@ class Reloc_symbol_changes
std::vector<Symbol*> vec_;
};
+// Type for mapping section index to uncompressed size.
+
+typedef std::map<unsigned int, section_size_type> Compressed_section_map;
+
// A regular object file. This is size and endian specific.
template<int size, bool big_endian>
@@ -1781,7 +1798,26 @@ class Sized_relobj : public Relobj
void
set_output_local_symbol_count(unsigned int value)
{ this->output_local_symbol_count_ = value; }
-
+
+ // Return TRUE if the section is a compressed debug section, and set
+ // *UNCOMPRESSED_SIZE to the size of the uncompressed data.
+ bool
+ do_section_is_compressed(unsigned int shndx,
+ section_size_type* uncompressed_size) const
+ {
+ if (this->compressed_sections_ == NULL)
+ return false;
+ Compressed_section_map::const_iterator p =
+ this->compressed_sections_->find(shndx);
+ if (p != this->compressed_sections_->end())
+ {
+ if (uncompressed_size != NULL)
+ *uncompressed_size = p->second;
+ return true;
+ }
+ return false;
+ }
+
private:
// For convenience.
typedef Sized_relobj<size, big_endian> This;
@@ -2024,6 +2060,8 @@ class Sized_relobj : public Relobj
std::vector<Deferred_layout> deferred_layout_;
// The list of relocation sections whose layout was deferred.
std::vector<Deferred_layout> deferred_layout_relocs_;
+ // For compressed debug sections, map section index to uncompressed size.
+ Compressed_section_map* compressed_sections_;
};
// A class to manage the list of all objects.
Index: output.cc
===================================================================
RCS file: /cvs/src/src/gold/output.cc,v
retrieving revision 1.128
diff -u -p -r1.128 output.cc
--- output.cc 3 Jun 2010 18:01:19 -0000 1.128
+++ output.cc 12 Jul 2010 17:56:42 -0000
@@ -2086,8 +2086,13 @@ Output_section::add_input_section(Layout
}
}
+ section_size_type input_section_size = shdr.get_sh_size();
+ section_size_type uncompressed_size;
+ if (object->section_is_compressed(shndx, &uncompressed_size))
+ input_section_size = uncompressed_size;
+
this->set_current_data_size_for_child(aligned_offset_in_section
- + shdr.get_sh_size());
+ + input_section_size);
// We need to keep track of this section if we are already keeping
// track of sections, or if we are relaxing. Also, if this is a
Index: reloc.cc
===================================================================
RCS file: /cvs/src/src/gold/reloc.cc,v
retrieving revision 1.56
diff -u -p -r1.56 reloc.cc
--- reloc.cc 11 Mar 2010 01:10:53 -0000 1.56
+++ reloc.cc 12 Jul 2010 17:56:42 -0000
@@ -32,6 +32,7 @@
#include "target-reloc.h"
#include "reloc.h"
#include "icf.h"
+#include "compressed_output.h"
namespace gold
{
@@ -732,10 +733,17 @@ Sized_relobj<size, big_endian>::write_se
off_t view_start;
section_size_type view_size;
+ bool must_decompress = false;
if (output_offset != invalid_address)
{
view_start = output_section_offset + output_offset;
view_size = convert_to_section_size_type(shdr.get_sh_size());
+ section_size_type uncompressed_size;
+ if (this->section_is_compressed(i, &uncompressed_size))
+ {
+ view_size = uncompressed_size;
+ must_decompress = true;
+ }
}
else
{
@@ -754,7 +762,7 @@ Sized_relobj<size, big_endian>::write_se
{
unsigned char* buffer = os->postprocessing_buffer();
view = buffer + view_start;
- if (output_offset != invalid_address)
+ if (output_offset != invalid_address && !must_decompress)
{
off_t sh_offset = shdr.get_sh_offset();
if (!rm.empty() && rm.back().file_offset > sh_offset)
@@ -770,14 +778,27 @@ Sized_relobj<size, big_endian>::write_se
else
{
view = of->get_output_view(view_start, view_size);
- off_t sh_offset = shdr.get_sh_offset();
- if (!rm.empty() && rm.back().file_offset > sh_offset)
- is_sorted = false;
- rm.push_back(File_read::Read_multiple_entry(sh_offset,
- view_size, view));
+ if (!must_decompress)
+ {
+ off_t sh_offset = shdr.get_sh_offset();
+ if (!rm.empty() && rm.back().file_offset > sh_offset)
+ is_sorted = false;
+ rm.push_back(File_read::Read_multiple_entry(sh_offset,
+ view_size, view));
+ }
}
}
+ if (must_decompress)
+ {
+ // Read and decompress the section.
+ section_size_type len;
+ const unsigned char* p = this->section_contents(i, &len, false);
+ if (!decompress_input_section(p, len, view, view_size))
+ this->error(_("could not decompress section %s"),
+ this->section_name(i).c_str());
+ }
+
pvs->view = view;
pvs->address = os->address();
if (output_offset != invalid_address)
More information about the Binutils
mailing list