This is the mail archive of the binutils@sourceware.org mailing list for the binutils project.


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]
Other format: [Raw text]

[patch/rfc][pr17902] GC unused parts of mergeable sections


Attached is an incomplete patch for removing unused parts of mergeable sections.

It uses the walk over relocations done by --gc-sections to collect
which offsets in a mergeable section are used. It then stores that
information in a (section -> offset set) map in Relobj.

The map is then used by other parts of the linker to avoid including
them in the output. In the patch, only strings is implemented.

The obvious missing parts are marked with a FIXME (and it is missing a
test). For now it would be nice to just know if I am going on the
right direction or if there is a better place to plug in this
optimization.

I tested it when linking chrome and it cuts the rodata section from
6985174 to 6852054 bytes.

Cheers,
Rafael
diff --git a/gold/gc.cc b/gold/gc.cc
index 843b2b8..634bda9 100644
--- a/gold/gc.cc
+++ b/gold/gc.cc
@@ -68,6 +68,21 @@ Garbage_collection::do_transitive_closure()
         }
     }
   this->worklist_ready();
+  for (Sections_reachable::iterator I = this->referenced_list().begin(),
+                                    E = this->referenced_list().end();
+       I != E; ++I) {
+    Atom_reachable &atoms = this->atom_reloc_map()[*I];
+    for (Atom_reachable::iterator I2 = atoms.begin(), E2 = atoms.end();
+         I2 != E2; ++I2) {
+      const Atom &atom = *I2;
+      const Section_id id = atom.first;
+      Object* obj = id.first;
+      unsigned int sec_num = id.second;
+      // FIXME: what is the safe way to do this cast?
+      Relobj* r_obj = static_cast<Relobj*>(obj);
+      r_obj->referenced_offsets()[sec_num].insert(atom.second);
+    }
+  }
 }
 
 } // End namespace gold.
diff --git a/gold/gc.h b/gold/gc.h
index 2db7cb9..fe0ca70 100644
--- a/gold/gc.h
+++ b/gold/gc.h
@@ -58,6 +58,16 @@ class Garbage_collection
   // Different object files can have cident sections with the same name.
   typedef std::map<std::string, Sections_reachable> Cident_section_map;
 
+  typedef std::pair<Section_id, uint64_t> Atom;
+  struct Atom_hash {
+    size_t operator()(const Atom &a) const {
+      const Section_id &s = a.first;
+      return reinterpret_cast<uintptr_t>(s.first) ^ s.second ^ a.second;
+    }
+  };
+  typedef Unordered_set<Atom, Atom_hash> Atom_reachable;
+  typedef std::map<Section_id, Atom_reachable> Atom_ref;
+
   Garbage_collection()
   : is_worklist_ready_(false)
   { }
@@ -72,6 +82,10 @@ class Garbage_collection
   section_reloc_map()
   { return this->section_reloc_map_; }
 
+  Atom_ref&
+  atom_reloc_map()
+  { return this->atom_reloc_map_; }
+
   Worklist_type&
   worklist()
   { return this->work_list_; }
@@ -116,6 +130,17 @@ class Garbage_collection
       p->second.insert(dst_id);
   }
 
+  void add_reference_to_merge_section(Object *src_object,
+                                      unsigned int src_shndx,
+                                      Object *dst_object,
+                                      unsigned int dst_shndx, uint64_t offset) {
+    add_reference(src_object, src_shndx, dst_object, dst_shndx);
+    Section_id src_id(src_object, src_shndx);
+    Section_id dst_id(dst_object, dst_shndx);
+    Atom atom(dst_id, offset);
+    this->atom_reloc_map_[src_id].insert(atom);
+  }
+
  private:
 
   Worklist_type work_list_;
@@ -123,6 +148,8 @@ class Garbage_collection
   Section_ref section_reloc_map_;
   Sections_reachable referenced_list_;
   Cident_section_map cident_sections_;
+
+  Atom_ref atom_reloc_map_;
 };
 
 // Data to pass between successive invocations of do_layout
@@ -238,6 +265,10 @@ gc_process_relocs(
       typedef typename elfcpp::Elf_types<size>::Elf_Addr Address;
       Address dst_off;
 
+      // If the relocation points to a section, this includes the addend.
+      // Otherwise it doesn't.
+      Address gc_ref_off;
+
       if (r_sym < local_count)
         {
           gold_assert(plocal_syms != NULL);
@@ -247,7 +278,10 @@ gc_process_relocs(
           bool is_ordinary;
 	  dst_indx = src_obj->adjust_sym_shndx(r_sym, dst_indx, &is_ordinary);
           dst_obj = src_obj;
-	  dst_off = lsym.get_st_value() + addend;
+	  gc_ref_off = dst_off = lsym.get_st_value();
+	  dst_off += addend;
+	  if (lsym.get_st_type() == elfcpp::STT_SECTION)
+	    gc_ref_off += addend;
 
           if (is_icf_tracked)
             {
@@ -300,6 +334,7 @@ gc_process_relocs(
             }
 	  dst_off = static_cast<const Sized_symbol<size>*>(gsym)->value();
 	  dst_off += addend;
+	  gc_ref_off = dst_off;
 
 	  // When doing safe folding, check to see if this relocation is that
 	  // of a function pointer being taken.
@@ -351,7 +386,14 @@ gc_process_relocs(
         }
       if (parameters->options().gc_sections())
         {
-	  symtab->gc()->add_reference(src_obj, src_indx, dst_obj, dst_indx);
+	  Garbage_collection* gc = symtab->gc();
+	  uint64_t dst_flags = dst_obj->section_flags(dst_indx);
+	  if (dst_flags & elfcpp::SHF_MERGE)
+	    gc->add_reference_to_merge_section(src_obj, src_indx, dst_obj,
+					       dst_indx, gc_ref_off);
+	  else
+	    gc->add_reference(src_obj, src_indx, dst_obj, dst_indx);
+
 	  parameters->sized_target<size, big_endian>()
 	    ->gc_add_reference(symtab, src_obj, src_indx,
 			       dst_obj, dst_indx, dst_off);
diff --git a/gold/merge.cc b/gold/merge.cc
index 269e6bf..fc5f5e0 100644
--- a/gold/merge.cc
+++ b/gold/merge.cc
@@ -574,10 +574,18 @@ Output_merge_string<Char_type>::do_add_input_section(Relobj* object,
 	      != init_align_modulo))
 	  has_misaligned_strings = true;
 
-      Stringpool::Key key;
-      this->stringpool_.add_with_length(p, len, true, &key);
-
-      merged_strings.push_back(Merged_string(i, key));
+      uintptr_t offset =
+        reinterpret_cast<uintptr_t>(p) - reinterpret_cast<uintptr_t>(pdata);
+
+      // FIXME: we have to handle an offset in [p, p + len).
+      // FIXME: handle the non-gc case
+      if (((object->section_flags(shndx) & elfcpp::SHF_ALLOC) == 0) ||
+          object->referenced_offsets()[shndx].count(offset)) {
+        Stringpool::Key key;
+        this->stringpool_.add_with_length(p, len, true, &key);
+
+        merged_strings.push_back(Merged_string(i, key));
+      }
       p += len + 1;
       i += (len + 1) * sizeof(Char_type);
     }
diff --git a/gold/object.h b/gold/object.h
index cce6c8c..202c949 100644
--- a/gold/object.h
+++ b/gold/object.h
@@ -322,6 +322,8 @@ class Object
 {
  public:
   typedef std::vector<Symbol*> Symbols;
+  typedef Unordered_set<uint64_t> Offsets_reachable;
+  typedef std::map<unsigned int, Offsets_reachable> Offset_ref;
 
   // NAME is the name of the object as we would report it to the user
   // (e.g., libfoo.a(bar.o) if this is in an archive.  INPUT_FILE is
@@ -1255,6 +1257,10 @@ class Relobj : public Object
   is_big_endian() const
   { return this->do_is_big_endian(); }
 
+  Offset_ref&
+  referenced_offsets()
+  { return this->referenced_offsets_; }
+
  protected:
   // The output section to be used for each input section, indexed by
   // the input section number.  The output section is NULL if the
@@ -1454,6 +1460,8 @@ class Relobj : public Object
   unsigned int first_dyn_reloc_;
   // Count of dynamic relocations for this object.
   unsigned int dyn_reloc_count_;
+
+  std::map<unsigned int, Offsets_reachable> referenced_offsets_;
 };
 
 // This class is used to handle relocations against a section symbol

Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]