This is the mail archive of the binutils@sourceware.org mailing list for the binutils project.


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]
Other format: [Raw text]

[PATCH] fix PR gold/13320, inline thumb stubs in PLT entries


For calls to functions that require PLT entries, GNU ld recognizes that such calls from Thumb code do not need a large, out-of-line stub that transitions to ARM mode before jumping to the PLT entry.  Instead, a small instruction sequence:

  bx pc
  nop

is inserted at the beginning of the PLT entry and calls through that PLT entry are adjusted appropriately.  This is a code-size win and improves the performance of code layout optimizations, as there are fewer stubs to consider (and the linker may not really account for the presence of the stubs when laying out code anyway).

The patch below brings the same optimization to gold.

I have not run the testsuite, as I do not have the test infrastructure to run gold's testsuite.  However, we have successfully compiled (for Thumb, both ARMv7-A and ARMv5t), linked and run Firefox for Android when using the patch below.

OK?

-Nathan

	PR gold/13320
	* arm.cc (Output_data_plt_arm): Move backwards in the file.
	(Output_data_plt_arm::calculate_offsets): New function.
	(Output_data_plt_arm::thumb_plt_entry_stub): New member.
	(Output_data_plt_arm::thumb_stub_size): New function.
	(Output_data_plt_arm::plt_reference): New struct.
	(Output_data_plt_arm::stub_map_): New member.
	(Output_data_plt_arm::stub_list_): New member.
	(Output_data_plt_arm::entry_count): Query this->stub_list_.
	(Output_data_plt_arm::count_): Delete.
	(Output_data_plt_arm::Output_data_plt_arm): Don't initialize count_.
	(Output_data_plt_arm::add_entry): Add boolean parameters indicating
	the source of the jump.  Record the target for later use.
	(Output_data_plt_arm::do_write): Iterate over this->stub_list_ for
	writing.
	(Reloc_stub::stub_type_for_reloc): Adjust the branch target if
	necessary.  Add gsym argument.
	(Arm_relocate_functions::arm_branch_common): Update call to it.
	(Target_arm::scan_reloc_for_stub): Likewise.
	(Arm_relocate_functions::thumb_branch_common): Likewise.
	Adjust the branch target if we are using an inline thumb stub.
	(Target_arm::make_plt_entry): Add boolean parameters indicating the
	source of the jump.  Pass them to add_entry.
	(Target_arm::Scan::global): Adjust calls to make_plt_entry.
	(Target_arm::do_finalize_sections): Call calculate_offsets if
	necessary.
commit ab22e2854145dada13eb34d9d31d735b4f442139
Author: Nathan Froyd <froydnj@gmail.com>
Date:   Fri Mar 30 15:14:11 2012 -0400

    commits

diff --git a/gold/arm.cc b/gold/arm.cc
index dc6e64a..2411995 100644
--- a/gold/arm.cc
+++ b/gold/arm.cc
@@ -33,6 +33,7 @@
 #include <map>
 #include <utility>
 #include <set>
+#include <vector>
 
 #include "elfcpp.h"
 #include "parameters.h"
@@ -57,8 +58,110 @@ namespace
 
 using namespace gold;
 
+// A class to handle the PLT data.
+
 template<bool big_endian>
-class Output_data_plt_arm;
+class Output_data_plt_arm : public Output_section_data
+{
+ public:
+  typedef Output_data_reloc<elfcpp::SHT_REL, true, 32, big_endian>
+    Reloc_section;
+
+  Output_data_plt_arm(Layout*, Output_data_space*);
+
+  // Add an entry to the PLT.
+  void
+  add_entry(Symbol* gsym, bool, bool);
+
+  // Calculate offsets for PLT entries.
+  void
+  calculate_offsets(bool);
+
+  // Return the .rel.plt section data.
+  const Reloc_section*
+  rel_plt() const
+  { return this->rel_; }
+
+  // Return the number of PLT entries.
+  unsigned int
+  entry_count() const
+  { return this->stub_list_.size(); }
+
+  // Return the offset of the first non-reserved PLT entry.
+  static unsigned int
+  first_plt_entry_offset()
+  { return sizeof(first_plt_entry); }
+
+  // Return the size of a PLT entry.
+  static unsigned int
+  get_plt_entry_size()
+  { return sizeof(plt_entry); }
+
+  static unsigned int
+  thumb_stub_size()
+  { return sizeof(thumb_plt_entry_stub); }
+
+ protected:
+  void
+  do_adjust_output_section(Output_section* os);
+
+  // Write to a map file.
+  void
+  do_print_to_mapfile(Mapfile* mapfile) const
+  { mapfile->print_output_data(this, _("** PLT")); }
+
+ private:
+  // Template for the first PLT entry.
+  static const uint32_t first_plt_entry[5];
+
+  // Template for subsequent PLT entries. 
+  static const uint32_t plt_entry[3];
+
+  // Template for Thumb stub in PLT entries.
+  static const uint16_t thumb_plt_entry_stub[2];
+
+  // Set the final size.
+  void
+  set_final_data_size()
+  { gold_assert(this->total_size_ != 0);
+    this->set_data_size(this->total_size_); }
+
+  // Write out the PLT data.
+  void
+  do_write(Output_file*);
+
+  // Information about where PLT entries are referenced from.
+  struct plt_reference
+  {
+    plt_reference()
+      : from_thumb_calls_(false),
+	from_thumb_jumps_(false),
+	needs_thumb_stub_(false)
+    { }
+
+    plt_reference(bool from_thumb_calls, bool from_thumb_jumps)
+      : from_thumb_calls_(from_thumb_calls),
+	from_thumb_jumps_(from_thumb_jumps),
+	needs_thumb_stub_(false)
+    { }
+
+    bool from_thumb_calls_;
+    bool from_thumb_jumps_;
+    bool needs_thumb_stub_;
+  };
+
+  // The reloc section.
+  Reloc_section* rel_;
+  // The .got.plt section.
+  Output_data_space* got_plt_;
+  // Collected information for determining necessity of Thumb stubs.
+  typedef std::vector<Symbol *> Thumb_stub_list;
+  typedef Unordered_map<Symbol *, plt_reference> Thumb_stub_map;
+  Thumb_stub_map stub_map_;
+  Thumb_stub_list stub_list_;
+  // Total size in bytes of all the entries.
+  size_t total_size_;
+};
 
 template<bool big_endian>
 class Stub_table;
@@ -492,7 +595,8 @@ class Reloc_stub : public Stub
   // the branch target is a thumb instruction.  TARGET is used for look
   // up ARM-specific linker settings.
   static Stub_type
-  stub_type_for_reloc(unsigned int r_type, Arm_address branch_address,
+  stub_type_for_reloc(unsigned int r_type, const Sized_symbol<32>* gsym,
+		      Arm_address branch_address,
 		      Arm_address branch_target, bool target_is_thumb);
 
   // Reloc_stub key.  A key is logically a triplet of a stub type, a symbol
@@ -2702,7 +2806,7 @@ class Target_arm : public Sized_target<32, big_endian>
 
   // Create a PLT entry for a global symbol.
   void
-  make_plt_entry(Symbol_table*, Layout*, Symbol*);
+  make_plt_entry(Symbol_table*, Layout*, Symbol*, bool, bool);
 
   // Define the _TLS_MODULE_BASE_ symbol in the TLS segment.
   void
@@ -3920,7 +4024,7 @@ Arm_relocate_functions<big_endian>::arm_branch_common(
       Valtype unadjusted_branch_target = psymval->value(object, 0);
 
       Stub_type stub_type =
-	Reloc_stub::stub_type_for_reloc(r_type, address,
+	Reloc_stub::stub_type_for_reloc(r_type, gsym, address,
 					unadjusted_branch_target,
 					(thumb_bit != 0));
       if (stub_type != arm_stub_none)
@@ -4039,28 +4143,50 @@ Arm_relocate_functions<big_endian>::thumb_branch_common(
  
   int32_t addend = This::thumb32_branch_offset(upper_insn, lower_insn);
   Arm_address branch_target = psymval->value(object, addend);
+  bool may_use_blx = arm_target->may_use_v5t_interworking();
+
+  // For direct Thumb jumps to PLT entries, adjust the branch target
+  // address to be the Thumb stub in the PLT entry.  For Thumb calls to
+  // PLT entries, use those stubs if we can't BLX to the entry.
+  if (gsym
+      && gsym->has_plt_offset()
+      && (r_type == elfcpp::R_ARM_THM_JUMP24
+	  || (r_type == elfcpp::R_ARM_THM_CALL && !may_use_blx)))
+    {
+      branch_target -= Output_data_plt_arm<big_endian>::thumb_stub_size();
+      thumb_bit = 1;
+    }
 
   // For BLX, bit 1 of target address comes from bit 1 of base address.
-  bool may_use_blx = arm_target->may_use_v5t_interworking();
   if (thumb_bit == 0 && may_use_blx)
     branch_target = Bits<32>::bit_select32(branch_target, address, 0x2);
 
   int32_t branch_offset = branch_target - address;
 
   // We need a stub if the branch offset is too large or if we need
-  // to switch mode.
+  // to switch mode.  If we are jumping to a PLT entry, however, we
+  // can use the Thumb stub in the PLT entry itself rather than
+  // creating a separate, out-of-line stub.
   bool thumb2 = arm_target->using_thumb2();
   if (!parameters->options().relocatable()
       && ((!thumb2 && Bits<23>::has_overflow32(branch_offset))
 	  || (thumb2 && Bits<25>::has_overflow32(branch_offset))
 	  || ((thumb_bit == 0)
 	      && (((r_type == elfcpp::R_ARM_THM_CALL) && !may_use_blx)
-		  || r_type == elfcpp::R_ARM_THM_JUMP24))))
+		  || (r_type == elfcpp::R_ARM_THM_JUMP24
+		      && !gsym->has_plt_offset())))))
     {
       Arm_address unadjusted_branch_target = psymval->value(object, 0);
 
+      // If we're branching to a PLT entry, force a switch to ARM mode.
+      // This ensures that any stub we might have considered necessary
+      // while scanning relocs is found.  It's also going to be more
+      // efficient, since we don't go Thumb -> ARM -> Thumb -> ARM.
+      if (gsym->has_plt_offset())
+	thumb_bit = 0;
+
       Stub_type stub_type =
-	Reloc_stub::stub_type_for_reloc(r_type, address,
+	Reloc_stub::stub_type_for_reloc(r_type, gsym, address,
 					unadjusted_branch_target,
 					(thumb_bit != 0));
 
@@ -4419,6 +4545,7 @@ Reloc_stub::Key::name() const
 Stub_type
 Reloc_stub::stub_type_for_reloc(
    unsigned int r_type,
+   const Sized_symbol<32>* gsym,
    Arm_address location,
    Arm_address destination,
    bool target_is_thumb)
@@ -4431,6 +4558,7 @@ Reloc_stub::stub_type_for_reloc(
   bool should_force_pic_veneer;
   bool thumb2;
   bool thumb_only;
+  unsigned int thumb_stub_size;
   if (parameters->target().is_big_endian())
     {
       const Target_arm<true>* big_endian_target =
@@ -4439,6 +4567,7 @@ Reloc_stub::stub_type_for_reloc(
       should_force_pic_veneer = big_endian_target->should_force_pic_veneer();
       thumb2 = big_endian_target->using_thumb2();
       thumb_only = big_endian_target->using_thumb_only();
+      thumb_stub_size = Output_data_plt_arm<true>::thumb_stub_size();
     }
   else
     {
@@ -4448,6 +4577,7 @@ Reloc_stub::stub_type_for_reloc(
       should_force_pic_veneer = little_endian_target->should_force_pic_veneer();
       thumb2 = little_endian_target->using_thumb2();
       thumb_only = little_endian_target->using_thumb_only();
+      thumb_stub_size = Output_data_plt_arm<false>::thumb_stub_size();
     }
 
   int64_t branch_offset;
@@ -4459,8 +4589,15 @@ Reloc_stub::stub_type_for_reloc(
       // base address (instruction address + 4).
       if ((r_type == elfcpp::R_ARM_THM_CALL) && may_use_blx && !target_is_thumb)
 	destination = Bits<32>::bit_select32(destination, location, 0x2);
+
+      bool use_plt = (gsym && gsym->has_plt_offset());
+      if (use_plt
+	  && (r_type == elfcpp::R_ARM_THM_JUMP24
+	      || (r_type == elfcpp::R_ARM_THM_CALL && !may_use_blx)))
+	destination -= thumb_stub_size;
+
       branch_offset = static_cast<int64_t>(destination) - location;
-	
+
       // Handle cases where:
       // - this call goes too far (different Thumb/Thumb2 max
       //   distance)
@@ -4474,7 +4611,8 @@ Reloc_stub::stub_type_for_reloc(
 		  || (branch_offset < THM2_MAX_BWD_BRANCH_OFFSET)))
 	  || ((!target_is_thumb)
 	      && (((r_type == elfcpp::R_ARM_THM_CALL) && !may_use_blx)
-		  || (r_type == elfcpp::R_ARM_THM_JUMP24))))
+		  || (r_type == elfcpp::R_ARM_THM_JUMP24))
+	      && !use_plt))
 	{
 	  if (target_is_thumb)
 	    {
@@ -7188,77 +7326,6 @@ Arm_output_data_got<big_endian>::do_write(Output_file* of)
   of->write_output_view(offset, oview_size, oview);
 }
 
-// A class to handle the PLT data.
-
-template<bool big_endian>
-class Output_data_plt_arm : public Output_section_data
-{
- public:
-  typedef Output_data_reloc<elfcpp::SHT_REL, true, 32, big_endian>
-    Reloc_section;
-
-  Output_data_plt_arm(Layout*, Output_data_space*);
-
-  // Add an entry to the PLT.
-  void
-  add_entry(Symbol* gsym);
-
-  // Return the .rel.plt section data.
-  const Reloc_section*
-  rel_plt() const
-  { return this->rel_; }
-
-  // Return the number of PLT entries.
-  unsigned int
-  entry_count() const
-  { return this->count_; }
-
-  // Return the offset of the first non-reserved PLT entry.
-  static unsigned int
-  first_plt_entry_offset()
-  { return sizeof(first_plt_entry); }
-
-  // Return the size of a PLT entry.
-  static unsigned int
-  get_plt_entry_size()
-  { return sizeof(plt_entry); }
-
- protected:
-  void
-  do_adjust_output_section(Output_section* os);
-
-  // Write to a map file.
-  void
-  do_print_to_mapfile(Mapfile* mapfile) const
-  { mapfile->print_output_data(this, _("** PLT")); }
-
- private:
-  // Template for the first PLT entry.
-  static const uint32_t first_plt_entry[5];
-
-  // Template for subsequent PLT entries. 
-  static const uint32_t plt_entry[3];
-
-  // Set the final size.
-  void
-  set_final_data_size()
-  {
-    this->set_data_size(sizeof(first_plt_entry)
-			+ this->count_ * sizeof(plt_entry));
-  }
-
-  // Write out the PLT data.
-  void
-  do_write(Output_file*);
-
-  // The reloc section.
-  Reloc_section* rel_;
-  // The .got.plt section.
-  Output_data_space* got_plt_;
-  // The number of PLT entries.
-  unsigned int count_;
-};
-
 // Create the PLT section.  The ordinary .got section is an argument,
 // since we need to refer to the start.  We also create our own .got
 // section just for PLT entries.
@@ -7266,7 +7333,7 @@ class Output_data_plt_arm : public Output_section_data
 template<bool big_endian>
 Output_data_plt_arm<big_endian>::Output_data_plt_arm(Layout* layout,
 						     Output_data_space* got_plt)
-  : Output_section_data(4), got_plt_(got_plt), count_(0)
+  : Output_section_data(4), got_plt_(got_plt)
 {
   this->rel_ = new Reloc_section(false);
   layout->add_output_section_data(".rel.plt", elfcpp::SHT_REL,
@@ -7285,16 +7352,27 @@ Output_data_plt_arm<big_endian>::do_adjust_output_section(Output_section* os)
 
 template<bool big_endian>
 void
-Output_data_plt_arm<big_endian>::add_entry(Symbol* gsym)
+Output_data_plt_arm<big_endian>::add_entry(Symbol* gsym,
+					   bool from_thumb_call,
+					   bool from_thumb_jump)
 {
   gold_assert(!gsym->has_plt_offset());
 
-  // Note that when setting the PLT offset we skip the initial
-  // reserved PLT entry.
-  gsym->set_plt_offset((this->count_) * sizeof(plt_entry)
-		       + sizeof(first_plt_entry));
+  // We want to record information about how the PLT entries are
+  // referenced.  We'll use this information in do_finalize_sections to
+  // assign offsets to all the symbols.
+  typename Thumb_stub_map::iterator it = this->stub_map_.find(gsym);
+  if (it != this->stub_map_.end())
+    {
+      plt_reference& pr = it->second;
+      pr.from_thumb_calls_ = pr.from_thumb_calls_ || from_thumb_call;
+      pr.from_thumb_jumps_ = pr.from_thumb_jumps_ || from_thumb_jump;
+      return;
+    }
 
-  ++this->count_;
+  struct plt_reference pr(from_thumb_call, from_thumb_jump);
+  this->stub_map_[gsym] = pr;
+  this->stub_list_.push_back(gsym);
 
   section_offset_type got_offset = this->got_plt_->current_data_size();
 
@@ -7313,6 +7391,40 @@ Output_data_plt_arm<big_endian>::add_entry(Symbol* gsym)
   // appear in the relocations.
 }
 
+// Calculate offsets for all the PLT symbols.
+
+template<bool big_endian>
+void
+Output_data_plt_arm<big_endian>::calculate_offsets(bool may_use_blx)
+{
+  size_t offset = sizeof(first_plt_entry);
+
+  for(typename Thumb_stub_list::iterator it = this->stub_list_.begin();
+      it != this->stub_list_.end();
+      ++it, offset += sizeof(plt_entry))
+    {
+      Symbol *sym = *it;
+      typename Thumb_stub_map::iterator it = this->stub_map_.find(sym);
+      gold_assert(it != this->stub_map_.end());
+      plt_reference& pr = it->second;
+      bool needs_thumb_stub = (pr.from_thumb_jumps_
+			       || (!may_use_blx
+				   && pr.from_thumb_calls_));
+      pr.needs_thumb_stub_ = needs_thumb_stub;
+
+      // Note that when setting the PLT offset we skip the initial
+      // reserved PLT entry.
+      //
+      // Make the entry point to the start of the ARM code, rather than
+      // the Thumb stub.
+      if (needs_thumb_stub)
+	offset += thumb_stub_size();
+      sym->set_plt_offset(offset);
+    }
+
+  this->total_size_ = offset;
+}
+
 // ARM PLTs.
 // FIXME:  This is not very flexible.  Right now this has only been tested
 // on armv5te.  If we are to support additional architecture features like
@@ -7339,6 +7451,15 @@ const uint32_t Output_data_plt_arm<big_endian>::plt_entry[3] =
   0xe5bcf000,	// ldr   pc, [ip, #0xNNN]!
 };
 
+// Thumb stub for PLT entries.
+
+template<bool big_endian>
+const uint16_t Output_data_plt_arm<big_endian>::thumb_plt_entry_stub[2] =
+{
+  0x4778,	// bx pc
+  0x46c0	// nop
+};
+
 // Write out the PLT.  This uses the hand-coded instructions above,
 // and adjusts them as needed.  This is all specified by the arm ELF
 // Processor Supplement.
@@ -7377,20 +7498,35 @@ Output_data_plt_arm<big_endian>::do_write(Output_file* of)
   memset(got_pov, 0, 12);
   got_pov += 12;
 
+  // Write the rest of the PLT entries.
   const int rel_size = elfcpp::Elf_sizes<32>::rel_size;
   unsigned int plt_offset = sizeof(first_plt_entry);
   unsigned int plt_rel_offset = 0;
   unsigned int got_offset = 12;
-  const unsigned int count = this->count_;
-  for (unsigned int i = 0;
-       i < count;
-       ++i,
+  for (typename Thumb_stub_list::const_iterator it = this->stub_list_.begin();
+       it != this->stub_list_.end();
+       ++it,
 	 pov += sizeof(plt_entry),
 	 got_pov += 4,
 	 plt_offset += sizeof(plt_entry),
 	 plt_rel_offset += rel_size,
 	 got_offset += 4)
     {
+      Symbol* sym = *it;
+      typename Thumb_stub_map::const_iterator mapit = this->stub_map_.find(sym);
+      gold_assert(mapit != this->stub_map_.end());
+      const plt_reference& pr = mapit->second;
+      // Write the thumb stub first if necessary.
+      if (pr.needs_thumb_stub_)
+	{
+	  elfcpp::Swap<16, big_endian>::writeval(pov,
+						 thumb_plt_entry_stub[0]);
+	  elfcpp::Swap<16, big_endian>::writeval(pov + 2,
+						 thumb_plt_entry_stub[1]);
+	  pov += sizeof(thumb_plt_entry_stub);
+	  plt_offset += sizeof(thumb_plt_entry_stub);
+	}
+
       // Set and adjust the PLT entry itself.
       int32_t offset = ((got_address + got_offset)
 			 - (plt_address + plt_offset + 8));
@@ -7419,7 +7555,8 @@ Output_data_plt_arm<big_endian>::do_write(Output_file* of)
 template<bool big_endian>
 void
 Target_arm<big_endian>::make_plt_entry(Symbol_table* symtab, Layout* layout,
-				       Symbol* gsym)
+				       Symbol* gsym, bool from_thumb_call,
+				       bool from_thumb_jump)
 {
   if (gsym->has_plt_offset())
     return;
@@ -7435,7 +7572,7 @@ Target_arm<big_endian>::make_plt_entry(Symbol_table* symtab, Layout* layout,
 				       | elfcpp::SHF_EXECINSTR),
 				      this->plt_, ORDER_PLT, false);
     }
-  this->plt_->add_entry(gsym);
+  this->plt_->add_entry(gsym, from_thumb_call, from_thumb_jump);
 }
 
 // Return the number of entries in the PLT.
@@ -8170,7 +8307,7 @@ Target_arm<big_endian>::Scan::global(Symbol_table* symtab,
         // Make a PLT entry if necessary.
         if (this->symbol_needs_plt_entry(gsym))
           {
-            target->make_plt_entry(symtab, layout, gsym);
+            target->make_plt_entry(symtab, layout, gsym, false, false);
             // Since this is not a PC-relative relocation, we may be
             // taking the address of a function. In that case we need to
             // set the entry in the dynamic symbol table to the address of
@@ -8309,7 +8446,10 @@ Target_arm<big_endian>::Scan::global(Symbol_table* symtab,
 	  && !gsym->is_from_dynobj()
 	  && !gsym->is_preemptible())
 	break;
-      target->make_plt_entry(symtab, layout, gsym);
+      target->make_plt_entry(symtab, layout, gsym,
+			     r_type == elfcpp::R_ARM_THM_CALL,
+			     (r_type == elfcpp::R_ARM_THM_JUMP24
+			      || r_type == elfcpp::R_ARM_THM_JUMP19));
       break;
 
     case elfcpp::R_ARM_GOT_BREL:
@@ -8605,6 +8745,9 @@ Target_arm<big_endian>::do_finalize_sections(
     gold_error(_("unable to provide V4BX reloc interworking fix up; "
 	         "the target profile does not support BX instruction"));
 
+  if (this->plt_ != NULL)
+    this->plt_->calculate_offsets(this->may_use_v5t_interworking());
+
   // Fill in some more dynamic tags.
   const Reloc_section* rel_plt = (this->plt_ == NULL
 				  ? NULL
@@ -10898,7 +11041,7 @@ Target_arm<big_endian>::scan_reloc_for_stub(
 	  symval.set_output_value(this->plt_section()->address()
 				  + gsym->plt_offset());
 	  psymval = &symval;
-	  target_is_thumb = false;
+	  target_is_thumb = 0;
 	}
       else if (gsym->is_undefined())
 	// There is no need to generate a stub symbol is undefined.
@@ -10959,7 +11102,7 @@ Target_arm<big_endian>::scan_reloc_for_stub(
 
   Reloc_stub* stub = NULL;
   Stub_type stub_type =
-    Reloc_stub::stub_type_for_reloc(r_type, address, destination,
+    Reloc_stub::stub_type_for_reloc(r_type, gsym, address, destination,
 				    target_is_thumb);
   if (stub_type != arm_stub_none)
     {

Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]