This is the mail archive of the
binutils@sourceware.org
mailing list for the binutils project.
Re: [PATCH] fix PR gold/13320, inline thumb stubs in PLT entries
- From: Nathan Froyd <froydnj at mozilla dot com>
- To: Doug Kwan (éæå) <dougkwan at google dot com>
- Cc: binutils at sourceware dot org
- Date: Mon, 9 Apr 2012 08:33:38 -0700 (PDT)
- Subject: Re: [PATCH] fix PR gold/13320, inline thumb stubs in PLT entries
----- Original Message -----
> Patch?
Sigh, missed the mail client error message the first time around...
-Nathan
diff --git a/gold/arm.cc b/gold/arm.cc
index dc6e64a..867c0e3 100644
--- a/gold/arm.cc
+++ b/gold/arm.cc
@@ -33,6 +33,7 @@
#include <map>
#include <utility>
#include <set>
+#include <vector>
#include "elfcpp.h"
#include "parameters.h"
@@ -57,8 +58,110 @@ namespace
using namespace gold;
+// A class to handle the PLT data.
+
template<bool big_endian>
-class Output_data_plt_arm;
+class Output_data_plt_arm : public Output_section_data
+{
+ public:
+ typedef Output_data_reloc<elfcpp::SHT_REL, true, 32, big_endian>
+ Reloc_section;
+
+ Output_data_plt_arm(Layout*, Output_data_space*);
+
+ // Add an entry to the PLT.
+ void
+ add_entry(Symbol* gsym, bool, bool);
+
+ // Calculate offsets for PLT entries.
+ void
+ calculate_offsets(bool);
+
+ // Return the .rel.plt section data.
+ const Reloc_section*
+ rel_plt() const
+ { return this->rel_; }
+
+ // Return the number of PLT entries.
+ unsigned int
+ entry_count() const
+ { return this->stub_list_.size(); }
+
+ // Return the offset of the first non-reserved PLT entry.
+ static unsigned int
+ first_plt_entry_offset()
+ { return sizeof(first_plt_entry); }
+
+ // Return the size of a PLT entry.
+ static unsigned int
+ get_plt_entry_size()
+ { return sizeof(plt_entry); }
+
+ static unsigned int
+ thumb_stub_size()
+ { return sizeof(thumb_plt_entry_stub); }
+
+ protected:
+ void
+ do_adjust_output_section(Output_section* os);
+
+ // Write to a map file.
+ void
+ do_print_to_mapfile(Mapfile* mapfile) const
+ { mapfile->print_output_data(this, _("** PLT")); }
+
+ private:
+ // Template for the first PLT entry.
+ static const uint32_t first_plt_entry[5];
+
+ // Template for subsequent PLT entries.
+ static const uint32_t plt_entry[3];
+
+ // Template for Thumb stub in PLT entries.
+ static const uint16_t thumb_plt_entry_stub[2];
+
+ // Set the final size.
+ void
+ set_final_data_size()
+ { gold_assert(this->total_size_ != 0);
+ this->set_data_size(this->total_size_); }
+
+ // Write out the PLT data.
+ void
+ do_write(Output_file*);
+
+ // Information about where PLT entries are referenced from.
+ struct plt_reference
+ {
+ plt_reference()
+ : from_thumb_calls_(false),
+ from_thumb_jumps_(false),
+ needs_thumb_stub_(false)
+ { }
+
+ plt_reference(bool from_thumb_calls, bool from_thumb_jumps)
+ : from_thumb_calls_(from_thumb_calls),
+ from_thumb_jumps_(from_thumb_jumps),
+ needs_thumb_stub_(false)
+ { }
+
+ bool from_thumb_calls_;
+ bool from_thumb_jumps_;
+ bool needs_thumb_stub_;
+ };
+
+ // The reloc section.
+ Reloc_section* rel_;
+ // The .got.plt section.
+ Output_data_space* got_plt_;
+ // Collected information for determining necessity of Thumb stubs.
+ typedef std::vector<Symbol *> Thumb_stub_list;
+ typedef Unordered_map<Symbol *, plt_reference> Thumb_stub_map;
+ Thumb_stub_map stub_map_;
+ Thumb_stub_list stub_list_;
+ // Total size in bytes of all the entries.
+ size_t total_size_;
+};
template<bool big_endian>
class Stub_table;
@@ -492,7 +595,8 @@ class Reloc_stub : public Stub
// the branch target is a thumb instruction. TARGET is used for look
// up ARM-specific linker settings.
static Stub_type
- stub_type_for_reloc(unsigned int r_type, Arm_address branch_address,
+ stub_type_for_reloc(unsigned int r_type, const Sized_symbol<32>* gsym,
+ Arm_address branch_address,
Arm_address branch_target, bool target_is_thumb);
// Reloc_stub key. A key is logically a triplet of a stub type, a symbol
@@ -2702,7 +2806,7 @@ class Target_arm : public Sized_target<32, big_endian>
// Create a PLT entry for a global symbol.
void
- make_plt_entry(Symbol_table*, Layout*, Symbol*);
+ make_plt_entry(Symbol_table*, Layout*, Symbol*, bool, bool);
// Define the _TLS_MODULE_BASE_ symbol in the TLS segment.
void
@@ -3920,7 +4024,7 @@ Arm_relocate_functions<big_endian>::arm_branch_common(
Valtype unadjusted_branch_target = psymval->value(object, 0);
Stub_type stub_type =
- Reloc_stub::stub_type_for_reloc(r_type, address,
+ Reloc_stub::stub_type_for_reloc(r_type, gsym, address,
unadjusted_branch_target,
(thumb_bit != 0));
if (stub_type != arm_stub_none)
@@ -4039,28 +4143,50 @@ Arm_relocate_functions<big_endian>::thumb_branch_common(
int32_t addend = This::thumb32_branch_offset(upper_insn, lower_insn);
Arm_address branch_target = psymval->value(object, addend);
+ bool may_use_blx = arm_target->may_use_v5t_interworking();
+
+ // For direct Thumb jumps to PLT entries, adjust the branch target
+ // address to be the Thumb stub in the PLT entry. For Thumb calls to
+ // PLT entries, use those stubs if we can't BLX to the entry.
+ if (gsym
+ && gsym->has_plt_offset()
+ && (r_type == elfcpp::R_ARM_THM_JUMP24
+ || (r_type == elfcpp::R_ARM_THM_CALL && !may_use_blx)))
+ {
+ branch_target -= Output_data_plt_arm<big_endian>::thumb_stub_size();
+ thumb_bit = 1;
+ }
// For BLX, bit 1 of target address comes from bit 1 of base address.
- bool may_use_blx = arm_target->may_use_v5t_interworking();
if (thumb_bit == 0 && may_use_blx)
branch_target = Bits<32>::bit_select32(branch_target, address, 0x2);
int32_t branch_offset = branch_target - address;
// We need a stub if the branch offset is too large or if we need
- // to switch mode.
+ // to switch mode. If we are jumping to a PLT entry, however, we
+ // can use the Thumb stub in the PLT entry itself rather than
+ // creating a separate, out-of-line stub.
bool thumb2 = arm_target->using_thumb2();
if (!parameters->options().relocatable()
&& ((!thumb2 && Bits<23>::has_overflow32(branch_offset))
|| (thumb2 && Bits<25>::has_overflow32(branch_offset))
|| ((thumb_bit == 0)
&& (((r_type == elfcpp::R_ARM_THM_CALL) && !may_use_blx)
- || r_type == elfcpp::R_ARM_THM_JUMP24))))
+ || (r_type == elfcpp::R_ARM_THM_JUMP24
+ && !gsym->has_plt_offset())))))
{
Arm_address unadjusted_branch_target = psymval->value(object, 0);
+ // If we're branching to a PLT entry, force a switch to ARM mode.
+ // This ensures that any stub we might have considered necessary
+ // while scanning relocs is found. It's also going to be more
+ // efficient, since we don't go Thumb -> ARM -> Thumb -> ARM.
+ if (gsym && gsym->has_plt_offset())
+ thumb_bit = 0;
+
Stub_type stub_type =
- Reloc_stub::stub_type_for_reloc(r_type, address,
+ Reloc_stub::stub_type_for_reloc(r_type, gsym, address,
unadjusted_branch_target,
(thumb_bit != 0));
@@ -4419,6 +4545,7 @@ Reloc_stub::Key::name() const
Stub_type
Reloc_stub::stub_type_for_reloc(
unsigned int r_type,
+ const Sized_symbol<32>* gsym,
Arm_address location,
Arm_address destination,
bool target_is_thumb)
@@ -4431,6 +4558,7 @@ Reloc_stub::stub_type_for_reloc(
bool should_force_pic_veneer;
bool thumb2;
bool thumb_only;
+ unsigned int thumb_stub_size;
if (parameters->target().is_big_endian())
{
const Target_arm<true>* big_endian_target =
@@ -4439,6 +4567,7 @@ Reloc_stub::stub_type_for_reloc(
should_force_pic_veneer = big_endian_target->should_force_pic_veneer();
thumb2 = big_endian_target->using_thumb2();
thumb_only = big_endian_target->using_thumb_only();
+ thumb_stub_size = Output_data_plt_arm<true>::thumb_stub_size();
}
else
{
@@ -4448,6 +4577,7 @@ Reloc_stub::stub_type_for_reloc(
should_force_pic_veneer = little_endian_target->should_force_pic_veneer();
thumb2 = little_endian_target->using_thumb2();
thumb_only = little_endian_target->using_thumb_only();
+ thumb_stub_size = Output_data_plt_arm<false>::thumb_stub_size();
}
int64_t branch_offset;
@@ -4459,8 +4589,15 @@ Reloc_stub::stub_type_for_reloc(
// base address (instruction address + 4).
if ((r_type == elfcpp::R_ARM_THM_CALL) && may_use_blx && !target_is_thumb)
destination = Bits<32>::bit_select32(destination, location, 0x2);
+
+ bool use_plt = (gsym && gsym->has_plt_offset());
+ if (use_plt
+ && (r_type == elfcpp::R_ARM_THM_JUMP24
+ || (r_type == elfcpp::R_ARM_THM_CALL && !may_use_blx)))
+ destination -= thumb_stub_size;
+
branch_offset = static_cast<int64_t>(destination) - location;
-
+
// Handle cases where:
// - this call goes too far (different Thumb/Thumb2 max
// distance)
@@ -4474,7 +4611,8 @@ Reloc_stub::stub_type_for_reloc(
|| (branch_offset < THM2_MAX_BWD_BRANCH_OFFSET)))
|| ((!target_is_thumb)
&& (((r_type == elfcpp::R_ARM_THM_CALL) && !may_use_blx)
- || (r_type == elfcpp::R_ARM_THM_JUMP24))))
+ || (r_type == elfcpp::R_ARM_THM_JUMP24))
+ && !use_plt))
{
if (target_is_thumb)
{
@@ -7188,77 +7326,6 @@ Arm_output_data_got<big_endian>::do_write(Output_file* of)
of->write_output_view(offset, oview_size, oview);
}
-// A class to handle the PLT data.
-
-template<bool big_endian>
-class Output_data_plt_arm : public Output_section_data
-{
- public:
- typedef Output_data_reloc<elfcpp::SHT_REL, true, 32, big_endian>
- Reloc_section;
-
- Output_data_plt_arm(Layout*, Output_data_space*);
-
- // Add an entry to the PLT.
- void
- add_entry(Symbol* gsym);
-
- // Return the .rel.plt section data.
- const Reloc_section*
- rel_plt() const
- { return this->rel_; }
-
- // Return the number of PLT entries.
- unsigned int
- entry_count() const
- { return this->count_; }
-
- // Return the offset of the first non-reserved PLT entry.
- static unsigned int
- first_plt_entry_offset()
- { return sizeof(first_plt_entry); }
-
- // Return the size of a PLT entry.
- static unsigned int
- get_plt_entry_size()
- { return sizeof(plt_entry); }
-
- protected:
- void
- do_adjust_output_section(Output_section* os);
-
- // Write to a map file.
- void
- do_print_to_mapfile(Mapfile* mapfile) const
- { mapfile->print_output_data(this, _("** PLT")); }
-
- private:
- // Template for the first PLT entry.
- static const uint32_t first_plt_entry[5];
-
- // Template for subsequent PLT entries.
- static const uint32_t plt_entry[3];
-
- // Set the final size.
- void
- set_final_data_size()
- {
- this->set_data_size(sizeof(first_plt_entry)
- + this->count_ * sizeof(plt_entry));
- }
-
- // Write out the PLT data.
- void
- do_write(Output_file*);
-
- // The reloc section.
- Reloc_section* rel_;
- // The .got.plt section.
- Output_data_space* got_plt_;
- // The number of PLT entries.
- unsigned int count_;
-};
-
// Create the PLT section. The ordinary .got section is an argument,
// since we need to refer to the start. We also create our own .got
// section just for PLT entries.
@@ -7266,7 +7333,7 @@ class Output_data_plt_arm : public Output_section_data
template<bool big_endian>
Output_data_plt_arm<big_endian>::Output_data_plt_arm(Layout* layout,
Output_data_space* got_plt)
- : Output_section_data(4), got_plt_(got_plt), count_(0)
+ : Output_section_data(4), got_plt_(got_plt)
{
this->rel_ = new Reloc_section(false);
layout->add_output_section_data(".rel.plt", elfcpp::SHT_REL,
@@ -7285,16 +7352,27 @@ Output_data_plt_arm<big_endian>::do_adjust_output_section(Output_section* os)
template<bool big_endian>
void
-Output_data_plt_arm<big_endian>::add_entry(Symbol* gsym)
+Output_data_plt_arm<big_endian>::add_entry(Symbol* gsym,
+ bool from_thumb_call,
+ bool from_thumb_jump)
{
gold_assert(!gsym->has_plt_offset());
- // Note that when setting the PLT offset we skip the initial
- // reserved PLT entry.
- gsym->set_plt_offset((this->count_) * sizeof(plt_entry)
- + sizeof(first_plt_entry));
+ // We want to record information about how the PLT entries are
+ // referenced. We'll use this information in do_finalize_sections to
+ // assign offsets to all the symbols.
+ typename Thumb_stub_map::iterator it = this->stub_map_.find(gsym);
+ if (it != this->stub_map_.end())
+ {
+ plt_reference& pr = it->second;
+ pr.from_thumb_calls_ = pr.from_thumb_calls_ || from_thumb_call;
+ pr.from_thumb_jumps_ = pr.from_thumb_jumps_ || from_thumb_jump;
+ return;
+ }
- ++this->count_;
+ struct plt_reference pr(from_thumb_call, from_thumb_jump);
+ this->stub_map_[gsym] = pr;
+ this->stub_list_.push_back(gsym);
section_offset_type got_offset = this->got_plt_->current_data_size();
@@ -7313,6 +7391,40 @@ Output_data_plt_arm<big_endian>::add_entry(Symbol* gsym)
// appear in the relocations.
}
+// Calculate offsets for all the PLT symbols.
+
+template<bool big_endian>
+void
+Output_data_plt_arm<big_endian>::calculate_offsets(bool may_use_blx)
+{
+ size_t offset = sizeof(first_plt_entry);
+
+ for(typename Thumb_stub_list::iterator it = this->stub_list_.begin();
+ it != this->stub_list_.end();
+ ++it, offset += sizeof(plt_entry))
+ {
+ Symbol *sym = *it;
+ typename Thumb_stub_map::iterator it = this->stub_map_.find(sym);
+ gold_assert(it != this->stub_map_.end());
+ plt_reference& pr = it->second;
+ bool needs_thumb_stub = (pr.from_thumb_jumps_
+ || (!may_use_blx
+ && pr.from_thumb_calls_));
+ pr.needs_thumb_stub_ = needs_thumb_stub;
+
+ // Note that when setting the PLT offset we skip the initial
+ // reserved PLT entry.
+ //
+ // Make the entry point to the start of the ARM code, rather than
+ // the Thumb stub.
+ if (needs_thumb_stub)
+ offset += thumb_stub_size();
+ sym->set_plt_offset(offset);
+ }
+
+ this->total_size_ = offset;
+}
+
// ARM PLTs.
// FIXME: This is not very flexible. Right now this has only been tested
// on armv5te. If we are to support additional architecture features like
@@ -7339,6 +7451,15 @@ const uint32_t Output_data_plt_arm<big_endian>::plt_entry[3] =
0xe5bcf000, // ldr pc, [ip, #0xNNN]!
};
+// Thumb stub for PLT entries.
+
+template<bool big_endian>
+const uint16_t Output_data_plt_arm<big_endian>::thumb_plt_entry_stub[2] =
+{
+ 0x4778, // bx pc
+ 0x46c0 // nop
+};
+
// Write out the PLT. This uses the hand-coded instructions above,
// and adjusts them as needed. This is all specified by the arm ELF
// Processor Supplement.
@@ -7377,20 +7498,35 @@ Output_data_plt_arm<big_endian>::do_write(Output_file* of)
memset(got_pov, 0, 12);
got_pov += 12;
+ // Write the rest of the PLT entries.
const int rel_size = elfcpp::Elf_sizes<32>::rel_size;
unsigned int plt_offset = sizeof(first_plt_entry);
unsigned int plt_rel_offset = 0;
unsigned int got_offset = 12;
- const unsigned int count = this->count_;
- for (unsigned int i = 0;
- i < count;
- ++i,
+ for (typename Thumb_stub_list::const_iterator it = this->stub_list_.begin();
+ it != this->stub_list_.end();
+ ++it,
pov += sizeof(plt_entry),
got_pov += 4,
plt_offset += sizeof(plt_entry),
plt_rel_offset += rel_size,
got_offset += 4)
{
+ Symbol* sym = *it;
+ typename Thumb_stub_map::const_iterator mapit = this->stub_map_.find(sym);
+ gold_assert(mapit != this->stub_map_.end());
+ const plt_reference& pr = mapit->second;
+ // Write the thumb stub first if necessary.
+ if (pr.needs_thumb_stub_)
+ {
+ elfcpp::Swap<16, big_endian>::writeval(pov,
+ thumb_plt_entry_stub[0]);
+ elfcpp::Swap<16, big_endian>::writeval(pov + 2,
+ thumb_plt_entry_stub[1]);
+ pov += sizeof(thumb_plt_entry_stub);
+ plt_offset += sizeof(thumb_plt_entry_stub);
+ }
+
// Set and adjust the PLT entry itself.
int32_t offset = ((got_address + got_offset)
- (plt_address + plt_offset + 8));
@@ -7419,7 +7555,8 @@ Output_data_plt_arm<big_endian>::do_write(Output_file* of)
template<bool big_endian>
void
Target_arm<big_endian>::make_plt_entry(Symbol_table* symtab, Layout* layout,
- Symbol* gsym)
+ Symbol* gsym, bool from_thumb_call,
+ bool from_thumb_jump)
{
if (gsym->has_plt_offset())
return;
@@ -7435,7 +7572,7 @@ Target_arm<big_endian>::make_plt_entry(Symbol_table* symtab, Layout* layout,
| elfcpp::SHF_EXECINSTR),
this->plt_, ORDER_PLT, false);
}
- this->plt_->add_entry(gsym);
+ this->plt_->add_entry(gsym, from_thumb_call, from_thumb_jump);
}
// Return the number of entries in the PLT.
@@ -8170,7 +8307,7 @@ Target_arm<big_endian>::Scan::global(Symbol_table* symtab,
// Make a PLT entry if necessary.
if (this->symbol_needs_plt_entry(gsym))
{
- target->make_plt_entry(symtab, layout, gsym);
+ target->make_plt_entry(symtab, layout, gsym, false, false);
// Since this is not a PC-relative relocation, we may be
// taking the address of a function. In that case we need to
// set the entry in the dynamic symbol table to the address of
@@ -8309,7 +8446,10 @@ Target_arm<big_endian>::Scan::global(Symbol_table* symtab,
&& !gsym->is_from_dynobj()
&& !gsym->is_preemptible())
break;
- target->make_plt_entry(symtab, layout, gsym);
+ target->make_plt_entry(symtab, layout, gsym,
+ r_type == elfcpp::R_ARM_THM_CALL,
+ (r_type == elfcpp::R_ARM_THM_JUMP24
+ || r_type == elfcpp::R_ARM_THM_JUMP19));
break;
case elfcpp::R_ARM_GOT_BREL:
@@ -8605,6 +8745,9 @@ Target_arm<big_endian>::do_finalize_sections(
gold_error(_("unable to provide V4BX reloc interworking fix up; "
"the target profile does not support BX instruction"));
+ if (this->plt_ != NULL)
+ this->plt_->calculate_offsets(this->may_use_v5t_interworking());
+
// Fill in some more dynamic tags.
const Reloc_section* rel_plt = (this->plt_ == NULL
? NULL
@@ -10959,7 +11102,7 @@ Target_arm<big_endian>::scan_reloc_for_stub(
Reloc_stub* stub = NULL;
Stub_type stub_type =
- Reloc_stub::stub_type_for_reloc(r_type, address, destination,
+ Reloc_stub::stub_type_for_reloc(r_type, gsym, address, destination,
target_is_thumb);
if (stub_type != arm_stub_none)
{
diff --git a/gold/symtab.h b/gold/symtab.h
index feed245..904fffd 100644
--- a/gold/symtab.h
+++ b/gold/symtab.h
@@ -666,13 +666,13 @@ class Symbol
// A function call that can branch to a local PLT entry does not need
// a dynamic relocation.
- if ((flags & FUNCTION_CALL) && this->has_plt_offset())
+ if ((flags & FUNCTION_CALL) && this->needs_plt_entry())
return false;
// A reference to any PLT entry in a non-position-independent executable
// does not need a dynamic relocation.
if (!parameters->options().output_is_position_independent()
- && this->has_plt_offset())
+ && this->needs_plt_entry())
return false;
// A reference to a symbol defined in a dynamic object or to a