This is the mail archive of the
binutils@sourceware.org
mailing list for the binutils project.
[GOLD] PowerPC64 toc code optimization
- From: Alan Modra <amodra at gmail dot com>
- To: binutils at sourceware dot org
- Date: Thu, 6 Dec 2012 23:50:51 +1030
- Subject: [GOLD] PowerPC64 toc code optimization
This implements an optimization for PowerPC64 -mcmodel=medium/large
code. It's not quite as robust as the same for BFD ld. Gold will
error out rather than disabling the optimization for an input file
that has incorrect code. It didn't seem worthwhile slowing down gold
for the latter behaviour. OK to apply?
* options.h (General_options): Add no_toc_optimize.
* powerpc.cc (ok_lo_toc_insn): New function.
(Target_powerpc::Relocate::relocate): Optimize toc access sequences.
Index: gold/options.h
===================================================================
RCS file: /cvs/src/src/gold/options.h,v
retrieving revision 1.178
diff -u -p -r1.178 options.h
--- gold/options.h 6 Dec 2012 03:13:16 -0000 1.178
+++ gold/options.h 6 Dec 2012 12:23:21 -0000
@@ -1109,6 +1109,9 @@ class General_options
DEFINE_uint64(Ttext, options::ONE_DASH, '\0', -1U,
N_("Set the address of the text segment"), N_("ADDRESS"));
+ DEFINE_bool(no_toc_optimize, options::TWO_DASHES, '\0', false,
+ N_("(PowerPC64 only) Don't optimize TOC code sequences"), NULL);
+
DEFINE_set(undefined, options::TWO_DASHES, 'u',
N_("Create undefined reference to SYMBOL"), N_("SYMBOL"));
Index: gold/powerpc.cc
===================================================================
RCS file: /cvs/src/src/gold/powerpc.cc,v
retrieving revision 1.74
diff -u -p -r1.74 powerpc.cc
--- gold/powerpc.cc 6 Dec 2012 03:13:17 -0000 1.74
+++ gold/powerpc.cc 6 Dec 2012 12:23:21 -0000
@@ -5441,6 +5441,33 @@ Target_powerpc<size, big_endian>::do_fin
this->copy_relocs_.emit(this->rela_dyn_section(layout));
}
+// Return TRUE iff INSN is one we expect on a _LO variety toc/got
+// reloc.
+
+static bool
+ok_lo_toc_insn(uint32_t insn)
+{
+ return ((insn & (0x3f << 26)) == 14u << 26 /* addi */
+ || (insn & (0x3f << 26)) == 32u << 26 /* lwz */
+ || (insn & (0x3f << 26)) == 34u << 26 /* lbz */
+ || (insn & (0x3f << 26)) == 36u << 26 /* stw */
+ || (insn & (0x3f << 26)) == 38u << 26 /* stb */
+ || (insn & (0x3f << 26)) == 40u << 26 /* lhz */
+ || (insn & (0x3f << 26)) == 42u << 26 /* lha */
+ || (insn & (0x3f << 26)) == 44u << 26 /* sth */
+ || (insn & (0x3f << 26)) == 46u << 26 /* lmw */
+ || (insn & (0x3f << 26)) == 47u << 26 /* stmw */
+ || (insn & (0x3f << 26)) == 48u << 26 /* lfs */
+ || (insn & (0x3f << 26)) == 50u << 26 /* lfd */
+ || (insn & (0x3f << 26)) == 52u << 26 /* stfs */
+ || (insn & (0x3f << 26)) == 54u << 26 /* stfd */
+ || ((insn & (0x3f << 26)) == 58u << 26 /* lwa,ld,lmd */
+ && (insn & 3) != 1)
+ || ((insn & (0x3f << 26)) == 62u << 26 /* std, stmd */
+ && ((insn & 3) == 0 || (insn & 3) == 3))
+ || (insn & (0x3f << 26)) == 12u << 26 /* addic */);
+}
+
// Return the value to use for a branch relocation.
template<int size, bool big_endian>
@@ -6021,6 +6048,75 @@ Target_powerpc<size, big_endian>::Reloca
break;
}
+ if (size == 64)
+ {
+ // Multi-instruction sequences that access the TOC can be
+ // optimized, eg. addis ra,r2,0; addi rb,ra,x;
+ // to nop; addi rb,r2,x;
+ switch (r_type)
+ {
+ default:
+ break;
+
+ case elfcpp::R_POWERPC_GOT_TLSLD16_HA:
+ case elfcpp::R_POWERPC_GOT_TLSGD16_HA:
+ case elfcpp::R_POWERPC_GOT_TPREL16_HA:
+ case elfcpp::R_POWERPC_GOT_DTPREL16_HA:
+ case elfcpp::R_POWERPC_GOT16_HA:
+ case elfcpp::R_PPC64_TOC16_HA:
+ if (!parameters->options().no_toc_optimize())
+ {
+ Insn* iview = reinterpret_cast<Insn*>(view - 2 * big_endian);
+ Insn insn = elfcpp::Swap<32, big_endian>::readval(iview);
+ if ((insn & ((0x3f << 26) | 0x1f << 16))
+ != ((15u << 26) | (2 << 16)) /* addis rt,2,imm */)
+ gold_error_at_location(relinfo, relnum, rela.get_r_offset(),
+ _("toc optimization is not supported "
+ "for %#08x instruction"), insn);
+ else if (value + 0x8000 < 0x10000)
+ {
+ elfcpp::Swap<32, big_endian>::writeval(iview, nop);
+ return true;
+ }
+ }
+ break;
+
+ case elfcpp::R_POWERPC_GOT_TLSLD16_LO:
+ case elfcpp::R_POWERPC_GOT_TLSGD16_LO:
+ case elfcpp::R_POWERPC_GOT_TPREL16_LO:
+ case elfcpp::R_POWERPC_GOT_DTPREL16_LO:
+ case elfcpp::R_POWERPC_GOT16_LO:
+ case elfcpp::R_PPC64_GOT16_LO_DS:
+ case elfcpp::R_PPC64_TOC16_LO:
+ case elfcpp::R_PPC64_TOC16_LO_DS:
+ if (!parameters->options().no_toc_optimize())
+ {
+ Insn* iview = reinterpret_cast<Insn*>(view - 2 * big_endian);
+ Insn insn = elfcpp::Swap<32, big_endian>::readval(iview);
+ if (!ok_lo_toc_insn(insn))
+ gold_error_at_location(relinfo, relnum, rela.get_r_offset(),
+ _("toc optimization is not supported "
+ "for %#08x instruction"), insn);
+ else if (value + 0x8000 < 0x10000)
+ {
+ if ((insn & (0x3f << 26)) == 12u << 26 /* addic */)
+ {
+ // Transform addic to addi when we change reg.
+ insn &= ~((0x3f << 26) | (0x1f << 16));
+ insn |= (14u << 26) | (2 << 16);
+ }
+ else
+ {
+ insn &= ~(0x1f << 16);
+ insn |= 2 << 16;
+ }
+ elfcpp::Swap<32, big_endian>::writeval(iview, insn);
+ }
+ }
+ break;
+ }
+ }
+
typename Reloc::Overflow_check overflow = Reloc::CHECK_NONE;
switch (r_type)
{
--
Alan Modra
Australia Development Lab, IBM