This is the mail archive of the binutils@sourceware.org mailing list for the binutils project.


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]
Other format: [Raw text]

[GOLD] PowerPC64 toc code optimization


This implements an optimization for PowerPC64 -mcmodel=medium/large
code.  It's not quite as robust as the same for BFD ld.  Gold will
error out rather than disabling the optimization for an input file
that has incorrect code.  It didn't seem worthwhile slowing down gold
for the latter behaviour.  OK to apply?

	* options.h (General_options): Add no_toc_optimize.
	* powerpc.cc (ok_lo_toc_insn): New function.
	(Target_powerpc::Relocate::relocate): Optimize toc access sequences.

Index: gold/options.h
===================================================================
RCS file: /cvs/src/src/gold/options.h,v
retrieving revision 1.178
diff -u -p -r1.178 options.h
--- gold/options.h	6 Dec 2012 03:13:16 -0000	1.178
+++ gold/options.h	6 Dec 2012 12:23:21 -0000
@@ -1109,6 +1109,9 @@ class General_options
   DEFINE_uint64(Ttext, options::ONE_DASH, '\0', -1U,
                 N_("Set the address of the text segment"), N_("ADDRESS"));
 
+  DEFINE_bool(no_toc_optimize, options::TWO_DASHES, '\0', false,
+	      N_("(PowerPC64 only) Don't optimize TOC code sequences"), NULL);
+
   DEFINE_set(undefined, options::TWO_DASHES, 'u',
 	     N_("Create undefined reference to SYMBOL"), N_("SYMBOL"));
 
Index: gold/powerpc.cc
===================================================================
RCS file: /cvs/src/src/gold/powerpc.cc,v
retrieving revision 1.74
diff -u -p -r1.74 powerpc.cc
--- gold/powerpc.cc	6 Dec 2012 03:13:17 -0000	1.74
+++ gold/powerpc.cc	6 Dec 2012 12:23:21 -0000
@@ -5441,6 +5441,33 @@ Target_powerpc<size, big_endian>::do_fin
     this->copy_relocs_.emit(this->rela_dyn_section(layout));
 }
 
+// Return TRUE iff INSN is one we expect on a _LO variety toc/got
+// reloc.
+
+static bool
+ok_lo_toc_insn(uint32_t insn)
+{
+  return ((insn & (0x3f << 26)) == 14u << 26 /* addi */
+	  || (insn & (0x3f << 26)) == 32u << 26 /* lwz */
+	  || (insn & (0x3f << 26)) == 34u << 26 /* lbz */
+	  || (insn & (0x3f << 26)) == 36u << 26 /* stw */
+	  || (insn & (0x3f << 26)) == 38u << 26 /* stb */
+	  || (insn & (0x3f << 26)) == 40u << 26 /* lhz */
+	  || (insn & (0x3f << 26)) == 42u << 26 /* lha */
+	  || (insn & (0x3f << 26)) == 44u << 26 /* sth */
+	  || (insn & (0x3f << 26)) == 46u << 26 /* lmw */
+	  || (insn & (0x3f << 26)) == 47u << 26 /* stmw */
+	  || (insn & (0x3f << 26)) == 48u << 26 /* lfs */
+	  || (insn & (0x3f << 26)) == 50u << 26 /* lfd */
+	  || (insn & (0x3f << 26)) == 52u << 26 /* stfs */
+	  || (insn & (0x3f << 26)) == 54u << 26 /* stfd */
+	  || ((insn & (0x3f << 26)) == 58u << 26 /* lwa,ld,lmd */
+	      && (insn & 3) != 1)
+	  || ((insn & (0x3f << 26)) == 62u << 26 /* std, stmd */
+	      && ((insn & 3) == 0 || (insn & 3) == 3))
+	  || (insn & (0x3f << 26)) == 12u << 26 /* addic */);
+}
+
 // Return the value to use for a branch relocation.
 
 template<int size, bool big_endian>
@@ -6021,6 +6048,75 @@ Target_powerpc<size, big_endian>::Reloca
       break;
     }
 
+  if (size == 64)
+    {
+      // Multi-instruction sequences that access the TOC can be
+      // optimized, eg. addis ra,r2,0; addi rb,ra,x;
+      // to             nop;           addi rb,r2,x;
+      switch (r_type)
+	{
+	default:
+	  break;
+
+	case elfcpp::R_POWERPC_GOT_TLSLD16_HA:
+	case elfcpp::R_POWERPC_GOT_TLSGD16_HA:
+	case elfcpp::R_POWERPC_GOT_TPREL16_HA:
+	case elfcpp::R_POWERPC_GOT_DTPREL16_HA:
+	case elfcpp::R_POWERPC_GOT16_HA:
+	case elfcpp::R_PPC64_TOC16_HA:
+	  if (!parameters->options().no_toc_optimize())
+	    {
+	      Insn* iview = reinterpret_cast<Insn*>(view - 2 * big_endian);
+	      Insn insn = elfcpp::Swap<32, big_endian>::readval(iview);
+	      if ((insn & ((0x3f << 26) | 0x1f << 16))
+		  != ((15u << 26) | (2 << 16)) /* addis rt,2,imm */)
+		gold_error_at_location(relinfo, relnum, rela.get_r_offset(),
+				       _("toc optimization is not supported "
+					 "for %#08x instruction"), insn);
+	      else if (value + 0x8000 < 0x10000)
+		{
+		  elfcpp::Swap<32, big_endian>::writeval(iview, nop);
+		  return true;
+		}
+	    }
+	  break;
+
+	case elfcpp::R_POWERPC_GOT_TLSLD16_LO:
+	case elfcpp::R_POWERPC_GOT_TLSGD16_LO:
+	case elfcpp::R_POWERPC_GOT_TPREL16_LO:
+	case elfcpp::R_POWERPC_GOT_DTPREL16_LO:
+	case elfcpp::R_POWERPC_GOT16_LO:
+	case elfcpp::R_PPC64_GOT16_LO_DS:
+	case elfcpp::R_PPC64_TOC16_LO:
+	case elfcpp::R_PPC64_TOC16_LO_DS:
+	  if (!parameters->options().no_toc_optimize())
+	    {
+	      Insn* iview = reinterpret_cast<Insn*>(view - 2 * big_endian);
+	      Insn insn = elfcpp::Swap<32, big_endian>::readval(iview);
+	      if (!ok_lo_toc_insn(insn))
+		gold_error_at_location(relinfo, relnum, rela.get_r_offset(),
+				       _("toc optimization is not supported "
+					 "for %#08x instruction"), insn);
+	      else if (value + 0x8000 < 0x10000)
+		{
+		  if ((insn & (0x3f << 26)) == 12u << 26 /* addic */)
+		    {
+		      // Transform addic to addi when we change reg.
+		      insn &= ~((0x3f << 26) | (0x1f << 16));
+		      insn |= (14u << 26) | (2 << 16);
+		    }
+		  else
+		    {
+		      insn &= ~(0x1f << 16);
+		      insn |= 2 << 16;
+		    }
+		  elfcpp::Swap<32, big_endian>::writeval(iview, insn);
+		}
+	    }
+	  break;
+	}
+    }
+
   typename Reloc::Overflow_check overflow = Reloc::CHECK_NONE;
   switch (r_type)
     {

-- 
Alan Modra
Australia Development Lab, IBM


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]