This is the mail archive of the
binutils@sourceware.org
mailing list for the binutils project.
PowerPC64 bigtoc code optimization
- From: Alan Modra <amodra at gmail dot com>
- To: binutils at sourceware dot org
- Date: Tue, 16 Mar 2010 11:09:53 +1030
- Subject: PowerPC64 bigtoc code optimization
Applied.
* elf64-ppc.c (struct ppc_link_hash_table): Add do_toc_opt.
(ppc64_elf_edit_toc): Set it here.
(ha_reloc_match): New function.
(ppc64_elf_relocate_section): Optimize bigtoc insn sequences.
Index: bfd/elf64-ppc.c
===================================================================
RCS file: /cvs/src/src/bfd/elf64-ppc.c,v
retrieving revision 1.322
diff -u -p -r1.322 elf64-ppc.c
--- bfd/elf64-ppc.c 15 Mar 2010 01:59:56 -0000 1.322
+++ bfd/elf64-ppc.c 15 Mar 2010 23:57:39 -0000
@@ -3788,6 +3788,7 @@ struct ppc_link_hash_table
unsigned int do_multi_toc:1;
unsigned int multi_toc_needed:1;
unsigned int second_toc_pass:1;
+ unsigned int do_toc_opt:1;
/* Set on error. */
unsigned int stub_error:1;
@@ -7882,7 +7883,9 @@ ppc64_elf_edit_toc (struct bfd_link_info
{
bfd *ibfd;
struct adjust_toc_info toc_inf;
+ struct ppc_link_hash_table *htab = ppc_hash_table (info);
+ htab->do_toc_opt = 1;
toc_inf.global_toc_syms = TRUE;
for (ibfd = info->input_bfds; ibfd != NULL; ibfd = ibfd->link_next)
{
@@ -11213,6 +11216,58 @@ ppc64_elf_action_discarded (asection *se
return _bfd_elf_default_action_discarded (sec);
}
+/* REL points to a low-part reloc on a bigtoc instruction sequence.
+ Find the matching high-part reloc instruction and verify that it
+ is addis REG,r2,x. If so, return a pointer to the high-part reloc. */
+
+static const Elf_Internal_Rela *
+ha_reloc_match (const Elf_Internal_Rela *relocs,
+ const Elf_Internal_Rela *rel,
+ unsigned int reg,
+ const bfd *input_bfd,
+ const bfd_byte *contents)
+{
+ enum elf_ppc64_reloc_type r_type, r_type_ha;
+ bfd_vma r_info_ha, r_addend;
+
+ r_type = ELF64_R_TYPE (rel->r_info);
+ switch (r_type)
+ {
+ case R_PPC64_GOT_TLSLD16_LO:
+ case R_PPC64_GOT_TLSGD16_LO:
+ case R_PPC64_GOT_TPREL16_LO_DS:
+ case R_PPC64_GOT_DTPREL16_LO_DS:
+ case R_PPC64_GOT16_LO:
+ case R_PPC64_TOC16_LO:
+ r_type_ha = r_type + 2;
+ break;
+ case R_PPC64_GOT16_LO_DS:
+ r_type_ha = R_PPC64_GOT16_HA;
+ break;
+ case R_PPC64_TOC16_LO_DS:
+ r_type_ha = R_PPC64_TOC16_HA;
+ break;
+ default:
+ abort ();
+ }
+ r_info_ha = ELF64_R_INFO (ELF64_R_SYM (rel->r_info), r_type_ha);
+ r_addend = rel->r_addend;
+
+ while (--rel >= relocs)
+ if (rel->r_info == r_info_ha
+ && rel->r_addend == r_addend)
+ {
+ const bfd_byte *p = contents + (rel->r_offset & ~3);
+ unsigned int insn = bfd_get_32 (input_bfd, p);
+ if ((insn & ((0x3f << 26) | (0x1f << 16)))
+ == ((15u << 26) | (2 << 16)) /* addis rt,r2,x */
+ && (insn & (0x1f << 21)) == (reg << 21))
+ return rel;
+ break;
+ }
+ return NULL;
+}
+
/* The RELOCATE_SECTION function is called by the ELF backend linker
to handle the relocations for a section.
@@ -11307,7 +11362,8 @@ ppc64_elf_relocate_section (bfd *output_
bfd_vma relocation;
bfd_boolean unresolved_reloc;
bfd_boolean warned;
- unsigned long insn, mask;
+ unsigned int insn;
+ bfd_vma mask;
struct ppc_stub_hash_entry *stub_entry;
bfd_vma max_br_offset;
bfd_vma from;
@@ -12665,6 +12721,81 @@ ppc64_elf_relocate_section (bfd *output_
continue;
}
+ /* Multi-instruction sequences that access the TOC can be
+ optimized, eg. addis ra,r2,0; addi rb,ra,x;
+ to nop; addi rb,r2,x; */
+ switch (r_type)
+ {
+ default:
+ break;
+
+ case R_PPC64_GOT_TLSLD16_HI:
+ case R_PPC64_GOT_TLSGD16_HI:
+ case R_PPC64_GOT_TPREL16_HI:
+ case R_PPC64_GOT_DTPREL16_HI:
+ case R_PPC64_GOT16_HI:
+ case R_PPC64_TOC16_HI:
+ /* These relocs would only be useful if building up an
+ offset to later add to r2, perhaps in an indexed
+ addressing mode instruction. Don't try to optimize.
+ Unfortunately, the possibility of someone building up an
+ offset like this or even with the HA relocs, means that
+ we need to check the high insn when optimizing the low
+ insn. */
+ break;
+
+ case R_PPC64_GOT_TLSLD16_HA:
+ case R_PPC64_GOT_TLSGD16_HA:
+ case R_PPC64_GOT_TPREL16_HA:
+ case R_PPC64_GOT_DTPREL16_HA:
+ case R_PPC64_GOT16_HA:
+ case R_PPC64_TOC16_HA:
+ /* For now we don't nop out the first instruction. */
+ break;
+
+ case R_PPC64_GOT_TLSLD16_LO:
+ case R_PPC64_GOT_TLSGD16_LO:
+ case R_PPC64_GOT_TPREL16_LO_DS:
+ case R_PPC64_GOT_DTPREL16_LO_DS:
+ case R_PPC64_GOT16_LO:
+ case R_PPC64_GOT16_LO_DS:
+ case R_PPC64_TOC16_LO:
+ case R_PPC64_TOC16_LO_DS:
+ if (htab->do_toc_opt && relocation + addend + 0x8000 < 0x10000)
+ {
+ bfd_byte *p = contents + (rel->r_offset & ~3);
+ insn = bfd_get_32 (input_bfd, p);
+ if ((insn & (0x3f << 26)) == 14u << 26 /* addi */
+ || (insn & (0x3f << 26)) == 32u << 26 /* lwz */
+ || (insn & (0x3f << 26)) == 34u << 26 /* lbz */
+ || (insn & (0x3f << 26)) == 36u << 26 /* stw */
+ || (insn & (0x3f << 26)) == 38u << 26 /* stb */
+ || (insn & (0x3f << 26)) == 40u << 26 /* lhz */
+ || (insn & (0x3f << 26)) == 42u << 26 /* lha */
+ || (insn & (0x3f << 26)) == 44u << 26 /* sth */
+ || (insn & (0x3f << 26)) == 46u << 26 /* lmw */
+ || (insn & (0x3f << 26)) == 47u << 26 /* stmw */
+ || (insn & (0x3f << 26)) == 48u << 26 /* lfs */
+ || (insn & (0x3f << 26)) == 50u << 26 /* lfd */
+ || (insn & (0x3f << 26)) == 52u << 26 /* stfs */
+ || (insn & (0x3f << 26)) == 54u << 26 /* stfd */
+ || ((insn & (0x3f << 26)) == 58u << 26 /* lwa,ld,lmd */
+ && (insn & 3) != 1)
+ || ((insn & (0x3f << 26)) == 62u << 26 /* std, stmd */
+ && ((insn & 3) == 0 || (insn & 3) == 3)))
+ {
+ unsigned int reg = (insn >> 16) & 0x1f;
+ if (ha_reloc_match (relocs, rel, reg, input_bfd, contents))
+ {
+ insn &= ~(0x1f << 16);
+ insn |= 2 << 16;
+ bfd_put_32 (input_bfd, insn, p);
+ }
+ }
+ }
+ break;
+ }
+
/* Do any further special processing. */
switch (r_type)
{
--
Alan Modra
Australia Development Lab, IBM