PowerPC PLT stub tidy

Alan Modra amodra@gmail.com
Wed Jan 17 08:18:00 GMT 2018


This is in preparation for the next patch adding Spectre variant 2
mitigation for PowerPC and PowerPC64.  Besides tidying code involved
in stub output (to reduce the number of places where bctr is output),
the patch adds some user visible features:

1) PowerPC64 ELFv2 global entry stubs now are aligned under the
   control of --plt-align, with a default alignment of 32 bytes.
2) PowerPC64 __glink_PLTresolve is no longer padded out with nops.
3) PowerPC32 PLT stubs are aligned under the control of --plt-align,
   with the default alignment being 16 bytes as before.
4) The PowerPC32 branch/nop table emitted before __glink_PLTresolve
   is now smaller in many cases.  It was sized incorrectly when the
   __tls_get_addr_opt stub was used, and unnecessarily included space
   for local ifuncs.

bfd/
	* elf32-ppc.c (GLINK_ENTRY_SIZE): Add parameters, handle
	__tls_get_addr_opt, and alignment sizing.
	(TLS_GET_ADDR_GLINK_SIZE): Delete.
	(is_nonpic_glink_stub): Don't use GLINK_ENTRY_SIZE.
	(ppc_elf_get_synthetic_symtab): Recognize stubs spaced at 4, 6,
	or 8 insns.
	(ppc_elf_link_hash_table_create): Init new ppc_elf_params field.
	(allocate_dynrelocs): Use new GLINK_ENTRY_SIZE.
	(ppc_elf_size_dynamic_sections): Likewise.  Size branch table
	by PLT reloc count.
	(write_glink_stub): Handle __tls_get_addr_opt stub.
	Pad out to size given by GLINK_ENTRY_SIZE.
	(ppc_elf_relocate_section): Adjust write_glink_stub call.
	(ppc_elf_finish_dynamic_symbol): Likewise.
	(ppc_elf_finish_dynamic_sections): Write PLTresolve without using
	insn array since so many need rewriting.
	* elf32-ppc.h (struct ppc_elf_params): Add plt_stub_align.
	* elf64-ppc.c (GLINK_PLTRESOLVE_SIZE): Rename from
	GLINK_CALL_STUB_SIZE.  Add htab param and evaluate to size without
	nops.  Adjust all uses.
	(ppc64_elf_get_synthetic_symtab): Don't use GLINK_CALL_STUB_SIZE
	in glink_vma calculation.
	(struct ppc_link_hash_table): Add global_entry section pointer.
	(create_linkage_sections): Create separate section for global
	entry stubs.
	(PPC_LO, PPC_HI, PPC_HA): Move earlier.
	(size_global_entry_stubs): Handle sizing for aligned stubs.
	(ppc64_elf_size_dynamic_sections): Handle global_entry alloc,
	and don't stash end of glink branch table in rawsize.
	(ppc_build_one_stub): Rewrite stub size calculations.
	(build_global_entry_stubs): Use new section.
	(ppc64_elf_build_stubs): Don't pad __glink_PLTresolve with nops.
	Build lazy link stubs out to end of section.  Build global entry
	stubs in new section.
gold/
	* options.h (plt_align): Support for PowerPC32 too.
	* powerpc.cc (Stub_table::stub_align): Heed --plt-align for 32-bit.
	(Stub_table::plt_call_size, branch_stub_size): Tidy.
	(Stub_table::plt_call_align): Implement using stub_align.
	(Output_data_glink::global_entry_align): New function.
	(Output_data_glink::global_entry_off): New function.
	(Output_data_glink::global_entry_address): Use global_entry_off.
	(Output_data_glink::pltresolve_size): New function, replacing
	pltresolve_size_ constant.  Update all uses.
	(Output_data_glink::add_global_entry): Align offset.
	(Output_data_glink::set_final_data_size): Use global_entry_align.
	(Stub_table::do_write): Don't pad __glink_PLTrelsolve with nops.
	Tidy stub output.  Use global_entry_off.
ld/
	* emultempl/ppc32elf.em (params): Init new field.
	(enum ppc32_opt): New enum to define OPTION_* values.  Add
	OPTION_PLT_ALIGN and OPTION_NO_PLT_ALIGN.
	(PARSE_AND_LIST_LONGOPTS): Handle new options.
	(PARSE_AND_LIST_ARGS_CASES): Likewise.
	(PARSE_AND_LIST_OPTIONS): Likewise.  Break up help output.
	* emultempl/ppc64elf.em (ppc_add_stub_section): Init alignment
	correctly for negative --plt-stub-align.
	* testsuite/ld-powerpc/elfv2exe.d,
	* testsuite/ld-powerpc/elfv2so.d,
	* testsuite/ld-powerpc/relbrlt.d,
	* testsuite/ld-powerpc/relbrlt.s,
	* testsuite/ld-powerpc/tlsexe.d,
	* testsuite/ld-powerpc/tlsexe.r,
	* testsuite/ld-powerpc/tlsexe32.d,
	* testsuite/ld-powerpc/tlsexe32.g,
	* testsuite/ld-powerpc/tlsexe32.r,
	* testsuite/ld-powerpc/tlsexetoc.d,
	* testsuite/ld-powerpc/tlsexetoc.r,
	* testsuite/ld-powerpc/tlsopt5_32.d,
	* testsuite/ld-powerpc/tlsso.d,
	* testsuite/ld-powerpc/tlstocso.d: Update for changed stub order.

diff --git a/bfd/elf32-ppc.c b/bfd/elf32-ppc.c
index 3e40962..efdaf04 100644
--- a/bfd/elf32-ppc.c
+++ b/bfd/elf32-ppc.c
@@ -68,8 +68,13 @@ static bfd_reloc_status_type ppc_elf_unhandled_reloc
 
 /* For new-style .glink and .plt.  */
 #define GLINK_PLTRESOLVE 16*4
-#define GLINK_ENTRY_SIZE 4*4
-#define TLS_GET_ADDR_GLINK_SIZE 12*4
+#define GLINK_ENTRY_SIZE(htab, h)					\
+  ((4*4									\
+    + (h != NULL							\
+       && h == htab->tls_get_addr					\
+       && !htab->params->no_tls_get_addr_opt ? 8*4 : 0)			\
+    + (1u << htab->params->plt_stub_align) - 1)				\
+   & -(1u << htab->params->plt_stub_align))
 
 /* VxWorks uses its own plt layout, filled in by the static linker.  */
 
@@ -2873,9 +2878,9 @@ ppc_elf_final_write_processing (bfd *abfd, bfd_boolean linker ATTRIBUTE_UNUSED)
 static bfd_boolean
 is_nonpic_glink_stub (bfd *abfd, asection *glink, bfd_vma off)
 {
-  bfd_byte buf[GLINK_ENTRY_SIZE];
+  bfd_byte buf[4 * 4];
 
-  if (!bfd_get_section_contents (abfd, glink, buf, off, GLINK_ENTRY_SIZE))
+  if (!bfd_get_section_contents (abfd, glink, buf, off, sizeof buf))
     return FALSE;
 
   return ((bfd_get_32 (abfd, buf + 0) & 0xffff0000) == LIS_11
@@ -2902,10 +2907,10 @@ ppc_elf_get_synthetic_symtab (bfd *abfd, long symcount, asymbol **syms,
   asection *plt, *relplt, *dynamic, *glink;
   bfd_vma glink_vma = 0;
   bfd_vma resolv_vma = 0;
-  bfd_vma stub_vma;
+  bfd_vma stub_off;
   asymbol *s;
   arelent *p;
-  long count, i;
+  long count, i, stub_delta;
   size_t size;
   char *names;
   bfd_byte buf[4];
@@ -3016,9 +3021,14 @@ ppc_elf_get_synthetic_symtab (bfd *abfd, long symcount, asymbol **syms,
   /* If the stubs are those for -shared/-pie then we might have
      multiple stubs for each plt entry.  If that is the case then
      there is no way to associate stubs with their plt entries short
-     of figuring out the GOT pointer value used in the stub.  */
-  if (!is_nonpic_glink_stub (abfd, glink,
-			     glink_vma - GLINK_ENTRY_SIZE - glink->vma))
+     of figuring out the GOT pointer value used in the stub.
+     The offsets tested here need to cover all possible values of
+     GLINK_ENTRY_SIZE for other than __tls_get_addr_opt.  */
+  stub_off = glink_vma - glink->vma;
+  for (stub_delta = 16; stub_delta <= 32; stub_delta += 8)
+    if (is_nonpic_glink_stub (abfd, glink, stub_off - stub_delta))
+      break;
+  if (stub_delta > 32)
     return 0;
 
   slurp_relocs = get_elf_backend_data (abfd)->s->slurp_reloc_table;
@@ -3043,13 +3053,16 @@ ppc_elf_get_synthetic_symtab (bfd *abfd, long symcount, asymbol **syms,
   if (s == NULL)
     return -1;
 
-  stub_vma = glink_vma;
+  stub_off = glink_vma - glink->vma;
   names = (char *) (s + count + 1 + (resolv_vma != 0));
   p = relplt->relocation + count - 1;
   for (i = 0; i < count; i++)
     {
       size_t len;
 
+      stub_off -= stub_delta;
+      if (strcmp ((*p->sym_ptr_ptr)->name, "__tls_get_addr_opt") == 0)
+	stub_off -= 32;
       *s = **p->sym_ptr_ptr;
       /* Undefined syms won't have BSF_LOCAL or BSF_GLOBAL set.  Since
 	 we are defining a symbol, ensure one of them is set.  */
@@ -3057,10 +3070,7 @@ ppc_elf_get_synthetic_symtab (bfd *abfd, long symcount, asymbol **syms,
 	s->flags |= BSF_GLOBAL;
       s->flags |= BSF_SYNTHETIC;
       s->section = glink;
-      stub_vma -= 16;
-      if (strcmp ((*p->sym_ptr_ptr)->name, "__tls_get_addr_opt") == 0)
-	stub_vma -= 32;
-      s->value = stub_vma - glink->vma;
+      s->value = stub_off;
       s->name = names;
       s->udata.p = NULL;
       len = strlen ((*p->sym_ptr_ptr)->name);
@@ -3355,7 +3365,7 @@ ppc_elf_link_hash_table_create (bfd *abfd)
 {
   struct ppc_elf_link_hash_table *ret;
   static struct ppc_elf_params default_params
-    = { PLT_OLD, 0, 1, 0, 0, 12, 0, 0, 0 };
+    = { PLT_OLD, 0, 0, 1, 0, 0, 12, 0, 0, 0 };
 
   ret = bfd_zmalloc (sizeof (struct ppc_elf_link_hash_table));
   if (ret == NULL)
@@ -6015,10 +6025,7 @@ allocate_dynrelocs (struct elf_link_hash_entry *h, void *inf)
 		if (!doneone || bfd_link_pic (info))
 		  {
 		    glink_offset = s->size;
-		    s->size += GLINK_ENTRY_SIZE;
-		    if (h == htab->tls_get_addr
-			&& !htab->params->no_tls_get_addr_opt)
-		      s->size += TLS_GET_ADDR_GLINK_SIZE - GLINK_ENTRY_SIZE;
+		    s->size += GLINK_ENTRY_SIZE (htab, h);
 		  }
 		if (!doneone
 		    && !bfd_link_pic (info)
@@ -6333,7 +6340,7 @@ ppc_elf_size_dynamic_sections (bfd *output_bfd,
 		if (!doneone || bfd_link_pic (info))
 		  {
 		    glink_offset = s->size;
-		    s->size += GLINK_ENTRY_SIZE;
+		    s->size += GLINK_ENTRY_SIZE (htab, NULL);
 		  }
 		ent->glink_offset = glink_offset;
 
@@ -6401,10 +6408,9 @@ ppc_elf_size_dynamic_sections (bfd *output_bfd,
       && htab->elf.dynamic_sections_created)
     {
       htab->glink_pltresolve = htab->glink->size;
-      /* Space for the branch table.  ??? We don't need entries for
-	 non-dynamic symbols in this table.  This case can arise with
-	 static ifuncs or forced local ifuncs.  */
-      htab->glink->size += htab->glink->size / (GLINK_ENTRY_SIZE / 4) - 4;
+      /* Space for the branch table.  */
+      htab->glink->size
+	+= htab->elf.srelplt->size / (sizeof (Elf32_External_Rela) / 4) - 4;
       /* Pad out to align the start of PLTresolve.  */
       htab->glink->size += -htab->glink->size & (htab->params->ppc476_workaround
 						 ? 63 : 15);
@@ -7443,12 +7449,36 @@ elf_finish_pointer_linker_section (bfd *input_bfd,
 #define PPC_HA(v) PPC_HI ((v) + 0x8000)
 
 static void
-write_glink_stub (struct plt_entry *ent, asection *plt_sec, unsigned char *p,
+write_glink_stub (struct elf_link_hash_entry *h, struct plt_entry *ent,
+		  asection *plt_sec, unsigned char *p,
 		  struct bfd_link_info *info)
 {
   struct ppc_elf_link_hash_table *htab = ppc_elf_hash_table (info);
   bfd *output_bfd = info->output_bfd;
   bfd_vma plt;
+  unsigned char *end = p + GLINK_ENTRY_SIZE (htab, h);
+
+  if (h != NULL
+      && h == htab->tls_get_addr
+      && !htab->params->no_tls_get_addr_opt)
+    {
+      bfd_put_32 (output_bfd, LWZ_11_3, p);
+      p += 4;
+      bfd_put_32 (output_bfd, LWZ_12_3 + 4, p);
+      p += 4;
+      bfd_put_32 (output_bfd, MR_0_3, p);
+      p += 4;
+      bfd_put_32 (output_bfd, CMPWI_11_0, p);
+      p += 4;
+      bfd_put_32 (output_bfd, ADD_3_12_2, p);
+      p += 4;
+      bfd_put_32 (output_bfd, BEQLR, p);
+      p += 4;
+      bfd_put_32 (output_bfd, MR_3_0, p);
+      p += 4;
+      bfd_put_32 (output_bfd, NOP, p);
+      p += 4;
+    }
 
   plt = ((ent->plt.offset & ~1)
 	 + plt_sec->output_section->vma
@@ -7468,26 +7498,12 @@ write_glink_stub (struct plt_entry *ent, asection *plt_sec, unsigned char *p,
       plt -= got;
 
       if (plt + 0x8000 < 0x10000)
-	{
-	  bfd_put_32 (output_bfd, LWZ_11_30 + PPC_LO (plt), p);
-	  p += 4;
-	  bfd_put_32 (output_bfd, MTCTR_11, p);
-	  p += 4;
-	  bfd_put_32 (output_bfd, BCTR, p);
-	  p += 4;
-	  bfd_put_32 (output_bfd, htab->params->ppc476_workaround ? BA : NOP, p);
-	  p += 4;
-	}
+	bfd_put_32 (output_bfd, LWZ_11_30 + PPC_LO (plt), p);
       else
 	{
 	  bfd_put_32 (output_bfd, ADDIS_11_30 + PPC_HA (plt), p);
 	  p += 4;
 	  bfd_put_32 (output_bfd, LWZ_11_11 + PPC_LO (plt), p);
-	  p += 4;
-	  bfd_put_32 (output_bfd, MTCTR_11, p);
-	  p += 4;
-	  bfd_put_32 (output_bfd, BCTR, p);
-	  p += 4;
 	}
     }
   else
@@ -7495,10 +7511,15 @@ write_glink_stub (struct plt_entry *ent, asection *plt_sec, unsigned char *p,
       bfd_put_32 (output_bfd, LIS_11 + PPC_HA (plt), p);
       p += 4;
       bfd_put_32 (output_bfd, LWZ_11_11 + PPC_LO (plt), p);
-      p += 4;
-      bfd_put_32 (output_bfd, MTCTR_11, p);
-      p += 4;
-      bfd_put_32 (output_bfd, BCTR, p);
+    }
+  p += 4;
+  bfd_put_32 (output_bfd, MTCTR_11, p);
+  p += 4;
+  bfd_put_32 (output_bfd, BCTR, p);
+  p += 4;
+  while (p < end)
+    {
+      bfd_put_32 (output_bfd, htab->params->ppc476_workaround ? BA : NOP, p);
       p += 4;
     }
 }
@@ -8253,7 +8274,8 @@ ppc_elf_relocate_section (bfd *output_bfd,
 		{
 		  unsigned char *p = ((unsigned char *) htab->glink->contents
 				      + ent->glink_offset);
-		  write_glink_stub (ent, htab->elf.iplt, p, info);
+
+		  write_glink_stub (NULL, ent, htab->elf.iplt, p, info);
 		  ent->glink_offset |= 1;
 		}
 
@@ -10191,33 +10213,13 @@ ppc_elf_finish_dynamic_symbol (bfd *output_bfd,
 	  {
 	    unsigned char *p;
 	    asection *splt = htab->elf.splt;
+
 	    if (!htab->elf.dynamic_sections_created
 		|| h->dynindx == -1)
 	      splt = htab->elf.iplt;
 
 	    p = (unsigned char *) htab->glink->contents + ent->glink_offset;
-
-	    if (h == htab->tls_get_addr && !htab->params->no_tls_get_addr_opt)
-	      {
-		bfd_put_32 (output_bfd, LWZ_11_3, p);
-		p += 4;
-		bfd_put_32 (output_bfd, LWZ_12_3 + 4, p);
-		p += 4;
-		bfd_put_32 (output_bfd, MR_0_3, p);
-		p += 4;
-		bfd_put_32 (output_bfd, CMPWI_11_0, p);
-		p += 4;
-		bfd_put_32 (output_bfd, ADD_3_12_2, p);
-		p += 4;
-		bfd_put_32 (output_bfd, BEQLR, p);
-		p += 4;
-		bfd_put_32 (output_bfd, MR_3_0, p);
-		p += 4;
-		bfd_put_32 (output_bfd, NOP, p);
-		p += 4;
-	      }
-
-	    write_glink_stub (ent, splt, p, info);
+	    write_glink_stub (h, ent, splt, p, info);
 
 	    if (!bfd_link_pic (info))
 	      /* We only need one non-PIC glink stub.  */
@@ -10502,7 +10504,6 @@ ppc_elf_finish_dynamic_sections (bfd *output_bfd,
       unsigned char *p;
       unsigned char *endp;
       bfd_vma res0;
-      unsigned int i;
 
       /*
        * PIC glink code is the following:
@@ -10539,28 +10540,7 @@ ppc_elf_finish_dynamic_sections (bfd *output_bfd,
        *    add 0,11,11
        *    add 11,0,11			# r11 = index * 12 = reloc offset.
        *    bctr
-       */
-      static const unsigned int pic_plt_resolve[] =
-	{
-	  ADDIS_11_11,
-	  MFLR_0,
-	  BCL_20_31,
-	  ADDI_11_11,
-	  MFLR_12,
-	  MTLR_0,
-	  SUB_11_11_12,
-	  ADDIS_12_12,
-	  LWZ_0_12,
-	  LWZ_12_12,
-	  MTCTR_0,
-	  ADD_0_11_11,
-	  ADD_11_0_11,
-	  BCTR,
-	  NOP,
-	  NOP
-	};
-
-      /*
+       *
        * Non-PIC glink code is a little simpler.
        *
        * # ith PLT code stub.
@@ -10582,30 +10562,6 @@ ppc_elf_finish_dynamic_sections (bfd *output_bfd,
        *    add 11,0,11			# r11 = index * 12 = reloc offset.
        *    bctr
        */
-      static const unsigned int plt_resolve[] =
-	{
-	  LIS_12,
-	  ADDIS_11_11,
-	  LWZ_0_12,
-	  ADDI_11_11,
-	  MTCTR_0,
-	  ADD_0_11_11,
-	  LWZ_12_12,
-	  ADD_11_0_11,
-	  BCTR,
-	  NOP,
-	  NOP,
-	  NOP,
-	  NOP,
-	  NOP,
-	  NOP,
-	  NOP
-	};
-
-      if (ARRAY_SIZE (pic_plt_resolve) != GLINK_PLTRESOLVE / 4)
-	abort ();
-      if (ARRAY_SIZE (plt_resolve) != GLINK_PLTRESOLVE / 4)
-	abort ();
 
       /* Build the branch table, one for each plt entry (less one),
 	 and perhaps some padding.  */
@@ -10662,80 +10618,83 @@ ppc_elf_finish_dynamic_sections (bfd *output_bfd,
 	}
 
       /* Last comes the PLTresolve stub.  */
+      endp = p + GLINK_PLTRESOLVE;
       if (bfd_link_pic (info))
 	{
 	  bfd_vma bcl;
 
-	  for (i = 0; i < ARRAY_SIZE (pic_plt_resolve); i++)
-	    {
-	      unsigned int insn = pic_plt_resolve[i];
-
-	      if (htab->params->ppc476_workaround && insn == NOP)
-		insn = BA + 0;
-	      bfd_put_32 (output_bfd, insn, p);
-	      p += 4;
-	    }
-	  p -= 4 * ARRAY_SIZE (pic_plt_resolve);
-
 	  bcl = (htab->glink->size - GLINK_PLTRESOLVE + 3*4
 		 + htab->glink->output_section->vma
 		 + htab->glink->output_offset);
 
-	  bfd_put_32 (output_bfd,
-		      ADDIS_11_11 + PPC_HA (bcl - res0), p + 0*4);
-	  bfd_put_32 (output_bfd,
-		      ADDI_11_11 + PPC_LO (bcl - res0), p + 3*4);
-	  bfd_put_32 (output_bfd,
-		      ADDIS_12_12 + PPC_HA (got + 4 - bcl), p + 7*4);
+	  bfd_put_32 (output_bfd, ADDIS_11_11 + PPC_HA (bcl - res0), p);
+	  p += 4;
+	  bfd_put_32 (output_bfd, MFLR_0, p);
+	  p += 4;
+	  bfd_put_32 (output_bfd, BCL_20_31, p);
+	  p += 4;
+	  bfd_put_32 (output_bfd, ADDI_11_11 + PPC_LO (bcl - res0), p);
+	  p += 4;
+	  bfd_put_32 (output_bfd, MFLR_12, p);
+	  p += 4;
+	  bfd_put_32 (output_bfd, MTLR_0, p);
+	  p += 4;
+	  bfd_put_32 (output_bfd, SUB_11_11_12, p);
+	  p += 4;
+	  bfd_put_32 (output_bfd, ADDIS_12_12 + PPC_HA (got + 4 - bcl), p);
+	  p += 4;
 	  if (PPC_HA (got + 4 - bcl) == PPC_HA (got + 8 - bcl))
 	    {
-	      bfd_put_32 (output_bfd,
-			  LWZ_0_12 + PPC_LO (got + 4 - bcl), p + 8*4);
-	      bfd_put_32 (output_bfd,
-			  LWZ_12_12 + PPC_LO (got + 8 - bcl), p + 9*4);
+	      bfd_put_32 (output_bfd, LWZ_0_12 + PPC_LO (got + 4 - bcl), p);
+	      p += 4;
+	      bfd_put_32 (output_bfd, LWZ_12_12 + PPC_LO (got + 8 - bcl), p);
+	      p += 4;
 	    }
 	  else
 	    {
-	      bfd_put_32 (output_bfd,
-			  LWZU_0_12 + PPC_LO (got + 4 - bcl), p + 8*4);
-	      bfd_put_32 (output_bfd,
-			  LWZ_12_12 + 4, p + 9*4);
+	      bfd_put_32 (output_bfd, LWZU_0_12 + PPC_LO (got + 4 - bcl), p);
+	      p += 4;
+	      bfd_put_32 (output_bfd, LWZ_12_12 + 4, p);
+	      p += 4;
 	    }
+	  bfd_put_32 (output_bfd, MTCTR_0, p);
+	  p += 4;
+	  bfd_put_32 (output_bfd, ADD_0_11_11, p);
 	}
       else
 	{
-	  for (i = 0; i < ARRAY_SIZE (plt_resolve); i++)
-	    {
-	      unsigned int insn = plt_resolve[i];
-
-	      if (htab->params->ppc476_workaround && insn == NOP)
-		insn = BA + 0;
-	      bfd_put_32 (output_bfd, insn, p);
-	      p += 4;
-	    }
-	  p -= 4 * ARRAY_SIZE (plt_resolve);
-
-	  bfd_put_32 (output_bfd,
-		      LIS_12 + PPC_HA (got + 4), p + 0*4);
-	  bfd_put_32 (output_bfd,
-		      ADDIS_11_11 + PPC_HA (-res0), p + 1*4);
-	  bfd_put_32 (output_bfd,
-		      ADDI_11_11 + PPC_LO (-res0), p + 3*4);
+	  bfd_put_32 (output_bfd, LIS_12 + PPC_HA (got + 4), p);
+	  p += 4;
+	  bfd_put_32 (output_bfd, ADDIS_11_11 + PPC_HA (-res0), p);
+	  p += 4;
 	  if (PPC_HA (got + 4) == PPC_HA (got + 8))
-	    {
-	      bfd_put_32 (output_bfd,
-			  LWZ_0_12 + PPC_LO (got + 4), p + 2*4);
-	      bfd_put_32 (output_bfd,
-			  LWZ_12_12 + PPC_LO (got + 8), p + 6*4);
-	    }
+	    bfd_put_32 (output_bfd, LWZ_0_12 + PPC_LO (got + 4), p);
 	  else
-	    {
-	      bfd_put_32 (output_bfd,
-			  LWZU_0_12 + PPC_LO (got + 4), p + 2*4);
-	      bfd_put_32 (output_bfd,
-			  LWZ_12_12 + 4, p + 6*4);
-	    }
+	    bfd_put_32 (output_bfd, LWZU_0_12 + PPC_LO (got + 4), p);
+	  p += 4;
+	  bfd_put_32 (output_bfd, ADDI_11_11 + PPC_LO (-res0), p);
+	  p += 4;
+	  bfd_put_32 (output_bfd, MTCTR_0, p);
+	  p += 4;
+	  bfd_put_32 (output_bfd, ADD_0_11_11, p);
+	  p += 4;
+	  if (PPC_HA (got + 4) == PPC_HA (got + 8))
+	    bfd_put_32 (output_bfd, LWZ_12_12 + PPC_LO (got + 8), p);
+	  else
+	    bfd_put_32 (output_bfd, LWZ_12_12 + 4, p);
+	}
+      p += 4;
+      bfd_put_32 (output_bfd, ADD_11_0_11, p);
+      p += 4;
+      bfd_put_32 (output_bfd, BCTR, p);
+      p += 4;
+      while (p < endp)
+	{
+	  bfd_put_32 (output_bfd,
+		      htab->params->ppc476_workaround ? BA : NOP, p);
+	  p += 4;
 	}
+      BFD_ASSERT (p == endp);
     }
 
   if (htab->glink_eh_frame != NULL
diff --git a/bfd/elf32-ppc.h b/bfd/elf32-ppc.h
index 4548ca7..f56d027 100644
--- a/bfd/elf32-ppc.h
+++ b/bfd/elf32-ppc.h
@@ -32,6 +32,9 @@ struct ppc_elf_params
   /* Chooses the type of .plt.  */
   enum ppc_elf_plt_type plt_style;
 
+  /* Set if individual PLT call stubs should be aligned.  */
+  int plt_stub_align;
+
   /* Whether to emit symbols for stubs.  */
   int emit_stub_syms;
 
diff --git a/bfd/elf64-ppc.c b/bfd/elf64-ppc.c
index d4016b9..10d3fe9 100644
--- a/bfd/elf64-ppc.c
+++ b/bfd/elf64-ppc.c
@@ -187,8 +187,9 @@ static bfd_vma opd_entry_value
 #define ADDIS_R12_R12	0x3d8c0000	/* addis %r12,%r12,xxx@ha */
 #define LD_R12_0R12	0xe98c0000	/* ld	 %r12,xxx@l(%r12) */
 
-/* glink call stub instructions.  We enter with the index in R0.  */
-#define GLINK_CALL_STUB_SIZE (16*4)
+/* __glink_PLTresolve stub instructions.  We enter with the index in R0.  */
+#define GLINK_PLTRESOLVE_SIZE(htab)			\
+  (8u + (htab->opd_abi ? 11 * 4 : 14 * 4))
 					/* 0:				*/
 					/*  .quad plt0-1f		*/
 					/* __glink:			*/
@@ -3515,9 +3516,9 @@ ppc64_elf_get_synthetic_symtab (bfd *abfd,
 
 	      if (dyn.d_tag == DT_PPC64_GLINK)
 		{
-		  /* The first glink stub starts at offset 32; see
-		     comment in ppc64_elf_finish_dynamic_sections. */
-		  glink_vma = dyn.d_un.d_val + GLINK_CALL_STUB_SIZE - 8 * 4;
+		  /* The first glink stub starts at DT_PPC64_GLINK plus 32.
+		     See comment in ppc64_elf_finish_dynamic_sections. */
+		  glink_vma = dyn.d_un.d_val + 8 * 4;
 		  /* The .glink section usually does not survive the final
 		     link; search for the section (usually .text) where the
 		     glink stubs now reside.  */
@@ -4092,6 +4093,7 @@ struct ppc_link_hash_table
 
   /* Shortcuts to get to dynamic linker sections.  */
   asection *glink;
+  asection *global_entry;
   asection *sfpr;
   asection *brlt;
   asection *relbrlt;
@@ -4431,6 +4433,14 @@ create_linkage_sections (bfd *dynobj, struct bfd_link_info *info)
       || ! bfd_set_section_alignment (dynobj, htab->glink, 3))
     return FALSE;
 
+  /* The part of .glink used by global entry stubs, separate so that
+     it can be aligned appropriately without affecting htab->glink.  */
+  htab->global_entry = bfd_make_section_anyway_with_flags (dynobj, ".glink",
+							   flags);
+  if (htab->global_entry == NULL
+      || ! bfd_set_section_alignment (dynobj, htab->global_entry, 2))
+    return FALSE;
+
   if (!info->no_ld_generated_unwind_info)
     {
       flags = (SEC_ALLOC | SEC_LOAD | SEC_READONLY | SEC_HAS_CONTENTS
@@ -9793,11 +9803,11 @@ allocate_dynrelocs (struct elf_link_hash_entry *h, void *inf)
 		/* Make room for the .glink code.  */
 		s = htab->glink;
 		if (s->size == 0)
-		  s->size += GLINK_CALL_STUB_SIZE;
+		  s->size += GLINK_PLTRESOLVE_SIZE (htab);
 		if (htab->opd_abi)
 		  {
 		    /* We need bigger stubs past index 32767.  */
-		    if (s->size >= GLINK_CALL_STUB_SIZE + 32768*2*4)
+		    if (s->size >= GLINK_PLTRESOLVE_SIZE (htab) + 32768*2*4)
 		      s->size += 4;
 		    s->size += 2*4;
 		  }
@@ -9827,6 +9837,10 @@ allocate_dynrelocs (struct elf_link_hash_entry *h, void *inf)
   return TRUE;
 }
 
+#define PPC_LO(v) ((v) & 0xffff)
+#define PPC_HI(v) (((v) >> 16) & 0xffff)
+#define PPC_HA(v) PPC_HI ((v) + 0x8000)
+
 /* Called via elf_link_hash_traverse from ppc64_elf_size_dynamic_sections
    to set up space for global entry stubs.  These are put in glink,
    after the branch table.  */
@@ -9837,7 +9851,7 @@ size_global_entry_stubs (struct elf_link_hash_entry *h, void *inf)
   struct bfd_link_info *info;
   struct ppc_link_hash_table *htab;
   struct plt_entry *pent;
-  asection *s;
+  asection *s, *plt;
 
   if (h->root.type == bfd_link_hash_indirect)
     return TRUE;
@@ -9853,7 +9867,8 @@ size_global_entry_stubs (struct elf_link_hash_entry *h, void *inf)
   if (htab == NULL)
     return FALSE;
 
-  s = htab->glink;
+  s = htab->global_entry;
+  plt = htab->elf.splt;
   for (pent = h->plt.plist; pent != NULL; pent = pent->next)
     if (pent->plt.offset != (bfd_vma) -1
 	&& pent->addend == 0)
@@ -9862,11 +9877,39 @@ size_global_entry_stubs (struct elf_link_hash_entry *h, void *inf)
 	   and we are not generating a shared library or pie, then we
 	   need to define the symbol in the executable on a call stub.
 	   This is to avoid text relocations.  */
-	s->size = (s->size + 15) & -16;
+	bfd_vma off, stub_align, stub_off, stub_size;
+	unsigned int align_power;
+
+	stub_size = 16;
+	stub_off = s->size;
+	if (htab->params->plt_stub_align >= 0)
+	  align_power = htab->params->plt_stub_align;
+	else
+	  align_power = -htab->params->plt_stub_align;
+	/* Setting section alignment is delayed until we know it is
+	   non-empty.  Otherwise the .text output section will be
+	   aligned at least to plt_stub_align even when no global
+	   entry stubs are needed.  */
+	if (s->alignment_power < align_power)
+	  s->alignment_power = align_power;
+	stub_align = (bfd_vma) 1 << align_power;
+	if (htab->params->plt_stub_align >= 0
+	    || ((((stub_off + stub_size - 1) & -stub_align)
+		 - (stub_off & -stub_align))
+		> ((stub_size - 1) & -stub_align)))
+	  stub_off = (stub_off + stub_align - 1) & -stub_align;
+	off = pent->plt.offset + plt->output_offset + plt->output_section->vma;
+	off -= stub_off + s->output_offset + s->output_section->vma;
+	/* Note that for --plt-stub-align negative we have a possible
+	   dependency between stub offset and size.  Break that
+	   dependency by assuming the max stub size when calculating
+	   the stub offset.  */
+	if (PPC_HA (off) == 0)
+	  stub_size -= 4;
 	h->root.type = bfd_link_hash_defined;
 	h->root.u.def.section = s;
-	h->root.u.def.value = s->size;
-	s->size += 16;
+	h->root.u.def.value = stub_off;
+	s->size = stub_off + stub_size;
 	break;
       }
   return TRUE;
@@ -10051,9 +10094,6 @@ ppc64_elf_size_dynamic_sections (bfd *output_bfd,
   /* Allocate global sym .plt and .got entries, and space for global
      sym dynamic relocs.  */
   elf_link_hash_traverse (&htab->elf, allocate_dynrelocs, info);
-  /* Stash the end of glink branch table.  */
-  if (htab->glink != NULL)
-    htab->glink->rawsize = htab->glink->size;
 
   if (!htab->opd_abi && !bfd_link_pic (info))
     elf_link_hash_traverse (&htab->elf, size_global_entry_stubs, info);
@@ -10108,6 +10148,7 @@ ppc64_elf_size_dynamic_sections (bfd *output_bfd,
 	       || s == htab->elf.splt
 	       || s == htab->elf.iplt
 	       || s == htab->glink
+	       || s == htab->global_entry
 	       || s == htab->elf.sdynbss
 	       || s == htab->elf.sdynrelro)
 	{
@@ -10393,10 +10434,6 @@ ppc_type_of_stub (asection *input_sec,
 #define ALWAYS_USE_FAKE_DEP 0
 #define ALWAYS_EMIT_R2SAVE 0
 
-#define PPC_LO(v) ((v) & 0xffff)
-#define PPC_HI(v) (((v) >> 16) & 0xffff)
-#define PPC_HA(v) PPC_HI ((v) + 0x8000)
-
 static inline unsigned int
 plt_stub_size (struct ppc_link_hash_table *htab,
 	       struct ppc_stub_hash_entry *stub_entry,
@@ -10492,7 +10529,7 @@ build_plt_stub (struct ppc_link_hash_table *htab,
       bfd_vma pltoff = stub_entry->plt_ent->plt.offset & ~1;
       bfd_vma pltindex = ((pltoff - PLT_INITIAL_ENTRY_SIZE (htab))
 			  / PLT_ENTRY_SIZE (htab));
-      bfd_vma glinkoff = GLINK_CALL_STUB_SIZE + pltindex * 8;
+      bfd_vma glinkoff = GLINK_PLTRESOLVE_SIZE (htab) + pltindex * 8;
       bfd_vma to, from;
 
       if (pltindex > 32768)
@@ -10767,7 +10804,6 @@ ppc_build_one_stub (struct bfd_hash_entry *gen_entry, void *in_arg)
   bfd_byte *loc;
   bfd_byte *p;
   bfd_vma dest, off;
-  int size;
   Elf_Internal_Rela *r;
   asection *plt;
 
@@ -10800,7 +10836,7 @@ ppc_build_one_stub (struct bfd_hash_entry *gen_entry, void *in_arg)
 	      + stub_entry->group->stub_sec->output_offset
 	      + stub_entry->group->stub_sec->output_section->vma);
 
-      size = 4;
+      p = loc;
       if (stub_entry->stub_type == ppc_stub_long_branch_r2off)
 	{
 	  bfd_vma r2off = get_r2off (info, stub_entry);
@@ -10810,26 +10846,24 @@ ppc_build_one_stub (struct bfd_hash_entry *gen_entry, void *in_arg)
 	      htab->stub_error = TRUE;
 	      return FALSE;
 	    }
-	  bfd_put_32 (htab->params->stub_bfd, STD_R2_0R1 + STK_TOC (htab), loc);
-	  loc += 4;
-	  size = 8;
+	  bfd_put_32 (htab->params->stub_bfd, STD_R2_0R1 + STK_TOC (htab), p);
+	  p += 4;
 	  if (PPC_HA (r2off) != 0)
 	    {
 	      bfd_put_32 (htab->params->stub_bfd,
-			  ADDIS_R2_R2 | PPC_HA (r2off), loc);
-	      loc += 4;
-	      size += 4;
+			  ADDIS_R2_R2 | PPC_HA (r2off), p);
+	      p += 4;
 	    }
 	  if (PPC_LO (r2off) != 0)
 	    {
 	      bfd_put_32 (htab->params->stub_bfd,
-			  ADDI_R2_R2 | PPC_LO (r2off), loc);
-	      loc += 4;
-	      size += 4;
+			  ADDI_R2_R2 | PPC_LO (r2off), p);
+	      p += 4;
 	    }
-	  off -= size - 4;
+	  off -= p - loc;
 	}
-      bfd_put_32 (htab->params->stub_bfd, B_DOT | (off & 0x3fffffc), loc);
+      bfd_put_32 (htab->params->stub_bfd, B_DOT | (off & 0x3fffffc), p);
+      p += 4;
 
       if (off + (1 << 25) >= (bfd_vma) (1 << 26))
 	{
@@ -10845,7 +10879,7 @@ ppc_build_one_stub (struct bfd_hash_entry *gen_entry, void *in_arg)
 	  r = get_relocs (stub_entry->group->stub_sec, 1);
 	  if (r == NULL)
 	    return FALSE;
-	  r->r_offset = loc - stub_entry->group->stub_sec->contents;
+	  r->r_offset = p - 4 - stub_entry->group->stub_sec->contents;
 	  r->r_info = ELF64_R_INFO (0, R_PPC64_REL24);
 	  r->r_addend = dest;
 	  if (stub_entry->h != NULL)
@@ -10985,23 +11019,20 @@ ppc_build_one_stub (struct bfd_hash_entry *gen_entry, void *in_arg)
 	    }
 	}
 
+      p = loc;
       if (stub_entry->stub_type != ppc_stub_plt_branch_r2off)
 	{
 	  if (PPC_HA (off) != 0)
 	    {
-	      size = 16;
 	      bfd_put_32 (htab->params->stub_bfd,
-			  ADDIS_R12_R2 | PPC_HA (off), loc);
-	      loc += 4;
+			  ADDIS_R12_R2 | PPC_HA (off), p);
+	      p += 4;
 	      bfd_put_32 (htab->params->stub_bfd,
-			  LD_R12_0R12 | PPC_LO (off), loc);
+			  LD_R12_0R12 | PPC_LO (off), p);
 	    }
 	  else
-	    {
-	      size = 12;
-	      bfd_put_32 (htab->params->stub_bfd,
-			  LD_R12_0R2 | PPC_LO (off), loc);
-	    }
+	    bfd_put_32 (htab->params->stub_bfd,
+			LD_R12_0R2 | PPC_LO (off), p);
 	}
       else
 	{
@@ -11013,40 +11044,37 @@ ppc_build_one_stub (struct bfd_hash_entry *gen_entry, void *in_arg)
 	      return FALSE;
 	    }
 
-	  bfd_put_32 (htab->params->stub_bfd, STD_R2_0R1 + STK_TOC (htab), loc);
-	  loc += 4;
-	  size = 16;
+	  bfd_put_32 (htab->params->stub_bfd, STD_R2_0R1 + STK_TOC (htab), p);
+	  p += 4;
 	  if (PPC_HA (off) != 0)
 	    {
-	      size += 4;
 	      bfd_put_32 (htab->params->stub_bfd,
-			  ADDIS_R12_R2 | PPC_HA (off), loc);
-	      loc += 4;
+			  ADDIS_R12_R2 | PPC_HA (off), p);
+	      p += 4;
 	      bfd_put_32 (htab->params->stub_bfd,
-			  LD_R12_0R12 | PPC_LO (off), loc);
+			  LD_R12_0R12 | PPC_LO (off), p);
 	    }
 	  else
-	    bfd_put_32 (htab->params->stub_bfd, LD_R12_0R2 | PPC_LO (off), loc);
+	    bfd_put_32 (htab->params->stub_bfd, LD_R12_0R2 | PPC_LO (off), p);
 
 	  if (PPC_HA (r2off) != 0)
 	    {
-	      size += 4;
-	      loc += 4;
+	      p += 4;
 	      bfd_put_32 (htab->params->stub_bfd,
-			  ADDIS_R2_R2 | PPC_HA (r2off), loc);
+			  ADDIS_R2_R2 | PPC_HA (r2off), p);
 	    }
 	  if (PPC_LO (r2off) != 0)
 	    {
-	      size += 4;
-	      loc += 4;
+	      p += 4;
 	      bfd_put_32 (htab->params->stub_bfd,
-			  ADDI_R2_R2 | PPC_LO (r2off), loc);
+			  ADDI_R2_R2 | PPC_LO (r2off), p);
 	    }
 	}
-      loc += 4;
-      bfd_put_32 (htab->params->stub_bfd, MTCTR_R12, loc);
-      loc += 4;
-      bfd_put_32 (htab->params->stub_bfd, BCTR, loc);
+      p += 4;
+      bfd_put_32 (htab->params->stub_bfd, MTCTR_R12, p);
+      p += 4;
+      bfd_put_32 (htab->params->stub_bfd, BCTR, p);
+      p += 4;
       break;
 
     case ppc_stub_plt_call:
@@ -11150,7 +11178,6 @@ ppc_build_one_stub (struct bfd_hash_entry *gen_entry, void *in_arg)
 	p = build_tls_get_addr_stub (htab, stub_entry, loc, off, r);
       else
 	p = build_plt_stub (htab, stub_entry, loc, off, r);
-      size = p - loc;
       break;
 
     case ppc_stub_save_res:
@@ -11161,7 +11188,7 @@ ppc_build_one_stub (struct bfd_hash_entry *gen_entry, void *in_arg)
       return FALSE;
     }
 
-  stub_entry->group->stub_sec->size += size;
+  stub_entry->group->stub_sec->size += p - loc;
 
   if (htab->params->emit_stub_syms)
     {
@@ -12951,7 +12978,7 @@ build_global_entry_stubs (struct elf_link_hash_entry *h, void *inf)
   if (htab == NULL)
     return FALSE;
 
-  s = htab->glink;
+  s = htab->global_entry;
   for (pent = h->plt.plist; pent != NULL; pent = pent->next)
     if (pent->plt.offset != (bfd_vma) -1
 	&& pent->addend == 0)
@@ -13144,15 +13171,11 @@ ppc64_elf_build_stubs (struct bfd_link_info *info,
 	}
       bfd_put_32 (htab->glink->owner, BCTR, p);
       p += 4;
-      while (p - htab->glink->contents < GLINK_CALL_STUB_SIZE)
-	{
-	  bfd_put_32 (htab->glink->owner, NOP, p);
-	  p += 4;
-	}
+      BFD_ASSERT (p - htab->glink->contents == GLINK_PLTRESOLVE_SIZE (htab));
 
       /* Build the .glink lazy link call stubs.  */
       indx = 0;
-      while (p < htab->glink->contents + htab->glink->rawsize)
+      while (p < htab->glink->contents + htab->glink->size)
 	{
 	  if (htab->opd_abi)
 	    {
@@ -13175,12 +13198,12 @@ ppc64_elf_build_stubs (struct bfd_link_info *info,
 	  indx++;
 	  p += 4;
 	}
-
-      /* Build .glink global entry stubs.  */
-      if (htab->glink->size > htab->glink->rawsize)
-	elf_link_hash_traverse (&htab->elf, build_global_entry_stubs, info);
     }
 
+  /* Build .glink global entry stubs.  */
+  if (htab->global_entry != NULL && htab->global_entry->size != 0)
+    elf_link_hash_traverse (&htab->elf, build_global_entry_stubs, info);
+
   if (htab->brlt != NULL && htab->brlt->size != 0)
     {
       htab->brlt->contents = bfd_zalloc (htab->brlt->owner,
@@ -15557,7 +15580,7 @@ ppc64_elf_finish_dynamic_sections (bfd *output_bfd,
 		 of glink rather than the first entry point, which is
 		 what ld.so needs, and now have a bigger stub to
 		 support automatic multiple TOCs.  */
-	      dyn.d_un.d_ptr += GLINK_CALL_STUB_SIZE - 8 * 4;
+	      dyn.d_un.d_ptr += GLINK_PLTRESOLVE_SIZE (htab) - 8 * 4;
 	      break;
 
 	    case DT_PPC64_OPD:
diff --git a/gold/options.h b/gold/options.h
index feb60cc..b39d5ff 100644
--- a/gold/options.h
+++ b/gold/options.h
@@ -1101,7 +1101,7 @@ class General_options
 	      NULL, N_("(ARM only) Ignore for backward compatibility"));
 
   DEFINE_var(plt_align, options::TWO_DASHES, '\0', 0, "5",
-	     N_("(PowerPC64 only) Align PLT call stubs to fit cache lines"),
+	     N_("(PowerPC only) Align PLT call stubs to fit cache lines"),
 	     N_("[=P2ALIGN]"), true, int, int, options::parse_uint, false);
 
   DEFINE_bool(plt_localentry, options::TWO_DASHES, '\0', false,
diff --git a/gold/powerpc.cc b/gold/powerpc.cc
index 94efcdf..9ed5b21 100644
--- a/gold/powerpc.cc
+++ b/gold/powerpc.cc
@@ -3524,7 +3524,7 @@ Target_powerpc<size, big_endian>::do_relax(int pass,
 
       if (this->glink_ != NULL)
 	{
-	  int stub_size = this->glink_->pltresolve_size;
+	  int stub_size = this->glink_->pltresolve_size();
 	  Address value = -stub_size;
 	  if (size == 64)
 	    {
@@ -3580,7 +3580,7 @@ Target_powerpc<size, big_endian>::do_plt_fde_location(const Output_data* plt,
 	  // There are two FDEs for a position independent glink.
 	  // The first covers the branch table, the second
 	  // __glink_PLTresolve at the end of glink.
-	  off_t resolve_size = this->glink_->pltresolve_size;
+	  off_t resolve_size = this->glink_->pltresolve_size();
 	  if (oview[9] == elfcpp::DW_CFA_nop)
 	    len -= resolve_size;
 	  else
@@ -4391,9 +4391,9 @@ class Stub_table : public Output_relaxed_input_section
   unsigned int
   stub_align() const
   {
-    if (size == 32)
-      return 16;
-    unsigned int min_align = 32;
+    unsigned int min_align = 4;
+    if (!parameters->options().user_set_plt_align())
+      return size == 64 ? 32 : min_align;
     unsigned int user_align = 1 << parameters->options().plt_align();
     return std::max(user_align, min_align);
   }
@@ -4425,9 +4425,8 @@ class Stub_table : public Output_relaxed_input_section
     if (size == 32)
       {
 	const Symbol* gsym = p->first.sym_;
-	if (this->targ_->is_tls_get_addr_opt(gsym))
-	  return 12 * 4;
-	return 4 * 4;
+	return (4 * 4
+		+ (this->targ_->is_tls_get_addr_opt(gsym) ? 8 * 4 : 0));
       }
 
     bool is_iplt;
@@ -4460,10 +4459,8 @@ class Stub_table : public Output_relaxed_input_section
   unsigned int
   plt_call_align(unsigned int bytes) const
   {
-    unsigned int align = 1 << parameters->options().plt_align();
-    if (align > 1)
-      bytes = (bytes + align - 1) & -align;
-    return bytes;
+    unsigned int align = this->stub_align();
+    return (bytes + align - 1) & -align;
   }
 
   // Return long branch stub size.
@@ -4473,9 +4470,10 @@ class Stub_table : public Output_relaxed_input_section
     Address loc = this->stub_address() + this->last_plt_size_ + p->second;
     if (p->first.dest_ - loc + (1 << 25) < 2 << 25)
       return 4;
-    if (size == 64 || !parameters->options().output_is_position_independent())
-      return 16;
-    return 32;
+    unsigned int bytes = 16;
+    if (size == 32 && parameters->options().output_is_position_independent())
+      bytes += 16;
+    return bytes;
   }
 
   // Write out stubs.
@@ -4884,7 +4882,6 @@ class Output_data_glink : public Output_section_data
  public:
   typedef typename elfcpp::Elf_types<size>::Elf_Addr Address;
   static const Address invalid_address = static_cast<Address>(0) - 1;
-  static const int pltresolve_size = 16*4;
 
   Output_data_glink(Target_powerpc<size, big_endian>* targ)
     : Output_section_data(16), targ_(targ), global_entry_stubs_(),
@@ -4900,12 +4897,35 @@ class Output_data_glink : public Output_section_data
   Address
   find_global_entry(const Symbol*) const;
 
+  unsigned int
+  global_entry_align(unsigned int off) const
+  {
+    unsigned int align = 1 << parameters->options().plt_align();
+    if (!parameters->options().user_set_plt_align())
+      align = size == 64 ? 32 : 4;
+    return (off + align - 1) & -align;
+  }
+
+  unsigned int
+  global_entry_off() const
+  {
+    return this->global_entry_align(this->end_branch_table_);
+  }
+
   Address
   global_entry_address() const
   {
     gold_assert(this->is_data_size_valid());
-    unsigned int global_entry_off = (this->end_branch_table_ + 15) & -16;
-    return this->address() + global_entry_off;
+    return this->address() + this->global_entry_off();
+  }
+
+  int
+  pltresolve_size() const
+  {
+    if (size == 64)
+      return (8
+	      + (this->targ_->abiversion() < 2 ? 11 * 4 : 14 * 4));
+    return 16 * 4;
   }
 
  protected:
@@ -4977,10 +4997,11 @@ template<int size, bool big_endian>
 void
 Output_data_glink<size, big_endian>::add_global_entry(const Symbol* gsym)
 {
+  unsigned int off = this->global_entry_align(this->ge_size_);
   std::pair<typename Global_entry_stub_entries::iterator, bool> p
-    = this->global_entry_stubs_.insert(std::make_pair(gsym, this->ge_size_));
+    = this->global_entry_stubs_.insert(std::make_pair(gsym, off));
   if (p.second)
-    this->ge_size_ += 16;
+    this->ge_size_ = off + 16;
 }
 
 template<int size, bool big_endian>
@@ -5007,11 +5028,11 @@ Output_data_glink<size, big_endian>::set_final_data_size()
 	  total += 4 * (count - 1);
 
 	  total += -total & 15;
-	  total += this->pltresolve_size;
+	  total += this->pltresolve_size();
 	}
       else
 	{
-	  total += this->pltresolve_size;
+	  total += this->pltresolve_size();
 
 	  // space for branch table
 	  total += 4 * count;
@@ -5024,7 +5045,7 @@ Output_data_glink<size, big_endian>::set_final_data_size()
 	}
     }
   this->end_branch_table_ = total;
-  total = (total + 15) & -16;
+  total = this->global_entry_align(total);
   total += this->ge_size_;
 
   this->set_data_size(total);
@@ -5175,7 +5196,7 @@ Stub_table<size, big_endian>::do_write(Output_file* of)
 		    = ((pltoff - this->targ_->first_plt_entry_offset())
 		       / this->targ_->plt_entry_size());
 		  Address glinkoff
-		    = (this->targ_->glink_section()->pltresolve_size
+		    = (this->targ_->glink_section()->pltresolve_size()
 		       + pltindex * 8);
 		  if (pltindex > 32768)
 		    glinkoff += (pltindex - 32768) * 4;
@@ -5441,26 +5462,24 @@ Stub_table<size, big_endian>::do_write(Output_file* of)
 
 		  Address off = plt_addr - got_addr;
 		  if (ha(off) == 0)
-		    {
-		      write_insn<big_endian>(p +  0, lwz_11_30 + l(off));
-		      write_insn<big_endian>(p +  4, mtctr_11);
-		      write_insn<big_endian>(p +  8, bctr);
-		    }
+		    write_insn<big_endian>(p, lwz_11_30 + l(off));
 		  else
 		    {
-		      write_insn<big_endian>(p +  0, addis_11_30 + ha(off));
-		      write_insn<big_endian>(p +  4, lwz_11_11 + l(off));
-		      write_insn<big_endian>(p +  8, mtctr_11);
-		      write_insn<big_endian>(p + 12, bctr);
+		      write_insn<big_endian>(p, addis_11_30 + ha(off));
+		      p += 4;
+		      write_insn<big_endian>(p, lwz_11_11 + l(off));
 		    }
 		}
 	      else
 		{
-		  write_insn<big_endian>(p +  0, lis_11 + ha(plt_addr));
-		  write_insn<big_endian>(p +  4, lwz_11_11 + l(plt_addr));
-		  write_insn<big_endian>(p +  8, mtctr_11);
-		  write_insn<big_endian>(p + 12, bctr);
+		  write_insn<big_endian>(p, lis_11 + ha(plt_addr));
+		  p += 4;
+		  write_insn<big_endian>(p, lwz_11_11 + l(plt_addr));
 		}
+	      p += 4;
+	      write_insn<big_endian>(p, mtctr_11);
+	      p += 4;
+	      write_insn<big_endian>(p, bctr);
 	    }
 	}
 
@@ -5479,23 +5498,29 @@ Stub_table<size, big_endian>::do_write(Output_file* of)
 	    write_insn<big_endian>(p, b | (delta & 0x3fffffc));
 	  else if (!parameters->options().output_is_position_independent())
 	    {
-	      write_insn<big_endian>(p +  0, lis_12 + ha(bs->first.dest_));
-	      write_insn<big_endian>(p +  4, addi_12_12 + l(bs->first.dest_));
-	      write_insn<big_endian>(p +  8, mtctr_12);
-	      write_insn<big_endian>(p + 12, bctr);
+	      write_insn<big_endian>(p, lis_12 + ha(bs->first.dest_));
+	      p += 4;
+	      write_insn<big_endian>(p, addi_12_12 + l(bs->first.dest_));
 	    }
 	  else
 	    {
 	      delta -= 8;
-	      write_insn<big_endian>(p +  0, mflr_0);
-	      write_insn<big_endian>(p +  4, bcl_20_31);
-	      write_insn<big_endian>(p +  8, mflr_12);
-	      write_insn<big_endian>(p + 12, addis_12_12 + ha(delta));
-	      write_insn<big_endian>(p + 16, addi_12_12 + l(delta));
-	      write_insn<big_endian>(p + 20, mtlr_0);
-	      write_insn<big_endian>(p + 24, mtctr_12);
-	      write_insn<big_endian>(p + 28, bctr);
+	      write_insn<big_endian>(p, mflr_0);
+	      p += 4;
+	      write_insn<big_endian>(p, bcl_20_31);
+	      p += 4;
+	      write_insn<big_endian>(p, mflr_12);
+	      p += 4;
+	      write_insn<big_endian>(p, addis_12_12 + ha(delta));
+	      p += 4;
+	      write_insn<big_endian>(p, addi_12_12 + l(delta));
+	      p += 4;
+	      write_insn<big_endian>(p, mtlr_0);
 	    }
+	  p += 4;
+	  write_insn<big_endian>(p, mtctr_12);
+	  p += 4;
+	  write_insn<big_endian>(p, bctr);
 	}
     }
   if (this->need_save_res_)
@@ -5563,8 +5588,7 @@ Output_data_glink<size, big_endian>::do_write(Output_file* of)
 	      write_insn<big_endian>(p, ld_11_11 + 8),		p += 4;
 	    }
 	  write_insn<big_endian>(p, bctr),			p += 4;
-	  while (p < oview + this->pltresolve_size)
-	    write_insn<big_endian>(p, nop), p += 4;
+	  gold_assert(p == oview + this->pltresolve_size());
 
 	  // Write lazy link call stubs.
 	  uint32_t indx = 0;
@@ -5590,7 +5614,7 @@ Output_data_glink<size, big_endian>::do_write(Output_file* of)
 
       Address plt_base = this->targ_->plt_section()->address();
       Address iplt_base = invalid_address;
-      unsigned int global_entry_off = (this->end_branch_table_ + 15) & -16;
+      unsigned int global_entry_off = this->global_entry_off();
       Address global_entry_base = this->address() + global_entry_off;
       typename Global_entry_stub_entries::const_iterator ge;
       for (ge = this->global_entry_stubs_.begin();
@@ -5631,7 +5655,7 @@ Output_data_glink<size, big_endian>::do_write(Output_file* of)
 
       // Write out pltresolve branch table.
       p = oview;
-      unsigned int the_end = oview_size - this->pltresolve_size;
+      unsigned int the_end = oview_size - this->pltresolve_size();
       unsigned char* end_p = oview + the_end;
       while (p < end_p - 8 * 4)
 	write_insn<big_endian>(p, b + end_p - p), p += 4;
@@ -5639,68 +5663,85 @@ Output_data_glink<size, big_endian>::do_write(Output_file* of)
 	write_insn<big_endian>(p, nop), p += 4;
 
       // Write out pltresolve call stub.
+      end_p = oview + oview_size;
       if (parameters->options().output_is_position_independent())
 	{
 	  Address res0_off = 0;
 	  Address after_bcl_off = the_end + 12;
 	  Address bcl_res0 = after_bcl_off - res0_off;
 
-	  write_insn<big_endian>(p +  0, addis_11_11 + ha(bcl_res0));
-	  write_insn<big_endian>(p +  4, mflr_0);
-	  write_insn<big_endian>(p +  8, bcl_20_31);
-	  write_insn<big_endian>(p + 12, addi_11_11 + l(bcl_res0));
-	  write_insn<big_endian>(p + 16, mflr_12);
-	  write_insn<big_endian>(p + 20, mtlr_0);
-	  write_insn<big_endian>(p + 24, sub_11_11_12);
+	  write_insn<big_endian>(p, addis_11_11 + ha(bcl_res0));
+	  p += 4;
+	  write_insn<big_endian>(p, mflr_0);
+	  p += 4;
+	  write_insn<big_endian>(p, bcl_20_31);
+	  p += 4;
+	  write_insn<big_endian>(p, addi_11_11 + l(bcl_res0));
+	  p += 4;
+	  write_insn<big_endian>(p, mflr_12);
+	  p += 4;
+	  write_insn<big_endian>(p, mtlr_0);
+	  p += 4;
+	  write_insn<big_endian>(p, sub_11_11_12);
+	  p += 4;
 
 	  Address got_bcl = g_o_t + 4 - (after_bcl_off + this->address());
 
-	  write_insn<big_endian>(p + 28, addis_12_12 + ha(got_bcl));
+	  write_insn<big_endian>(p, addis_12_12 + ha(got_bcl));
+	  p += 4;
 	  if (ha(got_bcl) == ha(got_bcl + 4))
 	    {
-	      write_insn<big_endian>(p + 32, lwz_0_12 + l(got_bcl));
-	      write_insn<big_endian>(p + 36, lwz_12_12 + l(got_bcl + 4));
+	      write_insn<big_endian>(p, lwz_0_12 + l(got_bcl));
+	      p += 4;
+	      write_insn<big_endian>(p, lwz_12_12 + l(got_bcl + 4));
 	    }
 	  else
 	    {
-	      write_insn<big_endian>(p + 32, lwzu_0_12 + l(got_bcl));
-	      write_insn<big_endian>(p + 36, lwz_12_12 + 4);
+	      write_insn<big_endian>(p, lwzu_0_12 + l(got_bcl));
+	      p += 4;
+	      write_insn<big_endian>(p, lwz_12_12 + 4);
 	    }
-	  write_insn<big_endian>(p + 40, mtctr_0);
-	  write_insn<big_endian>(p + 44, add_0_11_11);
-	  write_insn<big_endian>(p + 48, add_11_0_11);
-	  write_insn<big_endian>(p + 52, bctr);
-	  write_insn<big_endian>(p + 56, nop);
-	  write_insn<big_endian>(p + 60, nop);
+	  p += 4;
+	  write_insn<big_endian>(p, mtctr_0);
+	  p += 4;
+	  write_insn<big_endian>(p, add_0_11_11);
+	  p += 4;
+	  write_insn<big_endian>(p, add_11_0_11);
 	}
       else
 	{
 	  Address res0 = this->address();
 
-	  write_insn<big_endian>(p + 0, lis_12 + ha(g_o_t + 4));
-	  write_insn<big_endian>(p + 4, addis_11_11 + ha(-res0));
+	  write_insn<big_endian>(p, lis_12 + ha(g_o_t + 4));
+	  p += 4;
+	  write_insn<big_endian>(p, addis_11_11 + ha(-res0));
+	  p += 4;
 	  if (ha(g_o_t + 4) == ha(g_o_t + 8))
-	    write_insn<big_endian>(p + 8, lwz_0_12 + l(g_o_t + 4));
+	    write_insn<big_endian>(p, lwz_0_12 + l(g_o_t + 4));
 	  else
-	    write_insn<big_endian>(p + 8, lwzu_0_12 + l(g_o_t + 4));
-	  write_insn<big_endian>(p + 12, addi_11_11 + l(-res0));
-	  write_insn<big_endian>(p + 16, mtctr_0);
-	  write_insn<big_endian>(p + 20, add_0_11_11);
+	    write_insn<big_endian>(p, lwzu_0_12 + l(g_o_t + 4));
+	  p += 4;
+	  write_insn<big_endian>(p, addi_11_11 + l(-res0));
+	  p += 4;
+	  write_insn<big_endian>(p, mtctr_0);
+	  p += 4;
+	  write_insn<big_endian>(p, add_0_11_11);
+	  p += 4;
 	  if (ha(g_o_t + 4) == ha(g_o_t + 8))
-	    write_insn<big_endian>(p + 24, lwz_12_12 + l(g_o_t + 8));
+	    write_insn<big_endian>(p, lwz_12_12 + l(g_o_t + 8));
 	  else
-	    write_insn<big_endian>(p + 24, lwz_12_12 + 4);
-	  write_insn<big_endian>(p + 28, add_11_0_11);
-	  write_insn<big_endian>(p + 32, bctr);
-	  write_insn<big_endian>(p + 36, nop);
-	  write_insn<big_endian>(p + 40, nop);
-	  write_insn<big_endian>(p + 44, nop);
-	  write_insn<big_endian>(p + 48, nop);
-	  write_insn<big_endian>(p + 52, nop);
-	  write_insn<big_endian>(p + 56, nop);
-	  write_insn<big_endian>(p + 60, nop);
+	    write_insn<big_endian>(p, lwz_12_12 + 4);
+	  p += 4;
+	  write_insn<big_endian>(p, add_11_0_11);
+	}
+      p += 4;
+      write_insn<big_endian>(p, bctr);
+      p += 4;
+      while (p < end_p)
+	{
+	  write_insn<big_endian>(p, nop);
+	  p += 4;
 	}
-      p += 64;
     }
 
   of->write_output_view(off, oview_size, oview);
@@ -8161,7 +8202,7 @@ Target_powerpc<size, big_endian>::do_finalize_sections(
 	      this->glink_->finalize_data_size();
 	      odyn->add_section_plus_offset(elfcpp::DT_PPC64_GLINK,
 					    this->glink_,
-					    (this->glink_->pltresolve_size
+					    (this->glink_->pltresolve_size()
 					     - 32));
 	    }
 	  if (this->has_localentry0_ || this->has_tls_get_addr_opt_)
@@ -10187,8 +10228,6 @@ Target_selector_powerpc<64, false> target_selector_ppc64le;
 
 // Instantiate these constants for -O0
 template<int size, bool big_endian>
-const int Output_data_glink<size, big_endian>::pltresolve_size;
-template<int size, bool big_endian>
 const typename Output_data_glink<size, big_endian>::Address
   Output_data_glink<size, big_endian>::invalid_address;
 template<int size, bool big_endian>
diff --git a/ld/emultempl/ppc32elf.em b/ld/emultempl/ppc32elf.em
index 6852e9b..70dd5a0 100644
--- a/ld/emultempl/ppc32elf.em
+++ b/ld/emultempl/ppc32elf.em
@@ -38,7 +38,8 @@ static int notlsopt = 0;
 /* Choose the correct place for .got.  */
 static int old_got = 0;
 
-static struct ppc_elf_params params = { PLT_UNSET, -1, 0, 0, 0, 0, 0, 0, 0 };
+static struct ppc_elf_params params = { PLT_UNSET, 0, -1,
+					0, 0, 0, 0, 0, 0, 0 };
 
 static void
 ppc_after_open_output (void)
@@ -239,17 +240,22 @@ fi
 # parse_args and list_options functions.
 #
 PARSE_AND_LIST_PROLOGUE=${PARSE_AND_LIST_PROLOGUE}'
-#define OPTION_NO_TLS_OPT		321
-#define OPTION_NO_TLS_GET_ADDR_OPT	(OPTION_NO_TLS_OPT + 1)
-#define OPTION_NEW_PLT			(OPTION_NO_TLS_GET_ADDR_OPT + 1)
-#define OPTION_OLD_PLT			(OPTION_NEW_PLT + 1)
-#define OPTION_OLD_GOT			(OPTION_OLD_PLT + 1)
-#define OPTION_STUBSYMS			(OPTION_OLD_GOT + 1)
-#define OPTION_NO_STUBSYMS		(OPTION_STUBSYMS + 1)
-#define OPTION_PPC476_WORKAROUND	(OPTION_NO_STUBSYMS + 1)
-#define OPTION_NO_PPC476_WORKAROUND	(OPTION_PPC476_WORKAROUND + 1)
-#define OPTION_NO_PICFIXUP		(OPTION_NO_PPC476_WORKAROUND + 1)
-#define OPTION_VLE_RELOC_FIXUP		(OPTION_NO_PICFIXUP + 1)
+enum ppc32_opt
+{
+  OPTION_NO_TLS_OPT = 321,
+  OPTION_NO_TLS_GET_ADDR_OPT,
+  OPTION_NEW_PLT,
+  OPTION_OLD_PLT,
+  OPTION_PLT_ALIGN,
+  OPTION_NO_PLT_ALIGN,
+  OPTION_OLD_GOT,
+  OPTION_STUBSYMS,
+  OPTION_NO_STUBSYMS,
+  OPTION_PPC476_WORKAROUND,
+  OPTION_NO_PPC476_WORKAROUND,
+  OPTION_NO_PICFIXUP,
+  OPTION_VLE_RELOC_FIXUP
+};
 '
 
 PARSE_AND_LIST_LONGOPTS=${PARSE_AND_LIST_LONGOPTS}'
@@ -261,6 +267,8 @@ if test -z "$VXWORKS_BASE_EM_FILE" ; then
   PARSE_AND_LIST_LONGOPTS=${PARSE_AND_LIST_LONGOPTS}'
   { "secure-plt", no_argument, NULL, OPTION_NEW_PLT },
   { "bss-plt", no_argument, NULL, OPTION_OLD_PLT },
+  { "plt-align", no_argument, NULL, OPTION_PLT_ALIGN },
+  { "no-plt-align", no_argument, NULL, OPTION_NO_PLT_ALIGN },
   { "sdata-got", no_argument, NULL, OPTION_OLD_GOT },'
 fi
 PARSE_AND_LIST_LONGOPTS=${PARSE_AND_LIST_LONGOPTS}'
@@ -272,21 +280,47 @@ PARSE_AND_LIST_LONGOPTS=${PARSE_AND_LIST_LONGOPTS}'
 
 PARSE_AND_LIST_OPTIONS=${PARSE_AND_LIST_OPTIONS}'
   fprintf (file, _("\
-  --emit-stub-syms            Label linker stubs with a symbol.\n\
-  --no-emit-stub-syms         Don'\''t label linker stubs with a symbol.\n\
-  --no-tls-optimize           Don'\''t try to optimize TLS accesses.\n\
-  --no-tls-get-addr-optimize  Don'\''t use a special __tls_get_addr call.\n'
+  --emit-stub-syms            Label linker stubs with a symbol.\n"
+		   ));
+  fprintf (file, _("\
+  --no-emit-stub-syms         Don'\''t label linker stubs with a symbol.\n"
+		   ));
+  fprintf (file, _("\
+  --no-tls-optimize           Don'\''t try to optimize TLS accesses.\n"
+		   ));
+  fprintf (file, _("\
+  --no-tls-get-addr-optimize  Don'\''t use a special __tls_get_addr call.\n"
+		   ));'
 if test -z "$VXWORKS_BASE_EM_FILE" ; then
   PARSE_AND_LIST_OPTIONS=${PARSE_AND_LIST_OPTIONS}'\
-  --secure-plt                Use new-style PLT if possible.\n\
-  --bss-plt                   Force old-style BSS PLT.\n\
-  --sdata-got                 Force GOT location just before .sdata.\n'
+  fprintf (file, _("\
+  --secure-plt                Use new-style PLT if possible.\n"
+		   ));
+  fprintf (file, _("\
+  --bss-plt                   Force old-style BSS PLT.\n"
+		   ));
+  fprintf (file, _("\
+  --plt-align                 Align PLT call stubs to fit cache lines.\n"
+		   ));
+  fprintf (file, _("\
+  --no-plt-align              Dont'\''t align individual PLT call stubs.\n"
+		   ));
+  fprintf (file, _("\
+  --sdata-got                 Force GOT location just before .sdata.\n"
+		   ));'
 fi
 PARSE_AND_LIST_OPTIONS=${PARSE_AND_LIST_OPTIONS}'\
+  fprintf (file, _("\
   --ppc476-workaround [=pagesize]\n\
-                              Avoid a cache bug on ppc476.\n\
-  --no-ppc476-workaround      Disable workaround.\n\
-  --no-pic-fixup              Don'\''t edit non-pic to pic.\n\
+                              Avoid a cache bug on ppc476.\n"
+		   ));
+  fprintf (file, _("\
+  --no-ppc476-workaround      Disable workaround.\n"
+		   ));
+  fprintf (file, _("\
+  --no-pic-fixup              Don'\''t edit non-pic to pic.\n"
+		   ));
+  fprintf (file, _("\
   --vle-reloc-fixup           Correct old object file 16A/16D relocation.\n"
 		   ));
 '
@@ -316,6 +350,14 @@ PARSE_AND_LIST_ARGS_CASES=${PARSE_AND_LIST_ARGS_CASES}'
       params.plt_style = PLT_OLD;
       break;
 
+    case OPTION_PLT_ALIGN:
+      params.plt_stub_align = 5;
+      break;
+
+    case OPTION_NO_PLT_ALIGN:
+      params.plt_stub_align = 0;
+      break;
+
     case OPTION_OLD_GOT:
       old_got = 1;
       break;
diff --git a/ld/emultempl/ppc64elf.em b/ld/emultempl/ppc64elf.em
index 76eeb07..840bd44 100644
--- a/ld/emultempl/ppc64elf.em
+++ b/ld/emultempl/ppc64elf.em
@@ -412,6 +412,8 @@ ppc_add_stub_section (const char *stub_sec_name, asection *input_section)
       || !bfd_set_section_alignment (stub_file->the_bfd, stub_sec,
 				     (params.plt_stub_align > 5
 				      ? params.plt_stub_align
+				      : params.plt_stub_align < -5
+				      ? -params.plt_stub_align
 				      : 5)))
     goto err_ret;
 
diff --git a/ld/testsuite/ld-powerpc/elfv2exe.d b/ld/testsuite/ld-powerpc/elfv2exe.d
index 4c3c632..77bf6e2 100644
--- a/ld/testsuite/ld-powerpc/elfv2exe.d
+++ b/ld/testsuite/ld-powerpc/elfv2exe.d
@@ -7,13 +7,13 @@
 
 Disassembly of section \.text:
 
-0+100000c0 <.*\.plt_branch\.f2>:
+0+100000c0 <.*\.plt_branch\.f4>:
 .*:	(3d 82 ff ff|ff ff 82 3d) 	addis   r12,r2,-1
 .*:	(e9 8c 7f 28|28 7f 8c e9) 	ld      r12,32552\(r12\)
 .*:	(7d 89 03 a6|a6 03 89 7d) 	mtctr   r12
 .*:	(4e 80 04 20|20 04 80 4e) 	bctr
 
-0+100000d0 <.*\.plt_branch\.f4>:
+0+100000d0 <.*\.plt_branch\.f2>:
 .*:	(3d 82 ff ff|ff ff 82 3d) 	addis   r12,r2,-1
 .*:	(e9 8c 7f 30|30 7f 8c e9) 	ld      r12,32560\(r12\)
 .*:	(7d 89 03 a6|a6 03 89 7d) 	mtctr   r12
@@ -27,12 +27,12 @@ Disassembly of section \.text:
 .*:	(f8 01 00 30|30 00 01 f8) 	std     r0,48\(r1\)
 .*:	(4b ff ff f5|f5 ff ff 4b) 	bl      .* <(f1|_start)\+0x8>
 .*:	(e8 62 80 08|08 80 62 e8) 	ld      r3,-32760\(r2\)
-.*:	(4b ff ff c5|c5 ff ff 4b) 	bl      .*\.plt_branch\.f2>
+.*:	(4b ff ff d5|d5 ff ff 4b) 	bl      .*\.plt_branch\.f2>
 .*:	(60 00 00 00|00 00 00 60) 	nop
 .*:	(e8 62 80 10|10 80 62 e8) 	ld      r3,-32752\(r2\)
 .*:	(48 00 87 81|81 87 00 48) 	bl      10008888 <f3>
 .*:	(60 00 00 00|00 00 00 60) 	nop
-.*:	(4b ff ff c1|c1 ff ff 4b) 	bl      .*\.plt_branch\.f4>
+.*:	(4b ff ff b1|b1 ff ff 4b) 	bl      .*\.plt_branch\.f4>
 .*:	(60 00 00 00|00 00 00 60) 	nop
 .*:	(e8 01 00 30|30 00 01 e8) 	ld      r0,48\(r1\)
 .*:	(38 21 00 20|20 00 21 38) 	addi    r1,r1,32
diff --git a/ld/testsuite/ld-powerpc/elfv2so.d b/ld/testsuite/ld-powerpc/elfv2so.d
index a577b2a..e7cd45c 100644
--- a/ld/testsuite/ld-powerpc/elfv2so.d
+++ b/ld/testsuite/ld-powerpc/elfv2so.d
@@ -14,23 +14,23 @@ Disassembly of section \.text:
 .*:	(4e 80 04 20|20 04 80 4e) 	bctr
 	\.\.\.
 
-.* <.*\.plt_call\.f3>:
+.* <.*\.plt_call\.f1>:
 .*:	(f8 41 00 18|18 00 41 f8) 	std     r2,24\(r1\)
-.*:	(e9 82 80 28|28 80 82 e9) 	ld      r12,-32728\(r2\)
+.*:	(e9 82 80 40|40 80 82 e9) 	ld      r12,-32704\(r2\)
 .*:	(7d 89 03 a6|a6 03 89 7d) 	mtctr   r12
 .*:	(4e 80 04 20|20 04 80 4e) 	bctr
 	\.\.\.
 
-.* <.*\.plt_call\.f2>:
+.* <.*\.plt_call\.f3>:
 .*:	(f8 41 00 18|18 00 41 f8) 	std     r2,24\(r1\)
-.*:	(e9 82 80 30|30 80 82 e9) 	ld      r12,-32720\(r2\)
+.*:	(e9 82 80 28|28 80 82 e9) 	ld      r12,-32728\(r2\)
 .*:	(7d 89 03 a6|a6 03 89 7d) 	mtctr   r12
 .*:	(4e 80 04 20|20 04 80 4e) 	bctr
 	\.\.\.
 
-.* <.*\.plt_call\.f1>:
+.* <.*\.plt_call\.f2>:
 .*:	(f8 41 00 18|18 00 41 f8) 	std     r2,24\(r1\)
-.*:	(e9 82 80 40|40 80 82 e9) 	ld      r12,-32704\(r2\)
+.*:	(e9 82 80 30|30 80 82 e9) 	ld      r12,-32720\(r2\)
 .*:	(7d 89 03 a6|a6 03 89 7d) 	mtctr   r12
 .*:	(4e 80 04 20|20 04 80 4e) 	bctr
 	\.\.\.
@@ -41,12 +41,12 @@ Disassembly of section \.text:
 .*:	(7c 08 02 a6|a6 02 08 7c) 	mflr    r0
 .*:	(f8 21 ff e1|e1 ff 21 f8) 	stdu    r1,-32\(r1\)
 .*:	(f8 01 00 30|30 00 01 f8) 	std     r0,48\(r1\)
-.*:	(4b ff ff cd|cd ff ff 4b) 	bl      .*\.plt_call\.f1>
+.*:	(4b ff ff 8d|8d ff ff 4b) 	bl      .*\.plt_call\.f1>
 .*:	(e8 62 80 08|08 80 62 e8) 	ld      r3,-32760\(r2\)
-.*:	(4b ff ff a5|a5 ff ff 4b) 	bl      .*\.plt_call\.f2>
+.*:	(4b ff ff c5|c5 ff ff 4b) 	bl      .*\.plt_call\.f2>
 .*:	(e8 41 00 18|18 00 41 e8) 	ld      r2,24\(r1\)
 .*:	(e8 62 80 10|10 80 62 e8) 	ld      r3,-32752\(r2\)
-.*:	(4b ff ff 79|79 ff ff 4b) 	bl      .*\.plt_call\.f3>
+.*:	(4b ff ff 99|99 ff ff 4b) 	bl      .*\.plt_call\.f3>
 .*:	(e8 41 00 18|18 00 41 e8) 	ld      r2,24\(r1\)
 .*:	(4b ff ff 51|51 ff ff 4b) 	bl      .*\.plt_call\.f4>
 .*:	(e8 41 00 18|18 00 41 e8) 	ld      r2,24\(r1\)
diff --git a/ld/testsuite/ld-powerpc/relbrlt.d b/ld/testsuite/ld-powerpc/relbrlt.d
index a00b1ff..6f3db7d 100644
--- a/ld/testsuite/ld-powerpc/relbrlt.d
+++ b/ld/testsuite/ld-powerpc/relbrlt.d
@@ -8,32 +8,32 @@
 Disassembly of section \.text:
 
 0*100000c0 <_start>:
-[0-9a-f	 ]*:	(49 bf 00 2d|2d 00 bf 49) 	bl      .*
+[0-9a-f	 ]*:	(49 bf 00 21|21 00 bf 49) 	bl      .*
 [0-9a-f	 ]*: R_PPC64_REL24	\.text\+0x37e003c
 [0-9a-f	 ]*:	(60 00 00 00|00 00 00 60) 	nop
-[0-9a-f	 ]*:	(49 bf 00 19|19 00 bf 49) 	bl      .*
-[0-9a-f	 ]*: R_PPC64_REL24	\.text\+0x3bf0020
+[0-9a-f	 ]*:	(49 bf 00 1d|1d 00 bf 49) 	bl      .*
+[0-9a-f	 ]*: R_PPC64_REL24	\.text\+0x3bf002c
 [0-9a-f	 ]*:	(60 00 00 00|00 00 00 60) 	nop
 [0-9a-f	 ]*:	(49 bf 00 21|21 00 bf 49) 	bl      .*
-[0-9a-f	 ]*: R_PPC64_REL24	\.text\+0x57e0024
+[0-9a-f	 ]*: R_PPC64_REL24	\.text\+0x57e0030
 [0-9a-f	 ]*:	(60 00 00 00|00 00 00 60) 	nop
 [0-9a-f	 ]*:	00 00 00 00 	\.long 0x0
 [0-9a-f	 ]*:	(4b ff ff e4|e4 ff ff 4b) 	b       .* <_start>
 	\.\.\.
 
+[0-9a-f	 ]*<.*long_branch.*>:
+[0-9a-f	 ]*:	(49 bf 00 1c|1c 00 bf 49) 	b       .* <far>
+[0-9a-f	 ]*: R_PPC64_REL24	\*ABS\*\+0x137e00fc
+
 [0-9a-f	 ]*<.*plt_branch.*>:
-[0-9a-f	 ]*:	(e9 82 80 e8|e8 80 82 e9) 	ld      r12,-32536\(r2\)
-[0-9a-f	 ]*: R_PPC64_TOC16_DS	\*ABS\*\+0x157f00e8
+[0-9a-f	 ]*:	(e9 82 80 f8|f8 80 82 e9) 	ld      r12,-32520\(r2\)
+[0-9a-f	 ]*: R_PPC64_TOC16_DS	\*ABS\*\+0x157f00f8
 [0-9a-f	 ]*:	(7d 89 03 a6|a6 03 89 7d) 	mtctr   r12
 [0-9a-f	 ]*:	(4e 80 04 20|20 04 80 4e) 	bctr
 
-[0-9a-f	 ]*<.*long_branch.*>:
-[0-9a-f	 ]*:	(49 bf 00 10|10 00 bf 49) 	b       .* <far>
-[0-9a-f	 ]*: R_PPC64_REL24	\*ABS\*\+0x137e00fc
-
 [0-9a-f	 ]*<.*plt_branch.*>:
-[0-9a-f	 ]*:	(e9 82 80 f0|f0 80 82 e9) 	ld      r12,-32528\(r2\)
-[0-9a-f	 ]*: R_PPC64_TOC16_DS	\*ABS\*\+0x157f00f0
+[0-9a-f	 ]*:	(e9 82 81 00|00 81 82 e9) 	ld      r12,-32512\(r2\)
+[0-9a-f	 ]*: R_PPC64_TOC16_DS	\*ABS\*\+0x157f0100
 [0-9a-f	 ]*:	(7d 89 03 a6|a6 03 89 7d) 	mtctr   r12
 [0-9a-f	 ]*:	(4e 80 04 20|20 04 80 4e) 	bctr
 	\.\.\.
@@ -42,19 +42,19 @@ Disassembly of section \.text:
 [0-9a-f	 ]*:	(4e 80 00 20|20 00 80 4e) 	blr
 	\.\.\.
 
-0*13bf00e0 <far2far>:
+0*13bf00ec <far2far>:
 [0-9a-f	 ]*:	(4e 80 00 20|20 00 80 4e) 	blr
 	\.\.\.
 
-0*157e00e4 <huge>:
+0*157e00f0 <huge>:
 [0-9a-f	 ]*:	(4e 80 00 20|20 00 80 4e) 	blr
 
 Disassembly of section \.branch_lt:
 
-0*157f00e8 .*:
-[0-9a-f	 ]*:	(00 00 00 00|e0 00 bf 13) .*
-[0-9a-f	 ]*: R_PPC64_RELATIVE	\*ABS\*\+0x13bf00e0
-[0-9a-f	 ]*:	(13 bf 00 e0|00 00 00 00) .*
-[0-9a-f	 ]*:	(00 00 00 00|e4 00 7e 15) .*
-[0-9a-f	 ]*: R_PPC64_RELATIVE	\*ABS\*\+0x157e00e4
-[0-9a-f	 ]*:	(15 7e 00 e4|00 00 00 00) .*
+0*157f00f8 .*:
+[0-9a-f	 ]*:	(00 00 00 00|ec 00 bf 13) .*
+[0-9a-f	 ]*: R_PPC64_RELATIVE	\*ABS\*\+0x13bf00ec
+[0-9a-f	 ]*:	(13 bf 00 ec|00 00 00 00) .*
+[0-9a-f	 ]*:	(00 00 00 00|f0 00 7e 15) .*
+[0-9a-f	 ]*: R_PPC64_RELATIVE	\*ABS\*\+0x157e00f0
+[0-9a-f	 ]*:	(15 7e 00 f0|00 00 00 00) .*
diff --git a/ld/testsuite/ld-powerpc/relbrlt.s b/ld/testsuite/ld-powerpc/relbrlt.s
index cee0cdd..fade6a28 100644
--- a/ld/testsuite/ld-powerpc/relbrlt.s
+++ b/ld/testsuite/ld-powerpc/relbrlt.s
@@ -20,7 +20,7 @@ far:
  blr
 
  .section .text.pad2,"ax"
- .space 0x40ffe0
+ .space 0x40ffec
 
  .section .text.far2far,"ax"
 far2far:
diff --git a/ld/testsuite/ld-powerpc/tlsexe.d b/ld/testsuite/ld-powerpc/tlsexe.d
index 542b435..396f455 100644
--- a/ld/testsuite/ld-powerpc/tlsexe.d
+++ b/ld/testsuite/ld-powerpc/tlsexe.d
@@ -81,9 +81,6 @@ Disassembly of section \.text:
 .*	(7d 89 03 a6|a6 03 89 7d) 	mtctr   r12
 .*	(e9 6b 00 10|10 00 6b e9) 	ld      r11,16\(r11\)
 .*	(4e 80 04 20|20 04 80 4e) 	bctr
-.*	(60 00 00 00|00 00 00 60) 	nop
-.*	(60 00 00 00|00 00 00 60) 	nop
-.*	(60 00 00 00|00 00 00 60) 	nop
 .* <__tls_get_addr_opt@plt>:
 .*	(38 00 00 00|00 00 00 38) 	li      r0,0
-.*	(4b ff ff c4|c4 ff ff 4b) 	b       .*
+.*	(4b ff ff d0|d0 ff ff 4b) 	b       .*
diff --git a/ld/testsuite/ld-powerpc/tlsexe.r b/ld/testsuite/ld-powerpc/tlsexe.r
index f28ae9c..8c8b331 100644
--- a/ld/testsuite/ld-powerpc/tlsexe.r
+++ b/ld/testsuite/ld-powerpc/tlsexe.r
@@ -16,7 +16,7 @@ Section Headers:
  +\[[ 0-9]+\] \.dynstr +.*
  +\[[ 0-9]+\] \.rela\.dyn +.*
  +\[[ 0-9]+\] \.rela\.plt +.*
- +\[[ 0-9]+\] \.text +PROGBITS .* 0+128 0+ +AX +0 +0 +32
+ +\[[ 0-9]+\] \.text +PROGBITS .* 0+11c 0+ +AX +0 +0 +32
  +\[[ 0-9]+\] \.tdata +PROGBITS .* 0+38 0+ WAT +0 +0 +8
  +\[[ 0-9]+\] \.tbss +NOBITS .* 0+38 0+ WAT +0 +0 +8
  +\[[ 0-9]+\] \.dynamic +DYNAMIC .* 0+160 10 +WA +4 +0 +8
diff --git a/ld/testsuite/ld-powerpc/tlsexe32.d b/ld/testsuite/ld-powerpc/tlsexe32.d
index e7bc0d6..7902946 100644
--- a/ld/testsuite/ld-powerpc/tlsexe32.d
+++ b/ld/testsuite/ld-powerpc/tlsexe32.d
@@ -12,7 +12,7 @@ Disassembly of section \.text:
 .*:	(42 9f 00 05|05 00 9f 42) 	bcl     20,4\*cr7\+so,.* <_start\+0x4>
 .*:	(7f c8 02 a6|a6 02 c8 7f) 	mflr    r30
 .*:	(3f de 00 02|02 00 de 3f) 	addis   r30,r30,2
-.*:	(3b de 81 18|18 81 de 3b) 	addi    r30,r30,-32488
+.*:	(3b de 81 08|08 81 de 3b) 	addi    r30,r30,-32504
 .*:	(80 7f ff f4|f4 ff 7f 80) 	lwz     r3,-12\(r31\)
 .*:	(7c 63 12 14|14 12 63 7c) 	add     r3,r3,r2
 .*:	(38 7f ff f8|f8 ff 7f 38) 	addi    r3,r31,-8
@@ -52,24 +52,18 @@ Disassembly of section \.text:
 .*:	(7c 03 03 78|78 03 03 7c) 	mr      r3,r0
 .*:	(60 00 00 00|00 00 00 60) 	nop
 .*:	(3d 60 01 81|81 01 60 3d) 	lis     r11,385
-.*:	(81 6b 04 14|14 04 6b 81) 	lwz     r11,1044\(r11\)
+.*:	(81 6b 04 04|04 04 6b 81) 	lwz     r11,1028\(r11\)
 .*:	(7d 69 03 a6|a6 03 69 7d) 	mtctr   r11
 .*:	(4e 80 04 20|20 04 80 4e) 	bctr
 
-.* <__glink>:
-.*:	(60 00 00 00|00 00 00 60) 	nop
-.*:	(60 00 00 00|00 00 00 60) 	nop
-.*:	(60 00 00 00|00 00 00 60) 	nop
-.*:	(60 00 00 00|00 00 00 60) 	nop
-
-.* <__glink_PLTresolve>:
+.* <__glink(_PLTresolve)?>:
 .*:	(3d 80 01 81|81 01 80 3d) 	lis     r12,385
 .*:	(3d 6b fe 80|80 fe 6b 3d) 	addis   r11,r11,-384
-.*:	(80 0c 04 0c|0c 04 0c 80) 	lwz     r0,1036\(r12\)
+.*:	(80 0c 03 fc|fc 03 0c 80) 	lwz     r0,1020\(r12\)
 .*:	(39 6b fd 20|20 fd 6b 39) 	addi    r11,r11,-736
 .*:	(7c 09 03 a6|a6 03 09 7c) 	mtctr   r0
 .*:	(7c 0b 5a 14|14 5a 0b 7c) 	add     r0,r11,r11
-.*:	(81 8c 04 10|10 04 8c 81) 	lwz     r12,1040\(r12\)
+.*:	(81 8c 04 00|00 04 8c 81) 	lwz     r12,1024\(r12\)
 .*:	(7d 60 5a 14|14 5a 60 7d) 	add     r11,r0,r11
 .*:	(4e 80 04 20|20 04 80 4e) 	bctr
 .*:	(60 00 00 00|00 00 00 60) 	nop
diff --git a/ld/testsuite/ld-powerpc/tlsexe32.g b/ld/testsuite/ld-powerpc/tlsexe32.g
index e402f6b..b6afc9c 100644
--- a/ld/testsuite/ld-powerpc/tlsexe32.g
+++ b/ld/testsuite/ld-powerpc/tlsexe32.g
@@ -7,5 +7,5 @@
 .*
 
 Contents of section \.got:
-.* 00000000 00000000 00000000 (0181034c|4c038101)  .*
+.* 00000000 00000000 00000000 (0181033c|3c038101)  .*
 .* 00000000 00000000  .*
diff --git a/ld/testsuite/ld-powerpc/tlsexe32.r b/ld/testsuite/ld-powerpc/tlsexe32.r
index 6061835..a7387e4 100644
--- a/ld/testsuite/ld-powerpc/tlsexe32.r
+++ b/ld/testsuite/ld-powerpc/tlsexe32.r
@@ -16,7 +16,7 @@ Section Headers:
  +\[[ 0-9]+\] \.dynstr +.*
  +\[[ 0-9]+\] \.rela\.dyn +.*
  +\[[ 0-9]+\] \.rela\.plt +.*
- +\[[ 0-9]+\] \.text +PROGBITS +[0-9a-f]+ [0-9a-f]+ 000100 00 +AX +0 +0 +16
+ +\[[ 0-9]+\] \.text +PROGBITS +[0-9a-f]+ [0-9a-f]+ 0000f0 00 +AX +0 +0 +16
  +\[[ 0-9]+\] \.tdata +PROGBITS +[0-9a-f]+ [0-9a-f]+ 00001c 00 WAT +0 +0 +4
  +\[[ 0-9]+\] \.tbss +NOBITS +[0-9a-f]+ [0-9a-f]+ 00001c 00 WAT +0 +0 +4
  +\[[ 0-9]+\] \.dynamic +DYNAMIC +[0-9a-f]+ [0-9a-f]+ [0-9a-f]+ 08 +WA +4 +0 +4
diff --git a/ld/testsuite/ld-powerpc/tlsexetoc.d b/ld/testsuite/ld-powerpc/tlsexetoc.d
index d216a2a..4bab567 100644
--- a/ld/testsuite/ld-powerpc/tlsexetoc.d
+++ b/ld/testsuite/ld-powerpc/tlsexetoc.d
@@ -65,9 +65,6 @@ Disassembly of section \.text:
 .*	(7d 89 03 a6|a6 03 89 7d) 	mtctr   r12
 .*	(e9 6b 00 10|10 00 6b e9) 	ld      r11,16\(r11\)
 .*	(4e 80 04 20|20 04 80 4e) 	bctr
-.*	(60 00 00 00|00 00 00 60) 	nop
-.*	(60 00 00 00|00 00 00 60) 	nop
-.*	(60 00 00 00|00 00 00 60) 	nop
 .* <__tls_get_addr_opt@plt>:
 .*	(38 00 00 00|00 00 00 38) 	li      r0,0
-.*	(4b ff ff c4|c4 ff ff 4b) 	b       .*
+.*	(4b ff ff d0|d0 ff ff 4b) 	b       .*
diff --git a/ld/testsuite/ld-powerpc/tlsexetoc.r b/ld/testsuite/ld-powerpc/tlsexetoc.r
index ce746d9..14096ac 100644
--- a/ld/testsuite/ld-powerpc/tlsexetoc.r
+++ b/ld/testsuite/ld-powerpc/tlsexetoc.r
@@ -16,7 +16,7 @@ Section Headers:
  +\[[ 0-9]+\] \.dynstr +.*
  +\[[ 0-9]+\] \.rela\.dyn +.*
  +\[[ 0-9]+\] \.rela\.plt +.*
- +\[[ 0-9]+\] \.text +PROGBITS .* 0+e8 0+ +AX +0 +0 +32
+ +\[[ 0-9]+\] \.text +PROGBITS .* 0+dc 0+ +AX +0 +0 +32
  +\[[ 0-9]+\] \.tdata +PROGBITS .* 0+38 0+ WAT +0 +0 +8
  +\[[ 0-9]+\] \.tbss +NOBITS .* 0+38 0+ WAT +0 +0 +8
  +\[[ 0-9]+\] \.dynamic +DYNAMIC .* 0+160 10 +WA +4 +0 +8
diff --git a/ld/testsuite/ld-powerpc/tlsopt5_32.d b/ld/testsuite/ld-powerpc/tlsopt5_32.d
index 4f1bf06..822c93c 100644
--- a/ld/testsuite/ld-powerpc/tlsopt5_32.d
+++ b/ld/testsuite/ld-powerpc/tlsopt5_32.d
@@ -40,17 +40,11 @@ Disassembly of section \.text:
 .*:	(20 04 80 4e|4e 80 04 20) 	bctr
 .*:	(00 00 00 60|60 00 00 00) 	nop
 
-.* <__glink>:
-.*:	(00 00 00 60|60 00 00 00) 	nop
-.*:	(00 00 00 60|60 00 00 00) 	nop
-.*:	(00 00 00 60|60 00 00 00) 	nop
-.*:	(00 00 00 60|60 00 00 00) 	nop
-
-.* <__glink_PLTresolve>:
+.* <__glink(_PLTresolve)?>:
 .*:	(00 00 6b 3d|3d 6b 00 00) 	addis   r11,r11,0
 .*:	(a6 02 08 7c|7c 08 02 a6) 	mflr    r0
 .*:	(05 00 9f 42|42 9f 00 05) 	bcl     .*
-.*:	(1c 00 6b 39|39 6b 00 1c) 	addi    r11,r11,28
+.*:	(0c 00 6b 39|39 6b 00 0c) 	addi    r11,r11,12
 .*:	(a6 02 88 7d|7d 88 02 a6) 	mflr    r12
 .*:	(a6 03 08 7c|7c 08 03 a6) 	mtlr    r0
 .*:	(50 58 6c 7d|7d 6c 58 50) 	subf    r11,r12,r11
diff --git a/ld/testsuite/ld-powerpc/tlsso.d b/ld/testsuite/ld-powerpc/tlsso.d
index 8ac25e6..58c9dee 100644
--- a/ld/testsuite/ld-powerpc/tlsso.d
+++ b/ld/testsuite/ld-powerpc/tlsso.d
@@ -71,9 +71,6 @@ Disassembly of section \.text:
 .*	(7d 89 03 a6|a6 03 89 7d) 	mtctr   r12
 .*	(e9 6b 00 10|10 00 6b e9) 	ld      r11,16\(r11\)
 .*	(4e 80 04 20|20 04 80 4e) 	bctr
-.*	(60 00 00 00|00 00 00 60) 	nop
-.*	(60 00 00 00|00 00 00 60) 	nop
-.*	(60 00 00 00|00 00 00 60) 	nop
 .* <__tls_get_addr@plt>:
 .*	(38 00 00 00|00 00 00 38) 	li      r0,0
-.*	(4b ff ff c4|c4 ff ff 4b) 	b       .*
+.*	(4b ff ff d0|d0 ff ff 4b) 	b       .*
diff --git a/ld/testsuite/ld-powerpc/tlstocso.d b/ld/testsuite/ld-powerpc/tlstocso.d
index ad9481a..8d55fee 100644
--- a/ld/testsuite/ld-powerpc/tlstocso.d
+++ b/ld/testsuite/ld-powerpc/tlstocso.d
@@ -55,9 +55,6 @@ Disassembly of section \.text:
 .*	(7d 89 03 a6|a6 03 89 7d) 	mtctr   r12
 .*	(e9 6b 00 10|10 00 6b e9) 	ld      r11,16\(r11\)
 .*	(4e 80 04 20|20 04 80 4e) 	bctr
-.*	(60 00 00 00|00 00 00 60) 	nop
-.*	(60 00 00 00|00 00 00 60) 	nop
-.*	(60 00 00 00|00 00 00 60) 	nop
 .* <__tls_get_addr@plt>:
 .*	(38 00 00 00|00 00 00 38) 	li      r0,0
-.*	(4b ff ff c4|c4 ff ff 4b) 	b       .*
+.*	(4b ff ff d0|d0 ff ff 4b) 	b       .*

-- 
Alan Modra
Australia Development Lab, IBM



More information about the Binutils mailing list