RFC: Optimize out i386/x86-64 JUMP_SLOT relocation

H.J. Lu hongjiu.lu@intel.com
Sun Nov 23 15:07:00 GMT 2014


Hi,

This optimization leads to one glibc test failure:

FAIL: elf/check-localplt

due to

Missing required PLT reference: ld.so: free
Missing required PLT reference: libc.so: malloc
Missing required PLT reference: libc.so: free

since they are optimized out:

[hjl@gnu-6 build-x86_64-linux]$ readelf -r elf/ld.so | grep free
000000220fe8  000600000006 R_X86_64_GLOB_DAT 0000000000016020 free@@GLIBC_2.2.5 + 0
[hjl@gnu-6 build-x86_64-linux]$ readelf -r libc.so | grep free
000000394ee8  00d400000006 R_X86_64_GLOB_DAT 00000000003977a8 __free_hook@@GLIBC_2.2.5 + 0
000000394f98  08a400000006 R_X86_64_GLOB_DAT 000000000007aba0 free@@GLIBC_2.2.5 + 0
[hjl@gnu-6 build-x86_64-linux]$ readelf -r libc.so | grep malloc
000000394db0  06ec00000006 R_X86_64_GLOB_DAT 00000000003977b0 __malloc_initialize_ho@@GLIBC_2.2.5 + 0
000000394e38  049600000006 R_X86_64_GLOB_DAT 000000000007a4c0 malloc@@GLIBC_2.2.5 + 0
000000394ee0  043800000006 R_X86_64_GLOB_DAT 0000000000395630 __malloc_hook@@GLIBC_2.2.5 + 0
[hjl@gnu-6 build-x86_64-linux]$ 

Any comments?

Thanks.

H.J.
---
When there are both PLT and GOT references to the same function symbol,
linker will create a GOTPLT slot for PLT entry and a GOT slot for GOT
reference.  A run-time JUMP_SLOT relocation is created to update the
GOTPLT slot and a run-time GLOB_DAT relocation is created to update the
GOT slot.  Both JUMP_SLOT and GLOB_DAT relocations will apply the same
symbol value to GOTPLT and GOT slots, respectively, at run-time.

This optimization combines GOTPLT and GOT slots into a single GOT slot
and removes the run-time JUMP_SLOT relocation.  It replaces the regular
PLT entry:

	indirect jump	[GOTPLT slot]
	push		relocation index
	jump		PLT0

with an GOT PLT entry with an indirect jump via the GOT slot:

	indirect jump	[GOT slot]
	nop

and resolves PLT reference to the GOT PLT entry.

We must avoid this optimization if pointer equality is needed since
we don't clear symbol value in this case and the dynamic linker won't
update the GOT slot.  Otherwise, the resulting binary will get into an
infinite loop at run-time.

bfd/

	* elf32-i386.c (elf_i386_got_plt_entry): New.
	(elf_i386_pic_got_plt_entry): Likewise.
	(elf_i386_link_hash_entry): Add plt_got.
	(elf_i386_link_hash_table): Likewise.
	(elf_i386_link_hash_newfunc): Initialize plt_got.offset to -1.
	(elf_i386_get_local_sym_hash): Likewise.
	(elf_i386_check_relocs): Create the GOT PLT if there are both
	PLT and GOT references when the regular PLT is used.
	(elf_i386_allocate_dynrelocs): Use the GOT PLT if there are
	both PLT and GOT references unless pointer equality is needed.
	(elf_i386_relocate_section): Also check the GOT PLT when
	resolving R_386_PLT32.
	(elf_i386_finish_dynamic_symbol): Use the GOT PLT if it is
	available.

	* elf64-x86-64.c (elf_x86_64_link_hash_entry): Add plt_got.
	(elf_x86_64_link_hash_table): Likewise.
	(elf_x86_64_link_hash_newfunc): Initialize plt_got.offset to -1.
	(elf_x86_64_get_local_sym_hash): Likewise.
	(elf_x86_64_check_relocs): Create the GOT PLT if there are both
	PLT and GOT references when the regular PLT is used.
	(elf_x86_64_allocate_dynrelocs): Use the GOT PLT if there are
	both PLT and GOT references unless pointer equality is needed.
	(elf_x86_64_relocate_section): Also check the GOT PLT when
	resolving R_X86_64_PLT32.
	(elf_x86_64_finish_dynamic_symbol): Use the GOT PLT if it is

ld/

	* emulparams/elf_i386.sh (TINY_READONLY_SECTION): New.
	* emulparams/elf_x86_64.sh (TINY_READONLY_SECTION): Add .plt.got.

ld/testsuite/

	* ld-i386/i386.exp: Add run-time relocation tests for plt-main.
	* ld-i386/plt-main.rd: New file.
	* ld-x86-64/plt-main-bnd.dd: Likewise.
	* ld-x86-64/plt-main.rd: Likewise.
	* ld-x86-64/x86-64.exp: Add run-time relocation tests for
	plt-main.
---
 bfd/elf32-i386.c                       | 222 ++++++++++++++++++++++++++----
 bfd/elf64-x86-64.c                     | 239 +++++++++++++++++++++++++++------
 ld/emulparams/elf_i386.sh              |   4 +
 ld/emulparams/elf_x86_64.sh            |   5 +-
 ld/testsuite/ld-i386/i386.exp          |  18 +++
 ld/testsuite/ld-i386/plt-main.rd       |   4 +
 ld/testsuite/ld-x86-64/plt-main-bnd.dd |   7 +
 ld/testsuite/ld-x86-64/plt-main.rd     |   4 +
 ld/testsuite/ld-x86-64/x86-64.exp      |  65 +++++++++
 9 files changed, 499 insertions(+), 69 deletions(-)
 create mode 100644 ld/testsuite/ld-i386/plt-main.rd
 create mode 100644 ld/testsuite/ld-x86-64/plt-main-bnd.dd
 create mode 100644 ld/testsuite/ld-x86-64/plt-main.rd

diff --git a/bfd/elf32-i386.c b/bfd/elf32-i386.c
index 64cdece..5c35d65 100644
--- a/bfd/elf32-i386.c
+++ b/bfd/elf32-i386.c
@@ -580,6 +580,24 @@ static const bfd_byte elf_i386_pic_plt_entry[PLT_ENTRY_SIZE] =
   0, 0, 0, 0	/* replaced with offset to start of .plt.  */
 };
 
+/* Entries in the GOT procedure linkage table look like this.  */
+
+static const bfd_byte elf_i386_got_plt_entry[8] =
+{
+  0xff, 0x25,	/* jmp indirect */
+  0, 0, 0, 0,	/* replaced with offset of this symbol in .got.  */
+  0x66, 0x90	/* xchg %ax,%ax  */
+};
+
+/* Entries in the PIC GOT procedure linkage table look like this.  */
+
+static const bfd_byte elf_i386_pic_got_plt_entry[8] =
+{
+  0xff, 0xa3,	/* jmp *offset(%ebx)  */
+  0, 0, 0, 0,	/* replaced with offset of this symbol in .got.  */
+  0x66, 0x90	/* xchg %ax,%ax  */
+};
+
 /* .eh_frame covering the .plt section.  */
 
 static const bfd_byte elf_i386_eh_frame_plt[] =
@@ -736,6 +754,10 @@ struct elf_i386_link_hash_entry
   (GOT_TLS_GD_P (type) || GOT_TLS_GDESC_P (type))
   unsigned char tls_type;
 
+  /* Information about the GOT PLT entry. Filled when there are both
+     GOT and PLT relocations against the same function.  */
+  union gotplt_union plt_got;
+
   /* Offset of the GOTPLT entry reserved for the TLS descriptor,
      starting at the end of the jump table.  */
   bfd_vma tlsdesc_got;
@@ -785,6 +807,7 @@ struct elf_i386_link_hash_table
   asection *sdynbss;
   asection *srelbss;
   asection *plt_eh_frame;
+  asection *plt_got;
 
   union
   {
@@ -854,6 +877,7 @@ elf_i386_link_hash_newfunc (struct bfd_hash_entry *entry,
       eh = (struct elf_i386_link_hash_entry *) entry;
       eh->dyn_relocs = NULL;
       eh->tls_type = GOT_UNKNOWN;
+      eh->plt_got.offset = (bfd_vma) -1;
       eh->tlsdesc_got = (bfd_vma) -1;
     }
 
@@ -922,6 +946,7 @@ elf_i386_get_local_sym_hash (struct elf_i386_link_hash_table *htab,
       ret->elf.indx = sec->id;
       ret->elf.dynstr_index = ELF32_R_SYM (rel->r_info);
       ret->elf.dynindx = -1;
+      ret->plt_got.offset = (bfd_vma) -1;
       *slot = ret;
     }
   return &ret->elf;
@@ -1418,6 +1443,7 @@ elf_i386_check_relocs (bfd *abfd,
   const Elf_Internal_Rela *rel;
   const Elf_Internal_Rela *rel_end;
   asection *sreloc;
+  bfd_boolean use_plt_got;
 
   if (info->relocatable)
     return TRUE;
@@ -1428,6 +1454,10 @@ elf_i386_check_relocs (bfd *abfd,
   if (htab == NULL)
     return FALSE;
 
+  use_plt_got = (!get_elf_i386_backend_data (abfd)->is_vxworks
+		 && (get_elf_i386_backend_data (abfd)
+		     == &elf_i386_arch_bed));
+
   symtab_hdr = &elf_symtab_hdr (abfd);
   sym_hashes = elf_sym_hashes (abfd);
 
@@ -1819,6 +1849,39 @@ do_size:
 	default:
 	  break;
 	}
+
+      if (use_plt_got
+	  && h != NULL
+	  && h->plt.refcount > 0
+	  && h->got.refcount > 0
+	  && htab->plt_got == NULL)
+	{
+	  /* Create the GOT procedure linkage table.  */
+	  unsigned int plt_got_align;
+	  const struct elf_backend_data *bed;
+
+	  bed = get_elf_backend_data (info->output_bfd);
+	  BFD_ASSERT (sizeof (elf_i386_got_plt_entry) == 8
+		      && (sizeof (elf_i386_got_plt_entry)
+			  == sizeof (elf_i386_pic_got_plt_entry)));
+	  plt_got_align = 3;
+
+	  if (htab->elf.dynobj == NULL)
+	    htab->elf.dynobj = abfd;
+	  htab->plt_got
+	    = bfd_make_section_anyway_with_flags (htab->elf.dynobj,
+						  ".plt.got",
+						  (bed->dynamic_sec_flags
+						   | SEC_ALLOC
+						   | SEC_CODE
+						   | SEC_LOAD
+						   | SEC_READONLY));
+	  if (htab->plt_got == NULL
+	      || !bfd_set_section_alignment (htab->elf.dynobj,
+					     htab->plt_got,
+					     plt_got_align))
+	    return FALSE;
+	}
     }
 
   return TRUE;
@@ -2191,6 +2254,24 @@ elf_i386_allocate_dynrelocs (struct elf_link_hash_entry *h, void *inf)
 
   plt_entry_size = GET_PLT_ENTRY_SIZE (info->output_bfd);
 
+  /* We can't use the GOT PLT if pointer equality is needed since
+     finish_dynamic_symbol won't clear symbol value and the dynamic
+     linker won't update the GOT slot.  We will get into an infinite
+     loop at run-time.  */
+  if (htab->plt_got != NULL
+      && h->type != STT_GNU_IFUNC
+      && !h->pointer_equality_needed
+      && h->plt.refcount > 0
+      && h->got.refcount > 0)
+    {
+      /* Don't use the regular PLT if there are both GOT and GOTPLT
+         reloctions.  */
+      h->plt.offset = (bfd_vma) -1;
+
+      /* Use the GOT PLT.  */
+      eh->plt_got.refcount = 1;
+    }
+
   /* Since STT_GNU_IFUNC symbol must go through PLT, we handle it
      here if it is defined and referenced in a non-shared object.  */
   if (h->type == STT_GNU_IFUNC
@@ -2199,8 +2280,10 @@ elf_i386_allocate_dynrelocs (struct elf_link_hash_entry *h, void *inf)
                                                plt_entry_size,
 					       plt_entry_size, 4);
   else if (htab->elf.dynamic_sections_created
-	   && h->plt.refcount > 0)
+	   && (h->plt.refcount > 0 || eh->plt_got.refcount > 0))
     {
+      bfd_boolean use_plt_got = eh->plt_got.refcount > 0;
+
       /* Make sure this symbol is output as a dynamic symbol.
 	 Undefined weak syms won't yet be marked as dynamic.  */
       if (h->dynindx == -1
@@ -2214,13 +2297,17 @@ elf_i386_allocate_dynrelocs (struct elf_link_hash_entry *h, void *inf)
 	  || WILL_CALL_FINISH_DYNAMIC_SYMBOL (1, 0, h))
 	{
 	  asection *s = htab->elf.splt;
+	  asection *got_s = htab->plt_got;
 
 	  /* If this is the first .plt entry, make room for the special
 	     first entry.  */
 	  if (s->size == 0)
 	    s->size = plt_entry_size;
 
-	  h->plt.offset = s->size;
+	  if (use_plt_got)
+	    eh->plt_got.offset = got_s->size;
+	  else
+	    h->plt.offset = s->size;
 
 	  /* If this symbol is not defined in a regular file, and we are
 	     not generating a shared library, then set the symbol to this
@@ -2230,20 +2317,36 @@ elf_i386_allocate_dynrelocs (struct elf_link_hash_entry *h, void *inf)
 	  if (! info->shared
 	      && !h->def_regular)
 	    {
-	      h->root.u.def.section = s;
-	      h->root.u.def.value = h->plt.offset;
+	      if (use_plt_got)
+		{
+		  /* We need to make a call to the entry of the GOT PLT
+		     instead of regular PLT entry.  */
+		  h->root.u.def.section = got_s;
+		  h->root.u.def.value = eh->plt_got.offset;
+		}
+	      else
+		{
+		  h->root.u.def.section = s;
+		  h->root.u.def.value = h->plt.offset;
+		}
 	    }
 
 	  /* Make room for this entry.  */
-	  s->size += plt_entry_size;
+	  if (use_plt_got)
+	    got_s->size += sizeof (elf_i386_got_plt_entry);
+	  else
+	    {
+	      s->size += plt_entry_size;
 
-	  /* We also need to make an entry in the .got.plt section, which
-	     will be placed in the .got section by the linker script.  */
-	  htab->elf.sgotplt->size += 4;
+	      /* We also need to make an entry in the .got.plt section,
+		 which will be placed in the .got section by the linker
+		 script.  */
+	      htab->elf.sgotplt->size += 4;
 
-	  /* We also need to make an entry in the .rel.plt section.  */
-	  htab->elf.srelplt->size += sizeof (Elf32_External_Rel);
-	  htab->elf.srelplt->reloc_count++;
+	      /* We also need to make an entry in the .rel.plt section.  */
+	      htab->elf.srelplt->size += sizeof (Elf32_External_Rel);
+	      htab->elf.srelplt->reloc_count++;
+	    }
 
 	  if (get_elf_i386_backend_data (info->output_bfd)->is_vxworks
               && !info->shared)
@@ -2886,6 +2989,7 @@ elf_i386_size_dynamic_sections (bfd *output_bfd, struct bfd_link_info *info)
       else if (s == htab->elf.sgotplt
 	       || s == htab->elf.iplt
 	       || s == htab->elf.igotplt
+	       || s == htab->plt_got
 	       || s == htab->plt_eh_frame
 	       || s == htab->sdynbss)
 	{
@@ -3183,15 +3287,17 @@ elf_i386_relocate_section (bfd *output_bfd,
       reloc_howto_type *howto;
       unsigned long r_symndx;
       struct elf_link_hash_entry *h;
+      struct elf_i386_link_hash_entry *eh;
       Elf_Internal_Sym *sym;
       asection *sec;
-      bfd_vma off, offplt;
+      bfd_vma off, offplt, plt_offset;
       bfd_vma relocation;
       bfd_boolean unresolved_reloc;
       bfd_reloc_status_type r;
       unsigned int indx;
       int tls_type;
       bfd_vma st_size;
+      asection *resolved_plt;
 
       r_type = ELF32_R_TYPE (rel->r_info);
       if (r_type == R_386_GNU_VTINHERIT
@@ -3665,7 +3771,9 @@ elf_i386_relocate_section (bfd *output_bfd,
 	  if (h == NULL)
 	    break;
 
-	  if (h->plt.offset == (bfd_vma) -1
+	  eh = (struct elf_i386_link_hash_entry *) h;
+	  if ((h->plt.offset == (bfd_vma) -1
+	       && eh->plt_got.offset == (bfd_vma) -1)
 	      || htab->elf.splt == NULL)
 	    {
 	      /* We didn't make a PLT entry for this symbol.  This
@@ -3674,9 +3782,20 @@ elf_i386_relocate_section (bfd *output_bfd,
 	      break;
 	    }
 
-	  relocation = (htab->elf.splt->output_section->vma
-			+ htab->elf.splt->output_offset
-			+ h->plt.offset);
+	  if (h->plt.offset != (bfd_vma) -1)
+	    {
+	      resolved_plt = htab->elf.splt;
+	      plt_offset = h->plt.offset;
+	    }
+	  else
+	    {
+	      resolved_plt = htab->plt_got;
+	      plt_offset = eh->plt_got.offset;
+	    }
+
+	  relocation = (resolved_plt->output_section->vma
+			+ resolved_plt->output_offset
+			+ plt_offset);
 	  unresolved_reloc = FALSE;
 	  break;
 
@@ -4427,6 +4546,7 @@ elf_i386_finish_dynamic_symbol (bfd *output_bfd,
   struct elf_i386_link_hash_table *htab;
   unsigned plt_entry_size;
   const struct elf_i386_backend_data *abed;
+  struct elf_i386_link_hash_entry *eh;
 
   htab = elf_i386_hash_table (info);
   if (htab == NULL)
@@ -4435,6 +4555,8 @@ elf_i386_finish_dynamic_symbol (bfd *output_bfd,
   abed = get_elf_i386_backend_data (output_bfd);
   plt_entry_size = GET_PLT_ENTRY_SIZE (output_bfd);
 
+  eh = (struct elf_i386_link_hash_entry *) h;
+
   if (h->plt.offset != (bfd_vma) -1)
     {
       bfd_vma plt_index;
@@ -4599,21 +4721,65 @@ elf_i386_finish_dynamic_symbol (bfd *output_bfd,
 		      plt->contents + h->plt.offset
                       + abed->plt->plt_plt_offset);
 	}
+    }
+  else if (eh->plt_got.offset != (bfd_vma) -1)
+    {
+      bfd_vma got_offset, plt_offset;
+      asection *plt, *got, *gotplt;
+      const bfd_byte *got_plt_entry;
+
+      /* Offset of displacement of the indirect jump.  */
+      bfd_vma plt_got_offset = 2;
 
-      if (!h->def_regular)
+      /* Set the entry in the GOT procedure linkage table.  */
+      plt = htab->plt_got;
+      got = htab->elf.sgot;
+      gotplt = htab->elf.sgotplt;
+      got_offset = h->got.offset;
+
+      if (got_offset == (bfd_vma) -1
+	  || plt == NULL
+	  || got == NULL
+	  || gotplt == NULL)
+	abort ();
+
+      /* Fill in the entry in the GOT procedure linkage table.  */
+      if (! info->shared)
 	{
-	  /* Mark the symbol as undefined, rather than as defined in
-	     the .plt section.  Leave the value if there were any
-	     relocations where pointer equality matters (this is a clue
-	     for the dynamic linker, to make function pointer
-	     comparisons work between an application and shared
-	     library), otherwise set it to zero.  If a function is only
-	     called from a binary, there is no need to slow down
-	     shared libraries because of that.  */
-	  sym->st_shndx = SHN_UNDEF;
-	  if (!h->pointer_equality_needed)
-	    sym->st_value = 0;
+	  got_plt_entry = elf_i386_got_plt_entry;
+	  got_offset += got->output_section->vma + got->output_offset;
 	}
+      else
+	{
+	  got_plt_entry = elf_i386_pic_got_plt_entry;
+	  got_offset += (got->output_section->vma
+			 + got->output_offset
+			 - gotplt->output_section->vma
+			 - gotplt->output_offset);
+	}
+
+      plt_offset = eh->plt_got.offset;
+      memcpy (plt->contents + plt_offset, got_plt_entry,
+	      sizeof (elf_i386_got_plt_entry));
+      bfd_put_32 (output_bfd, got_offset,
+		  plt->contents + plt_offset + plt_got_offset);
+    }
+
+  if (!h->def_regular
+      && (h->plt.offset != (bfd_vma) -1
+	  || eh->plt_got.offset != (bfd_vma) -1))
+    {
+      /* Mark the symbol as undefined, rather than as defined in
+	 the .plt section.  Leave the value if there were any
+	 relocations where pointer equality matters (this is a clue
+	 for the dynamic linker, to make function pointer
+	 comparisons work between an application and shared
+	 library), otherwise set it to zero.  If a function is only
+	 called from a binary, there is no need to slow down
+	 shared libraries because of that.  */
+      sym->st_shndx = SHN_UNDEF;
+      if (!h->pointer_equality_needed)
+	sym->st_value = 0;
     }
 
   if (h->got.offset != (bfd_vma) -1
diff --git a/bfd/elf64-x86-64.c b/bfd/elf64-x86-64.c
index 66c24e8..b10c0c1 100644
--- a/bfd/elf64-x86-64.c
+++ b/bfd/elf64-x86-64.c
@@ -760,6 +760,10 @@ struct elf_x86_64_link_hash_entry
   /* TRUE if symbol has at least one BND relocation.  */
   bfd_boolean has_bnd_reloc;
 
+  /* Information about the GOT PLT entry. Filled when there are both
+     GOT and PLT relocations against the same function.  */
+  union gotplt_union plt_got;
+
   /* Information about the second PLT entry. Filled when has_bnd_reloc is
      set.  */
   union gotplt_union plt_bnd;
@@ -815,6 +819,7 @@ struct elf_x86_64_link_hash_table
   asection *srelbss;
   asection *plt_eh_frame;
   asection *plt_bnd;
+  asection *plt_got;
 
   union
   {
@@ -894,6 +899,7 @@ elf_x86_64_link_hash_newfunc (struct bfd_hash_entry *entry,
       eh->tls_type = GOT_UNKNOWN;
       eh->has_bnd_reloc = FALSE;
       eh->plt_bnd.offset = (bfd_vma) -1;
+      eh->plt_got.offset = (bfd_vma) -1;
       eh->tlsdesc_got = (bfd_vma) -1;
     }
 
@@ -962,6 +968,7 @@ elf_x86_64_get_local_sym_hash (struct elf_x86_64_link_hash_table *htab,
       ret->elf.indx = sec->id;
       ret->elf.dynstr_index = htab->r_sym (rel->r_info);
       ret->elf.dynindx = -1;
+      ret->plt_got.offset = (bfd_vma) -1;
       *slot = ret;
     }
   return &ret->elf;
@@ -1514,6 +1521,7 @@ elf_x86_64_check_relocs (bfd *abfd, struct bfd_link_info *info,
   const Elf_Internal_Rela *rel;
   const Elf_Internal_Rela *rel_end;
   asection *sreloc;
+  bfd_boolean use_plt_got;
 
   if (info->relocatable)
     return TRUE;
@@ -1524,6 +1532,8 @@ elf_x86_64_check_relocs (bfd *abfd, struct bfd_link_info *info,
   if (htab == NULL)
     return FALSE;
 
+  use_plt_got = get_elf_x86_64_backend_data (abfd) == &elf_x86_64_arch_bed;
+
   symtab_hdr = &elf_symtab_hdr (abfd);
   sym_hashes = elf_sym_hashes (abfd);
 
@@ -2039,6 +2049,39 @@ do_size:
 	default:
 	  break;
 	}
+
+      if (use_plt_got
+	  && h != NULL
+	  && h->plt.refcount > 0
+	  && h->got.refcount > 0
+	  && htab->plt_got == NULL)
+	{
+	  /* Create the GOT procedure linkage table.  */
+	  unsigned int plt_got_align;
+	  const struct elf_backend_data *bed;
+
+	  bed = get_elf_backend_data (info->output_bfd);
+	  BFD_ASSERT (sizeof (elf_x86_64_legacy_plt2_entry) == 8
+		      && (sizeof (elf_x86_64_bnd_plt2_entry)
+			  == sizeof (elf_x86_64_legacy_plt2_entry)));
+	  plt_got_align = 3;
+
+	  if (htab->elf.dynobj == NULL)
+	    htab->elf.dynobj = abfd;
+	  htab->plt_got
+	    = bfd_make_section_anyway_with_flags (htab->elf.dynobj,
+						  ".plt.got",
+						  (bed->dynamic_sec_flags
+						   | SEC_ALLOC
+						   | SEC_CODE
+						   | SEC_LOAD
+						   | SEC_READONLY));
+	  if (htab->plt_got == NULL
+	      || !bfd_set_section_alignment (htab->elf.dynobj,
+					     htab->plt_got,
+					     plt_got_align))
+	    return FALSE;
+	}
     }
 
   return TRUE;
@@ -2415,6 +2458,24 @@ elf_x86_64_allocate_dynrelocs (struct elf_link_hash_entry *h, void * inf)
   bed = get_elf_backend_data (info->output_bfd);
   plt_entry_size = GET_PLT_ENTRY_SIZE (info->output_bfd);
 
+  /* We can't use the GOT PLT if pointer equality is needed since
+     finish_dynamic_symbol won't clear symbol value and the dynamic
+     linker won't update the GOT slot.  We will get into an infinite
+     loop at run-time.  */
+  if (htab->plt_got != NULL
+      && h->type != STT_GNU_IFUNC
+      && !h->pointer_equality_needed
+      && h->plt.refcount > 0
+      && h->got.refcount > 0)
+    {
+      /* Don't use the regular PLT if there are both GOT and GOTPLT
+         reloctions.  */
+      h->plt.offset = (bfd_vma) -1;
+
+      /* Use the GOT PLT.  */
+      eh->plt_got.refcount = 1;
+    }
+
   /* Since STT_GNU_IFUNC symbol must go through PLT, we handle it
      here if it is defined and referenced in a non-shared object.  */
   if (h->type == STT_GNU_IFUNC
@@ -2442,8 +2503,10 @@ elf_x86_64_allocate_dynrelocs (struct elf_link_hash_entry *h, void * inf)
 	return FALSE;
     }
   else if (htab->elf.dynamic_sections_created
-	   && h->plt.refcount > 0)
+	   && (h->plt.refcount > 0 || eh->plt_got.refcount > 0))
     {
+      bfd_boolean use_plt_got = eh->plt_got.refcount > 0;
+
       /* Make sure this symbol is output as a dynamic symbol.
 	 Undefined weak syms won't yet be marked as dynamic.  */
       if (h->dynindx == -1
@@ -2458,15 +2521,21 @@ elf_x86_64_allocate_dynrelocs (struct elf_link_hash_entry *h, void * inf)
 	{
 	  asection *s = htab->elf.splt;
 	  asection *bnd_s = htab->plt_bnd;
+	  asection *got_s = htab->plt_got;
 
 	  /* If this is the first .plt entry, make room for the special
 	     first entry.  */
 	  if (s->size == 0)
 	    s->size = plt_entry_size;
 
-	  h->plt.offset = s->size;
-	  if (bnd_s)
-	    eh->plt_bnd.offset = bnd_s->size;
+	  if (use_plt_got)
+	    eh->plt_got.offset = got_s->size;
+	  else
+	    {
+	      h->plt.offset = s->size;
+	      if (bnd_s)
+		eh->plt_bnd.offset = bnd_s->size;
+	    }
 
 	  /* If this symbol is not defined in a regular file, and we are
 	     not generating a shared library, then set the symbol to this
@@ -2476,32 +2545,49 @@ elf_x86_64_allocate_dynrelocs (struct elf_link_hash_entry *h, void * inf)
 	  if (! info->shared
 	      && !h->def_regular)
 	    {
-	      if (bnd_s)
+	      if (use_plt_got)
 		{
-		  /* We need to make a call to the entry of the second
-		     PLT instead of regular PLT entry.  */
-		  h->root.u.def.section = bnd_s;
-		  h->root.u.def.value = eh->plt_bnd.offset;
+		  /* We need to make a call to the entry of the GOT PLT
+		     instead of regular PLT entry.  */
+		  h->root.u.def.section = got_s;
+		  h->root.u.def.value = eh->plt_got.offset;
 		}
 	      else
 		{
-		  h->root.u.def.section = s;
-		  h->root.u.def.value = h->plt.offset;
+		  if (bnd_s)
+		    {
+		      /* We need to make a call to the entry of the second
+			 PLT instead of regular PLT entry.  */
+		      h->root.u.def.section = bnd_s;
+		      h->root.u.def.value = eh->plt_bnd.offset;
+		    }
+		  else
+		    {
+		      h->root.u.def.section = s;
+		      h->root.u.def.value = h->plt.offset;
+		    }
 		}
 	    }
 
 	  /* Make room for this entry.  */
-	  s->size += plt_entry_size;
-	  if (bnd_s)
-	    bnd_s->size += sizeof (elf_x86_64_legacy_plt2_entry);
+	  if (use_plt_got)
+	    got_s->size += sizeof (elf_x86_64_legacy_plt2_entry);
+	  else
+	    {
+	      s->size += plt_entry_size;
+	      if (bnd_s)
+		bnd_s->size += sizeof (elf_x86_64_legacy_plt2_entry);
 
-	  /* We also need to make an entry in the .got.plt section, which
-	     will be placed in the .got section by the linker script.  */
-	  htab->elf.sgotplt->size += GOT_ENTRY_SIZE;
+	      /* We also need to make an entry in the .got.plt section,
+		 which will be placed in the .got section by the linker
+		 script.  */
+	      htab->elf.sgotplt->size += GOT_ENTRY_SIZE;
 
-	  /* We also need to make an entry in the .rela.plt section.  */
-	  htab->elf.srelplt->size += bed->s->sizeof_rela;
-	  htab->elf.srelplt->reloc_count++;
+	      /* We also need to make an entry in the .rela.plt
+		 section.  */
+	      htab->elf.srelplt->size += bed->s->sizeof_rela;
+	      htab->elf.srelplt->reloc_count++;
+	    }
 	}
       else
 	{
@@ -3120,6 +3206,7 @@ elf_x86_64_size_dynamic_sections (bfd *output_bfd,
 	  || s == htab->elf.iplt
 	  || s == htab->elf.igotplt
 	  || s == htab->plt_bnd
+	  || s == htab->plt_got
 	  || s == htab->plt_eh_frame
 	  || s == htab->sdynbss)
 	{
@@ -3885,7 +3972,8 @@ elf_x86_64_relocate_section (bfd *output_bfd,
 	  if (h == NULL)
 	    break;
 
-	  if (h->plt.offset == (bfd_vma) -1
+	  if ((h->plt.offset == (bfd_vma) -1
+	       && eh->plt_got.offset == (bfd_vma) -1)
 	      || htab->elf.splt == NULL)
 	    {
 	      /* We didn't make a PLT entry for this symbol.  This
@@ -3894,15 +3982,24 @@ elf_x86_64_relocate_section (bfd *output_bfd,
 	      break;
 	    }
 
-	  if (htab->plt_bnd != NULL)
+	  if (h->plt.offset != (bfd_vma) -1)
 	    {
-	      resolved_plt = htab->plt_bnd;
-	      plt_offset = eh->plt_bnd.offset;
+	      if (htab->plt_bnd != NULL)
+		{
+		  resolved_plt = htab->plt_bnd;
+		  plt_offset = eh->plt_bnd.offset;
+		}
+	      else
+		{
+		  resolved_plt = htab->elf.splt;
+		  plt_offset = h->plt.offset;
+		}
 	    }
 	  else
 	    {
-	      resolved_plt = htab->elf.splt;
-	      plt_offset = h->plt.offset;
+	      /* Use the GOT PLT.  */
+	      resolved_plt = htab->plt_got;
+	      plt_offset = eh->plt_got.offset;
 	    }
 
 	  relocation = (resolved_plt->output_section->vma
@@ -4717,6 +4814,7 @@ elf_x86_64_finish_dynamic_symbol (bfd *output_bfd,
   struct elf_x86_64_link_hash_table *htab;
   const struct elf_x86_64_backend_data *abed;
   bfd_boolean use_plt_bnd;
+  struct elf_x86_64_link_hash_entry *eh;
 
   htab = elf_x86_64_hash_table (info);
   if (htab == NULL)
@@ -4729,6 +4827,8 @@ elf_x86_64_finish_dynamic_symbol (bfd *output_bfd,
 	  ? &elf_x86_64_bnd_arch_bed
 	  : get_elf_x86_64_backend_data (output_bfd));
 
+  eh = (struct elf_x86_64_link_hash_entry *) h;
+
   if (h->plt.offset != (bfd_vma) -1)
     {
       bfd_vma plt_index;
@@ -4796,8 +4896,6 @@ elf_x86_64_finish_dynamic_symbol (bfd *output_bfd,
 	{
 	  /* Use the second PLT with BND relocations.  */
 	  const bfd_byte *plt_entry, *plt2_entry;
-	  struct elf_x86_64_link_hash_entry *eh
-	    = (struct elf_x86_64_link_hash_entry *) h;
 
 	  if (eh->has_bnd_reloc)
 	    {
@@ -4914,21 +5012,82 @@ elf_x86_64_finish_dynamic_symbol (bfd *output_bfd,
       bed = get_elf_backend_data (output_bfd);
       loc = relplt->contents + plt_index * bed->s->sizeof_rela;
       bed->s->swap_reloca_out (output_bfd, &rela, loc);
+    }
+  else if (eh->plt_got.offset != (bfd_vma) -1)
+    {
+      bfd_vma got_offset, plt_offset, plt_got_offset, plt_got_insn_size;
+      asection *plt, *got;
+      bfd_boolean got_after_plt;
+      int32_t got_pcrel_offset;
+      const bfd_byte *got_plt_entry;
+
+      /* Set the entry in the GOT procedure linkage table.  */
+      plt = htab->plt_got;
+      got = htab->elf.sgot;
+      got_offset = h->got.offset;
+
+      if (got_offset == (bfd_vma) -1
+	  || h->type == STT_GNU_IFUNC
+	  || plt == NULL
+	  || got == NULL)
+	abort ();
 
-      if (!h->def_regular)
+      /* Use the second PLT entry template for the GOT PLT since they
+	 are the identical.  */
+      plt_got_insn_size = elf_x86_64_bnd_arch_bed.plt_got_insn_size;
+      plt_got_offset = elf_x86_64_bnd_arch_bed.plt_got_offset;
+      if (eh->has_bnd_reloc)
+	got_plt_entry = elf_x86_64_bnd_plt2_entry;
+      else
 	{
-	  /* Mark the symbol as undefined, rather than as defined in
-	     the .plt section.  Leave the value if there were any
-	     relocations where pointer equality matters (this is a clue
-	     for the dynamic linker, to make function pointer
-	     comparisons work between an application and shared
-	     library), otherwise set it to zero.  If a function is only
-	     called from a binary, there is no need to slow down
-	     shared libraries because of that.  */
-	  sym->st_shndx = SHN_UNDEF;
-	  if (!h->pointer_equality_needed)
-	    sym->st_value = 0;
+	  got_plt_entry = elf_x86_64_legacy_plt2_entry;
+
+	  /* Subtract 1 since there is no BND prefix.  */
+	  plt_got_insn_size -= 1;
+	  plt_got_offset -= 1;
 	}
+
+      /* Fill in the entry in the GOT procedure linkage table.  */
+      plt_offset = eh->plt_got.offset;
+      memcpy (plt->contents + plt_offset,
+	      got_plt_entry, sizeof (elf_x86_64_legacy_plt2_entry));
+
+      /* Put offset the PC-relative instruction referring to the GOT
+	 entry, subtracting the size of that instruction.  */
+      got_pcrel_offset = (got->output_section->vma
+			  + got->output_offset
+			  + got_offset
+			  - plt->output_section->vma
+			  - plt->output_offset
+			  - plt_offset
+			  - plt_got_insn_size);
+
+      /* Check PC-relative offset overflow in GOT PLT entry.  */
+      got_after_plt = got->output_section->vma > plt->output_section->vma;
+      if ((got_after_plt && got_pcrel_offset < 0)
+	  || (!got_after_plt && got_pcrel_offset > 0))
+	info->callbacks->einfo (_("%F%B: PC-relative offset overflow in GOT PLT entry for `%s'\n"),
+				output_bfd, h->root.root.string);
+
+      bfd_put_32 (output_bfd, got_pcrel_offset,
+		  plt->contents + plt_offset + plt_got_offset);
+    }
+
+  if (!h->def_regular
+      && (h->plt.offset != (bfd_vma) -1
+	  || eh->plt_got.offset != (bfd_vma) -1))
+    {
+      /* Mark the symbol as undefined, rather than as defined in
+	 the .plt section.  Leave the value if there were any
+	 relocations where pointer equality matters (this is a clue
+	 for the dynamic linker, to make function pointer
+	 comparisons work between an application and shared
+	 library), otherwise set it to zero.  If a function is only
+	 called from a binary, there is no need to slow down
+	 shared libraries because of that.  */
+      sym->st_shndx = SHN_UNDEF;
+      if (!h->pointer_equality_needed)
+	sym->st_value = 0;
     }
 
   if (h->got.offset != (bfd_vma) -1
diff --git a/ld/emulparams/elf_i386.sh b/ld/emulparams/elf_i386.sh
index 93f1992..2ebfaac 100644
--- a/ld/emulparams/elf_i386.sh
+++ b/ld/emulparams/elf_i386.sh
@@ -13,6 +13,10 @@ GENERATE_PIE_SCRIPT=yes
 NO_SMALL_DATA=yes
 SEPARATE_GOTPLT="SIZEOF (.got.plt) >= 12 ? 12 : 0"
 IREL_IN_PLT=
+# Reuse TINY_READONLY_SECTION which is placed right after .plt section.
+TINY_READONLY_SECTION="
+.plt.got      ${RELOCATING-0} : { *(.plt.got) }
+"
 
 # Linux modify the default library search path to first include
 # a 32-bit specific directory.
diff --git a/ld/emulparams/elf_x86_64.sh b/ld/emulparams/elf_x86_64.sh
index 1e83a74..aa26a1b 100644
--- a/ld/emulparams/elf_x86_64.sh
+++ b/ld/emulparams/elf_x86_64.sh
@@ -17,7 +17,10 @@ LARGE_BSS_AFTER_BSS=
 SEPARATE_GOTPLT="SIZEOF (.got.plt) >= 24 ? 24 : 0"
 IREL_IN_PLT=
 # Reuse TINY_READONLY_SECTION which is placed right after .plt section.
-TINY_READONLY_SECTION=".plt.bnd      ${RELOCATING-0} : { *(.plt.bnd) }"
+TINY_READONLY_SECTION="
+.plt.got      ${RELOCATING-0} : { *(.plt.got) }
+.plt.bnd      ${RELOCATING-0} : { *(.plt.bnd) }
+"
 
 if [ "x${host}" = "x${target}" ]; then
   case " $EMULATION_LIBPATH " in
diff --git a/ld/testsuite/ld-i386/i386.exp b/ld/testsuite/ld-i386/i386.exp
index 4c870b8..304713d 100644
--- a/ld/testsuite/ld-i386/i386.exp
+++ b/ld/testsuite/ld-i386/i386.exp
@@ -309,6 +309,24 @@ if { [isnative]
 	    {{readelf {-Wr} plt-main4.rd}} \
 	    "libplt-main4.a" \
 	] \
+	[list \
+	    "Build plt-main" \
+	    "tmpdir/plt-main1.o tmpdir/plt-main2.o tmpdir/plt-main3.o \
+	     tmpdir/plt-main4.o tmpdir/libplt-lib.so" \
+	    "" \
+	    { plt-main5.c } \
+	    {{readelf {-Wr} plt-main.rd}} \
+	    "plt-main" \
+	] \
+	[list \
+	    "Build plt-main with PIE" \
+	    "tmpdir/plt-main1.o tmpdir/plt-main2.o tmpdir/plt-main3.o \
+	     tmpdir/plt-main4.o tmpdir/libplt-lib.so -pie" \
+	    "-fPIC" \
+	    { plt-main5.c } \
+	    {{readelf {-Wr} plt-main.rd}} \
+	    "plt-main" \
+	] \
     ]
 
     run_ld_link_exec_tests [] [list \
diff --git a/ld/testsuite/ld-i386/plt-main.rd b/ld/testsuite/ld-i386/plt-main.rd
new file mode 100644
index 0000000..6d01ff9
--- /dev/null
+++ b/ld/testsuite/ld-i386/plt-main.rd
@@ -0,0 +1,4 @@
+#failif
+#...
+[0-9a-f ]+R_386_JUMP_SLOT +0+ +bar
+#...
diff --git a/ld/testsuite/ld-x86-64/plt-main-bnd.dd b/ld/testsuite/ld-x86-64/plt-main-bnd.dd
new file mode 100644
index 0000000..8598e30
--- /dev/null
+++ b/ld/testsuite/ld-x86-64/plt-main-bnd.dd
@@ -0,0 +1,7 @@
+#...
+Disassembly of section .plt.got:
+
+[a-f0-9]+ <.plt.got>:
+[ 	]*[a-f0-9]+:	f2 ff 25 .. .. 20 00 	bnd jmpq \*0x20....\(%rip\)        # ...... <_DYNAMIC\+0x...>
+[ 	]*[a-f0-9]+:	90                   	nop
+#pass
diff --git a/ld/testsuite/ld-x86-64/plt-main.rd b/ld/testsuite/ld-x86-64/plt-main.rd
new file mode 100644
index 0000000..1effcd4
--- /dev/null
+++ b/ld/testsuite/ld-x86-64/plt-main.rd
@@ -0,0 +1,4 @@
+#failif
+#...
+[0-9a-f ]+R_X86_64_JUMP_SLOT +0+ +bar \+ 0
+#...
diff --git a/ld/testsuite/ld-x86-64/x86-64.exp b/ld/testsuite/ld-x86-64/x86-64.exp
index ab1bfd1..886469b 100644
--- a/ld/testsuite/ld-x86-64/x86-64.exp
+++ b/ld/testsuite/ld-x86-64/x86-64.exp
@@ -374,6 +374,24 @@ if { [isnative] && [which $CC] != 0 } {
 	    {{readelf {-Wr} plt-main4.rd}} \
 	    "libplt-main4.a" \
 	] \
+	[list \
+	    "Build plt-main" \
+	    "tmpdir/plt-main1.o tmpdir/plt-main2.o tmpdir/plt-main3.o \
+	     tmpdir/plt-main4.o tmpdir/libplt-lib.so" \
+	    "" \
+	    { plt-main5.c } \
+	    {{readelf {-Wr} plt-main.rd}} \
+	    "plt-main" \
+	] \
+	[list \
+	    "Build plt-main with PIE" \
+	    "tmpdir/plt-main1.o tmpdir/plt-main2.o tmpdir/plt-main3.o \
+	     tmpdir/plt-main4.o tmpdir/libplt-lib.so -pie" \
+	    "-fPIC" \
+	    { plt-main5.c } \
+	    {{readelf {-Wr} plt-main.rd}} \
+	    "plt-main" \
+	] \
     ]
 
     run_ld_link_exec_tests [] [list \
@@ -397,6 +415,53 @@ if { [isnative] && [which $CC] != 0 } {
 	    "-fPIC" \
 	] \
     ]
+
+    if { [istarget "x86_64-*-linux*"] \
+	 && ![istarget "x86_64-*-linux*-gnux32"]} {
+
+	run_cc_link_tests [list \
+	    [list \
+		"Build plt-main with -z bndplt" \
+		"tmpdir/plt-main1.o tmpdir/plt-main2.o tmpdir/plt-main3.o \
+		 tmpdir/plt-main4.o tmpdir/libplt-lib.so -z bndplt" \
+		"" \
+		{ plt-main5.c } \
+		{{objdump {-drw} plt-main-bnd.dd}} \
+		"plt-main-bnd" \
+	    ] \
+	    [list \
+		"Build plt-main with PIE and -z bndplt" \
+		"tmpdir/plt-main1.o tmpdir/plt-main2.o tmpdir/plt-main3.o \
+		 tmpdir/plt-main4.o tmpdir/libplt-lib.so -z bndplt -pie" \
+		"-fPIC" \
+		{ plt-main5.c } \
+		{{objdump {-drw} plt-main-bnd.dd}} \
+		"plt-main-pie-bnd" \
+	    ] \
+	]
+
+	run_ld_link_exec_tests [] [list \
+	    [list \
+		"Run plt-main with -z bndplt" \
+		"tmpdir/plt-main1.o tmpdir/plt-main2.o tmpdir/plt-main3.o \
+		 tmpdir/plt-main4.o tmpdir/libplt-lib.so -z bndplt" \
+		"" \
+		{ plt-main5.c } \
+		"plt-main-bnd" \
+		"plt-main.out" \
+	    ] \
+	    [list \
+		"Run plt-main with PIE and -z bndplt" \
+		"tmpdir/plt-main1.o tmpdir/plt-main2.o tmpdir/plt-main3.o \
+		 tmpdir/plt-main4.o tmpdir/libplt-lib.so -z bndplt -pie" \
+		"" \
+		{ plt-main5.c } \
+		"plt-main-pie-bnd" \
+		"plt-main.out" \
+		"-fPIC" \
+	    ] \
+	]
+    }
 }
 
 if { ![istarget "x86_64-*-linux*"]} {
-- 
1.9.3



More information about the Binutils mailing list