[PATCH] -mcmodel=large -fpic TLS GD and LD support gcc + binutils (PR target/58067)

Jakub Jelinek jakub@redhat.com
Tue Aug 13 19:42:00 GMT 2013


Hi!

We right now ICE with -mcmodel=large -fpic on x86_64 on TLS GD and LD
sequences, because obviously we can't call __tls_get_addr@plt there from code
potentially more than 2GB away from the PLT slot.

The attached patches add support for that in gcc and also teaches linker
about those, because otherwise the linker will fail if you try to link such
-mcmodel=large -fpic code into binaries or PIEs.

To make transitions possible, we emit always
leaq foo@tlsgd(%rip), %rdi
movabsq $__tls_get_addr@pltoff, %rax
addq $rbx, %rax
call *%rax
resp.
leaq foo@tlsld(%rip), %rdi
movabsq $__tls_get_addr@pltoff, %rax
addq $rbx, %rax
call *%rax
sequences (22 bytes, 6 bytes longer than what we do for TLSGD for normal
libraries).

Bootstrapped/regtested on x86_64-linux and i686-linux, attached is also the
sources I've used to test all the 3 different transitions.

Ok for trunk and 4.8 branch (and binutils trunk)?

	Jakub
-------------- next part --------------
2013-08-13  Jakub Jelinek  <jakub@redhat.com>

	PR target/58067
	* config/i386/i386.md (*tls_global_dynamic_64_largepic): New insn.
	(*tls_local_dynamic_base_64_largepic): Likewise.
	(tls_global_dynamic_64_<mode>, tls_local_dynamic_base_64_<mode>):
	Remove predicate from call operand.
	* config/i386/i386.c (ix86_tls_get_addr): For -mcmodel=large -fpic
	return sum of pic_offset_table_rtx and UNSPEC_PLTOFF of the symbol.

--- gcc/config/i386/i386.md.jj	2013-08-13 12:20:20.000000000 +0200
+++ gcc/config/i386/i386.md	2013-08-13 15:03:55.632194607 +0200
@@ -12303,11 +12303,33 @@ (define_insn "*tls_global_dynamic_64_<mo
    (set (attr "length")
 	(symbol_ref "TARGET_X32 ? 15 : 16"))])
 
+(define_insn "*tls_global_dynamic_64_largepic"
+  [(set (match_operand:DI 0 "register_operand" "=a")
+	(call:DI
+	 (mem:QI (plus:DI (match_operand:DI 2 "register_operand" "b")
+			  (match_operand:DI 3 "immediate_operand" "i")))
+	 (match_operand 4)))
+   (unspec:DI [(match_operand 1 "tls_symbolic_operand")]
+	     UNSPEC_TLS_GD)]
+  "TARGET_64BIT && ix86_cmodel == CM_LARGE_PIC && !TARGET_PECOFF
+   && GET_CODE (operands[3]) == CONST
+   && GET_CODE (XEXP (operands[3], 0)) == UNSPEC
+   && XINT (XEXP (operands[3], 0), 1) == UNSPEC_PLTOFF"
+{
+  output_asm_insn
+    ("lea{q}\t{%E1@tlsgd(%%rip), %%rdi|rdi, %E1@tlsgd[rip]}", operands);
+  output_asm_insn ("movabs{q}\t{%3, %%rax|rax, %3}", operands);
+  output_asm_insn ("add{q}\t{%2, %%rax|rax, %2}", operands);
+  return "call\t{*%%rax|rax}";
+}
+  [(set_attr "type" "multi")
+   (set_attr "length" "22")])
+
 (define_expand "tls_global_dynamic_64_<mode>"
   [(parallel
     [(set (match_operand:P 0 "register_operand")
 	  (call:P
-	   (mem:QI (match_operand 2 "constant_call_address_operand"))
+	   (mem:QI (match_operand 2))
 	   (const_int 0)))
      (unspec:P [(match_operand 1 "tls_symbolic_operand")]
 	       UNSPEC_TLS_GD)])]
@@ -12365,11 +12387,32 @@ (define_insn "*tls_local_dynamic_base_64
   [(set_attr "type" "multi")
    (set_attr "length" "12")])
 
+(define_insn "*tls_local_dynamic_base_64_largepic"
+  [(set (match_operand:DI 0 "register_operand" "=a")
+	(call:DI
+	 (mem:QI (plus:DI (match_operand:DI 1 "register_operand" "b")
+			  (match_operand:DI 2 "immediate_operand" "i")))
+	 (match_operand 3)))
+   (unspec:DI [(const_int 0)] UNSPEC_TLS_LD_BASE)]
+  "TARGET_64BIT && ix86_cmodel == CM_LARGE_PIC && !TARGET_PECOFF
+   && GET_CODE (operands[2]) == CONST
+   && GET_CODE (XEXP (operands[2], 0)) == UNSPEC
+   && XINT (XEXP (operands[2], 0), 1) == UNSPEC_PLTOFF"
+{
+  output_asm_insn
+    ("lea{q}\t{%&@tlsld(%%rip), %%rdi|rdi, %&@tlsld[rip]}", operands);
+  output_asm_insn ("movabs{q}\t{%2, %%rax|rax, %2}", operands);
+  output_asm_insn ("add{q}\t{%1, %%rax|rax, %1}", operands);
+  return "call\t{*%%rax|rax}";
+}
+  [(set_attr "type" "multi")
+   (set_attr "length" "22")])
+
 (define_expand "tls_local_dynamic_base_64_<mode>"
   [(parallel
      [(set (match_operand:P 0 "register_operand")
 	   (call:P
-	    (mem:QI (match_operand 1 "constant_call_address_operand"))
+	    (mem:QI (match_operand 1))
 	    (const_int 0)))
       (unspec:P [(const_int 0)] UNSPEC_TLS_LD_BASE)])]
   "TARGET_64BIT")
--- gcc/config/i386/i386.c.jj	2013-08-13 12:20:20.000000000 +0200
+++ gcc/config/i386/i386.c	2013-08-13 14:42:32.449334139 +0200
@@ -13220,6 +13220,14 @@ ix86_tls_get_addr (void)
       ix86_tls_symbol = gen_rtx_SYMBOL_REF (Pmode, sym);
     }
 
+  if (ix86_cmodel == CM_LARGE_PIC && !TARGET_PECOFF)
+    {
+      rtx unspec = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, ix86_tls_symbol),
+				   UNSPEC_PLTOFF);
+      return gen_rtx_PLUS (Pmode, pic_offset_table_rtx,
+			   gen_rtx_CONST (Pmode, unspec));
+    }
+
   return ix86_tls_symbol;
 }
 
-------------- next part --------------
2013-08-13  Jakub Jelinek  <jakub@redhat.com>

	* elf64-x86-64.c (elf_x86_64_check_tls_transition): Allow
	64-bit -mcmodel=large -fpic TLS GD and LD sequences.
	(elf_x86_64_relocate_section): Handle -mcmodel=large -fpic
	TLS GD and LD sequences in GD->LE, GD->IE and LD->LE transitions.

--- bfd/elf64-x86-64.c.jj	2013-08-13 13:41:41.000000000 +0200
+++ bfd/elf64-x86-64.c	2013-08-13 16:11:28.902439602 +0200
@@ -1087,6 +1087,7 @@ elf_x86_64_check_tls_transition (bfd *ab
 {
   unsigned int val;
   unsigned long r_symndx;
+  bfd_boolean largepic = FALSE;
   struct elf_link_hash_entry *h;
   bfd_vma offset;
   struct elf_x86_64_link_hash_table *htab;
@@ -1124,16 +1125,32 @@ elf_x86_64_check_tls_transition (bfd *ab
 	     can transit to different access model.  For 32bit, only
 		leaq foo@tlsgd(%rip), %rdi
 		.word 0x6666; rex64; call __tls_get_addr
-	     can transit to different access model.  */
+	     can transit to different access model.  For largepic
+	     we also support:
+		leaq foo@tlsgd(%rip), %rdi
+		movabsq $__tls_get_addr@pltoff, %rax
+		addq $rbx, %rax
+		call *%rax.  */
 
 	  static const unsigned char call[] = { 0x66, 0x66, 0x48, 0xe8 };
 	  static const unsigned char leaq[] = { 0x66, 0x48, 0x8d, 0x3d };
 
-	  if ((offset + 12) > sec->size
-	      || memcmp (contents + offset + 4, call, 4) != 0)
+	  if ((offset + 12) > sec->size)
 	    return FALSE;
 
-	  if (ABI_64_P (abfd))
+	  if (memcmp (contents + offset + 4, call, 4) != 0)
+	    {
+	      if (!ABI_64_P (abfd)
+		  || (offset + 19) > sec->size
+		  || offset < 3
+		  || memcmp (contents + offset - 3, leaq + 1, 3) != 0
+		  || memcmp (contents + offset + 4, "\x48\xb8", 2) != 0
+		  || memcmp (contents + offset + 14, "\x48\x01\xd8\xff\xd0", 5)
+		     != 0)
+		return FALSE;
+	      largepic = TRUE;
+	    }
+	  else if (ABI_64_P (abfd))
 	    {
 	      if (offset < 4
 		  || memcmp (contents + offset - 4, leaq, 4) != 0)
@@ -1151,16 +1168,31 @@ elf_x86_64_check_tls_transition (bfd *ab
 	  /* Check transition from LD access model.  Only
 		leaq foo@tlsld(%rip), %rdi;
 		call __tls_get_addr
-	     can transit to different access model.  */
+	     can transit to different access model.  For largepic
+	     we also support:
+		leaq foo@tlsld(%rip), %rdi
+		movabsq $__tls_get_addr@pltoff, %rax
+		addq $rbx, %rax
+		call *%rax.  */
 
 	  static const unsigned char lea[] = { 0x48, 0x8d, 0x3d };
 
 	  if (offset < 3 || (offset + 9) > sec->size)
 	    return FALSE;
 
-	  if (memcmp (contents + offset - 3, lea, 3) != 0
-	      || 0xe8 != *(contents + offset + 4))
+	  if (memcmp (contents + offset - 3, lea, 3) != 0)
 	    return FALSE;
+
+	  if (0xe8 != *(contents + offset + 4))
+	    {
+	      if (!ABI_64_P (abfd)
+		  || (offset + 19) > sec->size
+		  || memcmp (contents + offset + 4, "\x48\xb8", 2) != 0
+		  || memcmp (contents + offset + 14, "\x48\x01\xd8\xff\xd0", 5)
+		     != 0)
+		return FALSE;
+	      largepic = TRUE;
+	    }
 	}
 
       r_symndx = htab->r_sym (rel[1].r_info);
@@ -1172,8 +1204,10 @@ elf_x86_64_check_tls_transition (bfd *ab
 	 may be versioned.  */
       return (h != NULL
 	      && h->root.root.string != NULL
-	      && (ELF32_R_TYPE (rel[1].r_info) == R_X86_64_PC32
-		  || ELF32_R_TYPE (rel[1].r_info) == R_X86_64_PLT32)
+	      && (largepic
+		  ? ELF32_R_TYPE (rel[1].r_info) == R_X86_64_PLTOFF64
+		  : (ELF32_R_TYPE (rel[1].r_info) == R_X86_64_PC32
+		     || ELF32_R_TYPE (rel[1].r_info) == R_X86_64_PLT32))
 	      && (strncmp (h->root.root.string,
 			   "__tls_get_addr", 14) == 0));
 
@@ -3947,8 +3981,26 @@ direct:
 		     .word 0x6666; rex64; call __tls_get_addr
 		     into:
 		     movl %fs:0, %eax
-		     leaq foo@tpoff(%rax), %rax */
-		  if (ABI_64_P (output_bfd))
+		     leaq foo@tpoff(%rax), %rax
+		     For largepic, change:
+		     leaq foo@tlsgd(%rip), %rdi
+		     movabsq $__tls_get_addr@pltoff, %rax
+		     addq %rbx, %rax
+		     call *%rax
+		     into:
+		     movq %fs:0, %rax
+		     leaq foo@tpoff(%rax), %rax
+		     nopw 0x0(%rax,%rax,1) */
+		  int largepic = 0;
+		  if (ABI_64_P (output_bfd)
+		      && contents[roff + 5] == (bfd_byte) '\xb8')
+		    {
+		      memcpy (contents + roff - 3,
+			      "\x64\x48\x8b\x04\x25\0\0\0\0\x48\x8d\x80"
+			      "\0\0\0\0\x66\x0f\x1f\x44\0", 22);
+		      largepic = 1;
+		    }
+		  else if (ABI_64_P (output_bfd))
 		    memcpy (contents + roff - 4,
 			    "\x64\x48\x8b\x04\x25\0\0\0\0\x48\x8d\x80\0\0\0",
 			    16);
@@ -3958,8 +4010,8 @@ direct:
 			    15);
 		  bfd_put_32 (output_bfd,
 			      elf_x86_64_tpoff (info, relocation),
-			      contents + roff + 8);
-		  /* Skip R_X86_64_PC32/R_X86_64_PLT32.  */
+			      contents + roff + 8 + largepic);
+		  /* Skip R_X86_64_PC32/R_X86_64_PLT32/R_X86_64_PLTOFF64.  */
 		  rel++;
 		  continue;
 		}
@@ -4194,8 +4246,26 @@ direct:
 		     .word 0x6666; rex64; call __tls_get_addr@plt
 		     into:
 		     movl %fs:0, %eax
-		     addq foo@gottpoff(%rip), %rax */
-		  if (ABI_64_P (output_bfd))
+		     addq foo@gottpoff(%rip), %rax
+		     For largepic, change:
+		     leaq foo@tlsgd(%rip), %rdi
+		     movabsq $__tls_get_addr@pltoff, %rax
+		     addq %rbx, %rax
+		     call *%rax
+		     into:
+		     movq %fs:0, %rax
+		     addq foo@gottpoff(%rax), %rax
+		     nopw 0x0(%rax,%rax,1) */
+		  int largepic = 0;
+		  if (ABI_64_P (output_bfd)
+		      && contents[roff + 5] == (bfd_byte) '\xb8')
+		    {
+		      memcpy (contents + roff - 3,
+			      "\x64\x48\x8b\x04\x25\0\0\0\0\x48\x03\x05"
+			      "\0\0\0\0\x66\x0f\x1f\x44\0", 22);
+		      largepic = 1;
+		    }
+		  else if (ABI_64_P (output_bfd))
 		    memcpy (contents + roff - 4,
 			    "\x64\x48\x8b\x04\x25\0\0\0\0\x48\x03\x05\0\0\0",
 			    16);
@@ -4207,12 +4277,13 @@ direct:
 		  relocation = (htab->elf.sgot->output_section->vma
 				+ htab->elf.sgot->output_offset + off
 				- roff
+				- largepic
 				- input_section->output_section->vma
 				- input_section->output_offset
 				- 12);
 		  bfd_put_32 (output_bfd, relocation,
-			      contents + roff + 8);
-		  /* Skip R_X86_64_PLT32.  */
+			      contents + roff + 8 + largepic);
+		  /* Skip R_X86_64_PLT32/R_X86_64_PLTOFF64.  */
 		  rel++;
 		  continue;
 		}
@@ -4274,16 +4345,29 @@ direct:
 		 For 64bit, we change it into:
 		 .word 0x6666; .byte 0x66; movq %fs:0, %rax.
 		 For 32bit, we change it into:
-		 nopl 0x0(%rax); movl %fs:0, %eax.  */
+		 nopl 0x0(%rax); movl %fs:0, %eax.
+		 For largepic, change:
+		 leaq foo@tlsgd(%rip), %rdi
+		 movabsq $__tls_get_addr@pltoff, %rax
+		 addq %rbx, %rax
+		 call *%rax
+		 into:
+		 data32 data32 data32 nopw %cs:0x0(%rax,%rax,1)
+		 movq %fs:0, %eax */
 
 	      BFD_ASSERT (r_type == R_X86_64_TPOFF32);
-	      if (ABI_64_P (output_bfd))
+	      if (ABI_64_P (output_bfd)
+		  && contents[rel->r_offset + 5] == (bfd_byte) '\xb8')
+		memcpy (contents + rel->r_offset - 3,
+			"\x66\x66\x66\x66\x2e\x0f\x1f\x84\0\0\0\0\0"
+			"\x64\x48\x8b\x04\x25\0\0\0", 22);
+	      else if (ABI_64_P (output_bfd))
 		memcpy (contents + rel->r_offset - 3,
 			"\x66\x66\x66\x64\x48\x8b\x04\x25\0\0\0", 12);
 	      else
 		memcpy (contents + rel->r_offset - 3,
 			"\x0f\x1f\x40\x00\x64\x8b\x04\x25\0\0\0", 12);
-	      /* Skip R_X86_64_PC32/R_X86_64_PLT32.  */
+	      /* Skip R_X86_64_PC32/R_X86_64_PLT32/R_X86_64_PLTOFF64.  */
 	      rel++;
 	      continue;
 	    }
-------------- next part --------------
__thread int a;
static __thread int b;
extern __thread int c;

int
foo ()
{
  return a++ + b++ + c++;
}

int
main ()
{
  a = 4;
  b = 5;
  c = 6;
  return foo () + foo () - 33;
}
-------------- next part --------------
__thread int c;


More information about the Binutils mailing list