This is the mail archive of the
binutils@sourceware.org
mailing list for the binutils project.
powerpc tls
- From: Alan Modra <amodra at bigpond dot net dot au>
- To: binutils at sourceware dot org
- Date: Tue, 6 Nov 2007 14:30:04 +1030
- Subject: powerpc tls
Someone sent me an object file generated by gcc-4.1.1 recently with
the following horrible powerpc code. It breaks the linker tls
optimisation assumption that GOT_TLSGD relocs on instructions setting
up __tls_get_addr parameters are followed by the PLTREL24 reloc for
the call itself. I'm not inclined to teach the linker how to analyse
code to stitch together arguments with calls, so for now we'll just
refuse to optimise this mess.
288: 7f c8 02 a6 mflr r30
28c: 38 7e 00 00 addi r3,r30,0
28e: R_PPC_GOT_TLSGD16 blas_cbstate
290: 41 9e 00 2c beq- cr7,2bc <get_allocated_cb+0x54>
294: 38 7e 00 00 addi r3,r30,0
296: R_PPC_GOT_TLSGD16 blas_cbstate
298: 2f 80 00 03 cmpwi cr7,r0,3
29c: 41 9a 00 3c beq- cr6,2d8 <get_allocated_cb+0x70>
2a0: 38 7e 00 00 addi r3,r30,0
2a2: R_PPC_GOT_TLSGD16 blas_cbstate
2a4: 41 9e 00 50 beq- cr7,2f4 <get_allocated_cb+0x8c>
2a8: 80 01 00 24 lwz r0,36(r1)
2ac: 83 c1 00 18 lwz r30,24(r1)
2b0: 38 21 00 20 addi r1,r1,32
2b4: 7c 08 03 a6 mtlr r0
2b8: 4e 80 00 20 blr
2bc: 48 00 00 01 bl 2bc <get_allocated_cb+0x54>
2bc: R_PPC_PLTREL24 __tls_get_addr
2c0: 80 01 00 24 lwz r0,36(r1)
2c4: 80 63 00 00 lwz r3,0(r3)
2c8: 83 c1 00 18 lwz r30,24(r1)
2cc: 7c 08 03 a6 mtlr r0
2d0: 38 21 00 20 addi r1,r1,32
2d4: 4e 80 00 20 blr
2d8: 48 00 00 01 bl 2d8 <get_allocated_cb+0x70>
2d8: R_PPC_PLTREL24 __tls_get_addr
2dc: 80 01 00 24 lwz r0,36(r1)
2e0: 80 63 00 04 lwz r3,4(r3)
2e4: 83 c1 00 18 lwz r30,24(r1)
2e8: 7c 08 03 a6 mtlr r0
2ec: 38 21 00 20 addi r1,r1,32
2f0: 4e 80 00 20 blr
2f4: 48 00 00 01 bl 2f4 <get_allocated_cb+0x8c>
2f4: R_PPC_PLTREL24 __tls_get_addr
2f8: 80 01 00 24 lwz r0,36(r1)
2fc: 80 63 00 08 lwz r3,8(r3)
300: 83 c1 00 18 lwz r30,24(r1)
304: 7c 08 03 a6 mtlr r0
308: 38 21 00 20 addi r1,r1,32
30c: 4e 80 00 20 blr
bfd/
* elf32-ppc.c (ppc_elf_check_relocs): Don't refcount tlsld_got here..
(ppc_elf_gc_sweep_hook): ..or here..
(ppc_elf_tls_optimize): ..or here. Make two passes through the
relocs, ensuring that tls_get_addr calls follow gd and ld relocs.
(allocate_dynrelocs): Refcount tlsld_got here.
(ppc_elf_size_dynamic_sections): Call allocate_dynrelocs before
allocating tlsld_got.
(ppc_elf_relocate_section): Remove check that a tls_get_addr
call follows gd and ld relocs.
ld/testsuite/
* ld-powerpc/tlsso32.d: Update for changed got alloc order.
Note: diff -w to exclude indentation changes.
Index: bfd/elf32-ppc.c
===================================================================
RCS file: /cvs/src/src/bfd/elf32-ppc.c,v
retrieving revision 1.222
diff -u -p -w -r1.222 elf32-ppc.c
--- bfd/elf32-ppc.c 25 Oct 2007 15:20:23 -0000 1.222
+++ bfd/elf32-ppc.c 6 Nov 2007 03:00:10 -0000
@@ -3109,7 +3109,6 @@ ppc_elf_check_relocs (bfd *abfd,
case R_PPC_GOT_TLSLD16_LO:
case R_PPC_GOT_TLSLD16_HI:
case R_PPC_GOT_TLSLD16_HA:
- htab->tlsld_got.refcount += 1;
tls_type = TLS_TLS | TLS_LD;
goto dogottls;
@@ -3957,9 +3956,6 @@ ppc_elf_gc_sweep_hook (bfd *abfd,
case R_PPC_GOT_TLSLD16_LO:
case R_PPC_GOT_TLSLD16_HI:
case R_PPC_GOT_TLSLD16_HA:
- htab->tlsld_got.refcount -= 1;
- /* Fall thru */
-
case R_PPC_GOT_TLSGD16:
case R_PPC_GOT_TLSGD16_LO:
case R_PPC_GOT_TLSGD16_HI:
@@ -4064,11 +4060,17 @@ ppc_elf_tls_optimize (bfd *obfd ATTRIBUT
bfd *ibfd;
asection *sec;
struct ppc_elf_link_hash_table *htab;
+ int pass;
if (info->relocatable || info->shared)
return TRUE;
htab = ppc_elf_hash_table (info);
+ /* Make two passes through the relocs. First time check that tls
+ relocs involved in setting up a tls_get_addr call are indeed
+ followed by such a call. If they are not, exclude them from
+ the optimizations done on the second pass. */
+ for (pass = 0; pass < 2; ++pass)
for (ibfd = info->input_bfds; ibfd != NULL; ibfd = ibfd->link_next)
{
Elf_Internal_Sym *locsyms = NULL;
@@ -4078,7 +4080,6 @@ ppc_elf_tls_optimize (bfd *obfd ATTRIBUT
if (sec->has_tls_reloc && !bfd_is_abs_section (sec->output_section))
{
Elf_Internal_Rela *relstart, *rel, *relend;
- int expecting_tls_get_addr;
/* Read the relocations. */
relstart = _bfd_elf_link_read_relocs (ibfd, sec, NULL, NULL,
@@ -4086,7 +4087,6 @@ ppc_elf_tls_optimize (bfd *obfd ATTRIBUT
if (relstart == NULL)
return FALSE;
- expecting_tls_get_addr = 0;
relend = relstart + sec->reloc_count;
for (rel = relstart; rel < relend; rel++)
{
@@ -4096,6 +4096,8 @@ ppc_elf_tls_optimize (bfd *obfd ATTRIBUT
char *tls_mask;
char tls_set, tls_clear;
bfd_boolean is_local;
+ int expecting_tls_get_addr;
+ bfd_signed_vma *got_count;
r_symndx = ELF32_R_SYM (rel->r_info);
if (r_symndx >= symtab_hdr->sh_info)
@@ -4109,6 +4111,7 @@ ppc_elf_tls_optimize (bfd *obfd ATTRIBUT
h = (struct elf_link_hash_entry *) h->root.u.i.link;
}
+ expecting_tls_get_addr = 0;
is_local = FALSE;
if (h == NULL
|| !h->def_dynamic)
@@ -4119,24 +4122,27 @@ ppc_elf_tls_optimize (bfd *obfd ATTRIBUT
{
case R_PPC_GOT_TLSLD16:
case R_PPC_GOT_TLSLD16_LO:
+ expecting_tls_get_addr = 1;
+ /* Fall thru */
+
case R_PPC_GOT_TLSLD16_HI:
case R_PPC_GOT_TLSLD16_HA:
/* These relocs should never be against a symbol
defined in a shared lib. Leave them alone if
that turns out to be the case. */
- expecting_tls_get_addr = 0;
- htab->tlsld_got.refcount -= 1;
if (!is_local)
continue;
/* LD -> LE */
tls_set = 0;
tls_clear = TLS_LD;
- expecting_tls_get_addr = 1;
break;
case R_PPC_GOT_TLSGD16:
case R_PPC_GOT_TLSGD16_LO:
+ expecting_tls_get_addr = 1;
+ /* Fall thru */
+
case R_PPC_GOT_TLSGD16_HI:
case R_PPC_GOT_TLSGD16_HA:
if (is_local)
@@ -4146,14 +4152,12 @@ ppc_elf_tls_optimize (bfd *obfd ATTRIBUT
/* GD -> IE */
tls_set = TLS_TLS | TLS_TPRELGD;
tls_clear = TLS_GD;
- expecting_tls_get_addr = 1;
break;
case R_PPC_GOT_TPREL16:
case R_PPC_GOT_TPREL16_LO:
case R_PPC_GOT_TPREL16_HI:
case R_PPC_GOT_TPREL16_HA:
- expecting_tls_get_addr = 0;
if (is_local)
{
/* IE -> LE */
@@ -4164,35 +4168,57 @@ ppc_elf_tls_optimize (bfd *obfd ATTRIBUT
else
continue;
- case R_PPC_REL14:
- case R_PPC_REL14_BRTAKEN:
- case R_PPC_REL14_BRNTAKEN:
- case R_PPC_REL24:
- if (expecting_tls_get_addr
- && h != NULL
- && h == htab->tls_get_addr)
- {
- struct plt_entry *ent = find_plt_ent (h, NULL, 0);
- if (ent != NULL && ent->plt.refcount > 0)
- ent->plt.refcount -= 1;
+ default:
+ continue;
}
- expecting_tls_get_addr = 0;
+
+ if (pass == 0)
+ {
+ if (!expecting_tls_get_addr)
continue;
- default:
- expecting_tls_get_addr = 0;
+ if (rel + 1 < relend)
+ {
+ enum elf_ppc_reloc_type r_type2;
+ unsigned long r_symndx2;
+ struct elf_link_hash_entry *h2;
+
+ /* The next instruction should be a call to
+ __tls_get_addr. Peek at the reloc to be sure. */
+ r_type2 = ELF32_R_TYPE (rel[1].r_info);
+ r_symndx2 = ELF32_R_SYM (rel[1].r_info);
+ if (r_symndx2 >= symtab_hdr->sh_info
+ && (r_type2 == R_PPC_REL14
+ || r_type2 == R_PPC_REL14_BRTAKEN
+ || r_type2 == R_PPC_REL14_BRNTAKEN
+ || r_type2 == R_PPC_REL24
+ || r_type2 == R_PPC_PLTREL24))
+ {
+ struct elf_link_hash_entry **sym_hashes;
+
+ sym_hashes = elf_sym_hashes (ibfd);
+ h2 = sym_hashes[r_symndx2 - symtab_hdr->sh_info];
+ while (h2->root.type == bfd_link_hash_indirect
+ || h2->root.type == bfd_link_hash_warning)
+ h2 = ((struct elf_link_hash_entry *)
+ h2->root.u.i.link);
+ if (h2 == htab->tls_get_addr)
continue;
}
+ }
+
+ /* Uh oh, we didn't find the expected call. We
+ could just mark this symbol to exclude it
+ from tls optimization but it's safer to skip
+ the entire section. */
+ sec->has_tls_reloc = 0;
+ break;
+ }
if (h != NULL)
{
- if (tls_set == 0)
- {
- /* We managed to get rid of a got entry. */
- if (h->got.refcount > 0)
- h->got.refcount -= 1;
- }
tls_mask = &ppc_elf_hash_entry (h)->tls_mask;
+ got_count = &h->got.refcount;
}
else
{
@@ -4218,14 +4244,25 @@ ppc_elf_tls_optimize (bfd *obfd ATTRIBUT
lgot_refs = elf_local_got_refcounts (ibfd);
if (lgot_refs == NULL)
abort ();
+ lgot_masks = (char *) (lgot_refs + symtab_hdr->sh_info);
+ tls_mask = &lgot_masks[r_symndx];
+ got_count = &lgot_refs[r_symndx];
+ }
+
if (tls_set == 0)
{
/* We managed to get rid of a got entry. */
- if (lgot_refs[r_symndx] > 0)
- lgot_refs[r_symndx] -= 1;
+ if (*got_count > 0)
+ *got_count -= 1;
}
- lgot_masks = (char *) (lgot_refs + symtab_hdr->sh_info);
- tls_mask = &lgot_masks[r_symndx];
+
+ if (expecting_tls_get_addr)
+ {
+ struct plt_entry *ent;
+
+ ent = find_plt_ent (htab->tls_get_addr, NULL, 0);
+ if (ent != NULL && ent->plt.refcount > 0)
+ ent->plt.refcount -= 1;
}
*tls_mask |= tls_set;
@@ -4673,8 +4710,11 @@ allocate_dynrelocs (struct elf_link_hash
if (eh->tls_mask == (TLS_TLS | TLS_LD)
&& !eh->elf.def_dynamic)
+ {
/* If just an LD reloc, we'll just use htab->tlsld_got.offset. */
+ htab->tlsld_got.refcount += 1;
eh->elf.got.offset = (bfd_vma) -1;
+ }
else
{
bfd_boolean dyn;
@@ -4949,6 +4989,9 @@ ppc_elf_size_dynamic_sections (bfd *outp
*local_got = (bfd_vma) -1;
}
+ /* Allocate space for global sym dynamic relocs. */
+ elf_link_hash_traverse (elf_hash_table (info), allocate_dynrelocs, info);
+
if (htab->tlsld_got.refcount > 0)
{
htab->tlsld_got.offset = allocate_got (htab, 8);
@@ -4958,9 +5001,6 @@ ppc_elf_size_dynamic_sections (bfd *outp
else
htab->tlsld_got.offset = (bfd_vma) -1;
- /* Allocate space for global sym dynamic relocs. */
- elf_link_hash_traverse (elf_hash_table (info), allocate_dynrelocs, info);
-
if (htab->got != NULL && htab->plt_type != PLT_VXWORKS)
{
unsigned int g_o_t = 32768;
@@ -5928,42 +5968,17 @@ ppc_elf_relocate_section (bfd *output_bf
case R_PPC_GOT_TLSGD16_LO:
tls_gd = TLS_TPRELGD;
if (tls_mask != 0 && (tls_mask & TLS_GD) == 0)
- goto tls_get_addr_check;
+ goto tls_ldgd_opt;
break;
case R_PPC_GOT_TLSLD16:
case R_PPC_GOT_TLSLD16_LO:
if (tls_mask != 0 && (tls_mask & TLS_LD) == 0)
{
- tls_get_addr_check:
- if (rel + 1 < relend)
- {
- enum elf_ppc_reloc_type r_type2;
- unsigned long r_symndx2;
- struct elf_link_hash_entry *h2;
bfd_vma insn1, insn2;
bfd_vma offset;
- /* The next instruction should be a call to
- __tls_get_addr. Peek at the reloc to be sure. */
- r_type2 = ELF32_R_TYPE (rel[1].r_info);
- r_symndx2 = ELF32_R_SYM (rel[1].r_info);
- if (r_symndx2 < symtab_hdr->sh_info
- || (r_type2 != R_PPC_REL14
- && r_type2 != R_PPC_REL14_BRTAKEN
- && r_type2 != R_PPC_REL14_BRNTAKEN
- && r_type2 != R_PPC_REL24
- && r_type2 != R_PPC_PLTREL24))
- break;
-
- h2 = sym_hashes[r_symndx2 - symtab_hdr->sh_info];
- while (h2->root.type == bfd_link_hash_indirect
- || h2->root.type == bfd_link_hash_warning)
- h2 = (struct elf_link_hash_entry *) h2->root.u.i.link;
- if (h2 == NULL || h2 != htab->tls_get_addr)
- break;
-
- /* OK, it checks out. Replace the call. */
+ tls_ldgd_opt:
offset = rel[1].r_offset;
insn1 = bfd_get_32 (output_bfd,
contents + rel->r_offset - d_offset);
@@ -5973,7 +5988,8 @@ ppc_elf_relocate_section (bfd *output_bf
insn1 &= (1 << 26) - 1;
insn1 |= 32 << 26; /* lwz */
insn2 = 0x7c631214; /* add 3,3,2 */
- rel[1].r_info = ELF32_R_INFO (r_symndx2, R_PPC_NONE);
+ rel[1].r_info
+ = ELF32_R_INFO (ELF32_R_SYM (rel[1].r_info), R_PPC_NONE);
rel[1].r_addend = 0;
r_type = (((r_type - (R_PPC_GOT_TLSGD16 & 3)) & 3)
+ R_PPC_GOT_TPREL16);
@@ -5997,7 +6013,8 @@ ppc_elf_relocate_section (bfd *output_bf
rel[1].r_offset += d_offset;
rel[1].r_addend = rel->r_addend;
}
- bfd_put_32 (output_bfd, insn1, contents + rel->r_offset - d_offset);
+ bfd_put_32 (output_bfd, insn1,
+ contents + rel->r_offset - d_offset);
bfd_put_32 (output_bfd, insn2, contents + offset);
if (tls_gd == 0)
{
@@ -6007,7 +6024,6 @@ ppc_elf_relocate_section (bfd *output_bf
continue;
}
}
- }
break;
}
Index: ld/testsuite/ld-powerpc/tlsso32.d
===================================================================
RCS file: /cvs/src/src/ld/testsuite/ld-powerpc/tlsso32.d,v
retrieving revision 1.14
diff -u -p -r1.14 tlsso32.d
--- ld/testsuite/ld-powerpc/tlsso32.d 17 Oct 2006 13:41:48 -0000 1.14
+++ ld/testsuite/ld-powerpc/tlsso32.d 5 Nov 2007 01:33:52 -0000
@@ -9,30 +9,30 @@
Disassembly of section \.text:
.* <_start>:
-.*: 38 7f ff e8 addi r3,r31,-24
-.*: 48 00 00 01 bl .*
.*: 38 7f ff e0 addi r3,r31,-32
.*: 48 00 00 01 bl .*
-.*: 38 7f ff f0 addi r3,r31,-16
+.*: 38 7f ff f4 addi r3,r31,-12
+.*: 48 00 00 01 bl .*
+.*: 38 7f ff e8 addi r3,r31,-24
.*: 48 01 01 95 bl .*<__tls_get_addr@plt>
-.*: 38 7f ff e0 addi r3,r31,-32
+.*: 38 7f ff f4 addi r3,r31,-12
.*: 48 01 01 8d bl .*<__tls_get_addr@plt>
.*: 39 23 80 20 addi r9,r3,-32736
.*: 3d 23 00 00 addis r9,r3,0
.*: 81 49 80 24 lwz r10,-32732\(r9\)
-.*: 81 3f ff f8 lwz r9,-8\(r31\)
+.*: 81 3f ff f0 lwz r9,-16\(r31\)
.*: 7d 49 12 2e lhzx r10,r9,r2
.*: 89 42 00 00 lbz r10,0\(r2\)
.*: 3d 22 00 00 addis r9,r2,0
.*: 99 49 00 00 stb r10,0\(r9\)
.*: 38 7e ff d8 addi r3,r30,-40
.*: 48 00 00 01 bl .*
-.*: 38 7e ff e0 addi r3,r30,-32
+.*: 38 7e ff f4 addi r3,r30,-12
.*: 48 00 00 01 bl .*
.*: 91 43 80 04 stw r10,-32764\(r3\)
.*: 3d 23 00 00 addis r9,r3,0
.*: 91 49 80 08 stw r10,-32760\(r9\)
-.*: 81 3e ff f8 lwz r9,-8\(r30\)
+.*: 81 3e ff f0 lwz r9,-16\(r30\)
.*: 7d 49 13 2e sthx r10,r9,r2
.*: a1 42 00 00 lhz r10,0\(r2\)
.*: 3d 22 00 00 addis r9,r2,0
--
Alan Modra
Australia Development Lab, IBM