[PATCH] Decreasing the size of .opd on ppc64 (take 2)
Jakub Jelinek
jakub@redhat.com
Mon Aug 16 18:08:00 GMT 2004
Hi!
Attached are both binutils and gcc patches which implement what I talked
about back in June, particularly if the compiler knows it will never look
at the r11 (static chain reg) passed to a function, it doesn't need 0
to be passed there (well, in reality it was sometimes 0 and sometimes
shared library's load address), but can be passed fd_func value of next
.opd entry, thus the .opd entries effectively overlap.
+--------------+-------------+--------------+
| ent0 fd_func | ent0 fd_toc | ent0 fd_aux |
+--------------+-------------+--------------+-------------+-------------+
| ent1 fd_func | ent1 fd_toc | ent1 fd_aux |
+--------------+-------------+-------------+
For e.g. nested C functions were r11 register is live on entry, an explicit
fd_aux field not overlapped with next .opd entry is still used.
The linker is able to cope with a different mix of overlapped and overlapped
.opd entries.
The linker now has a --non-overlapping-opd switch which could be useful
if some post-link processing tool can't cope with this .opd overlapping
(the dynamic linker certainly can cope with it).
When ld is given --non-overlapping-opd, it would canonicalize the above to:
+--------------+-------------+---+--------------+-------------+-------------+
| ent0 fd_func | ent0 fd_toc | 0 | ent1 fd_func | ent1 fd_toc | ent1 fd_aux
|
+--------------+-------------+---+--------------+-------------+-------------+
The patch is on top of Alan's no-dot-syms patches
http://gcc.gnu.org/ml/gcc-patches/2004-08/msg00557.html
(the binutils side is already in CVS).
Tested with binutils make check, gcc make check and glibc make check
on ppc64-redhat-linux, no regressions.
For glibc, this saves around 18KB of per-process memory, for e.g. libgcj.so
almost 150KB.
Jakub
-------------- next part --------------
2004-08-16 Jakub Jelinek <jakub@redhat.com>
* elf64-ppc.c (ppc64_elf_edit_opd): Support 16 byte long .opd
entries (where fd_aux overlaps next entry's fd_func).
Add non_overlapping argument, use it.
(ppc64_elf_check_relocs, ppc64_elf_gc_mark_hook, adjust_opd_syms,
ppc64_elf_size_stubs, ppc64_elf_relocate_section,
ppc64_elf_output_symbol_hook): Use address / 8 instead of address / 24
as indexes into opd_sym_map/opd_adjust array.
* elf64-ppc.h (ppc64_elf_edit_opd): Adjust prototype.
* emultempl/ppc64elf.em (non_overlapping_opd): New variable.
(ppc_before_allocation): Pass it to ppc64_elf_edit_opd).
(OPTION_NON_OVERLAPPING_OPD): Define.
(PARSE_AND_LIST_OPTIONS, PARSE_AND_LIST_ARGS_CASES): Add
--non-overlapping-opd option.
--- ld/emultempl/ppc64elf.em.jj 2004-08-12 16:21:50.000000000 +0200
+++ ld/emultempl/ppc64elf.em 2004-08-13 09:42:55.933733473 +0200
@@ -51,6 +51,9 @@ static int emit_stub_syms = 0;
static asection *toc_section = 0;
+/* Whether to canonicalize .opd so that there are no overlapping
+ .opd entries. */
+static int non_overlapping_opd = 0;
/* This is called before the input files are opened. We create a new
fake input file to hold the stub sections. */
@@ -89,7 +92,7 @@ ppc_before_allocation (void)
{
if (stub_file != NULL)
{
- if (!ppc64_elf_edit_opd (output_bfd, &link_info))
+ if (!ppc64_elf_edit_opd (output_bfd, &link_info, non_overlapping_opd))
{
einfo ("%X%P: can not edit opd %E\n");
return;
@@ -455,6 +458,7 @@ PARSE_AND_LIST_PROLOGUE='
#define OPTION_DOTSYMS (OPTION_STUBSYMS + 1)
#define OPTION_NO_DOTSYMS (OPTION_DOTSYMS + 1)
#define OPTION_NO_TLS_OPT (OPTION_NO_DOTSYMS + 1)
+#define OPTION_NON_OVERLAPPING_OPD (OPTION_NO_TLS_OPT + 1)
'
PARSE_AND_LIST_LONGOPTS='
@@ -463,6 +467,7 @@ PARSE_AND_LIST_LONGOPTS='
{ "dotsyms", no_argument, NULL, OPTION_DOTSYMS },
{ "no-dotsyms", no_argument, NULL, OPTION_NO_DOTSYMS },
{ "no-tls-optimize", no_argument, NULL, OPTION_NO_TLS_OPT },
+ { "non-overlapping-opd", no_argument, NULL, OPTION_NON_OVERLAPPING_OPD },
'
PARSE_AND_LIST_OPTIONS='
@@ -490,6 +495,10 @@ PARSE_AND_LIST_OPTIONS='
fprintf (file, _("\
--no-tls-optimize Don'\''t try to optimize TLS accesses.\n"
));
+ fprintf (file, _("\
+ --non-overlapping-opd Canonicalize .opd, so that there are no overlapping\n\
+ .opd entries.\n"
+ ));
'
PARSE_AND_LIST_ARGS_CASES='
@@ -517,6 +526,10 @@ PARSE_AND_LIST_ARGS_CASES='
case OPTION_NO_TLS_OPT:
notlsopt = 1;
break;
+
+ case OPTION_NON_OVERLAPPING_OPD:
+ non_overlapping_opd = 1;
+ break;
'
# Put these extra ppc64elf routines in ld_${EMULATION_NAME}_emulation
--- bfd/elf64-ppc.c.jj 2004-08-11 14:26:47.000000000 +0200
+++ bfd/elf64-ppc.c 2004-08-12 17:20:50.000000000 +0200
@@ -4157,7 +4157,7 @@ ppc64_elf_check_relocs (bfd *abfd, struc
};
bfd_size_type amt;
- amt = sec->size * sizeof (union opd_info) / 24;
+ amt = sec->size * sizeof (union opd_info) / 8;
opd_sym_map = bfd_zalloc (abfd, amt);
if (opd_sym_map == NULL)
return FALSE;
@@ -4464,7 +4464,7 @@ ppc64_elf_check_relocs (bfd *abfd, struc
if (s == NULL)
return FALSE;
else if (s != sec)
- opd_sym_map[rel->r_offset / 24] = s;
+ opd_sym_map[rel->r_offset / 8] = s;
}
}
/* Fall through. */
@@ -4843,7 +4843,7 @@ ppc64_elf_gc_mark_hook (asection *sec,
if (!rsec->gc_mark)
_bfd_elf_gc_mark (info, rsec, ppc64_elf_gc_mark_hook);
- rsec = opd_sym_section[sym->st_value / 24];
+ rsec = opd_sym_section[sym->st_value / 8];
}
}
@@ -5607,7 +5607,7 @@ adjust_opd_syms (struct elf_link_hash_en
opd_adjust = get_opd_info (sym_sec);
if (opd_adjust != NULL)
{
- long adjust = opd_adjust[eh->elf.root.u.def.value / 24];
+ long adjust = opd_adjust[eh->elf.root.u.def.value / 8];
if (adjust == -1)
{
/* This entry has been deleted. */
@@ -5628,10 +5628,12 @@ adjust_opd_syms (struct elf_link_hash_en
applications. */
bfd_boolean
-ppc64_elf_edit_opd (bfd *obfd, struct bfd_link_info *info)
+ppc64_elf_edit_opd (bfd *obfd, struct bfd_link_info *info,
+ bfd_boolean non_overlapping)
{
bfd *ibfd;
bfd_boolean some_edited = FALSE;
+ asection *need_pad = NULL;
for (ibfd = info->input_bfds; ibfd != NULL; ibfd = ibfd->link_next)
{
@@ -5643,13 +5645,14 @@ ppc64_elf_edit_opd (bfd *obfd, struct bf
bfd_vma offset;
bfd_size_type amt;
long *opd_adjust;
- bfd_boolean need_edit;
+ bfd_boolean need_edit, add_aux_fields;
+ bfd_size_type cnt_16b = 0;
sec = bfd_get_section_by_name (ibfd, ".opd");
if (sec == NULL)
continue;
- amt = sec->size * sizeof (long) / 24;
+ amt = sec->size * sizeof (long) / 8;
opd_adjust = get_opd_info (sec);
if (opd_adjust == NULL)
{
@@ -5680,6 +5683,7 @@ ppc64_elf_edit_opd (bfd *obfd, struct bf
/* First run through the relocs to check they are sane, and to
determine whether we need to edit this opd section. */
need_edit = FALSE;
+ need_pad = sec;
offset = 0;
relend = relstart + sec->reloc_count;
for (rel = relstart; rel < relend; )
@@ -5690,7 +5694,7 @@ ppc64_elf_edit_opd (bfd *obfd, struct bf
struct elf_link_hash_entry *h;
Elf_Internal_Sym *sym;
- /* .opd contains a regular array of 24 byte entries. We're
+ /* .opd contains a regular array of 16 or 24 byte entries. We're
only interested in the reloc pointing to a function entry
point. */
if (rel->r_offset != offset
@@ -5702,6 +5706,7 @@ ppc64_elf_edit_opd (bfd *obfd, struct bf
Also, there's nothing to prevent someone putting
something silly in .opd with the assembler. No .opd
optimization for them! */
+ broken_opd:
(*_bfd_error_handler)
(_("%B: .opd is not a regular array of opd entries"), ibfd);
need_edit = FALSE;
@@ -5749,19 +5754,54 @@ ppc64_elf_edit_opd (bfd *obfd, struct bf
|| sym_sec->output_section == bfd_abs_section_ptr)
need_edit = TRUE;
- offset += 24;
rel += 2;
- /* Allow for the possibility of a reloc on the third word. */
- if (rel < relend
- && rel->r_offset == offset - 8)
- rel += 1;
+ if (rel == relend
+ || (rel + 1 == relend && rel->r_offset == offset + 16))
+ {
+ if (sec->size == offset + 24)
+ {
+ need_pad = NULL;
+ break;
+ }
+ if (rel == relend && sec->size == offset + 16)
+ {
+ cnt_16b++;
+ break;
+ }
+ goto broken_opd;
+ }
+
+ if (rel->r_offset == offset + 24)
+ offset += 24;
+ else if (rel->r_offset != offset + 16)
+ goto broken_opd;
+ else if (rel + 1 < relend
+ && ELF64_R_TYPE (rel[0].r_info) == R_PPC64_ADDR64
+ && ELF64_R_TYPE (rel[1].r_info) == R_PPC64_TOC)
+ {
+ offset += 16;
+ cnt_16b++;
+ }
+ else if (rel + 2 < relend
+ && ELF64_R_TYPE (rel[1].r_info) == R_PPC64_ADDR64
+ && ELF64_R_TYPE (rel[2].r_info) == R_PPC64_TOC)
+ {
+ offset += 24;
+ rel += 1;
+ }
+ else
+ goto broken_opd;
}
- if (need_edit)
+ add_aux_fields = non_overlapping && cnt_16b > 0;
+
+ if (need_edit || add_aux_fields)
{
Elf_Internal_Rela *write_rel;
bfd_byte *rptr, *wptr;
+ bfd_byte *new_contents = NULL;
bfd_boolean skip;
+ long opd_ent_size;
/* This seems a waste of time as input .opd sections are all
zeros as generated by gcc, but I suppose there's no reason
@@ -5790,9 +5830,21 @@ ppc64_elf_edit_opd (bfd *obfd, struct bf
wptr = sec->contents;
rptr = sec->contents;
+ new_contents = sec->contents;
+
+ if (add_aux_fields)
+ {
+ new_contents = bfd_malloc (sec->size + cnt_16b * 8);
+ if (new_contents == NULL)
+ return FALSE;
+ need_pad = FALSE;
+ wptr = new_contents;
+ }
+
write_rel = relstart;
skip = FALSE;
offset = 0;
+ opd_ent_size = 0;
for (rel = relstart; rel < relend; rel++)
{
unsigned long r_symndx;
@@ -5808,6 +5860,19 @@ ppc64_elf_edit_opd (bfd *obfd, struct bf
if (rel->r_offset == offset)
{
struct ppc_link_hash_entry *fdh = NULL;
+
+ /* See if the .opd entry is full 24 byte or
+ 16 byte (with fd_aux entry overlapped with next
+ fd_func). */
+ opd_ent_size = 24;
+ if ((rel + 2 == relend && sec->size == offset + 16)
+ || (rel + 3 < relend
+ && rel[2].r_offset == offset + 16
+ && rel[3].r_offset == offset + 24
+ && ELF64_R_TYPE (rel[2].r_info) == R_PPC64_ADDR64
+ && ELF64_R_TYPE (rel[3].r_info) == R_PPC64_TOC))
+ opd_ent_size = 16;
+
if (h != NULL
&& h->root.root.string[0] == '.')
fdh = get_fdh ((struct ppc_link_hash_entry *) h,
@@ -5824,7 +5889,7 @@ ppc64_elf_edit_opd (bfd *obfd, struct bf
fdh->elf.root.u.def.value = 0;
fdh->elf.root.u.def.section = sym_sec;
}
- opd_adjust[rel->r_offset / 24] = -1;
+ opd_adjust[rel->r_offset / 8] = -1;
}
else
{
@@ -5839,7 +5904,7 @@ ppc64_elf_edit_opd (bfd *obfd, struct bf
for local symbols, because various places
in the generic ELF code use the value
stored in u.def.value. */
- fdh->elf.root.u.def.value = wptr - sec->contents;
+ fdh->elf.root.u.def.value = wptr - new_contents;
fdh->adjust_done = 1;
}
@@ -5849,14 +5914,20 @@ ppc64_elf_edit_opd (bfd *obfd, struct bf
for the function descriptor sym which we
don't have at the moment. So keep an
array of adjustments. */
- opd_adjust[rel->r_offset / 24] = wptr - rptr;
+ opd_adjust[rel->r_offset / 8]
+ = (wptr - new_contents) - (rptr - sec->contents);
if (wptr != rptr)
- memcpy (wptr, rptr, 24);
- wptr += 24;
+ memcpy (wptr, rptr, opd_ent_size);
+ wptr += opd_ent_size;
+ if (add_aux_fields && opd_ent_size == 16)
+ {
+ memset (wptr, '\0', 8);
+ wptr += 8;
+ }
}
- rptr += 24;
- offset += 24;
+ rptr += opd_ent_size;
+ offset += opd_ent_size;
}
if (skip)
@@ -5894,15 +5965,21 @@ ppc64_elf_edit_opd (bfd *obfd, struct bf
/* We need to adjust any reloc offsets to point to the
new opd entries. While we're at it, we may as well
remove redundant relocs. */
- rel->r_offset += wptr - rptr;
+ rel->r_offset += opd_adjust[(offset - opd_ent_size) / 8];
if (write_rel != rel)
memcpy (write_rel, rel, sizeof (*rel));
++write_rel;
}
}
- sec->size = wptr - sec->contents;
+ sec->size = wptr - new_contents;
sec->reloc_count = write_rel - relstart;
+ if (add_aux_fields)
+ {
+ free (sec->contents);
+ sec->contents = new_contents;
+ }
+
/* Fudge the size too, as this is used later in
elf_bfd_final_link if we are emitting relocs. */
elf_section_data (sec)->rel_hdr.sh_size
@@ -5926,6 +6003,40 @@ ppc64_elf_edit_opd (bfd *obfd, struct bf
if (some_edited)
elf_link_hash_traverse (elf_hash_table (info), adjust_opd_syms, NULL);
+ /* If we are doing a final link and the last .opd entry is just 16 byte
+ long, add a 8 byte padding after it. */
+ if (need_pad != NULL && !info->relocatable)
+ {
+ bfd_byte *p;
+
+ if ((need_pad->flags & SEC_IN_MEMORY) == 0)
+ {
+ BFD_ASSERT (need_pad->size > 0);
+
+ p = bfd_malloc (need_pad->size + 8);
+ if (p == NULL)
+ return FALSE;
+
+ if (! bfd_get_section_contents (need_pad->owner, need_pad,
+ p, 0, need_pad->size))
+ return FALSE;
+
+ need_pad->contents = p;
+ need_pad->flags |= (SEC_IN_MEMORY | SEC_HAS_CONTENTS);
+ }
+ else
+ {
+ p = bfd_realloc (need_pad->contents, need_pad->size + 8);
+ if (p == NULL)
+ return FALSE;
+
+ need_pad->contents = p;
+ }
+
+ memset (need_pad->contents + need_pad->size, 0, 8);
+ need_pad->size += 8;
+ }
+
return TRUE;
}
@@ -7693,7 +7804,7 @@ ppc64_elf_size_stubs (bfd *output_bfd,
if (hash == NULL)
{
- long adjust = opd_adjust[sym_value / 24];
+ long adjust = opd_adjust[sym_value / 8];
if (adjust == -1)
continue;
sym_value += adjust;
@@ -8231,7 +8342,7 @@ ppc64_elf_relocate_section (bfd *output_
opd_adjust = get_opd_info (sec);
if (opd_adjust != NULL)
{
- long adjust = opd_adjust[(sym->st_value + rel->r_addend) / 24];
+ long adjust = opd_adjust[(sym->st_value + rel->r_addend) / 8];
if (adjust == -1)
relocation = 0;
else
@@ -9527,7 +9638,7 @@ ppc64_elf_output_symbol_hook (struct bfd
if (!info->relocatable)
value -= input_sec->output_section->vma;
- adjust = opd_adjust[value / 24];
+ adjust = opd_adjust[value / 8];
if (adjust == -1)
elfsym->st_value = 0;
else
--- bfd/elf64-ppc.h.jj 2004-08-11 14:26:30.000000000 +0200
+++ bfd/elf64-ppc.h 2004-08-12 16:56:33.000000000 +0200
@@ -20,7 +20,7 @@ Foundation, Inc., 59 Temple Place - Suit
void ppc64_elf_init_stub_bfd
(bfd *, struct bfd_link_info *);
bfd_boolean ppc64_elf_edit_opd
- (bfd *, struct bfd_link_info *);
+ (bfd *, struct bfd_link_info *, bfd_boolean);
asection *ppc64_elf_tls_setup
(bfd *, struct bfd_link_info *);
bfd_boolean ppc64_elf_tls_optimize
-------------- next part --------------
2004-08-16 Jakub Jelinek <jakub@redhat.com>
* configure.ac (HAVE_LD_OVERLAPPING_OPD): New test.
* configure: Rebuilt.
* config.in: Rebuilt.
* config/rs6000/rs6000.c (OVERLAPPING_OPD): Define.
(rs6000_elf_declare_function_name): Use it.
--- gcc/configure.ac.jj 2004-08-13 04:49:44.000000000 -0400
+++ gcc/configure.ac 2004-08-16 05:12:36.000000000 -0400
@@ -2689,6 +2689,23 @@ EOF
AC_DEFINE(HAVE_LD_NO_DOT_SYMS, 1,
[Define if your PowerPC64 linker only needs function descriptor syms.])
fi
+ AC_CACHE_CHECK(linker support for overlapping .opd entries,
+ gcc_cv_ld_overlapping_opd,
+ [gcc_cv_ld_overlapping_opd=no
+ if test $in_tree_ld = yes ; then
+ if test "$gcc_cv_gld_major_version" -eq 2 -a "$gcc_cv_gld_minor_version" -ge 16 -o "$gcc_cv_gld_major_version" -gt 2; then
+ gcc_cv_ld_overlapping_opd=yes
+ fi
+ elif test x$gcc_cv_ld != x ; then
+ if $gcc_cv_ld -melf64ppc --help 2>/dev/null | grep non-overlapping-opd > /dev/null; then
+ gcc_cv_ld_overlapping_opd=yes
+ fi
+ fi
+ ])
+ if test x"$gcc_cv_ld_overlapping_opd" = xyes; then
+ AC_DEFINE(HAVE_LD_OVERLAPPING_OPD, 1,
+ [Define if your PowerPC64 linker handles overlapping .opd entries.])
+ fi
;;
esac
--- gcc/configure.jj 2004-08-13 04:49:44.000000000 -0400
+++ gcc/configure 2004-08-16 05:21:44.000000000 -0400
@@ -11394,6 +11394,32 @@ cat >>confdefs.h <<\_ACEOF
_ACEOF
fi
+ echo "$as_me:$LINENO: checking linker support for overlapping .opd entries" >&5
+echo $ECHO_N "checking linker support for overlapping .opd entries... $ECHO_C" >&6
+if test "${gcc_cv_ld_overlapping_opd+set}" = set; then
+ echo $ECHO_N "(cached) $ECHO_C" >&6
+else
+ gcc_cv_ld_overlapping_opd=no
+ if test $in_tree_ld = yes ; then
+ if test "$gcc_cv_gld_major_version" -eq 2 -a "$gcc_cv_gld_minor_version" -ge 16 -o "$gcc_cv_gld_major_version" -gt 2; then
+ gcc_cv_ld_overlapping_opd=yes
+ fi
+ elif test x$gcc_cv_ld != x ; then
+ if $gcc_cv_ld -melf64ppc --help 2>/dev/null | grep non-overlapping-opd > /dev/null; then
+ gcc_cv_ld_overlapping_opd=yes
+ fi
+ fi
+
+fi
+echo "$as_me:$LINENO: result: $gcc_cv_ld_overlapping_opd" >&5
+echo "${ECHO_T}$gcc_cv_ld_overlapping_opd" >&6
+ if test x"$gcc_cv_ld_overlapping_opd" = xyes; then
+
+cat >>confdefs.h <<\_ACEOF
+#define HAVE_LD_OVERLAPPING_OPD 1
+_ACEOF
+
+ fi
;;
esac
--- gcc/config.in.jj 2004-08-13 04:49:45.000000000 -0400
+++ gcc/config.in 2004-08-16 05:17:55.000000000 -0400
@@ -315,6 +315,9 @@
/* Define if your PowerPC64 linker only needs function descriptor syms. */
#undef HAVE_LD_NO_DOT_SYMS
+/* Define if your PowerPC64 linker handles overlapping .opd entries. */
+#undef HAVE_LD_OVERLAPPING_OPD
+
/* Define if your linker supports -pie option. */
#undef HAVE_LD_PIE
--- gcc/config/rs6000/rs6000.c.jj 2004-08-13 04:49:44.000000000 -0400
+++ gcc/config/rs6000/rs6000.c 2004-08-16 05:37:48.000000000 -0400
@@ -15487,6 +15487,15 @@ rs6000_elf_asm_out_destructor (rtx symbo
assemble_integer (symbol, POINTER_SIZE / BITS_PER_UNIT, POINTER_SIZE, 1);
}
+#ifdef HAVE_LD_OVERLAPPING_OPD
+/* If the linker supports overlapping .opd entries and we know this function
+ doesn't ever use r11 passed to it, we can overlap the fd_aux function
+ descriptor field with next function descriptor's fd_func field. */
+# define OVERLAPPING_OPD (cfun->static_chain_decl == NULL)
+#else
+# define OVERLAPPING_OPD 0
+#endif
+
void
rs6000_elf_declare_function_name (FILE *file, const char *name, tree decl)
{
@@ -15496,7 +15505,8 @@ rs6000_elf_declare_function_name (FILE *
ASM_OUTPUT_LABEL (file, name);
fputs (DOUBLE_INT_ASM_OP, file);
rs6000_output_function_entry (file, name);
- fputs (",.TOC.@tocbase,0\n\t.previous\n", file);
+ fprintf (file, ",.TOC.@tocbase%s\n\t.previous\n",
+ OVERLAPPING_OPD ? "" : ",0");
if (DOT_SYMBOLS)
{
fputs ("\t.size\t", file);
More information about the Binutils
mailing list