automatic overlays for SPU

Alan Modra amodra@bigpond.net.au
Tue Apr 8 04:01:00 GMT 2008


Automatic overlay script generation for SPU.  The linker uses a call
graph to choose sections to pack into overlays, so we should result in
at least a somewhat intelligent partitioning.  The algorithm used
takes a call graph like the following:

                    A
                   /|\
                  / | \
                 B  C  D
                /|    / \
               E F   G   H
              /|\       /|\
             I J K     L M N
                            \
                             O

Flattens it to a list, picking off deepest call, parent of deepest
call, then other children of the parent and so on, to:

O,N,H,L,M,D,G,A,I,E,J,K,B,F,C

Then greedily packs these sections into overlays, starting a new
overlay when a section won't fit.  This could be improved by being
less greedy in some cases.  For example, it would be better to choose
(O,N,H,L,M,D,G,A), (I,E,J,K,B,F,C) rather than
(O,N,H,L,M,D,G,A,I), (E,J,K,B,F,C).

I may make this improvement at some stage, but for now it's time to
stop tinkering and get something committed.

bfd/
	* elf32-spu.c: Include libiberty.h.
	(struct spu_link_hash_table): Add local_stire, overlay_fixed, reserved,
	non_ovly_stub, spu_elf_load_ovl_mgr, spu_elf_open_overlay_script,
	spu_elf_relink, auto_overlay fields.
	(AUTO_OVERLAY, AUTO_RELINK, OVERLAY_RODATA): Define.
	(needs_ovl_stub): Flip test so that call to non-function warning
	is emitted during relocate_section rather than earlier.
	(spu_elf_check_vma): Stash --auto-overlay parameters, and clear
	auto_overlay if no section exceeds local store.
	(struct call_info): Add count, max_depth, is_pasted fields.
	(struct function_info): Add rodata, last_caller, call_count,
	depth, new visit flags.
	(insert_callee): Increment call count.
	(copy_callee): New function.
	(mark_functions_via_relocs): Investigate all reloc types to count
	possible function pointer stubs for --auto-overlay.  Track
	last_caller and increment function call_count.
	(pasted_function): Insert a "call" into call info for pasted section.
	(remove_cycles): Track max depth of calls.  Don't emit call graph
	pruning warning for --auto-overlay.
	(build_call_tree): Don't transfer_calls for --auto-overlay.
	Adjust remove_cycles call.
	(sort_calls, sort_lib, sort_bfds): New functions.
	(struct _mos_param, struct _uos_param, struct _cl_param): New.
	(mark_overlay_section, unmark_overlay_section): New functions.
	(collect_lib_sectios, auto_ovl_lib_functions): New functions.
	(collect_overlays, find_pasted_call): New functions.
	(sum_stack): Deal with is_pasted "calls".  Exit before printing
	when --auto-overlay.
	(spu_elf_auto_overlay): New function.
	(spu_elf_final_link): Call spu_elf_auto_overlay.
	* elf32-spu.h (spu_elf_check_vma): Update prototype.
ld/
	* emultempl/spuelf.em (auto_overlay, auto_overlay_file,
	auto_overlay_fixed, auto_overlay_reserved, my_argc, my_argv): New vars.
	(spu_before_allocation): Warn on --auto-overlay and existing overlays.
	(struct tflist, clean_tmp): Move.
	(new_tmp_file): New function, extracted from..
	(embedded_spu_file): ..here.
	(spu_elf_open_overlay_script, spu_elf_relink): New function.
	(gld${EMULATION_NAME}_finish): Pass a bunch of --auto-overlay params.
	Warn on --auto-overlay and zero local store.
	(gld${EMULATION_NAME}_choose_target): New function to stash argv.
	(OPTION_SPU_AUTO_OVERLAY, OPTION_SPU_AUTO_RELINK,
	OPTION_SPU_OVERLAY_RODATA, OPTION_SPU_FIXED_SPACE,
	OPTION_SPU_RESERVED_SPACE, OPTION_SPU_NO_AUTO_OVERLAY): Define.
	(PARSE_AND_LIST_LONGOPTS): Add entries for new options.
	(PARSE_AND_LIST_OPTIONS): Likewise.
	(PARSE_AND_LIST_ARGS_CASES): Likewise.
	(LDEMUL_CHOOSE_TARGET): Define.

Index: bfd/elf32-spu.c
===================================================================
RCS file: /cvs/src/src/bfd/elf32-spu.c,v
retrieving revision 1.37
diff -u -p -r1.37 elf32-spu.c
--- bfd/elf32-spu.c	7 Apr 2008 13:07:23 -0000	1.37
+++ bfd/elf32-spu.c	7 Apr 2008 23:30:04 -0000
@@ -19,6 +19,7 @@
    51 Franklin Street - Fifth Floor, Boston, MA 02110-1301, USA.  */
 
 #include "sysdep.h"
+#include "libiberty.h"
 #include "bfd.h"
 #include "bfdlink.h"
 #include "libbfd.h"
@@ -278,6 +279,29 @@ struct spu_link_hash_table
   /* Total number of overlays.  */
   unsigned int num_overlays;
 
+  /* How much memory we have.  */
+  unsigned int local_store;
+  /* Local store --auto-overlay should reserve for non-overlay
+     functions and data.  */
+  unsigned int overlay_fixed;
+  /* Local store --auto-overlay should reserve for stack and heap.  */
+  unsigned int reserved;
+  /* Count of overlay stubs needed in non-overlay area.  */
+  unsigned int non_ovly_stub;
+
+  /* Stash various callbacks for --auto-overlay.  */
+  void (*spu_elf_load_ovl_mgr) (void);
+  FILE *(*spu_elf_open_overlay_script) (void);
+  void (*spu_elf_relink) (void);
+
+  /* Bit 0 set if --auto-overlay.
+     Bit 1 set if --auto-relink.
+     Bit 2 set if --overlay-rodata.  */
+  unsigned int auto_overlay : 3;
+#define AUTO_OVERLAY 1
+#define AUTO_RELINK 2
+#define OVERLAY_RODATA 4
+
   /* Set if we should emit symbols for stubs.  */
   unsigned int emit_stub_syms:1;
 
@@ -721,7 +745,7 @@ needs_ovl_stub (struct elf_link_hash_ent
 	  branch = TRUE;
 	  if ((contents[0] & 0xfd) == 0x31
 	      && sym_type != STT_FUNC
-	      && contents == insn)
+	      && contents != insn)
 	    {
 	      /* It's common for people to write assembly and forget
 		 to give function symbols the right type.  Handle
@@ -1490,15 +1514,33 @@ spu_elf_build_stubs (struct bfd_link_inf
 }
 
 /* Check that all loadable section VMAs lie in the range
-   LO .. HI inclusive.  */
+   LO .. HI inclusive, and stash some parameters for --auto-overlay.  */
 
 asection *
-spu_elf_check_vma (struct bfd_link_info *info, bfd_vma lo, bfd_vma hi)
+spu_elf_check_vma (struct bfd_link_info *info,
+		   int auto_overlay,
+		   unsigned int lo,
+		   unsigned int hi,
+		   unsigned int overlay_fixed,
+		   unsigned int reserved,
+		   void (*spu_elf_load_ovl_mgr) (void),
+		   FILE *(*spu_elf_open_overlay_script) (void),
+		   void (*spu_elf_relink) (void))
 {
   struct elf_segment_map *m;
   unsigned int i;
+  struct spu_link_hash_table *htab = spu_hash_table (info);
   bfd *abfd = info->output_bfd;
 
+  if (auto_overlay & AUTO_OVERLAY)
+    htab->auto_overlay = auto_overlay;
+  htab->local_store = hi + 1 - lo;
+  htab->overlay_fixed = overlay_fixed;
+  htab->reserved = reserved;
+  htab->spu_elf_load_ovl_mgr = spu_elf_load_ovl_mgr;
+  htab->spu_elf_open_overlay_script = spu_elf_open_overlay_script;
+  htab->spu_elf_relink = spu_elf_relink;
+
   for (m = elf_tdata (abfd)->segment_map; m != NULL; m = m->next)
     if (m->p_type == PT_LOAD)
       for (i = 0; i < m->count; i++)
@@ -1508,6 +1550,8 @@ spu_elf_check_vma (struct bfd_link_info 
 		|| m->sections[i]->vma + m->sections[i]->size - 1 > hi))
 	  return m->sections[i];
 
+  /* No need for overlays if it all fits.  */
+  htab->auto_overlay = 0;
   return NULL;
 }
 
@@ -1643,7 +1687,10 @@ struct call_info
 {
   struct function_info *fun;
   struct call_info *next;
+  unsigned int count;
+  unsigned int max_depth;
   unsigned int is_tail : 1;
+  unsigned int is_pasted : 1;
 };
 
 struct function_info
@@ -1660,21 +1707,34 @@ struct function_info
   } u;
   /* Function section.  */
   asection *sec;
+  asection *rodata;
+  /* Where last called from, and number of sections called from.  */
+  asection *last_caller;
+  unsigned int call_count;
   /* Address range of (this part of) function.  */
   bfd_vma lo, hi;
   /* Stack usage.  */
   int stack;
+  /* Distance from root of call tree.  Tail and hot/cold branches
+     count as one deeper.  We aren't counting stack frames here.  */
+  unsigned int depth;
   /* Set if global symbol.  */
   unsigned int global : 1;
   /* Set if known to be start of function (as distinct from a hunk
      in hot/cold section.  */
   unsigned int is_func : 1;
-  /* Flags used during call tree traversal.  */
-  unsigned int visit1 : 1;
+  /* Set if not a root node.  */
   unsigned int non_root : 1;
+  /* Flags used during call tree traversal.  It's cheaper to replicate
+     the visit flags than have one which needs clearing after a traversal.  */
+  unsigned int visit1 : 1;
   unsigned int visit2 : 1;
   unsigned int marking : 1;
   unsigned int visit3 : 1;
+  unsigned int visit4 : 1;
+  unsigned int visit5 : 1;
+  unsigned int visit6 : 1;
+  unsigned int visit7 : 1;
 };
 
 struct spu_elf_stack_info
@@ -1934,7 +1994,9 @@ find_function (asection *sec, bfd_vma of
   return NULL;
 }
 
-/* Add CALLEE to CALLER call list if not already present.  */
+/* Add CALLEE to CALLER call list if not already present.  Return TRUE
+   if CALLEE was new.  If this function return FALSE, CALLEE should
+   be freed.  */
 
 static bfd_boolean
 insert_callee (struct function_info *caller, struct call_info *callee)
@@ -1952,6 +2014,7 @@ insert_callee (struct function_info *cal
 	    p->fun->start = NULL;
 	    p->fun->is_func = TRUE;
 	  }
+	p->count += 1;
 	/* Reorder list so most recent call is first.  */
 	*pp = p->next;
 	p->next = caller->call_list;
@@ -1959,10 +2022,26 @@ insert_callee (struct function_info *cal
 	return FALSE;
       }
   callee->next = caller->call_list;
+  callee->count += 1;
   caller->call_list = callee;
   return TRUE;
 }
 
+/* Copy CALL and insert the copy into CALLER.  */
+
+static bfd_boolean
+copy_callee (struct function_info *caller, const struct call_info *call)
+{
+  struct call_info *callee;
+  callee = bfd_malloc (sizeof (*callee));
+  if (callee == NULL)
+    return FALSE;
+  *callee = *call;
+  if (!insert_callee (caller, callee))
+    free (callee);
+  return TRUE;
+}
+
 /* We're only interested in code sections.  Testing SEC_IN_MEMORY excludes
    overlay stub sections.  */
 
@@ -2015,15 +2094,19 @@ mark_functions_via_relocs (asection *sec
       Elf_Internal_Sym *sym;
       struct elf_link_hash_entry *h;
       bfd_vma val;
-      unsigned char insn[4];
-      bfd_boolean is_call;
+      bfd_boolean reject, is_call;
       struct function_info *caller;
       struct call_info *callee;
 
+      reject = FALSE;
       r_type = ELF32_R_TYPE (irela->r_info);
       if (r_type != R_SPU_REL16
 	  && r_type != R_SPU_ADDR16)
-	continue;
+	{
+	  reject = TRUE;
+	  if (!(call_tree && spu_hash_table (info)->auto_overlay))
+	    continue;
+	}
 
       r_indx = ELF32_R_SYM (irela->r_info);
       if (!get_sym_h (&h, &sym, &sym_sec, psyms, r_indx, sec->owner))
@@ -2034,27 +2117,53 @@ mark_functions_via_relocs (asection *sec
 	  || sym_sec->output_section->owner != info->output_bfd)
 	continue;
 
-      if (!bfd_get_section_contents (sec->owner, sec, insn,
-				     irela->r_offset, 4))
-	return FALSE;
-      if (!is_branch (insn))
-	continue;
+      is_call = FALSE;
+      if (!reject)
+	{
+	  unsigned char insn[4];
 
-      if ((sym_sec->flags & (SEC_ALLOC | SEC_LOAD | SEC_CODE))
-	  != (SEC_ALLOC | SEC_LOAD | SEC_CODE))
+	  if (!bfd_get_section_contents (sec->owner, sec, insn,
+					 irela->r_offset, 4))
+	    return FALSE;
+	  if (is_branch (insn))
+	    {
+	      is_call = (insn[0] & 0xfd) == 0x31;
+	      if ((sym_sec->flags & (SEC_ALLOC | SEC_LOAD | SEC_CODE))
+		  != (SEC_ALLOC | SEC_LOAD | SEC_CODE))
+		{
+		  if (!warned)
+		    info->callbacks->einfo
+		      (_("%B(%A+0x%v): call to non-code section"
+			 " %B(%A), analysis incomplete\n"),
+		       sec->owner, sec, irela->r_offset,
+		       sym_sec->owner, sym_sec);
+		  warned = TRUE;
+		  continue;
+		}
+	    }
+	  else
+	    {
+	      reject = TRUE;
+	      if (!(call_tree && spu_hash_table (info)->auto_overlay)
+		  || is_hint (insn))
+		continue;
+	    }
+	}
+
+      if (reject)
 	{
-	  if (!call_tree)
-	    warned = TRUE;
-	  if (!call_tree || !warned)
-	    info->callbacks->einfo (_("%B(%A+0x%v): call to non-code section"
-				      " %B(%A), stack analysis incomplete\n"),
-				    sec->owner, sec, irela->r_offset,
-				    sym_sec->owner, sym_sec);
+	  /* For --auto-overlay, count possible stubs we need for
+	     function pointer references.  */
+	  unsigned int sym_type;
+	  if (h)
+	    sym_type = h->type;
+	  else
+	    sym_type = ELF_ST_TYPE (sym->st_info);
+	  if (sym_type == STT_FUNC)
+	    spu_hash_table (info)->non_ovly_stub += 1;
 	  continue;
 	}
 
-      is_call = (insn[0] & 0xfd) == 0x31;
-
       if (h)
 	val = h->root.u.def.value;
       else
@@ -2098,6 +2207,13 @@ mark_functions_via_relocs (asection *sec
       if (callee->fun == NULL)
 	return FALSE;
       callee->is_tail = !is_call;
+      callee->is_pasted = FALSE;
+      callee->count = 0;
+      if (callee->fun->last_caller != sec)
+	{
+	  callee->fun->last_caller = sec;
+	  callee->fun->call_count += 1;
+	}
       if (!insert_callee (caller, callee))
 	free (callee);
       else if (!is_call
@@ -2168,8 +2284,21 @@ pasted_function (asection *sec, struct b
       if (l->u.indirect.section == sec)
 	{
 	  if (fun_start != NULL)
-	    fun->start = fun_start;
-	  return TRUE;
+	    {
+	      struct call_info *callee = bfd_malloc (sizeof *callee);
+	      if (callee == NULL)
+		return FALSE;
+
+	      fun->start = fun_start;
+	      callee->fun = fun;
+	      callee->is_tail = TRUE;
+	      callee->is_pasted = TRUE;
+	      callee->count = 0;
+	      if (!insert_callee (fun_start, callee))
+		free (callee);
+	      return TRUE;
+	    }
+	  break;
 	}
       if (l->type == bfd_indirect_link_order
 	  && (sec_data = spu_elf_section_data (l->u.indirect.section)) != NULL
@@ -2509,15 +2638,18 @@ mark_non_root (struct function_info *fun
   return TRUE;
 }
 
-/* Remove cycles from the call graph.  */
+/* Remove cycles from the call graph.  Set depth of nodes.  */
 
 static bfd_boolean
 remove_cycles (struct function_info *fun,
 	       struct bfd_link_info *info,
-	       void *param ATTRIBUTE_UNUSED)
+	       void *param)
 {
   struct call_info **callp, *call;
+  unsigned int depth = *(unsigned int *) param;
+  unsigned int max_depth = depth;
 
+  fun->depth = depth;
   fun->visit2 = TRUE;
   fun->marking = TRUE;
 
@@ -2526,17 +2658,23 @@ remove_cycles (struct function_info *fun
     {
       if (!call->fun->visit2)
 	{
-	  if (!remove_cycles (call->fun, info, 0))
+	  call->max_depth = depth + !call->is_pasted;
+	  if (!remove_cycles (call->fun, info, &call->max_depth))
 	    return FALSE;
+	  if (max_depth < call->max_depth)
+	    max_depth = call->max_depth;
 	}
       else if (call->fun->marking)
 	{
-	  const char *f1 = func_name (fun);
-	  const char *f2 = func_name (call->fun);
+	  if (!spu_hash_table (info)->auto_overlay)
+	    {
+	      const char *f1 = func_name (fun);
+	      const char *f2 = func_name (call->fun);
 
-	  info->callbacks->info (_("Stack analysis will ignore the call "
-				   "from %s to %s\n"),
-				 f1, f2);
+	      info->callbacks->info (_("Stack analysis will ignore the call "
+				       "from %s to %s\n"),
+				     f1, f2);
+	    }
 	  *callp = call->next;
 	  free (call);
 	  continue;
@@ -2544,6 +2682,7 @@ remove_cycles (struct function_info *fun
       callp = &call->next;
     }
   fun->marking = FALSE;
+  *(unsigned int *) param = max_depth;
   return TRUE;
 }
 
@@ -2553,6 +2692,7 @@ static bfd_boolean
 build_call_tree (struct bfd_link_info *info)
 {
   bfd *ibfd;
+  unsigned int depth;
 
   for (ibfd = info->input_bfds; ibfd != NULL; ibfd = ibfd->link_next)
     {
@@ -2569,7 +2709,8 @@ build_call_tree (struct bfd_link_info *i
 
   /* Transfer call info from hot/cold section part of function
      to main entry.  */
-  if (!for_each_node (transfer_calls, info, 0, FALSE))
+  if (!spu_hash_table (info)->auto_overlay
+      && !for_each_node (transfer_calls, info, 0, FALSE))
     return FALSE;
 
   /* Find the call graph root(s).  */
@@ -2578,7 +2719,526 @@ build_call_tree (struct bfd_link_info *i
 
   /* Remove cycles from the call graph.  We start from the root node(s)
      so that we break cycles in a reasonable place.  */
-  return for_each_node (remove_cycles, info, 0, TRUE);
+  depth = 0;
+  return for_each_node (remove_cycles, info, &depth, TRUE);
+}
+
+/* qsort predicate to sort calls by max_depth then count.  */
+
+static int
+sort_calls (const void *a, const void *b)
+{
+  struct call_info *const *c1 = a;
+  struct call_info *const *c2 = b;
+  int delta;
+
+  delta = (*c2)->max_depth - (*c1)->max_depth;
+  if (delta != 0)
+    return delta;
+
+  delta = (*c2)->count - (*c1)->count;
+  if (delta != 0)
+    return delta;
+
+  return c1 - c2;
+}
+
+struct _mos_param {
+  unsigned int max_overlay_size;
+};
+
+/* Set linker_mark and gc_mark on any sections that we will put in
+   overlays.  These flags are used by the generic ELF linker, but we
+   won't be continuing on to bfd_elf_final_link so it is OK to use
+   them.  linker_mark is clear before we get here.  Set segment_mark
+   on sections that are part of a pasted function (excluding the last
+   section).
+
+   Set up function rodata section if --overlay-rodata.  We don't
+   currently include merged string constant rodata sections since
+
+   Sort the call graph so that the deepest nodes will be visited
+   first.  */
+
+static bfd_boolean
+mark_overlay_section (struct function_info *fun,
+		      struct bfd_link_info *info,
+		      void *param)
+{
+  struct call_info *call;
+  unsigned int count;
+  struct _mos_param *mos_param = param;
+
+  if (fun->visit4)
+    return TRUE;
+
+  fun->visit4 = TRUE;
+  if (!fun->sec->linker_mark)
+    {
+      fun->sec->linker_mark = 1;
+      fun->sec->gc_mark = 1;
+      fun->sec->segment_mark = 0;
+      /* Ensure SEC_CODE is set on this text section (it ought to
+	 be!), and SEC_CODE is clear on rodata sections.  We use
+	 this flag to differentiate the two overlay section types.  */
+      fun->sec->flags |= SEC_CODE;
+      if (spu_hash_table (info)->auto_overlay & OVERLAY_RODATA)
+	{
+	  char *name = NULL;
+	  unsigned int size;
+
+	  /* Find the rodata section corresponding to this function's
+	     text section.  */
+	  if (strcmp (fun->sec->name, ".text") == 0)
+	    {
+	      name = bfd_malloc (sizeof (".rodata"));
+	      if (name == NULL)
+		return FALSE;
+	      memcpy (name, ".rodata", sizeof (".rodata"));
+	    }
+	  else if (strncmp (fun->sec->name, ".text.", 6) == 0)
+	    {
+	      size_t len = strlen (fun->sec->name);
+	      name = bfd_malloc (len + 3);
+	      if (name == NULL)
+		return FALSE;
+	      memcpy (name, ".rodata", sizeof (".rodata"));
+	      memcpy (name + 7, fun->sec->name + 5, len - 4);
+	    }
+	  else if (strncmp (fun->sec->name, ".gnu.linkonce.t.", 16) == 0)
+	    {
+	      size_t len = strlen (fun->sec->name) + 1;
+	      name = bfd_malloc (len);
+	      if (name == NULL)
+		return FALSE;
+	      memcpy (name, fun->sec->name, len);
+	      name[14] = 'r';
+	    }
+
+	  if (name != NULL)
+	    {
+	      asection *rodata = NULL;
+	      asection *group_sec = elf_section_data (fun->sec)->next_in_group;
+	      if (group_sec == NULL)
+		rodata = bfd_get_section_by_name (fun->sec->owner, name);
+	      else
+		while (group_sec != NULL && group_sec != fun->sec)
+		  {
+		    if (strcmp (group_sec->name, name) == 0)
+		      {
+			rodata = group_sec;
+			break;
+		      }
+		    group_sec = elf_section_data (group_sec)->next_in_group;
+		  }
+	      fun->rodata = rodata;
+	      if (fun->rodata)
+		{
+		  fun->rodata->linker_mark = 1;
+		  fun->rodata->gc_mark = 1;
+		  fun->rodata->flags &= ~SEC_CODE;
+		}
+	      free (name);
+	    }
+	  size = fun->sec->size;
+	  if (fun->rodata)
+	    size += fun->rodata->size;
+	  if (mos_param->max_overlay_size < size)
+	    mos_param->max_overlay_size = size;
+	}
+    }
+
+  for (count = 0, call = fun->call_list; call != NULL; call = call->next)
+    count += 1;
+
+  if (count > 1)
+    {
+      struct call_info **calls = bfd_malloc (count * sizeof (*calls));
+      if (calls == NULL)
+	return FALSE;
+
+      for (count = 0, call = fun->call_list; call != NULL; call = call->next)
+	calls[count++] = call;
+
+      qsort (calls, count, sizeof (*calls), sort_calls);
+
+      fun->call_list = NULL;
+      while (count != 0)
+	{
+	  --count;
+	  calls[count]->next = fun->call_list;
+	  fun->call_list = calls[count];
+	}
+      free (calls);
+    }
+
+  for (call = fun->call_list; call != NULL; call = call->next)
+    {
+      if (call->is_pasted)
+	{
+	  /* There can only be one is_pasted call per function_info.  */
+	  BFD_ASSERT (!fun->sec->segment_mark);
+	  fun->sec->segment_mark = 1;
+	}
+      if (!mark_overlay_section (call->fun, info, param))
+	return FALSE;
+    }
+
+  /* Don't put entry code into an overlay.  The overlay manager needs
+     a stack!  */
+  if (fun->lo + fun->sec->output_offset + fun->sec->output_section->vma
+      == info->output_bfd->start_address)
+    {
+      fun->sec->linker_mark = 0;
+      if (fun->rodata != NULL)
+	fun->rodata->linker_mark = 0;
+    }
+  return TRUE;
+}
+
+struct _uos_param {
+  asection *exclude_input_section;
+  asection *exclude_output_section;
+  unsigned long clearing;
+};
+
+/* Undo some of mark_overlay_section's work.  */
+
+static bfd_boolean
+unmark_overlay_section (struct function_info *fun,
+			struct bfd_link_info *info,
+			void *param)
+{
+  struct call_info *call;
+  struct _uos_param *uos_param = param;
+  unsigned int excluded = 0;
+
+  if (fun->visit5)
+    return TRUE;
+
+  fun->visit5 = TRUE;
+
+  excluded = 0;
+  if (fun->sec == uos_param->exclude_input_section
+      || fun->sec->output_section == uos_param->exclude_output_section)
+    excluded = 1;
+
+  uos_param->clearing += excluded;
+
+  if (uos_param->clearing)
+    {
+      fun->sec->linker_mark = 0;
+      if (fun->rodata)
+	fun->rodata->linker_mark = 0;
+    }
+
+  for (call = fun->call_list; call != NULL; call = call->next)
+    if (!unmark_overlay_section (call->fun, info, param))
+      return FALSE;
+
+  uos_param->clearing -= excluded;
+  return TRUE;
+}
+
+struct _cl_param {
+  unsigned int lib_size;
+  asection **lib_sections;
+};
+
+/* Add sections we have marked as belonging to overlays to an array
+   for consideration as non-overlay sections.  The array consist of
+   pairs of sections, (text,rodata), for functions in the call graph.  */
+
+static bfd_boolean
+collect_lib_sections (struct function_info *fun,
+		      struct bfd_link_info *info,
+		      void *param)
+{
+  struct _cl_param *lib_param = param;
+  struct call_info *call;
+  unsigned int size;
+
+  if (fun->visit6)
+    return TRUE;
+
+  fun->visit6 = TRUE;
+  if (!fun->sec->linker_mark || !fun->sec->gc_mark || fun->sec->segment_mark)
+    return TRUE;
+
+  size = fun->sec->size;
+  if (fun->rodata)
+    size += fun->rodata->size;
+  if (size > lib_param->lib_size)
+    return TRUE;
+
+  *lib_param->lib_sections++ = fun->sec;
+  fun->sec->gc_mark = 0;
+  if (fun->rodata && fun->rodata->linker_mark && fun->rodata->gc_mark)
+    {
+      *lib_param->lib_sections++ = fun->rodata;
+      fun->rodata->gc_mark = 0;
+    }
+  else
+    *lib_param->lib_sections++ = NULL;
+
+  for (call = fun->call_list; call != NULL; call = call->next)
+    collect_lib_sections (call->fun, info, param);
+
+  return TRUE;
+}
+
+/* qsort predicate to sort sections by call count.  */
+
+static int
+sort_lib (const void *a, const void *b)
+{
+  asection *const *s1 = a;
+  asection *const *s2 = b;
+  struct _spu_elf_section_data *sec_data;
+  struct spu_elf_stack_info *sinfo;
+  int delta;
+
+  delta = 0;
+  if ((sec_data = spu_elf_section_data (*s1)) != NULL
+      && (sinfo = sec_data->u.i.stack_info) != NULL)
+    {
+      int i;
+      for (i = 0; i < sinfo->num_fun; ++i)
+	delta -= sinfo->fun[i].call_count;
+    }
+
+  if ((sec_data = spu_elf_section_data (*s2)) != NULL
+      && (sinfo = sec_data->u.i.stack_info) != NULL)
+    {
+      int i;
+      for (i = 0; i < sinfo->num_fun; ++i)
+	delta += sinfo->fun[i].call_count;
+    }
+
+  if (delta != 0)
+    return delta;
+
+  return s1 - s2;
+}
+
+/* Remove some sections from those marked to be in overlays.  Choose
+   those that are called from many places, likely library functions.  */
+
+static unsigned int
+auto_ovl_lib_functions (struct bfd_link_info *info, unsigned int lib_size)
+{
+  bfd *ibfd;
+  asection **lib_sections;
+  unsigned int i, lib_count;
+  struct _cl_param collect_lib_param;
+  struct function_info dummy_caller;
+
+  memset (&dummy_caller, 0, sizeof (dummy_caller));
+  lib_count = 0;
+  for (ibfd = info->input_bfds; ibfd != NULL; ibfd = ibfd->link_next)
+    {
+      extern const bfd_target bfd_elf32_spu_vec;
+      asection *sec;
+
+      if (ibfd->xvec != &bfd_elf32_spu_vec)
+	continue;
+
+      for (sec = ibfd->sections; sec != NULL; sec = sec->next)
+	if (sec->linker_mark
+	    && sec->size < lib_size
+	    && (sec->flags & SEC_CODE) != 0)
+	  lib_count += 1;
+    }
+  lib_sections = bfd_malloc (lib_count * 2 * sizeof (*lib_sections));
+  if (lib_sections == NULL)
+    return (unsigned int) -1;
+  collect_lib_param.lib_size = lib_size;
+  collect_lib_param.lib_sections = lib_sections;
+  if (!for_each_node (collect_lib_sections, info, &collect_lib_param,
+		      TRUE))
+    return (unsigned int) -1;
+  lib_count = (collect_lib_param.lib_sections - lib_sections) / 2;
+
+  /* Sort sections so that those with the most calls are first.  */
+  if (lib_count > 1)
+    qsort (lib_sections, lib_count, 2 * sizeof (*lib_sections), sort_lib);
+
+  for (i = 0; i < lib_count; i++)
+    {
+      unsigned int tmp, stub_size;
+      asection *sec;
+      struct _spu_elf_section_data *sec_data;
+      struct spu_elf_stack_info *sinfo;
+
+      sec = lib_sections[2 * i];
+      /* If this section is OK, its size must be less than lib_size.  */
+      tmp = sec->size;
+      /* If it has a rodata section, then add that too.  */
+      if (lib_sections[2 * i + 1])
+	tmp += lib_sections[2 * i + 1]->size;
+      /* Add any new overlay call stubs needed by the section.  */
+      stub_size = 0;
+      if (tmp < lib_size
+	  && (sec_data = spu_elf_section_data (sec)) != NULL
+	  && (sinfo = sec_data->u.i.stack_info) != NULL)
+	{
+	  int k;
+	  struct call_info *call;
+
+	  for (k = 0; k < sinfo->num_fun; ++k)
+	    for (call = sinfo->fun[k].call_list; call; call = call->next)
+	      if (call->fun->sec->linker_mark)
+		{
+		  struct call_info *p;
+		  for (p = dummy_caller.call_list; p; p = p->next)
+		    if (p->fun == call->fun)
+		      break;
+		  if (!p)
+		    stub_size += OVL_STUB_SIZE;
+		}
+	}
+      if (tmp + stub_size < lib_size)
+	{
+	  struct call_info **pp, *p;
+
+	  /* This section fits.  Mark it as non-overlay.  */
+	  lib_sections[2 * i]->linker_mark = 0;
+	  if (lib_sections[2 * i + 1])
+	    lib_sections[2 * i + 1]->linker_mark = 0;
+	  lib_size -= tmp + stub_size;
+	  /* Call stubs to the section we just added are no longer
+	     needed.  */
+	  pp = &dummy_caller.call_list;
+	  while ((p = *pp) != NULL)
+	    if (!p->fun->sec->linker_mark)
+	      {
+		lib_size += OVL_STUB_SIZE;
+		*pp = p->next;
+		free (p);
+	      }
+	    else
+	      pp = &p->next;
+	  /* Add new call stubs to dummy_caller.  */
+	  if ((sec_data = spu_elf_section_data (sec)) != NULL
+	      && (sinfo = sec_data->u.i.stack_info) != NULL)
+	    {
+	      int k;
+	      struct call_info *call;
+
+	      for (k = 0; k < sinfo->num_fun; ++k)
+		for (call = sinfo->fun[k].call_list;
+		     call;
+		     call = call->next)
+		  if (call->fun->sec->linker_mark)
+		    {
+		      struct call_info *callee;
+		      callee = bfd_malloc (sizeof (*callee));
+		      if (callee == NULL)
+			return (unsigned int) -1;
+		      *callee = *call;
+		      if (!insert_callee (&dummy_caller, callee))
+			free (callee);
+		    }
+	    }
+	}
+    }
+  while (dummy_caller.call_list != NULL)
+    {
+      struct call_info *call = dummy_caller.call_list;
+      dummy_caller.call_list = call->next;
+      free (call);
+    }
+  for (i = 0; i < 2 * lib_count; i++)
+    if (lib_sections[i])
+      lib_sections[i]->gc_mark = 1;
+  free (lib_sections);
+  return lib_size;
+}
+
+/* Build an array of overlay sections.  The deepest node's section is
+   added first, the its parent node's section, then everything called
+   from the parent section.  The idea being to group sections to
+   minimise calls between different overlays.  */
+
+static bfd_boolean
+collect_overlays (struct function_info *fun,
+		  struct bfd_link_info *info,
+		  void *param)
+{
+  struct call_info *call;
+  bfd_boolean added_fun;
+  asection ***ovly_sections = param;
+
+  if (fun->visit7)
+    return TRUE;
+
+  fun->visit7 = TRUE;
+  for (call = fun->call_list; call != NULL; call = call->next)
+    if (!call->is_pasted)
+      {
+	if (!collect_overlays (call->fun, info, ovly_sections))
+	  return FALSE;
+	break;
+      }
+
+  added_fun = FALSE;
+  if (fun->sec->linker_mark && fun->sec->gc_mark)
+    {
+      fun->sec->gc_mark = 0;
+      *(*ovly_sections)++ = fun->sec;
+      if (fun->rodata && fun->rodata->linker_mark && fun->rodata->gc_mark)
+	{
+	  fun->rodata->gc_mark = 0;
+	  *(*ovly_sections)++ = fun->rodata;
+	}
+      else
+	*(*ovly_sections)++ = NULL;
+      added_fun = TRUE;
+
+      /* Pasted sections must stay with the first section.  We don't
+	 put pasted sections in the array, just the first section.
+	 Mark subsequent sections as already considered.  */
+      if (fun->sec->segment_mark)
+	{
+	  struct function_info *call_fun = fun;
+	  do
+	    {
+	      for (call = call_fun->call_list; call != NULL; call = call->next)
+		if (call->is_pasted)
+		  {
+		    call_fun = call->fun;
+		    call_fun->sec->gc_mark = 0;
+		    if (call_fun->rodata)
+		      call_fun->rodata->gc_mark = 0;
+		    break;
+		  }
+	      if (call == NULL)
+		abort ();
+	    }
+	  while (call_fun->sec->segment_mark);
+	}
+    }
+
+  for (call = fun->call_list; call != NULL; call = call->next)
+    if (!collect_overlays (call->fun, info, ovly_sections))
+      return FALSE;
+
+  if (added_fun)
+    {
+      struct _spu_elf_section_data *sec_data;
+      struct spu_elf_stack_info *sinfo;
+
+      if ((sec_data = spu_elf_section_data (fun->sec)) != NULL
+	  && (sinfo = sec_data->u.i.stack_info) != NULL)
+	{
+	  int i;
+	  for (i = 0; i < sinfo->num_fun; ++i)
+	    if (!collect_overlays (&sinfo->fun[i], info, ovly_sections))
+	      return FALSE;
+	}
+    }
+
+  return TRUE;
 }
 
 struct _sum_stack_param {
@@ -2598,23 +3258,28 @@ sum_stack (struct function_info *fun,
   struct function_info *max;
   size_t stack, cum_stack;
   const char *f1;
+  bfd_boolean has_call;
   struct _sum_stack_param *sum_stack_param = param;
+  struct spu_link_hash_table *htab;
 
   cum_stack = fun->stack;
   sum_stack_param->cum_stack = cum_stack;
   if (fun->visit3)
     return TRUE;
 
+  has_call = FALSE;
   max = NULL;
   for (call = fun->call_list; call; call = call->next)
     {
+      if (!call->is_pasted)
+	has_call = TRUE;
       if (!sum_stack (call->fun, info, sum_stack_param))
 	return FALSE;
       stack = sum_stack_param->cum_stack;
       /* Include caller stack for normal calls, don't do so for
 	 tail calls.  fun->stack here is local stack usage for
 	 this function.  */
-      if (!call->is_tail)
+      if (!call->is_tail || call->is_pasted || call->fun->start != NULL)
 	stack += fun->stack;
       if (cum_stack < stack)
 	{
@@ -2633,28 +3298,32 @@ sum_stack (struct function_info *fun,
       && sum_stack_param->overall_stack < cum_stack)
     sum_stack_param->overall_stack = cum_stack;
 
+  htab = spu_hash_table (info);
+  if (htab->auto_overlay)
+    return TRUE;
+
   f1 = func_name (fun);
   if (!fun->non_root)
     info->callbacks->info (_("  %s: 0x%v\n"), f1, (bfd_vma) cum_stack);
   info->callbacks->minfo (_("%s: 0x%v 0x%v\n"),
 			  f1, (bfd_vma) stack, (bfd_vma) cum_stack);
 
-  if (fun->call_list)
+  if (has_call)
     {
       info->callbacks->minfo (_("  calls:\n"));
       for (call = fun->call_list; call; call = call->next)
-	{
-	  const char *f2 = func_name (call->fun);
-	  const char *ann1 = call->fun == max ? "*" : " ";
-	  const char *ann2 = call->is_tail ? "t" : " ";
+	if (!call->is_pasted)
+	  {
+	    const char *f2 = func_name (call->fun);
+	    const char *ann1 = call->fun == max ? "*" : " ";
+	    const char *ann2 = call->is_tail ? "t" : " ";
 
-	  info->callbacks->minfo (_("   %s%s %s\n"), ann1, ann2, f2);
-	}
+	    info->callbacks->minfo (_("   %s%s %s\n"), ann1, ann2, f2);
+	  }
     }
 
   if (sum_stack_param->emit_stack_syms)
     {
-      struct spu_link_hash_table *htab = spu_hash_table (info);
       char *name = bfd_malloc (18 + strlen (f1));
       struct elf_link_hash_entry *h;
 
@@ -2689,6 +3358,455 @@ sum_stack (struct function_info *fun,
   return TRUE;
 }
 
+/* SEC is part of a pasted function.  Return the call_info for the
+   next section of this function.  */
+
+static struct call_info *
+find_pasted_call (asection *sec)
+{
+  struct _spu_elf_section_data *sec_data = spu_elf_section_data (sec);
+  struct spu_elf_stack_info *sinfo = sec_data->u.i.stack_info;
+  struct call_info *call;
+  int k;
+
+  for (k = 0; k < sinfo->num_fun; ++k)
+    for (call = sinfo->fun[k].call_list; call != NULL; call = call->next)
+      if (call->is_pasted)
+	return call;
+  abort ();
+  return 0;
+}
+
+/* qsort predicate to sort bfds by file name.  */
+
+static int
+sort_bfds (const void *a, const void *b)
+{
+  bfd *const *abfd1 = a;
+  bfd *const *abfd2 = b;
+
+  return strcmp ((*abfd1)->filename, (*abfd2)->filename);
+}
+
+/* Handle --auto-overlay.  */
+
+static void spu_elf_auto_overlay (struct bfd_link_info *, void (*) (void))
+     ATTRIBUTE_NORETURN;
+
+static void
+spu_elf_auto_overlay (struct bfd_link_info *info,
+		      void (*spu_elf_load_ovl_mgr) (void))
+{
+  bfd *ibfd;
+  bfd **bfd_arr;
+  struct elf_segment_map *m;
+  unsigned int fixed_size, lo, hi;
+  struct spu_link_hash_table *htab;
+  unsigned int base, i, count, bfd_count;
+  int ovlynum;
+  asection **ovly_sections, **ovly_p;
+  FILE *script;
+  unsigned int total_overlay_size, overlay_size;
+  struct elf_link_hash_entry *h;
+  struct _mos_param mos_param;
+  struct _uos_param uos_param;
+  struct function_info dummy_caller;
+
+  /* Find the extents of our loadable image.  */
+  lo = (unsigned int) -1;
+  hi = 0;
+  for (m = elf_tdata (info->output_bfd)->segment_map; m != NULL; m = m->next)
+    if (m->p_type == PT_LOAD)
+      for (i = 0; i < m->count; i++)
+	if (m->sections[i]->size != 0)
+	  {
+	    if (m->sections[i]->vma < lo)
+	      lo = m->sections[i]->vma;
+	    if (m->sections[i]->vma + m->sections[i]->size - 1 > hi)
+	      hi = m->sections[i]->vma + m->sections[i]->size - 1;
+	  }
+  fixed_size = hi + 1 - lo;
+
+  if (!discover_functions (info))
+    goto err_exit;
+
+  if (!build_call_tree (info))
+    goto err_exit;
+
+  uos_param.exclude_input_section = 0;
+  uos_param.exclude_output_section
+    = bfd_get_section_by_name (info->output_bfd, ".interrupt");
+
+  htab = spu_hash_table (info);
+  h = elf_link_hash_lookup (&htab->elf, "__ovly_load",
+			    FALSE, FALSE, FALSE);
+  if (h != NULL
+      && (h->root.type == bfd_link_hash_defined
+	  || h->root.type == bfd_link_hash_defweak)
+      && h->def_regular)
+    {
+      /* We have a user supplied overlay manager.  */
+      uos_param.exclude_input_section = h->root.u.def.section;
+    }
+  else
+    {
+      /* If no user overlay manager, spu_elf_load_ovl_mgr will add our
+	 builtin version to .text, and will adjust .text size.  */
+      asection *text = bfd_get_section_by_name (info->output_bfd, ".text");
+      if (text != NULL)
+	fixed_size -= text->size;
+      spu_elf_load_ovl_mgr ();
+      text = bfd_get_section_by_name (info->output_bfd, ".text");
+      if (text != NULL)
+	fixed_size += text->size;
+    }
+
+  /* Mark overlay sections, and find max overlay section size.  */
+  mos_param.max_overlay_size = 0;
+  if (!for_each_node (mark_overlay_section, info, &mos_param, TRUE))
+    goto err_exit;
+
+  /* We can't put the overlay manager or interrupt routines in
+     overlays.  */
+  uos_param.clearing = 0;
+  if ((uos_param.exclude_input_section
+       || uos_param.exclude_output_section)
+      && !for_each_node (unmark_overlay_section, info, &uos_param, TRUE))
+    goto err_exit;
+
+  bfd_count = 0;
+  for (ibfd = info->input_bfds; ibfd != NULL; ibfd = ibfd->link_next)
+    ++bfd_count;
+  bfd_arr = bfd_malloc (bfd_count * sizeof (*bfd_arr));
+  if (bfd_arr == NULL)
+    goto err_exit;
+
+  /* Count overlay sections, and subtract their sizes from "fixed_size".  */
+  count = 0;
+  bfd_count = 0;
+  total_overlay_size = 0;
+  for (ibfd = info->input_bfds; ibfd != NULL; ibfd = ibfd->link_next)
+    {
+      extern const bfd_target bfd_elf32_spu_vec;
+      asection *sec;
+      unsigned int old_count;
+
+      if (ibfd->xvec != &bfd_elf32_spu_vec)
+	continue;
+
+      old_count = count;
+      for (sec = ibfd->sections; sec != NULL; sec = sec->next)
+	if (sec->linker_mark)
+	  {
+	    if ((sec->flags & SEC_CODE) != 0)
+	      count += 1;
+	    fixed_size -= sec->size;
+	    total_overlay_size += sec->size;
+	  }
+      if (count != old_count)
+	bfd_arr[bfd_count++] = ibfd;
+    }
+
+  /* Since the overlay link script selects sections by file name and
+     section name, ensure that file names are unique.  */
+  if (bfd_count > 1)
+    {
+      bfd_boolean ok = TRUE;
+
+      qsort (bfd_arr, bfd_count, sizeof (*bfd_arr), sort_bfds);
+      for (i = 1; i < bfd_count; ++i)
+	if (strcmp (bfd_arr[i - 1]->filename, bfd_arr[i]->filename) == 0)
+	  {
+	    if (bfd_arr[i - 1]->my_archive && bfd_arr[i]->my_archive)
+	      {
+		if (bfd_arr[i - 1]->my_archive == bfd_arr[i]->my_archive)
+		  info->callbacks->einfo (_("%s duplicated in %s\n"),
+					  bfd_arr[i - 1]->filename,
+					  bfd_arr[i - 1]->my_archive->filename);
+		else
+		  info->callbacks->einfo (_("%s in both %s and %s\n"),
+					  bfd_arr[i - 1]->filename,
+					  bfd_arr[i - 1]->my_archive->filename,
+					  bfd_arr[i]->my_archive->filename);
+	      }
+	    else if (bfd_arr[i - 1]->my_archive)
+	      info->callbacks->einfo (_("%s in %s and as an object\n"),
+				      bfd_arr[i - 1]->filename,
+				      bfd_arr[i - 1]->my_archive->filename);
+	    else if (bfd_arr[i]->my_archive)
+	      info->callbacks->einfo (_("%s in %s and as an object\n"),
+				      bfd_arr[i]->filename,
+				      bfd_arr[i]->my_archive->filename);
+	    else
+	      info->callbacks->einfo (_("%s duplicated\n"),
+				      bfd_arr[i]->filename);
+	    ok = FALSE;
+	  }
+      if (!ok)
+	{
+	  /* FIXME: modify plain object files from foo.o to ./foo.o
+	     and emit EXCLUDE_FILE to handle the duplicates in
+	     archives.  There is a pathological case we can't handle:
+	     We may have duplicate file names within a single archive.  */
+	  info->callbacks->einfo (_("sorry, no support for duplicate "
+				    "object files in auto-overlay script\n"));
+	  bfd_set_error (bfd_error_bad_value);
+	  goto err_exit;
+	}
+    }
+  free (bfd_arr);
+
+  if (htab->reserved == 0)
+    {
+      struct _sum_stack_param sum_stack_param;
+
+      sum_stack_param.emit_stack_syms = 0;
+      sum_stack_param.overall_stack = 0;
+      if (!for_each_node (sum_stack, info, &sum_stack_param, TRUE))
+	goto err_exit;
+      htab->reserved = sum_stack_param.overall_stack;
+    }
+  fixed_size += htab->reserved;
+  fixed_size += htab->non_ovly_stub * OVL_STUB_SIZE;
+  if (fixed_size + mos_param.max_overlay_size <= htab->local_store)
+    {
+      /* Guess number of overlays.  Assuming overlay buffer is on
+	 average only half full should be conservative.  */
+      ovlynum = total_overlay_size * 2 / (htab->local_store - fixed_size);
+      /* Space for _ovly_table[], _ovly_buf_table[] and toe.  */
+      fixed_size += ovlynum * 16 + 16 + 4 + 16;
+    }
+
+  if (fixed_size + mos_param.max_overlay_size > htab->local_store)
+    info->callbacks->einfo (_("non-overlay plus maximum overlay size "
+			      "of 0x%x exceeds local store\n"),
+			    fixed_size + mos_param.max_overlay_size);
+
+  /* Now see if we should put some functions in the non-overlay area.  */
+  if (fixed_size < htab->overlay_fixed
+      && htab->overlay_fixed + mos_param.max_overlay_size < htab->local_store)
+    {
+      unsigned int lib_size = htab->overlay_fixed - fixed_size;
+      lib_size = auto_ovl_lib_functions (info, lib_size);
+      if (lib_size == (unsigned int) -1)
+	goto err_exit;
+      fixed_size = htab->overlay_fixed - lib_size;
+    }
+
+  /* Build an array of sections, suitably sorted to place into
+     overlays.  */
+  ovly_sections = bfd_malloc (2 * count * sizeof (*ovly_sections));
+  if (ovly_sections == NULL)
+    goto err_exit;
+  ovly_p = ovly_sections;
+  if (!for_each_node (collect_overlays, info, &ovly_p, TRUE))
+    goto err_exit;
+  count = (size_t) (ovly_p - ovly_sections) / 2;
+
+  script = htab->spu_elf_open_overlay_script ();
+
+  if (fprintf (script, "SECTIONS\n{\n OVERLAY :\n {\n") <= 0)
+    goto file_err;
+
+  memset (&dummy_caller, 0, sizeof (dummy_caller));
+  overlay_size = htab->local_store - fixed_size;
+  base = 0;
+  ovlynum = 0;
+  while (base < count)
+    {
+      unsigned int size = 0;
+      unsigned int j;
+
+      for (i = base; i < count; i++)
+	{
+	  asection *sec;
+	  unsigned int tmp;
+	  unsigned int stub_size;
+	  struct call_info *call, *pasty;
+	  struct _spu_elf_section_data *sec_data;
+	  struct spu_elf_stack_info *sinfo;
+	  int k;
+
+	  /* See whether we can add this section to the current
+	     overlay without overflowing our overlay buffer.  */
+	  sec = ovly_sections[2 * i];
+	  tmp = size + sec->size;
+	  if (ovly_sections[2 * i + 1])
+	    tmp += ovly_sections[2 * i + 1]->size;
+	  if (tmp > overlay_size)
+	    break;
+	  if (sec->segment_mark)
+	    {
+	      /* Pasted sections must stay together, so add their
+		 sizes too.  */
+	      struct call_info *pasty = find_pasted_call (sec);
+	      while (pasty != NULL)
+		{
+		  struct function_info *call_fun = pasty->fun;
+		  tmp += call_fun->sec->size;
+		  if (call_fun->rodata)
+		    tmp += call_fun->rodata->size;
+		  for (pasty = call_fun->call_list; pasty; pasty = pasty->next)
+		    if (pasty->is_pasted)
+		      break;
+		}
+	    }
+	  if (tmp > overlay_size)
+	    break;
+
+	  /* If we add this section, we might need new overlay call
+	     stubs.  Add any overlay section calls to dummy_call.  */
+	  pasty = NULL;
+	  sec_data = spu_elf_section_data (sec);
+	  sinfo = sec_data->u.i.stack_info;
+	  for (k = 0; k < sinfo->num_fun; ++k)
+	    for (call = sinfo->fun[k].call_list; call; call = call->next)
+	      if (call->is_pasted)
+		{
+		  BFD_ASSERT (pasty == NULL);
+		  pasty = call;
+		}
+	      else if (call->fun->sec->linker_mark)
+		{
+		  if (!copy_callee (&dummy_caller, call))
+		    goto err_exit;
+		}
+	  while (pasty != NULL)
+	    {
+	      struct function_info *call_fun = pasty->fun;
+	      pasty = NULL;
+	      for (call = call_fun->call_list; call; call = call->next)
+		if (call->is_pasted)
+		  {
+		    BFD_ASSERT (pasty == NULL);
+		    pasty = call;
+		  }
+		else if (!copy_callee (&dummy_caller, call))
+		  goto err_exit;
+	    }
+
+	  /* Calculate call stub size.  */
+	  stub_size = 0;
+	  for (call = dummy_caller.call_list; call; call = call->next)
+	    {
+	      unsigned int k;
+
+	      stub_size += OVL_STUB_SIZE;
+	      /* If the call is within this overlay, we won't need a
+		 stub.  */
+	      for (k = base; k < i + 1; k++)
+		if (call->fun->sec == ovly_sections[2 * k])
+		  {
+		    stub_size -= OVL_STUB_SIZE;
+		    break;
+		  }
+	    }
+	  if (tmp + stub_size > overlay_size)
+	    break;
+	  
+	  size = tmp;
+	}
+
+      if (i == base)
+	{
+	  info->callbacks->einfo (_("%B:%A%s exceeds overlay size\n"),
+				  ovly_sections[2 * i]->owner,
+				  ovly_sections[2 * i],
+				  ovly_sections[2 * i + 1] ? " + rodata" : "");
+	  bfd_set_error (bfd_error_bad_value);
+	  goto err_exit;
+	}
+
+      if (fprintf (script, "  .ovly%d {\n", ++ovlynum) <= 0)
+	goto file_err;
+      for (j = base; j < i; j++)
+	{
+	  asection *sec = ovly_sections[2 * j];
+
+	  if (fprintf (script, "   [%c]%s (%s)\n",
+		       sec->owner->filename[0],
+		       sec->owner->filename + 1,
+		       sec->name) <= 0)
+	    goto file_err;
+	  if (sec->segment_mark)
+	    {
+	      struct call_info *call = find_pasted_call (sec);
+	      while (call != NULL)
+		{
+		  struct function_info *call_fun = call->fun;
+		  sec = call_fun->sec;
+		  if (fprintf (script, "   [%c]%s (%s)\n",
+			       sec->owner->filename[0],
+			       sec->owner->filename + 1,
+			       sec->name) <= 0)
+		    goto file_err;
+		  for (call = call_fun->call_list; call; call = call->next)
+		    if (call->is_pasted)
+		      break;
+		}
+	    }
+	}
+
+      for (j = base; j < i; j++)
+	{
+	  asection *sec = ovly_sections[2 * j + 1];
+	  if (sec != NULL && fprintf (script, "   [%c]%s (%s)\n",
+				      sec->owner->filename[0],
+				      sec->owner->filename + 1,
+				      sec->name) <= 0)
+	    goto file_err;
+
+	  sec = ovly_sections[2 * j];
+	  if (sec->segment_mark)
+	    {
+	      struct call_info *call = find_pasted_call (sec);
+	      while (call != NULL)
+		{
+		  struct function_info *call_fun = call->fun;
+		  sec = call_fun->rodata;
+		  if (sec != NULL && fprintf (script, "   [%c]%s (%s)\n",
+					      sec->owner->filename[0],
+					      sec->owner->filename + 1,
+					      sec->name) <= 0)
+		    goto file_err;
+		  for (call = call_fun->call_list; call; call = call->next)
+		    if (call->is_pasted)
+		      break;
+		}
+	    }
+	}
+
+      if (fprintf (script, "  }\n") <= 0)
+	goto file_err;
+
+      while (dummy_caller.call_list != NULL)
+	{
+	  struct call_info *call = dummy_caller.call_list;
+	  dummy_caller.call_list = call->next;
+	  free (call);
+	}
+
+      base = i;
+    }
+  free (ovly_sections);
+
+  if (fprintf (script, " }\n}\nINSERT AFTER .text;\n") <= 0)
+    goto file_err;
+  if (fclose (script) != 0)
+    goto file_err;
+
+  if (htab->auto_overlay & AUTO_RELINK)
+    htab->spu_elf_relink ();
+
+  xexit (0);
+
+ file_err:
+  bfd_set_error (bfd_error_system_call);
+ err_exit:
+  info->callbacks->einfo ("%F%P: auto overlay error: %E\n");
+  xexit (1);
+}
+
 /* Provide an estimate of total stack required.  */
 
 static bfd_boolean
@@ -2723,6 +3841,9 @@ spu_elf_final_link (bfd *output_bfd, str
 {
   struct spu_link_hash_table *htab = spu_hash_table (info);
 
+  if (htab->auto_overlay)
+    spu_elf_auto_overlay (info, htab->spu_elf_load_ovl_mgr);
+
   if (htab->stack_analysis
       && !spu_elf_stack_analysis (info, htab->emit_stack_syms))
     info->callbacks->einfo ("%X%P: stack analysis error: %E\n");
Index: bfd/elf32-spu.h
===================================================================
RCS file: /cvs/src/src/bfd/elf32-spu.h,v
retrieving revision 1.6
diff -u -p -r1.6 elf32-spu.h
--- bfd/elf32-spu.h	20 Mar 2008 05:35:10 -0000	1.6
+++ bfd/elf32-spu.h	7 Apr 2008 23:30:04 -0000
@@ -60,4 +60,7 @@ extern int spu_elf_size_stubs (struct bf
 			       void (*) (asection *, asection *, const char *),
 			       int);
 extern bfd_boolean spu_elf_build_stubs (struct bfd_link_info *, int);
-extern asection *spu_elf_check_vma (struct bfd_link_info *, bfd_vma, bfd_vma);
+extern asection *spu_elf_check_vma (struct bfd_link_info *, int,
+				    unsigned int, unsigned int, unsigned int,
+				    unsigned int, void (*) (void),
+				    FILE *(*) (void), void (*) (void));
Index: ld/emultempl/spuelf.em
===================================================================
RCS file: /cvs/src/src/ld/emultempl/spuelf.em,v
retrieving revision 1.18
diff -u -p -r1.18 spuelf.em
--- ld/emultempl/spuelf.em	20 Mar 2008 05:35:10 -0000	1.18
+++ ld/emultempl/spuelf.em	7 Apr 2008 23:30:25 -0000
@@ -45,6 +45,14 @@ static int emit_stack_syms = 0;
 static bfd_vma local_store_lo = 0;
 static bfd_vma local_store_hi = 0x3ffff;
 
+/* Control --auto-overlay feature.  */
+static int auto_overlay = 0;
+static char *auto_overlay_file = 0;
+static unsigned int auto_overlay_fixed = 0;
+static unsigned int auto_overlay_reserved = 0;
+int my_argc;
+char **my_argv;
+
 static const char ovl_mgr[] = {
 EOF
 
@@ -202,6 +210,12 @@ spu_before_allocation (void)
 	{
 	  int ret;
 
+	  if (auto_overlay != 0)
+	    {
+	      einfo ("%P: --auto-overlay ignored with user overlay script\n");
+	      auto_overlay = 0;
+	    }
+
 	  ret = spu_elf_size_stubs (&link_info,
 				    spu_place_special_section,
 				    non_overlay_stubs);
@@ -218,6 +232,83 @@ spu_before_allocation (void)
   gld${EMULATION_NAME}_before_allocation ();
 }
 
+struct tflist {
+  struct tflist *next;
+  char name[9];
+};
+
+static struct tflist *tmp_file_list;
+
+static void clean_tmp (void)
+{
+  for (; tmp_file_list != NULL; tmp_file_list = tmp_file_list->next)
+    unlink (tmp_file_list->name);
+}
+
+static int
+new_tmp_file (char **fname)
+{
+  struct tflist *tf;
+  int fd;
+
+  if (tmp_file_list == NULL)
+    atexit (clean_tmp);
+  tf = xmalloc (sizeof (*tf));
+  tf->next = tmp_file_list;
+  tmp_file_list = tf;
+  memcpy (tf->name, "ldXXXXXX", sizeof (tf->name));
+  *fname = tf->name;
+#ifdef HAVE_MKSTEMP
+  fd = mkstemp (*fname);
+#else
+  *fname = mktemp (*fname);
+  if (*fname == NULL)
+    return -1;
+  fd = open (fname, O_RDWR | O_CREAT | O_EXCL, 0600);
+#endif
+  return fd;
+}
+
+static FILE *
+spu_elf_open_overlay_script (void)
+{
+  FILE *script = NULL;
+
+  if (auto_overlay_file == NULL)
+    {
+      int fd = new_tmp_file (&auto_overlay_file);
+      if (fd == -1)
+	goto file_err;
+      script = fdopen (fd, "w");
+    }
+  else
+    script = fopen (auto_overlay_file, "w");
+
+  if (script == NULL)
+    {
+    file_err:
+      einfo ("%F%P: can not open script: %E\n");
+    }
+  return script;
+}
+
+static void
+spu_elf_relink (void)
+{
+  char **argv = xmalloc ((my_argc + 5) * sizeof (*argv));
+
+  memcpy (argv, my_argv, my_argc * sizeof (*argv));
+  argv[my_argc++] = "--no-auto-overlay";
+  if (tmp_file_list->name == auto_overlay_file)
+    argv[my_argc++] = auto_overlay_file;
+  argv[my_argc++] = "-T";
+  argv[my_argc++] = auto_overlay_file;
+  argv[my_argc] = 0;
+  execvp (argv[0], (char *const *) argv);
+  perror (argv[0]);
+  _exit (127);
+}
+
 /* Final emulation specific call.  */
 
 static void
@@ -235,10 +326,17 @@ gld${EMULATION_NAME}_finish (void)
         {
 	  asection *s;
 
-	  s = spu_elf_check_vma (&link_info, local_store_lo, local_store_hi);
-	  if (s != NULL)
+	  s = spu_elf_check_vma (&link_info, auto_overlay,
+				 local_store_lo, local_store_hi,
+				 auto_overlay_fixed, auto_overlay_reserved,
+				 spu_elf_load_ovl_mgr,
+				 spu_elf_open_overlay_script,
+				 spu_elf_relink);
+	  if (s != NULL && !auto_overlay)
 	    einfo ("%X%P: %A exceeds local store range\n", s);
 	}
+      else if (auto_overlay)
+	einfo ("%P: --auto-overlay ignored with zero local store range\n");
 
       if (!spu_elf_build_stubs (&link_info,
 				emit_stub_syms || link_info.emitrelocations))
@@ -248,6 +346,14 @@ gld${EMULATION_NAME}_finish (void)
   finish_default ();
 }
 
+static char *
+gld${EMULATION_NAME}_choose_target (int argc, char *argv[])
+{
+  my_argc = argc;
+  my_argv = argv;
+  return ldemul_default_target (argc, argv);
+}
+
 EOF
 
 if grep -q 'ld_elf.*ppc.*_emulation' ldemul-list.h; then
@@ -256,19 +362,6 @@ if grep -q 'ld_elf.*ppc.*_emulation' lde
 #include <fcntl.h>
 #include <sys/wait.h>
 
-struct tflist {
-  struct tflist *next;
-  char name[9];
-};
-
-static struct tflist *tmp_file_list;
-
-static void clean_tmp (void)
-{
-  for (; tmp_file_list != NULL; tmp_file_list = tmp_file_list->next)
-    unlink (tmp_file_list->name);
-}
-
 static const char *
 base_name (const char *path)
 {
@@ -302,7 +395,6 @@ embedded_spu_file (lang_input_statement_
   const char *cmd[6];
   const char *sym;
   char *handle, *p;
-  struct tflist *tf;
   char *oname;
   int fd;
   pid_t pid;
@@ -326,22 +418,7 @@ embedded_spu_file (lang_input_statement_
     if (!(ISALNUM (*p) || *p == '$' || *p == '.'))
       *p = '_';
 
-  if (tmp_file_list == NULL)
-    atexit (clean_tmp);
-  tf = xmalloc (sizeof (*tf));
-  tf->next = tmp_file_list;
-  tmp_file_list = tf;
-  oname = tf->name;
-  memcpy (tf->name, "ldXXXXXX", sizeof (tf->name));
-
-#ifdef HAVE_MKSTEMP
-  fd = mkstemp (oname);
-#else
-  oname = mktemp (oname);
-  if (oname == NULL)
-    return FALSE;
-  fd = open (oname, O_RDWR | O_CREAT | O_EXCL, 0600);
-#endif
+  fd = new_tmp_file (&oname);
   if (fd == -1)
     return FALSE;
   close (fd);
@@ -440,6 +517,12 @@ PARSE_AND_LIST_PROLOGUE='
 #define OPTION_SPU_LOCAL_STORE		(OPTION_SPU_NON_OVERLAY_STUBS + 1)
 #define OPTION_SPU_STACK_ANALYSIS	(OPTION_SPU_LOCAL_STORE + 1)
 #define OPTION_SPU_STACK_SYMS		(OPTION_SPU_STACK_ANALYSIS + 1)
+#define OPTION_SPU_AUTO_OVERLAY		(OPTION_SPU_STACK_SYMS + 1)
+#define OPTION_SPU_AUTO_RELINK		(OPTION_SPU_AUTO_OVERLAY + 1)
+#define OPTION_SPU_OVERLAY_RODATA	(OPTION_SPU_AUTO_RELINK + 1)
+#define OPTION_SPU_FIXED_SPACE		(OPTION_SPU_OVERLAY_RODATA + 1)
+#define OPTION_SPU_RESERVED_SPACE	(OPTION_SPU_FIXED_SPACE + 1)
+#define OPTION_SPU_NO_AUTO_OVERLAY	(OPTION_SPU_RESERVED_SPACE + 1)
 '
 
 PARSE_AND_LIST_LONGOPTS='
@@ -450,6 +533,12 @@ PARSE_AND_LIST_LONGOPTS='
   { "local-store", required_argument, NULL, OPTION_SPU_LOCAL_STORE },
   { "stack-analysis", no_argument, NULL, OPTION_SPU_STACK_ANALYSIS },
   { "emit-stack-syms", no_argument, NULL, OPTION_SPU_STACK_SYMS },
+  { "auto-overlay", optional_argument, NULL, OPTION_SPU_AUTO_OVERLAY },
+  { "auto-relink", no_argument, NULL, OPTION_SPU_AUTO_RELINK },
+  { "overlay-rodata", no_argument, NULL, OPTION_SPU_OVERLAY_RODATA },
+  { "fixed-space", required_argument, NULL, OPTION_SPU_FIXED_SPACE },
+  { "reserved-space", required_argument, NULL, OPTION_SPU_RESERVED_SPACE },
+  { "no-auto-overlay", optional_argument, NULL, OPTION_SPU_NO_AUTO_OVERLAY },
 '
 
 PARSE_AND_LIST_OPTIONS='
@@ -460,7 +549,14 @@ PARSE_AND_LIST_OPTIONS='
   --extra-overlay-stubs       Add stubs on all calls out of overlay regions.\n\
   --local-store=lo:hi         Valid address range.\n\
   --stack-analysis            Estimate maximum stack requirement.\n\
-  --emit-stack-syms           Add sym giving stack needed for each func.\n"
+  --emit-stack-syms           Add sym giving stack needed for each func.\n\
+  --auto-overlay [=filename]  Create an overlay script in filename if\n\
+                              executable does not fit in local store.\n\
+  --auto-relink               Rerun linker using auto-overlay script.\n\
+  --overlay-rodata            Place read-only data with associated function\n\
+                              code in overlays.\n\
+  --fixed-space=bytes         Local store for non-overlay code and data.\n\
+  --reserved-space=bytes      Local store for stack and heap.\n"
 		   ));
 '
 
@@ -502,8 +598,63 @@ PARSE_AND_LIST_ARGS_CASES='
     case OPTION_SPU_STACK_SYMS:
       emit_stack_syms = 1;
       break;
+
+    case OPTION_SPU_AUTO_OVERLAY:
+      auto_overlay |= 1;
+      if (optarg != NULL)
+	{
+	  auto_overlay_file = optarg;
+	  break;
+	}
+      /* Fall thru */
+
+    case OPTION_SPU_AUTO_RELINK:
+      auto_overlay |= 2;
+      break;
+
+    case OPTION_SPU_OVERLAY_RODATA:
+      auto_overlay |= 4;
+      break;
+
+    case OPTION_SPU_FIXED_SPACE:
+      {
+	char *end;
+	auto_overlay_fixed = strtoul (optarg, &end, 0);
+	if (*end != 0)
+	  einfo (_("%P%F: invalid --fixed-space value `%s'\''\n"), optarg);
+      }
+      break;
+
+    case OPTION_SPU_RESERVED_SPACE:
+      {
+	char *end;
+	auto_overlay_reserved = strtoul (optarg, &end, 0);
+	if (*end != 0)
+	  einfo (_("%P%F: invalid --reserved-space value `%s'\''\n"), optarg);
+      }
+      break;
+
+    case OPTION_SPU_NO_AUTO_OVERLAY:
+      auto_overlay = 0;
+      if (optarg != NULL)
+	{
+	  struct tflist *tf;
+	  size_t len;
+
+	  if (tmp_file_list == NULL)
+	    atexit (clean_tmp);
+
+	  len = strlen (optarg) + 1;
+	  tf = xmalloc (sizeof (*tf) - sizeof (tf->name) + len);
+	  memcpy (tf->name, optarg, len);
+	  tf->next = tmp_file_list;
+	  tmp_file_list = tf;
+	  break;
+	}
+      break;
 '
 
 LDEMUL_AFTER_OPEN=spu_after_open
 LDEMUL_BEFORE_ALLOCATION=spu_before_allocation
 LDEMUL_FINISH=gld${EMULATION_NAME}_finish
+LDEMUL_CHOOSE_TARGET=gld${EMULATION_NAME}_choose_target

-- 
Alan Modra
Australia Development Lab, IBM



More information about the Binutils mailing list