SPU overlay update

Alan Modra amodra@bigpond.net.au
Tue Jan 29 15:45:00 GMT 2008


This is a rewrite of SPU overlays.
- Faster, properly scheduled overlay manager.
- Faster overlay call stubs.
- Overlay stubs occupy space in the overlays themselves where
possible, making more non-overlay space available.

bfd/
	Rewrite SPU overlay handling code.  Put overlay calls stubs in the
	overlays where possible.  Use a faster call stub, or optionally at
	compile time, a more compact stub.  Double size of _ovly_buf_table
	so that low bit of _ovly_table.buf can be used as a "present" bit.
	Reserve an extra _ovly_table entry for index zero.
	* elf32-spu.c: (struct spu_link_hash_table): Delete a number of fields,
	add new ones.
	(struct got_entry): New.
	(struct spu_stub_hash_entry): Delete.
	(stub_hash_newfunc, spu_elf_link_hash_table_free): Delete.
	(spu_elf_link_hash_table_create): Adjust for struct changes.
	(spu_stub_name): Delete.
	(spu_elf_find_overlays): Don't track sections from overlay regions.
	Instead set ovl_buf number in spu elf section data.  Error if
	overlays in one region don't start at the same address.  Adjust
	for struct _spu_elf_section_data changes.
	(SIZEOF_STUB1, SIZEOF_STUB2, ILA_79, ILA_78): Delete.
	(OVL_STUB_SIZE, BRSL, LNOP, ILA): Define.
	(needs_ovl_stub): Adjust for struct _spu_elf_section_data changes.
	(enum _insn_type): New.
	(count_stub, build_stub): New functions.
	(allocate_spuear_stubs): Use count_stub.
	(build_spuear_stubs): Use build_stub.
	(populate_stubs, sort_stubs): Delete.
	(process_stubs): New function.
	(write_one_stub): Delete.
	(spu_elf_size_stubs, spu_elf_build_stubs): Rewrite.
	(alloc_stack_info): Adjust for struct _spu_elf_section_data changes.
	(maybe_insert_function, check_function_ranges): Likewise.
	(find_function, pasted_function, build_call_tree): Likewise.
	(spu_elf_stack_analysis, spu_elf_modify_segment_map): Likewise.
	(spu_elf_modify_program_headers): Likewise.
	(interesting_section): Detect stub sections differently.  Delete
	htab param, adjust all callers.
	(spu_elf_relocate_section): Rewrite stub handling.
	(spu_elf_output_symbol_hook): Likewise.
	(bfd_elf32_bfd_link_hash_table_free): Delete.
	* elf32-spu.h (struct _spu_elf_section_data): Move input-only and
	output-only fields into a union.  Add ovl_buf.
	(spu_elf_size_stubs, spu_elf_build_stubs): Update prototypes.
ld/
	* emultempl/spu_ovl.S: Rewrite.
	* emultempl/spu_ovl.o: Regenerate.
	* emultempl/spuelf.em (toe): Delete.
	(spu_place_special_section): Add param to control section placement.
	Adjust callers.
	(spu_elf_load_ovl_mgr): Adjust for struct _spu_elf_section_data
	changes.
	(spu_before_allocation): Adjust spu_elf_size_stubs call.
	(gld${EMULATION_NAME}_finish): Adjust spu_elf_build_stubs call.
ld/testsuite/
	* ld-spu/ovl.d: Update.
	* ld-spu/ovl2.d: Update.

Index: bfd/elf32-spu.c
===================================================================
RCS file: /cvs/src/src/bfd/elf32-spu.c,v
retrieving revision 1.26
diff -u -p -r1.26 elf32-spu.c
--- bfd/elf32-spu.c	5 Dec 2007 03:29:20 -0000	1.26
+++ bfd/elf32-spu.c	28 Jan 2008 01:34:29 -0000
@@ -257,31 +257,21 @@ struct spu_link_hash_table
 {
   struct elf_link_hash_table elf;
 
-  /* The stub hash table.  */
-  struct bfd_hash_table stub_hash_table;
-
-  /* Sorted array of stubs.  */
-  struct {
-    struct spu_stub_hash_entry **sh;
-    unsigned int count;
-    int err;
-  } stubs;
-
   /* Shortcuts to overlay sections.  */
-  asection *stub;
   asection *ovtab;
+  asection *toe;
+  asection **ovl_sec;
+
+  /* Count of stubs in each overlay section.  */
+  unsigned int *stub_count;
+
+  /* The stub section for each overlay section.  */
+  asection **stub_sec;
 
   struct elf_link_hash_entry *ovly_load;
+  struct elf_link_hash_entry *ovly_return;
   unsigned long ovly_load_r_symndx;
 
-  /* An array of two output sections per overlay region, chosen such that
-     the first section vma is the overlay buffer vma (ie. the section has
-     the lowest vma in the group that occupy the region), and the second
-     section vma+size specifies the end of the region.  We keep pointers
-     to sections like this because section vmas may change when laying
-     them out.  */
-  asection **ovl_region;
-
   /* Number of overlay buffers.  */
   unsigned int num_buf;
 
@@ -296,7 +286,7 @@ struct spu_link_hash_table
   unsigned int non_overlay_stubs : 1;
 
   /* Set on error.  */
-  unsigned int stub_overflow : 1;
+  unsigned int stub_err : 1;
 
   /* Set if stack size analysis should be done.  */
   unsigned int stack_analysis : 1;
@@ -305,54 +295,17 @@ struct spu_link_hash_table
   unsigned int emit_stack_syms : 1;
 };
 
-#define spu_hash_table(p) \
-  ((struct spu_link_hash_table *) ((p)->hash))
+/* Hijack the generic got fields for overlay stub accounting.  */
 
-struct spu_stub_hash_entry
+struct got_entry
 {
-  struct bfd_hash_entry root;
-
-  /* Destination of this stub.  */
-  asection *target_section;
-  bfd_vma target_off;
-
-  /* Offset of entry in stub section.  */
-  bfd_vma off;
-
-  /* Offset from this stub to stub that loads the overlay index.  */
-  bfd_vma delta;
+  struct got_entry *next;
+  unsigned int ovl;
+  bfd_vma stub_addr;
 };
 
-/* Create an entry in a spu stub hash table.  */
-
-static struct bfd_hash_entry *
-stub_hash_newfunc (struct bfd_hash_entry *entry,
-		   struct bfd_hash_table *table,
-		   const char *string)
-{
-  /* Allocate the structure if it has not already been allocated by a
-     subclass.  */
-  if (entry == NULL)
-    {
-      entry = bfd_hash_allocate (table, sizeof (struct spu_stub_hash_entry));
-      if (entry == NULL)
-	return entry;
-    }
-
-  /* Call the allocation method of the superclass.  */
-  entry = bfd_hash_newfunc (entry, table, string);
-  if (entry != NULL)
-    {
-      struct spu_stub_hash_entry *sh = (struct spu_stub_hash_entry *) entry;
-
-      sh->target_section = NULL;
-      sh->target_off = 0;
-      sh->off = 0;
-      sh->delta = 0;
-    }
-
-  return entry;
-}
+#define spu_hash_table(p) \
+  ((struct spu_link_hash_table *) ((p)->hash))
 
 /* Create a spu ELF linker hash table.  */
 
@@ -373,28 +326,16 @@ spu_elf_link_hash_table_create (bfd *abf
       return NULL;
     }
 
-  /* Init the stub hash table too.  */
-  if (!bfd_hash_table_init (&htab->stub_hash_table, stub_hash_newfunc,
-			    sizeof (struct spu_stub_hash_entry)))
-    return NULL;
-
-  memset (&htab->stubs, 0,
-	  sizeof (*htab) - offsetof (struct spu_link_hash_table, stubs));
+  memset (&htab->ovtab, 0,
+	  sizeof (*htab) - offsetof (struct spu_link_hash_table, ovtab));
 
+  htab->elf.init_got_refcount.refcount = 0;
+  htab->elf.init_got_refcount.glist = NULL;
+  htab->elf.init_got_offset.offset = 0;
+  htab->elf.init_got_offset.glist = NULL;
   return &htab->elf.root;
 }
 
-/* Free the derived linker hash table.  */
-
-static void
-spu_elf_link_hash_table_free (struct bfd_link_hash_table *hash)
-{
-  struct spu_link_hash_table *ret = (struct spu_link_hash_table *) hash;
-
-  bfd_hash_table_free (&ret->stub_hash_table);
-  _bfd_generic_link_hash_table_free (hash);
-}
-
 /* Find the symbol for the given R_SYMNDX in IBFD and set *HP and *SYMP
    to (hash, NULL) for global symbols, and (NULL, sym) for locals.  Set
    *SYMSECP to the symbol's section.  *LOCSYMSP caches local syms.  */
@@ -480,51 +421,6 @@ get_sym_h (struct elf_link_hash_entry **
   return TRUE;
 }
 
-/* Build a name for an entry in the stub hash table.  We can't use a
-   local symbol name because ld -r might generate duplicate local symbols.  */
-
-static char *
-spu_stub_name (const asection *sym_sec,
-	       const struct elf_link_hash_entry *h,
-	       const Elf_Internal_Rela *rel)
-{
-  char *stub_name;
-  bfd_size_type len;
-
-  if (h)
-    {
-      len = strlen (h->root.root.string) + 1 + 8 + 1;
-      stub_name = bfd_malloc (len);
-      if (stub_name == NULL)
-	return stub_name;
-
-      sprintf (stub_name, "%s+%x",
-	       h->root.root.string,
-	       (int) rel->r_addend & 0xffffffff);
-      len -= 8;
-    }
-  else
-    {
-      len = 8 + 1 + 8 + 1 + 8 + 1;
-      stub_name = bfd_malloc (len);
-      if (stub_name == NULL)
-	return stub_name;
-
-      sprintf (stub_name, "%x:%x+%x",
-	       sym_sec->id & 0xffffffff,
-	       (int) ELF32_R_SYM (rel->r_info) & 0xffffffff,
-	       (int) rel->r_addend & 0xffffffff);
-      len = strlen (stub_name);
-    }
-
-  if (stub_name[len - 2] == '+'
-      && stub_name[len - 1] == '0'
-      && stub_name[len] == 0)
-    stub_name[len - 2] = 0;
-
-  return stub_name;
-}
-
 /* Create the note section if not already present.  This is done early so
    that the linker maps the sections to the right place in the output.  */
 
@@ -634,9 +530,7 @@ spu_elf_find_overlays (bfd *output_bfd, 
   qsort (alloc_sec, n, sizeof (*alloc_sec), sort_sections);
 
   /* Look for overlapping vmas.  Any with overlap must be overlays.
-     Count them.  Also count the number of overlay regions and for
-     each region save a section from that region with the lowest vma
-     and another section with the highest end vma.  */
+     Count them.  Also count the number of overlay regions.  */
   ovl_end = alloc_sec[0]->vma + alloc_sec[0]->size;
   for (ovl_index = 0, num_buf = 0, i = 1; i < n; i++)
     {
@@ -645,19 +539,24 @@ spu_elf_find_overlays (bfd *output_bfd, 
 	{
 	  asection *s0 = alloc_sec[i - 1];
 
-	  if (spu_elf_section_data (s0)->ovl_index == 0)
+	  if (spu_elf_section_data (s0)->u.o.ovl_index == 0)
 	    {
-	      spu_elf_section_data (s0)->ovl_index = ++ovl_index;
-	      alloc_sec[num_buf * 2] = s0;
-	      alloc_sec[num_buf * 2 + 1] = s0;
-	      num_buf++;
+	      alloc_sec[ovl_index] = s0;
+	      spu_elf_section_data (s0)->u.o.ovl_index = ++ovl_index;
+	      spu_elf_section_data (s0)->u.o.ovl_buf = ++num_buf;
 	    }
-	  spu_elf_section_data (s)->ovl_index = ++ovl_index;
-	  if (ovl_end < s->vma + s->size)
+	  alloc_sec[ovl_index] = s;
+	  spu_elf_section_data (s)->u.o.ovl_index = ++ovl_index;
+	  spu_elf_section_data (s)->u.o.ovl_buf = num_buf;
+	  if (s0->vma != s->vma)
 	    {
-	      ovl_end = s->vma + s->size;
-	      alloc_sec[num_buf * 2 - 1] = s;
+	      info->callbacks->einfo (_("%X%P: overlay sections %A and %A "
+					"do not start at the same address.\n"),
+				      s0, s);
+	      return FALSE;
 	    }
+	  if (ovl_end < s->vma + s->size)
+	    ovl_end = s->vma + s->size;
 	}
       else
 	ovl_end = s->vma + s->size;
@@ -665,30 +564,22 @@ spu_elf_find_overlays (bfd *output_bfd, 
 
   htab->num_overlays = ovl_index;
   htab->num_buf = num_buf;
-  if (ovl_index == 0)
-    {
-      free (alloc_sec);
-      return FALSE;
-    }
-
-  alloc_sec = bfd_realloc (alloc_sec, num_buf * 2 * sizeof (*alloc_sec));
-  if (alloc_sec == NULL)
-    return FALSE;
-
-  htab->ovl_region = alloc_sec;
-  return TRUE;
+  htab->ovl_sec = alloc_sec;
+  return ovl_index != 0;
 }
 
-/* One of these per stub.  */
-#define SIZEOF_STUB1 8
-#define ILA_79	0x4200004f		/* ila $79,function_address */
-#define BR	0x32000000		/* br stub2 */
-
-/* One of these per overlay.  */
-#define SIZEOF_STUB2 8
-#define ILA_78	0x4200004e		/* ila $78,overlay_number */
-					/* br __ovly_load */
+/* Support two sizes of overlay stubs, a slower more compact stub of two
+   intructions, and a faster stub of four instructions.  */
+#ifndef OVL_STUB_SIZE
+/* Default to faster.  */
+#define OVL_STUB_SIZE 16
+/* #define OVL_STUB_SIZE 8 */
+#endif
+#define BRSL	0x33000000
+#define BR	0x32000000
 #define NOP	0x40200000
+#define LNOP	0x00200000
+#define ILA	0x42000000
 
 /* Return true for all relative and absolute branch instructions.
    bra   00110000 0..
@@ -757,14 +648,14 @@ needs_ovl_stub (const char *sym_name,
     return TRUE;
 
   /* Usually, symbols in non-overlay sections don't need stubs.  */
-  if (spu_elf_section_data (sym_sec->output_section)->ovl_index == 0
+  if (spu_elf_section_data (sym_sec->output_section)->u.o.ovl_index == 0
       && !htab->non_overlay_stubs)
     return FALSE;
 
   /* A reference from some other section to a symbol in an overlay
      section needs a stub.  */
-  if (spu_elf_section_data (sym_sec->output_section)->ovl_index
-       != spu_elf_section_data (input_section->output_section)->ovl_index)
+  if (spu_elf_section_data (sym_sec->output_section)->u.o.ovl_index
+       != spu_elf_section_data (input_section->output_section)->u.o.ovl_index)
     return TRUE;
 
   /* If this insn isn't a branch then we are possibly taking the
@@ -772,117 +663,284 @@ needs_ovl_stub (const char *sym_name,
   return !is_branch;
 }
 
-/* Called via elf_link_hash_traverse to allocate stubs for any _SPUEAR_
-   symbols.  */
+enum _insn_type { non_branch, branch, call };
 
 static bfd_boolean
-allocate_spuear_stubs (struct elf_link_hash_entry *h, void *inf)
-{
-  /* Symbols starting with _SPUEAR_ need a stub because they may be
-     invoked by the PPU.  */
-  if ((h->root.type == bfd_link_hash_defined
-       || h->root.type == bfd_link_hash_defweak)
-      && h->def_regular
-      && strncmp (h->root.root.string, "_SPUEAR_", 8) == 0)
-    {
-      struct spu_link_hash_table *htab = inf;
-      static Elf_Internal_Rela zero_rel;
-      char *stub_name = spu_stub_name (h->root.u.def.section, h, &zero_rel);
-      struct spu_stub_hash_entry *sh;
+count_stub (struct spu_link_hash_table *htab,
+	    bfd *ibfd,
+	    asection *isec,
+	    enum _insn_type insn_type,
+	    struct elf_link_hash_entry *h,
+	    const Elf_Internal_Rela *irela)
+{
+  unsigned int ovl = 0;
+  struct got_entry *g, **head;
+
+  /* If this instruction is a branch or call, we need a stub
+     for it.  One stub per function per overlay.
+     If it isn't a branch, then we are taking the address of
+     this function so need a stub in the non-overlay area
+     for it.  One stub per function.  */
+  if (insn_type != non_branch)
+    ovl = spu_elf_section_data (isec->output_section)->u.o.ovl_index;
 
-      if (stub_name == NULL)
+  if (h != NULL)
+    head = &h->got.glist;
+  else
+    {
+      if (elf_local_got_ents (ibfd) == NULL)
 	{
-	  htab->stubs.err = 1;
-	  return FALSE;
+	  bfd_size_type amt = (elf_tdata (ibfd)->symtab_hdr.sh_info
+			       * sizeof (*elf_local_got_ents (ibfd)));
+	  elf_local_got_ents (ibfd) = bfd_zmalloc (amt);
+	  if (elf_local_got_ents (ibfd) == NULL)
+	    return FALSE;
 	}
+      head = elf_local_got_ents (ibfd) + ELF32_R_SYM (irela->r_info);
+    }
+
+  /* If we have a stub in the non-overlay area then there's no need
+     for one in overlays.  */
+  g = *head;
+  if (g != NULL && g->ovl == 0)
+    return TRUE;
 
-      sh = (struct spu_stub_hash_entry *)
-	bfd_hash_lookup (&htab->stub_hash_table, stub_name, TRUE, FALSE);
-      if (sh == NULL)
+  if (ovl == 0)
+    {
+      struct got_entry *gnext;
+
+      /* Need a new non-overlay area stub.  Zap other stubs.  */
+      for (; g != NULL; g = gnext)
 	{
-	  free (stub_name);
-	  return FALSE;
+	  htab->stub_count[g->ovl] -= 1;
+	  gnext = g->next;
+	  free (g);
 	}
+    }
+  else
+    {
+      for (; g != NULL; g = g->next)
+	if (g->ovl == ovl)
+	  break;
+    }
+
+  if (g == NULL)
+    {
+      g = bfd_malloc (sizeof *g);
+      if (g == NULL)
+	return FALSE;
+      g->ovl = ovl;
+      g->stub_addr = (bfd_vma) -1;
+      g->next = *head;
+      *head = g;
+
+      htab->stub_count[ovl] += 1;
+    }
 
-      /* If this entry isn't new, we already have a stub.  */
-      if (sh->target_section != NULL)
+  return TRUE;
+}
+
+/* Two instruction overlay stubs look like:
+
+   brsl $75,__ovly_load
+   .word target_ovl_and_address
+
+   ovl_and_address is a word with the overlay number in the top 14 bits
+   and local store address in the bottom 18 bits.
+
+   Four instruction overlay stubs look like:
+
+   ila $78,ovl_number
+   lnop
+   ila $79,target_address
+   br __ovly_load  */
+
+static bfd_boolean
+build_stub (struct spu_link_hash_table *htab,
+	    bfd *ibfd,
+	    asection *isec,
+	    enum _insn_type insn_type,
+	    struct elf_link_hash_entry *h,
+	    const Elf_Internal_Rela *irela,
+	    bfd_vma dest,
+	    asection *dest_sec)
+{
+  unsigned int ovl;
+  struct got_entry *g, **head;
+  asection *sec;
+  bfd_vma val, from, to;
+
+  ovl = 0;
+  if (insn_type != non_branch)
+    ovl = spu_elf_section_data (isec->output_section)->u.o.ovl_index;
+
+  if (h != NULL)
+    head = &h->got.glist;
+  else
+    head = elf_local_got_ents (ibfd) + ELF32_R_SYM (irela->r_info);
+
+  g = *head;
+  if (g != NULL && g->ovl == 0 && ovl != 0)
+    return TRUE;
+
+  for (; g != NULL; g = g->next)
+    if (g->ovl == ovl)
+      break;
+  if (g == NULL)
+    abort ();
+
+  if (g->stub_addr != (bfd_vma) -1)
+    return TRUE;
+
+  sec = htab->stub_sec[ovl];
+  dest += dest_sec->output_offset + dest_sec->output_section->vma;
+  from = sec->size + sec->output_offset + sec->output_section->vma;
+  g->stub_addr = from;
+  to = (htab->ovly_load->root.u.def.value
+	+ htab->ovly_load->root.u.def.section->output_offset
+	+ htab->ovly_load->root.u.def.section->output_section->vma);
+  val = to - from;
+  if (OVL_STUB_SIZE == 16)
+    val -= 12;
+  if (((dest | to | from) & 3) != 0
+      || val + 0x20000 >= 0x40000)
+    {
+      htab->stub_err = 1;
+      return FALSE;
+    }
+  ovl = spu_elf_section_data (dest_sec->output_section)->u.o.ovl_index;
+
+  if (OVL_STUB_SIZE == 16)
+    {
+      bfd_put_32 (sec->owner, ILA + ((ovl << 7) & 0x01ffff80) + 78,
+		  sec->contents + sec->size);
+      bfd_put_32 (sec->owner, LNOP,
+		  sec->contents + sec->size + 4);
+      bfd_put_32 (sec->owner, ILA + ((dest << 7) & 0x01ffff80) + 79,
+		  sec->contents + sec->size + 8);
+      bfd_put_32 (sec->owner, BR + ((val << 5) & 0x007fff80),
+		  sec->contents + sec->size + 12);
+    }
+  else if (OVL_STUB_SIZE == 8)
+    {
+      bfd_put_32 (sec->owner, BRSL + ((val << 5) & 0x007fff80) + 75,
+		  sec->contents + sec->size);
+
+      val = (dest & 0x3ffff) | (ovl << 14);
+      bfd_put_32 (sec->owner, val,
+		  sec->contents + sec->size + 4);
+    }
+  else
+    abort ();
+  sec->size += OVL_STUB_SIZE;
+
+  if (htab->emit_stub_syms)
+    {
+      size_t len;
+      char *name;
+      int add;
+
+      len = 8 + sizeof (".ovl_call.") - 1;
+      if (h != NULL)
+	len += strlen (h->root.root.string);
+      else
+	len += 8 + 1 + 8;
+      add = 0;
+      if (irela != NULL)
+	add = (int) irela->r_addend & 0xffffffff;
+      if (add != 0)
+	len += 1 + 8;
+      name = bfd_malloc (len);
+      if (name == NULL)
+	return FALSE;
+
+      sprintf (name, "%08x.ovl_call.", g->ovl);
+      if (h != NULL)
+	strcpy (name + 8 + sizeof (".ovl_call.") - 1, h->root.root.string);
+      else
+	sprintf (name + 8 + sizeof (".ovl_call.") - 1, "%x:%x",
+		 dest_sec->id & 0xffffffff,
+		 (int) ELF32_R_SYM (irela->r_info) & 0xffffffff);
+      if (add != 0)
+	sprintf (name + len - 9, "+%x", add);
+
+      h = elf_link_hash_lookup (&htab->elf, name, TRUE, TRUE, FALSE);
+      free (name);
+      if (h == NULL)
+	return FALSE;
+      if (h->root.type == bfd_link_hash_new)
 	{
-	  free (stub_name);
-	  return TRUE;
+	  h->root.type = bfd_link_hash_defined;
+	  h->root.u.def.section = sec;
+	  h->root.u.def.value = sec->size - OVL_STUB_SIZE;
+	  h->size = OVL_STUB_SIZE;
+	  h->type = STT_FUNC;
+	  h->ref_regular = 1;
+	  h->def_regular = 1;
+	  h->ref_regular_nonweak = 1;
+	  h->forced_local = 1;
+	  h->non_elf = 0;
 	}
-
-      sh->target_section = h->root.u.def.section;
-      sh->target_off = h->root.u.def.value;
-      htab->stubs.count += 1;
     }
-  
+
   return TRUE;
 }
 
-/* Called via bfd_hash_traverse to set up pointers to all symbols
-   in the stub hash table.  */
+/* Called via elf_link_hash_traverse to allocate stubs for any _SPUEAR_
+   symbols.  */
 
 static bfd_boolean
-populate_stubs (struct bfd_hash_entry *bh, void *inf)
+allocate_spuear_stubs (struct elf_link_hash_entry *h, void *inf)
 {
-  struct spu_link_hash_table *htab = inf;
+  /* Symbols starting with _SPUEAR_ need a stub because they may be
+     invoked by the PPU.  */
+  if ((h->root.type == bfd_link_hash_defined
+       || h->root.type == bfd_link_hash_defweak)
+      && h->def_regular
+      && strncmp (h->root.root.string, "_SPUEAR_", 8) == 0)
+    {
+      struct spu_link_hash_table *htab = inf;
 
-  htab->stubs.sh[--htab->stubs.count] = (struct spu_stub_hash_entry *) bh;
+      count_stub (htab, NULL, NULL, non_branch, h, NULL);
+    }
+  
   return TRUE;
 }
 
-/* qsort predicate to sort stubs by overlay number.  */
-
-static int
-sort_stubs (const void *a, const void *b)
+static bfd_boolean
+build_spuear_stubs (struct elf_link_hash_entry *h, void *inf)
 {
-  const struct spu_stub_hash_entry *const *sa = a;
-  const struct spu_stub_hash_entry *const *sb = b;
-  int i;
-  bfd_signed_vma d;
+  /* Symbols starting with _SPUEAR_ need a stub because they may be
+     invoked by the PPU.  */
+  if ((h->root.type == bfd_link_hash_defined
+       || h->root.type == bfd_link_hash_defweak)
+      && h->def_regular
+      && strncmp (h->root.root.string, "_SPUEAR_", 8) == 0)
+    {
+      struct spu_link_hash_table *htab = inf;
 
-  i = spu_elf_section_data ((*sa)->target_section->output_section)->ovl_index;
-  i -= spu_elf_section_data ((*sb)->target_section->output_section)->ovl_index;
-  if (i != 0)
-    return i;
-
-  d = ((*sa)->target_section->output_section->vma
-       + (*sa)->target_section->output_offset
-       + (*sa)->target_off
-       - (*sb)->target_section->output_section->vma
-       - (*sb)->target_section->output_offset
-       - (*sb)->target_off);
-  if (d != 0)
-    return d < 0 ? -1 : 1;
-
-  /* Two functions at the same address.  Aliases perhaps.  */
-  i = strcmp ((*sb)->root.string, (*sa)->root.string);
-  BFD_ASSERT (i != 0);
-  return i;
+      build_stub (htab, NULL, NULL, non_branch, h, NULL,
+		  h->root.u.def.value, h->root.u.def.section);
+    }
+  
+  return TRUE;
 }
 
-/* Allocate space for overlay call and return stubs.  */
+/* Size or build stubs.  */
 
-bfd_boolean
-spu_elf_size_stubs (bfd *output_bfd,
-		    struct bfd_link_info *info,
-		    int non_overlay_stubs,
-		    int stack_analysis,
-		    asection **stub,
-		    asection **ovtab,
-		    asection **toe)
+static bfd_boolean
+process_stubs (bfd *output_bfd,
+	       struct bfd_link_info *info,
+	       bfd_boolean build)
 {
   struct spu_link_hash_table *htab = spu_hash_table (info);
   bfd *ibfd;
-  unsigned i, group;
-  flagword flags;
 
-  htab->non_overlay_stubs = non_overlay_stubs;
   for (ibfd = info->input_bfds; ibfd != NULL; ibfd = ibfd->link_next)
     {
       extern const bfd_target bfd_elf32_spu_vec;
       Elf_Internal_Shdr *symtab_hdr;
-      asection *section;
+      asection *isec;
       Elf_Internal_Sym *local_syms = NULL;
       void *psyms;
 
@@ -896,37 +954,36 @@ spu_elf_size_stubs (bfd *output_bfd,
 
       /* Arrange to read and keep global syms for later stack analysis.  */
       psyms = &local_syms;
-      if (stack_analysis)
+      if (htab->stack_analysis)
 	psyms = &symtab_hdr->contents;
 
       /* Walk over each section attached to the input bfd.  */
-      for (section = ibfd->sections; section != NULL; section = section->next)
+      for (isec = ibfd->sections; isec != NULL; isec = isec->next)
 	{
 	  Elf_Internal_Rela *internal_relocs, *irelaend, *irela;
 
 	  /* If there aren't any relocs, then there's nothing more to do.  */
-	  if ((section->flags & SEC_RELOC) == 0
-	      || (section->flags & SEC_ALLOC) == 0
-	      || (section->flags & SEC_LOAD) == 0
-	      || section->reloc_count == 0)
+	  if ((isec->flags & SEC_RELOC) == 0
+	      || (isec->flags & SEC_ALLOC) == 0
+	      || (isec->flags & SEC_LOAD) == 0
+	      || isec->reloc_count == 0)
 	    continue;
 
 	  /* If this section is a link-once section that will be
 	     discarded, then don't create any stubs.  */
-	  if (section->output_section == NULL
-	      || section->output_section->owner != output_bfd)
+	  if (isec->output_section == NULL
+	      || isec->output_section->owner != output_bfd)
 	    continue;
 
 	  /* Get the relocs.  */
-	  internal_relocs
-	    = _bfd_elf_link_read_relocs (ibfd, section, NULL, NULL,
-					 info->keep_memory);
+	  internal_relocs = _bfd_elf_link_read_relocs (ibfd, isec, NULL, NULL,
+						       info->keep_memory);
 	  if (internal_relocs == NULL)
 	    goto error_ret_free_local;
 
 	  /* Now examine each relocation.  */
 	  irela = internal_relocs;
-	  irelaend = irela + section->reloc_count;
+	  irelaend = irela + isec->reloc_count;
 	  for (; irela < irelaend; irela++)
 	    {
 	      enum elf_spu_reloc_type r_type;
@@ -935,10 +992,8 @@ spu_elf_size_stubs (bfd *output_bfd,
 	      Elf_Internal_Sym *sym;
 	      struct elf_link_hash_entry *h;
 	      const char *sym_name;
-	      char *stub_name;
-	      struct spu_stub_hash_entry *sh;
 	      unsigned int sym_type;
-	      enum _insn_type { non_branch, branch, call } insn_type;
+	      enum _insn_type insn_type;
 
 	      r_type = ELF32_R_TYPE (irela->r_info);
 	      r_indx = ELF32_R_SYM (irela->r_info);
@@ -946,7 +1001,15 @@ spu_elf_size_stubs (bfd *output_bfd,
 	      if (r_type >= R_SPU_max)
 		{
 		  bfd_set_error (bfd_error_bad_value);
-		  goto error_ret_free_internal;
+		error_ret_free_internal:
+		  if (elf_section_data (isec)->relocs != internal_relocs)
+		    free (internal_relocs);
+		error_ret_free_local:
+		  if (local_syms != NULL
+		      && (symtab_hdr->contents
+			  != (unsigned char *) local_syms))
+		    free (local_syms);
+		  return FALSE;
 		}
 
 	      /* Determine the reloc target section.  */
@@ -970,7 +1033,7 @@ spu_elf_size_stubs (bfd *output_bfd,
 		{
 		  unsigned char insn[4];
 
-		  if (!bfd_get_section_contents (ibfd, section, insn,
+		  if (!bfd_get_section_contents (ibfd, isec, insn,
 						 irela->r_offset, 4))
 		    goto error_ret_free_internal;
 
@@ -1013,50 +1076,40 @@ spu_elf_size_stubs (bfd *output_bfd,
 		    continue;
 		}
 
-	      if (!needs_ovl_stub (sym_name, sym_sec, section, htab,
+	      if (!needs_ovl_stub (sym_name, sym_sec, isec, htab,
 				   insn_type != non_branch))
 		continue;
 
-	      stub_name = spu_stub_name (sym_sec, h, irela);
-	      if (stub_name == NULL)
-		goto error_ret_free_internal;
-
-	      sh = (struct spu_stub_hash_entry *)
-		bfd_hash_lookup (&htab->stub_hash_table, stub_name,
-				 TRUE, FALSE);
-	      if (sh == NULL)
+	      if (htab->stub_count == NULL)
 		{
-		  free (stub_name);
-		error_ret_free_internal:
-		  if (elf_section_data (section)->relocs != internal_relocs)
-		    free (internal_relocs);
-		error_ret_free_local:
-		  if (local_syms != NULL
-		      && (symtab_hdr->contents
-			  != (unsigned char *) local_syms))
-		    free (local_syms);
-		  return FALSE;
+		  bfd_size_type amt;
+		  amt = (htab->num_overlays + 1) * sizeof (*htab->stub_count);
+		  htab->stub_count = bfd_zmalloc (amt);
+		  if (htab->stub_count == NULL)
+		    goto error_ret_free_internal;
 		}
 
-	      /* If this entry isn't new, we already have a stub.  */
-	      if (sh->target_section != NULL)
+	      if (!build)
 		{
-		  free (stub_name);
-		  continue;
+		  if (!count_stub (htab, ibfd, isec, insn_type, h, irela))
+		    goto error_ret_free_internal;
 		}
-
-	      sh->target_section = sym_sec;
-	      if (h != NULL)
-		sh->target_off = h->root.u.def.value;
 	      else
-		sh->target_off = sym->st_value;
-	      sh->target_off += irela->r_addend;
+		{
+		  bfd_vma dest;
 
-	      htab->stubs.count += 1;
+		  if (h != NULL)
+		    dest = h->root.u.def.value;
+		  else
+		    dest = sym->st_value;
+		  if (!build_stub (htab, ibfd, isec, insn_type, h, irela,
+				   dest, sym_sec))
+		    goto error_ret_free_internal;
+		}
 	    }
 
 	  /* We're done with the internal relocs, free them.  */
-	  if (elf_section_data (section)->relocs != internal_relocs)
+	  if (elf_section_data (isec)->relocs != internal_relocs)
 	    free (internal_relocs);
 	}
 
@@ -1070,99 +1123,64 @@ spu_elf_size_stubs (bfd *output_bfd,
 	}
     }
 
+  return TRUE;
+}
+
+/* Allocate space for overlay call and return stubs.  */
+
+int
+spu_elf_size_stubs (bfd *output_bfd,
+		    struct bfd_link_info *info,
+		    void (*place_spu_section) (asection *, asection *,
+					       const char *),
+		    int non_overlay_stubs)
+{
+  struct spu_link_hash_table *htab = spu_hash_table (info);
+  bfd *ibfd;
+  bfd_size_type amt;
+  flagword flags;
+  unsigned int i;
+  asection *stub;
+
+  htab->non_overlay_stubs = non_overlay_stubs;
+  if (!process_stubs (output_bfd, info, FALSE))
+    return 0;
+
   elf_link_hash_traverse (&htab->elf, allocate_spuear_stubs, htab);
-  if (htab->stubs.err)
-    return FALSE;
+  if (htab->stub_err)
+    return 0;
 
-  *stub = NULL;
-  if (htab->stubs.count == 0)
-    return TRUE;
+  if (htab->stub_count == NULL)
+    return 1;
 
   ibfd = info->input_bfds;
-  flags = (SEC_ALLOC | SEC_LOAD | SEC_CODE | SEC_READONLY
-	   | SEC_HAS_CONTENTS | SEC_IN_MEMORY);
-  htab->stub = bfd_make_section_anyway_with_flags (ibfd, ".stub", flags);
-  *stub = htab->stub;
-  if (htab->stub == NULL
-      || !bfd_set_section_alignment (ibfd, htab->stub, 4))
-    return FALSE;
+  amt = (htab->num_overlays + 1) * sizeof (*htab->stub_sec);
+  htab->stub_sec = bfd_zmalloc (amt);
+  if (htab->stub_sec == NULL)
+    return 0;
 
-  flags = (SEC_ALLOC | SEC_LOAD
+  flags = (SEC_ALLOC | SEC_LOAD | SEC_CODE | SEC_READONLY
 	   | SEC_HAS_CONTENTS | SEC_IN_MEMORY);
-  htab->ovtab = bfd_make_section_anyway_with_flags (ibfd, ".ovtab", flags);
-  *ovtab = htab->ovtab;
-  if (htab->ovtab == NULL
-      || !bfd_set_section_alignment (ibfd, htab->ovtab, 4))
-    return FALSE;
+  stub = bfd_make_section_anyway_with_flags (ibfd, ".stub", flags);
+  htab->stub_sec[0] = stub;
+  if (stub == NULL
+      || !bfd_set_section_alignment (ibfd, stub, 3 + (OVL_STUB_SIZE > 8)))
+    return 0;
+  stub->size = htab->stub_count[0] * OVL_STUB_SIZE;
+  (*place_spu_section) (stub, NULL, ".text");
 
-  *toe = bfd_make_section_anyway_with_flags (ibfd, ".toe", SEC_ALLOC);
-  if (*toe == NULL
-      || !bfd_set_section_alignment (ibfd, *toe, 4))
-    return FALSE;
-  (*toe)->size = 16;
-
-  /* Retrieve all the stubs and sort.  */
-  htab->stubs.sh = bfd_malloc (htab->stubs.count * sizeof (*htab->stubs.sh));
-  if (htab->stubs.sh == NULL)
-    return FALSE;
-  i = htab->stubs.count;
-  bfd_hash_traverse (&htab->stub_hash_table, populate_stubs, htab);
-  BFD_ASSERT (htab->stubs.count == 0);
-
-  htab->stubs.count = i;
-  qsort (htab->stubs.sh, htab->stubs.count, sizeof (*htab->stubs.sh),
-	 sort_stubs);
-
-  /* Now that the stubs are sorted, place them in the stub section.
-     Stubs are grouped per overlay
-     .	    ila $79,func1
-     .	    br 1f
-     .	    ila $79,func2
-     .	    br 1f
-     .
-     .
-     .	    ila $79,funcn
-     .	    nop
-     .	1:
-     .	    ila $78,ovl_index
-     .	    br __ovly_load  */
-
-  group = 0;
-  for (i = 0; i < htab->stubs.count; i++)
-    {
-      if (spu_elf_section_data (htab->stubs.sh[group]->target_section
-				->output_section)->ovl_index
-	  != spu_elf_section_data (htab->stubs.sh[i]->target_section
-				   ->output_section)->ovl_index)
-	{
-	  htab->stub->size += SIZEOF_STUB2;
-	  for (; group != i; group++)
-	    htab->stubs.sh[group]->delta
-	      = htab->stubs.sh[i - 1]->off - htab->stubs.sh[group]->off;
-	}
-      if (group == i
-	  || ((htab->stubs.sh[i - 1]->target_section->output_section->vma
-	       + htab->stubs.sh[i - 1]->target_section->output_offset
-	       + htab->stubs.sh[i - 1]->target_off)
-	      != (htab->stubs.sh[i]->target_section->output_section->vma
-		  + htab->stubs.sh[i]->target_section->output_offset
-		  + htab->stubs.sh[i]->target_off)))
-	{
-	  htab->stubs.sh[i]->off = htab->stub->size;
-	  htab->stub->size += SIZEOF_STUB1;
-	  if (info->emitrelocations)
-	    htab->stub->reloc_count += 1;
-	}
-      else
-	htab->stubs.sh[i]->off = htab->stubs.sh[i - 1]->off;
+  for (i = 0; i < htab->num_overlays; ++i)
+    {
+      asection *osec = htab->ovl_sec[i];
+      unsigned int ovl = spu_elf_section_data (osec)->u.o.ovl_index;
+      stub = bfd_make_section_anyway_with_flags (ibfd, ".stub", flags);
+      htab->stub_sec[ovl] = stub;
+      if (stub == NULL
+	  || !bfd_set_section_alignment (ibfd, stub, 3 + (OVL_STUB_SIZE > 8)))
+	return 0;
+      stub->size = htab->stub_count[ovl] * OVL_STUB_SIZE;
+      (*place_spu_section) (stub, osec, NULL);
     }
-  if (group != i)
-    htab->stub->size += SIZEOF_STUB2;
-  if (info->emitrelocations)
-    htab->stub->flags |= SEC_RELOC;
-  for (; group != i; group++)
-    htab->stubs.sh[group]->delta
-      = htab->stubs.sh[i - 1]->off - htab->stubs.sh[group]->off;
 
  /* htab->ovtab consists of two arrays.
     .	struct {
@@ -1174,12 +1192,27 @@ spu_elf_size_stubs (bfd *output_bfd,
     .
     .	struct {
     .	  u32 mapped;
-    .	} _ovly_buf_table[];  */
+    .	} _ovly_buf_table[];
+    .  */
 
-  htab->ovtab->alignment_power = 4;
-  htab->ovtab->size = htab->num_overlays * 16 + htab->num_buf * 4;
+  flags = (SEC_ALLOC | SEC_LOAD
+	   | SEC_HAS_CONTENTS | SEC_IN_MEMORY);
+  htab->ovtab = bfd_make_section_anyway_with_flags (ibfd, ".ovtab", flags);
+  if (htab->ovtab == NULL
+      || !bfd_set_section_alignment (ibfd, htab->ovtab, 4))
+    return 0;
 
-  return TRUE;
+  htab->ovtab->size = htab->num_overlays * 16 + 16 + htab->num_buf * 2 * 4;
+  (*place_spu_section) (htab->ovtab, NULL, ".data");
+
+  htab->toe = bfd_make_section_anyway_with_flags (ibfd, ".toe", SEC_ALLOC);
+  if (htab->toe == NULL
+      || !bfd_set_section_alignment (ibfd, htab->toe, 4))
+    return 0;
+  htab->toe->size = 16;
+  (*place_spu_section) (htab->toe, NULL, ".toe");
+
+  return 2;
 }
 
 /* Functions to handle embedded spu_ovl.o object.  */
@@ -1228,149 +1261,6 @@ spu_elf_open_builtin_lib (bfd **ovl_bfd,
   return *ovl_bfd != NULL;
 }
 
-/* Fill in the ila and br for a stub.  On the last stub for a group,
-   write the stub that sets the overlay number too.  */
-
-static bfd_boolean
-write_one_stub (struct spu_stub_hash_entry *ent, struct bfd_link_info *info)
-{
-  struct spu_link_hash_table *htab = spu_hash_table (info);
-  asection *sec = htab->stub;
-  asection *s = ent->target_section;
-  unsigned int ovl;
-  bfd_vma val;
-
-  val = ent->target_off + s->output_offset + s->output_section->vma;
-  bfd_put_32 (sec->owner, ILA_79 + ((val << 7) & 0x01ffff80),
-	      sec->contents + ent->off);
-  val = ent->delta + 4;
-  bfd_put_32 (sec->owner, BR + ((val << 5) & 0x007fff80),
-	      sec->contents + ent->off + 4);
-
-  if (info->emitrelocations)
-    {
-      Elf_Internal_Rela *relocs, *r;
-      struct bfd_elf_section_data *elfsec_data;
-
-      elfsec_data = elf_section_data (sec);
-      relocs = elfsec_data->relocs;
-      if (relocs == NULL)
-	{
-	  bfd_size_type relsize;
-	  Elf_Internal_Shdr *symtab_hdr;
-	  struct elf_link_hash_entry **sym_hash;
-	  unsigned long symcount;
-	  bfd_vma amt;
-
-	  relsize = sec->reloc_count * sizeof (*relocs);
-	  relocs = bfd_alloc (sec->owner, relsize);
-	  if (relocs == NULL)
-	    return FALSE;
-	  elfsec_data->relocs = relocs;
-	  elfsec_data->rel_hdr.sh_size
-	    = sec->reloc_count * sizeof (Elf32_External_Rela);
-	  elfsec_data->rel_hdr.sh_entsize = sizeof (Elf32_External_Rela);
-	  sec->reloc_count = 0;
-
-	  /* Increase the size of symbol hash array on the bfd to
-	     which we attached our .stub section.  This hack allows
-	     us to create relocs against global symbols.  */
-	  symtab_hdr = &elf_tdata (sec->owner)->symtab_hdr;
-	  symcount = symtab_hdr->sh_size / symtab_hdr->sh_entsize;
-	  symcount -= symtab_hdr->sh_info;
-	  amt = symcount * sizeof (*sym_hash);
-	  sym_hash = bfd_alloc (sec->owner, amt + sizeof (*sym_hash));
-	  if (sym_hash == NULL)
-	    return FALSE;
-	  memcpy (sym_hash, elf_sym_hashes (sec->owner), amt);
-	  sym_hash[symcount] = htab->ovly_load;
-	  htab->ovly_load_r_symndx = symcount + symtab_hdr->sh_info;
-	  elf_sym_hashes (sec->owner) = sym_hash;
-	}
-      r = relocs + sec->reloc_count;
-      sec->reloc_count += 1;
-      r->r_offset = ent->off + 4;
-      r->r_info = ELF32_R_INFO (0, R_SPU_REL16);
-      r->r_addend = (sec->output_section->vma
-		     + sec->output_offset
-		     + ent->off + 4
-		     + val);
-    }
-
-  /* If this is the last stub of this group, write stub2.  */
-  if (ent->delta == 0)
-    {
-      bfd_put_32 (sec->owner, NOP,
-		  sec->contents + ent->off + 4);
-
-      ovl = spu_elf_section_data (s->output_section)->ovl_index;
-      bfd_put_32 (sec->owner, ILA_78 + ((ovl << 7) & 0x01ffff80),
-		  sec->contents + ent->off + 8);
-
-      val = (htab->ovly_load->root.u.def.section->output_section->vma
-	     + htab->ovly_load->root.u.def.section->output_offset
-	     + htab->ovly_load->root.u.def.value
-	     - (sec->output_section->vma
-		+ sec->output_offset
-		+ ent->off + 12));
-
-      if (val + 0x20000 >= 0x40000)
-	htab->stub_overflow = TRUE;
-
-      bfd_put_32 (sec->owner, BR + ((val << 5) & 0x007fff80),
-		  sec->contents + ent->off + 12);
-
-      if (info->emitrelocations)
-	{
-	  Elf_Internal_Rela *relocs, *r;
-	  struct bfd_elf_section_data *elfsec_data;
-
-	  elfsec_data = elf_section_data (sec);
-	  relocs = elfsec_data->relocs;
-	  /* The last branch is overwritten, so overwrite its reloc too.  */
-	  r = relocs + sec->reloc_count - 1;
-	  r->r_offset = ent->off + 12;
-	  r->r_info = ELF32_R_INFO (htab->ovly_load_r_symndx, R_SPU_REL16);
-	  r->r_addend = 0;
-	}
-    }
-
-  if (htab->emit_stub_syms)
-    {
-      struct elf_link_hash_entry *h;
-      size_t len1, len2;
-      char *name;
-
-      len1 = sizeof ("00000000.ovl_call.") - 1;
-      len2 = strlen (ent->root.string);
-      name = bfd_malloc (len1 + len2 + 1);
-      if (name == NULL)
-	return FALSE;
-      memcpy (name, "00000000.ovl_call.", len1);
-      memcpy (name + len1, ent->root.string, len2 + 1);
-      h = elf_link_hash_lookup (&htab->elf, name, TRUE, TRUE, FALSE);
-      free (name);
-      if (h == NULL)
-	return FALSE;
-      if (h->root.type == bfd_link_hash_new)
-	{
-	  h->root.type = bfd_link_hash_defined;
-	  h->root.u.def.section = sec;
-	  h->root.u.def.value = ent->off;
-	  h->size = (ent->delta == 0
-		     ? SIZEOF_STUB1 + SIZEOF_STUB2 : SIZEOF_STUB1);
-	  h->type = STT_FUNC;
-	  h->ref_regular = 1;
-	  h->def_regular = 1;
-	  h->ref_regular_nonweak = 1;
-	  h->forced_local = 1;
-	  h->non_elf = 0;
-	}
-    }
-
-  return TRUE;
-}
-
 /* Define an STT_OBJECT symbol.  */
 
 static struct elf_link_hash_entry *
@@ -1408,7 +1298,7 @@ define_ovtab_symbol (struct spu_link_has
 /* Fill in all stubs and the overlay tables.  */
 
 bfd_boolean
-spu_elf_build_stubs (struct bfd_link_info *info, int emit_syms, asection *toe)
+spu_elf_build_stubs (struct bfd_link_info *info, int emit_syms)
 {
   struct spu_link_hash_table *htab = spu_hash_table (info);
   struct elf_link_hash_entry *h;
@@ -1418,9 +1308,19 @@ spu_elf_build_stubs (struct bfd_link_inf
   unsigned int i;
 
   htab->emit_stub_syms = emit_syms;
-  htab->stub->contents = bfd_zalloc (htab->stub->owner, htab->stub->size);
-  if (htab->stub->contents == NULL)
-    return FALSE;
+  if (htab->stub_count == NULL)
+    return TRUE;
+
+  for (i = 0; i <= htab->num_overlays; i++)
+    if (htab->stub_sec[i]->size != 0)
+      {
+	htab->stub_sec[i]->contents = bfd_zalloc (htab->stub_sec[i]->owner,
+						  htab->stub_sec[i]->size);
+	if (htab->stub_sec[i]->contents == NULL)
+	  return FALSE;
+	htab->stub_sec[i]->rawsize = htab->stub_sec[i]->size;
+	htab->stub_sec[i]->size = 0;
+      }
 
   h = elf_link_hash_lookup (&htab->elf, "__ovly_load", FALSE, FALSE, FALSE);
   htab->ovly_load = h;
@@ -1430,7 +1330,7 @@ spu_elf_build_stubs (struct bfd_link_inf
 	      && h->def_regular);
 
   s = h->root.u.def.section->output_section;
-  if (spu_elf_section_data (s)->ovl_index)
+  if (spu_elf_section_data (s)->u.o.ovl_index)
     {
       (*_bfd_error_handler) (_("%s in overlay section"),
 			     h->root.u.def.section->owner);
@@ -1438,11 +1338,29 @@ spu_elf_build_stubs (struct bfd_link_inf
       return FALSE;
     }
 
+  h = elf_link_hash_lookup (&htab->elf, "__ovly_return", FALSE, FALSE, FALSE);
+  htab->ovly_return = h;
+
   /* Write out all the stubs.  */
-  for (i = 0; i < htab->stubs.count; i++)
-    write_one_stub (htab->stubs.sh[i], info);
+  obfd = htab->ovtab->output_section->owner;
+  process_stubs (obfd, info, TRUE);
+
+  elf_link_hash_traverse (&htab->elf, build_spuear_stubs, htab);
+  if (htab->stub_err)
+    return FALSE;
 
-  if (htab->stub_overflow)
+  for (i = 0; i <= htab->num_overlays; i++)
+    {
+      if (htab->stub_sec[i]->size != htab->stub_sec[i]->rawsize)
+	{
+	  (*_bfd_error_handler)  (_("stubs don't match calculated size"));
+	  bfd_set_error (bfd_error_bad_value);
+	  return FALSE;
+	}
+      htab->stub_sec[i]->rawsize = 0;
+    }
+
+  if (htab->stub_err)
     {
       (*_bfd_error_handler) (_("overlay stub relocation overflow"));
       bfd_set_error (bfd_error_bad_value);
@@ -1455,75 +1373,52 @@ spu_elf_build_stubs (struct bfd_link_inf
 
   /* Write out _ovly_table.  */
   p = htab->ovtab->contents;
-  obfd = htab->ovtab->output_section->owner;
+  /* set low bit of .buf to mark non-overlay area as present.  */
+  p[15] = 1;
   for (s = obfd->sections; s != NULL; s = s->next)
     {
-      unsigned int ovl_index = spu_elf_section_data (s)->ovl_index;
+      unsigned int ovl_index = spu_elf_section_data (s)->u.o.ovl_index;
 
       if (ovl_index != 0)
 	{
-	  unsigned int lo, hi, mid;
-	  unsigned long off = (ovl_index - 1) * 16;
+	  unsigned long off = ovl_index * 16;
+	  unsigned int ovl_buf = spu_elf_section_data (s)->u.o.ovl_buf;
+
 	  bfd_put_32 (htab->ovtab->owner, s->vma, p + off);
 	  bfd_put_32 (htab->ovtab->owner, (s->size + 15) & -16, p + off + 4);
 	  /* file_off written later in spu_elf_modify_program_headers.  */
-
-	  lo = 0;
-	  hi = htab->num_buf;
-	  while (lo < hi)
-	    {
-	      mid = (lo + hi) >> 1;
-	      if (htab->ovl_region[2 * mid + 1]->vma
-		  + htab->ovl_region[2 * mid + 1]->size <= s->vma)
-		lo = mid + 1;
-	      else if (htab->ovl_region[2 * mid]->vma > s->vma)
-		hi = mid;
-	      else
-		{
-		  bfd_put_32 (htab->ovtab->owner, mid + 1, p + off + 12);
-		  break;
-		}
-	    }
-	  BFD_ASSERT (lo < hi);
+	  bfd_put_32 (htab->ovtab->owner, ovl_buf * 2, p + off + 12);
 	}
     }
 
-  /* Write out _ovly_buf_table.  */
-  p = htab->ovtab->contents + htab->num_overlays * 16;
-  for (i = 0; i < htab->num_buf; i++)
-    {
-      bfd_put_32 (htab->ovtab->owner, 0, p);
-      p += 4;
-    }
-
   h = define_ovtab_symbol (htab, "_ovly_table");
   if (h == NULL)
     return FALSE;
-  h->root.u.def.value = 0;
+  h->root.u.def.value = 16;
   h->size = htab->num_overlays * 16;
 
   h = define_ovtab_symbol (htab, "_ovly_table_end");
   if (h == NULL)
     return FALSE;
-  h->root.u.def.value = htab->num_overlays * 16;
+  h->root.u.def.value = htab->num_overlays * 16 + 16;
   h->size = 0;
 
   h = define_ovtab_symbol (htab, "_ovly_buf_table");
   if (h == NULL)
     return FALSE;
-  h->root.u.def.value = htab->num_overlays * 16;
-  h->size = htab->num_buf * 4;
+  h->root.u.def.value = htab->num_overlays * 16 + 16;
+  h->size = htab->num_buf * 2 * 4;
 
   h = define_ovtab_symbol (htab, "_ovly_buf_table_end");
   if (h == NULL)
     return FALSE;
-  h->root.u.def.value = htab->num_overlays * 16 + htab->num_buf * 4;
+  h->root.u.def.value = htab->num_overlays * 16 + 16 + htab->num_buf * 2 * 4;
   h->size = 0;
 
   h = define_ovtab_symbol (htab, "_EAR_");
   if (h == NULL)
     return FALSE;
-  h->root.u.def.section = toe;
+  h->root.u.def.section = htab->toe;
   h->root.u.def.value = 0;
   h->size = 16;
 
@@ -1716,10 +1611,10 @@ alloc_stack_info (asection *sec, int max
 
   amt = sizeof (struct spu_elf_stack_info);
   amt += (max_fun - 1) * sizeof (struct function_info);
-  sec_data->stack_info = bfd_zmalloc (amt);
-  if (sec_data->stack_info != NULL)
-    sec_data->stack_info->max_fun = max_fun;
-  return sec_data->stack_info;
+  sec_data->u.i.stack_info = bfd_zmalloc (amt);
+  if (sec_data->u.i.stack_info != NULL)
+    sec_data->u.i.stack_info->max_fun = max_fun;
+  return sec_data->u.i.stack_info;
 }
 
 /* Add a new struct function_info describing a (part of a) function
@@ -1732,7 +1627,7 @@ maybe_insert_function (asection *sec,
 		       bfd_boolean is_func)
 {
   struct _spu_elf_section_data *sec_data = spu_elf_section_data (sec);
-  struct spu_elf_stack_info *sinfo = sec_data->stack_info;
+  struct spu_elf_stack_info *sinfo = sec_data->u.i.stack_info;
   int i;
   bfd_vma off, size;
 
@@ -1796,7 +1691,7 @@ maybe_insert_function (asection *sec,
       if (sinfo == NULL)
 	return NULL;
       memset ((char *) sinfo + old, 0, amt - old);
-      sec_data->stack_info = sinfo;
+      sec_data->u.i.stack_info = sinfo;
     }
   sinfo->fun[i].is_func = is_func;
   sinfo->fun[i].global = global;
@@ -1887,7 +1782,7 @@ static bfd_boolean
 check_function_ranges (asection *sec, struct bfd_link_info *info)
 {
   struct _spu_elf_section_data *sec_data = spu_elf_section_data (sec);
-  struct spu_elf_stack_info *sinfo = sec_data->stack_info;
+  struct spu_elf_stack_info *sinfo = sec_data->u.i.stack_info;
   int i;
   bfd_boolean gaps = FALSE;
 
@@ -1933,7 +1828,7 @@ static struct function_info *
 find_function (asection *sec, bfd_vma offset, struct bfd_link_info *info)
 {
   struct _spu_elf_section_data *sec_data = spu_elf_section_data (sec);
-  struct spu_elf_stack_info *sinfo = sec_data->stack_info;
+  struct spu_elf_stack_info *sinfo = sec_data->u.i.stack_info;
   int lo, hi, mid;
 
   lo = 0;
@@ -2155,7 +2050,7 @@ pasted_function (asection *sec, struct b
 	}
       if (l->type == bfd_indirect_link_order
 	  && (sec_data = spu_elf_section_data (l->u.indirect.section)) != NULL
-	  && (sinfo = sec_data->stack_info) != NULL
+	  && (sinfo = sec_data->u.i.stack_info) != NULL
 	  && sinfo->num_fun != 0)
 	fun_start = &sinfo->fun[sinfo->num_fun - 1];
     }
@@ -2164,15 +2059,15 @@ pasted_function (asection *sec, struct b
   return FALSE;
 }
 
-/* We're only interested in code sections.  */
+/* We're only interested in code sections.  Testing SEC_IN_MEMORY excludes
+   overlay stub sections.  */
 
 static bfd_boolean
-interesting_section (asection *s, bfd *obfd, struct spu_link_hash_table *htab)
+interesting_section (asection *s, bfd *obfd)
 {
-  return (s != htab->stub
-	  && s->output_section != NULL
+  return (s->output_section != NULL
 	  && s->output_section->owner == obfd
-	  && ((s->flags & (SEC_ALLOC | SEC_LOAD | SEC_CODE))
+	  && ((s->flags & (SEC_ALLOC | SEC_LOAD | SEC_CODE | SEC_IN_MEMORY))
 	      == (SEC_ALLOC | SEC_LOAD | SEC_CODE))
 	  && s->size != 0);
 }
@@ -2182,7 +2077,6 @@ interesting_section (asection *s, bfd *o
 static bfd_boolean
 discover_functions (bfd *output_bfd, struct bfd_link_info *info)
 {
-  struct spu_link_hash_table *htab = spu_hash_table (info);
   bfd *ibfd;
   int bfd_idx;
   Elf_Internal_Sym ***psym_arr;
@@ -2247,7 +2141,7 @@ discover_functions (bfd *output_bfd, str
 	    asection *s;
 
 	    *p = s = bfd_section_from_elf_index (ibfd, sy->st_shndx);
-	    if (s != NULL && interesting_section (s, output_bfd, htab))
+	    if (s != NULL && interesting_section (s, output_bfd))
 	      *psy++ = sy;
 	  }
       symcount = psy - psyms;
@@ -2289,7 +2183,7 @@ discover_functions (bfd *output_bfd, str
 	}
 
       for (sec = ibfd->sections; sec != NULL && !gaps; sec = sec->next)
-	if (interesting_section (sec, output_bfd, htab))
+	if (interesting_section (sec, output_bfd))
 	  gaps |= check_function_ranges (sec, info);
     }
 
@@ -2307,7 +2201,7 @@ discover_functions (bfd *output_bfd, str
 	    continue;
 
 	  for (sec = ibfd->sections; sec != NULL; sec = sec->next)
-	    if (interesting_section (sec, output_bfd, htab)
+	    if (interesting_section (sec, output_bfd)
 		&& sec->reloc_count != 0)
 	      {
 		if (!mark_functions_via_relocs (sec, info, FALSE))
@@ -2334,7 +2228,7 @@ discover_functions (bfd *output_bfd, str
 
 	  gaps = FALSE;
 	  for (sec = ibfd->sections; sec != NULL && !gaps; sec = sec->next)
-	    if (interesting_section (sec, output_bfd, htab))
+	    if (interesting_section (sec, output_bfd))
 	      gaps |= check_function_ranges (sec, info);
 	  if (!gaps)
 	    continue;
@@ -2360,13 +2254,13 @@ discover_functions (bfd *output_bfd, str
 	     the range of such functions to the beginning of the
 	     next symbol of interest.  */
 	  for (sec = ibfd->sections; sec != NULL; sec = sec->next)
-	    if (interesting_section (sec, output_bfd, htab))
+	    if (interesting_section (sec, output_bfd))
 	      {
 		struct _spu_elf_section_data *sec_data;
 		struct spu_elf_stack_info *sinfo;
 
 		sec_data = spu_elf_section_data (sec);
-		sinfo = sec_data->stack_info;
+		sinfo = sec_data->u.i.stack_info;
 		if (sinfo != NULL)
 		  {
 		    int fun_idx;
@@ -2455,7 +2349,6 @@ call_graph_traverse (struct function_inf
 static bfd_boolean
 build_call_tree (bfd *output_bfd, struct bfd_link_info *info)
 {
-  struct spu_link_hash_table *htab = spu_hash_table (info);
   bfd *ibfd;
 
   for (ibfd = info->input_bfds; ibfd != NULL; ibfd = ibfd->link_next)
@@ -2468,7 +2361,7 @@ build_call_tree (bfd *output_bfd, struct
 
       for (sec = ibfd->sections; sec != NULL; sec = sec->next)
 	{
-	  if (!interesting_section (sec, output_bfd, htab)
+	  if (!interesting_section (sec, output_bfd)
 	      || sec->reloc_count == 0)
 	    continue;
 
@@ -2484,7 +2377,7 @@ build_call_tree (bfd *output_bfd, struct
 	  struct spu_elf_stack_info *sinfo;
 
 	  if ((sec_data = spu_elf_section_data (sec)) != NULL
-	      && (sinfo = sec_data->stack_info) != NULL)
+	      && (sinfo = sec_data->u.i.stack_info) != NULL)
 	    {
 	      int i;
 	      for (i = 0; i < sinfo->num_fun; ++i)
@@ -2523,7 +2416,7 @@ build_call_tree (bfd *output_bfd, struct
 	  struct spu_elf_stack_info *sinfo;
 
 	  if ((sec_data = spu_elf_section_data (sec)) != NULL
-	      && (sinfo = sec_data->stack_info) != NULL)
+	      && (sinfo = sec_data->u.i.stack_info) != NULL)
 	    {
 	      int i;
 	      for (i = 0; i < sinfo->num_fun; ++i)
@@ -2549,7 +2442,7 @@ build_call_tree (bfd *output_bfd, struct
 	  struct spu_elf_stack_info *sinfo;
 
 	  if ((sec_data = spu_elf_section_data (sec)) != NULL
-	      && (sinfo = sec_data->stack_info) != NULL)
+	      && (sinfo = sec_data->u.i.stack_info) != NULL)
 	    {
 	      int i;
 	      for (i = 0; i < sinfo->num_fun; ++i)
@@ -2684,7 +2577,7 @@ spu_elf_stack_analysis (bfd *output_bfd,
 	  struct spu_elf_stack_info *sinfo;
 
 	  if ((sec_data = spu_elf_section_data (sec)) != NULL
-	      && (sinfo = sec_data->stack_info) != NULL)
+	      && (sinfo = sec_data->u.i.stack_info) != NULL)
 	    {
 	      int i;
 	      for (i = 0; i < sinfo->num_fun; ++i)
@@ -2847,25 +2740,31 @@ spu_elf_relocate_section (bfd *output_bf
       addend = rel->r_addend;
       branch = (is_branch (contents + rel->r_offset)
 		|| is_hint (contents + rel->r_offset));
-      if (needs_ovl_stub (sym_name, sec, input_section, htab, branch))
-	{
-	  char *stub_name;
-	  struct spu_stub_hash_entry *sh;
+      if (htab->stub_sec != NULL
+	  && needs_ovl_stub (sym_name, sec, input_section, htab, branch)
+	  && (h == NULL
+	      || (h != htab->ovly_load && h != htab->ovly_return)))
+	{
+	  unsigned int ovl = 0;
+	  struct got_entry *g, **head;
+
+	  if (branch)
+	    ovl = (spu_elf_section_data (input_section->output_section)
+		   ->u.o.ovl_index);
 
-	  stub_name = spu_stub_name (sec, h, rel);
-	  if (stub_name == NULL)
-	    return FALSE;
+	  if (h != NULL)
+	    head = &h->got.glist;
+	  else
+	    head = elf_local_got_ents (input_bfd) + r_symndx;
 
-	  sh = (struct spu_stub_hash_entry *)
-	    bfd_hash_lookup (&htab->stub_hash_table, stub_name, FALSE, FALSE);
-	  if (sh != NULL)
-	    {
-	      relocation = (htab->stub->output_section->vma
-			    + htab->stub->output_offset
-			    + sh->off);
-	      addend = 0;
-	    }
-	  free (stub_name);
+	  for (g = *head; g != NULL; g = g->next)
+	    if (g->ovl == ovl || g->ovl == 0)
+	      break;
+	  if (g == NULL)
+	    abort ();
+
+	  relocation = g->stub_addr;
+	  addend = 0;
 	}
 
       r = _bfd_final_link_relocate (howto,
@@ -2960,30 +2859,22 @@ spu_elf_output_symbol_hook (struct bfd_l
   struct spu_link_hash_table *htab = spu_hash_table (info);
 
   if (!info->relocatable
-      && htab->num_overlays != 0
+      && htab->stub_sec != NULL
       && h != NULL
       && (h->root.type == bfd_link_hash_defined
 	  || h->root.type == bfd_link_hash_defweak)
       && h->def_regular
       && strncmp (h->root.root.string, "_SPUEAR_", 8) == 0)
     {
-      static Elf_Internal_Rela zero_rel;
-      char *stub_name = spu_stub_name (h->root.u.def.section, h, &zero_rel);
-      struct spu_stub_hash_entry *sh;
+      struct got_entry *g = h->got.glist;
 
-      if (stub_name == NULL)
-	return FALSE;
-      sh = (struct spu_stub_hash_entry *)
-	bfd_hash_lookup (&htab->stub_hash_table, stub_name, FALSE, FALSE);
-      free (stub_name);
-      if (sh == NULL)
-	return TRUE;
-      sym->st_shndx
-	= _bfd_elf_section_from_bfd_section (htab->stub->output_section->owner,
-					     htab->stub->output_section);
-      sym->st_value = (htab->stub->output_section->vma
-		       + htab->stub->output_offset
-		       + sh->off);
+      if (g != NULL && g->ovl == 0)
+	{
+	  sym->st_shndx = (_bfd_elf_section_from_bfd_section
+			   (htab->stub_sec[0]->output_section->owner,
+			    htab->stub_sec[0]->output_section));
+	  sym->st_value = g->stub_addr;
+	}
     }
 
   return TRUE;
@@ -3049,7 +2940,7 @@ spu_elf_modify_segment_map (bfd *abfd, s
     if (m->p_type == PT_LOAD && m->count > 1)
       for (i = 0; i < m->count; i++)
 	if ((s = m->sections[i]) == toe
-	    || spu_elf_section_data (s)->ovl_index != 0)
+	    || spu_elf_section_data (s)->u.o.ovl_index != 0)
 	  {
 	    struct elf_segment_map *m2;
 	    bfd_vma amt;
@@ -3148,7 +3039,7 @@ spu_elf_modify_program_headers (bfd *abf
 
       for (i = 0, m = elf_tdata (abfd)->segment_map; m; ++i, m = m->next)
 	if (m->count != 0
-	    && (o = spu_elf_section_data (m->sections[0])->ovl_index) != 0)
+	    && (o = spu_elf_section_data (m->sections[0])->u.o.ovl_index) != 0)
 	  {
 	    /* Mark this as an overlay header.  */
 	    phdr[i].p_flags |= PF_OVERLAY;
@@ -3156,7 +3047,7 @@ spu_elf_modify_program_headers (bfd *abf
 	    if (htab->ovtab != NULL && htab->ovtab->size != 0)
 	      {
 		bfd_byte *p = htab->ovtab->contents;
-		unsigned int off = (o - 1) * 16 + 8;
+		unsigned int off = o * 16 + 8;
 
 		/* Write file_off into _ovly_table.  */
 		bfd_put_32 (htab->ovtab->owner, phdr[i].p_offset, p + off);
@@ -3226,7 +3117,6 @@ spu_elf_modify_program_headers (bfd *abf
 #define elf_backend_link_output_symbol_hook	spu_elf_output_symbol_hook
 #define bfd_elf32_new_section_hook		spu_elf_new_section_hook
 #define bfd_elf32_bfd_link_hash_table_create	spu_elf_link_hash_table_create
-#define bfd_elf32_bfd_link_hash_table_free	spu_elf_link_hash_table_free
 
 #define elf_backend_additional_program_headers	spu_elf_additional_program_headers
 #define elf_backend_modify_segment_map		spu_elf_modify_segment_map
Index: bfd/elf32-spu.h
===================================================================
RCS file: /cvs/src/src/bfd/elf32-spu.h,v
retrieving revision 1.4
diff -u -p -r1.4 elf32-spu.h
--- bfd/elf32-spu.h	3 Jul 2007 14:26:41 -0000	1.4
+++ bfd/elf32-spu.h	28 Jan 2008 01:34:29 -0000
@@ -26,12 +26,20 @@ struct _spu_elf_section_data
 {
   struct bfd_elf_section_data elf;
 
-  /* Stack analysis info kept for this section.  */
+  union {
+    /* Info kept for input sections.  */
+    struct {
+      /* Stack analysis info kept for this section.  */
+      struct spu_elf_stack_info *stack_info;
+    } i;
 
-  struct spu_elf_stack_info *stack_info;
-
-  /* Non-zero for overlay output sections.  */
-  unsigned int ovl_index;
+    /* Info kept for output sections.  */
+    struct {
+      /* Non-zero for overlay output sections.  */
+      unsigned int ovl_index;
+      unsigned int ovl_buf;
+    } o;
+  } u;
 };
 
 #define spu_elf_section_data(sec) \
@@ -49,9 +57,8 @@ extern bfd_boolean spu_elf_open_builtin_
 extern bfd_boolean spu_elf_create_sections (bfd *,
 					    struct bfd_link_info *, int, int);
 extern bfd_boolean spu_elf_find_overlays (bfd *, struct bfd_link_info *);
-extern bfd_boolean spu_elf_size_stubs (bfd *, struct bfd_link_info *, int, int,
-				       asection **, asection **,
-				       asection **);
-extern bfd_boolean spu_elf_build_stubs (struct bfd_link_info *, int,
-					asection *);
+extern int spu_elf_size_stubs (bfd *, struct bfd_link_info *,
+			       void (*) (asection *, asection *, const char *),
+			       int);
+extern bfd_boolean spu_elf_build_stubs (struct bfd_link_info *, int);
 extern asection *spu_elf_check_vma (bfd *, bfd_vma, bfd_vma);
Index: ld/emultempl/spu_ovl.S
===================================================================
RCS file: /cvs/src/src/ld/emultempl/spu_ovl.S,v
retrieving revision 1.7
diff -u -p -r1.7 spu_ovl.S
--- ld/emultempl/spu_ovl.S	26 Sep 2007 03:13:55 -0000	1.7
+++ ld/emultempl/spu_ovl.S	28 Jan 2008 01:34:49 -0000
@@ -19,295 +19,242 @@
    Foundation, Inc., 51 Franklin Street - Fifth Floor, Boston,
    MA 02110-1301, USA.  */
 
-/**
- * MFC DMA defn's.
- */
+/* MFC DMA defn's.  */
 #define MFC_GET_CMD		0x40
 #define MFC_MAX_DMA_SIZE	0x4000
 #define MFC_TAG_UPDATE_ALL	2
 #define MFC_TAG_ID		0
 
+/* Register usage.  */
+#define reserved1	$75
+#define parm		$75
+#define tab1		reserved1
+#define tab2		reserved1
+#define vma		reserved1
+#define oldvma		reserved1
+#define newmask		reserved1
+#define map		reserved1
+
+#define reserved2	$76
+#define off1		reserved2
+#define off2		reserved2
+#define present1	reserved2
+#define present2	reserved2
+#define sz		reserved2
+#define cmp		reserved2
+#define add64		reserved2
+#define cgbits		reserved2
+#define off3		reserved2
+#define off4		reserved2
+#define off5		reserved2
+#define tagstat		reserved2
+
+#define reserved3	$77
+#define buf1		reserved3
+#define buf2		reserved3
+#define rv3		reserved3
+#define ealo		reserved3
+#define cmd		reserved3
+#define off64		reserved3
+#define tab3		reserved3
+#define tab4		reserved3
+#define tab5		reserved3
+
+#define reserved4	$78
+#define ovl		reserved4
+#define rv2		reserved4
+#define rv5		reserved4
+#define cgshuf		reserved4
+#define newovl		reserved4
+
+#define reserved5	$79
+#define target		reserved5
+
+#define save1		$72
+#define rv4		save1
+#define rv7		save1
+#define tagid		save1
+#define maxsize		save1
+#define pbyte		save1
+#define pbit		save1
+
+#define save2		$73
+#define cur		save2
+#define rv6		save2
+#define osize		save2
+#define zovl		save2
+#define oldovl		save2
+#define newvma		save2
+
+#define save3		$74
+#define rv1		save3
+#define ea64		save3
+#define buf3		save3
+#define genwi		save3
+#define newmap		save3
+#define oldmask		save3
 
-/**
- * Temporary register allocations.
- * These are saved/restored here.
- */
-#define tab		$75
-#define cgbits		$75
-#define add64		$75
-#define ealo		$75
-#define newmask		$75
-#define tagstat		$75
-#define bchn		$75
-#define rv1		$75
-
-#define off		$76
-#define off64		$76
-#define maxsize		$76
-#define oldmask		$76
-#define sz		$76
-#define lnkr		$76
-#define rv2		$76
-
-#define cur		$77
-#define cmp		$77
-#define buf		$77
-#define genwi		$77
-#define tagid		$77
-#define cmd		$77
-#define rv3		$77
-
-#define cgshuf		$78
-
-#define vma		$6
-
-#define map		$7
-#define osize		$7
-#define cmp2		$7
-
-#define ea64		$8
-#define retval		$8
-
-#ifdef OVLY_IRQ_SAVE
-#define irqtmp		$8
-#define irq_stat	$9
-#endif
-
-# Stack quadword minux N
-#define	SQWM1	-16*1
-#define	SQWM2	-16*2
-#define	SQWM3	-16*3
-#define	SQWM4	-16*4
-#define	SQWM5	-16*5
-#define	SQWM6	-16*6
-#define	SQWM7	-16*7
-#define	SQWM8	-16*8
-#define	SQWM9	-16*9
-#define	SQWM10	-16*10
-#define	SQWM11	-16*11
-#define	SQWM12	-16*12
-#define	SQWM13	-16*13
-#define	SQWM14	-16*14
-#define	SQWM15	-16*15
-#define	SQWM16	-16*16
-
-	.extern		_ovly_table
-	.extern		_ovly_buf_table
-
-#ifdef OVLY_PRINTFS
-#define SPE_C99_VPRINTF 37
-__entry_event_format:
-	.string		"In entry_event_hook segment=0x%08x entry-address=0x%08x\n"
-__debug_event_format:
-  	.string		"In debug_event_hook link-register=0x%08x %08x %08x %08x\n"
-__dma_event_format:
-  	.string		"In dma_event_hook vma=0x%08x ea=%08x%08x sz=%08x\n"
-__ovly_buf_table_format:
-  	.string		"_ovly_buf_table[%08x]=%08x\n"
-#endif
 
 	.text
-	.align 		4
-	.type		__rv_pattern, @object
-	.size		__rv_pattern, 16
+	.align 	4
+	.type	__rv_pattern, @object
+	.size	__rv_pattern, 16
 __rv_pattern:
-	.word		0x00010203, 0x1c1d1e1f, 0x00010203, 0x10111213
-	.type		__cg_pattern, @object
-	.size		__cg_pattern, 16
+	.word	0x00010203, 0x10111213, 0x80808080, 0x80808080
+
+	.type	__cg_pattern, @object
+	.size	__cg_pattern, 16
 __cg_pattern:
-	.word		0x04050607, 0x80808080, 0x80808080, 0x80808080
+	.word	0x04050607, 0x80808080, 0x80808080, 0x80808080
+
+	.type	__ovly_current, @object
+	.size	__ovly_current, 16
+__ovly_current:
+	.space	16
 
-/**
+/*
  * __ovly_return - stub for returning from overlay functions.
  *
- * inputs:
- *	$lr	link register
+ * On entry the four slots of $lr are:
+ *   __ovly_return, prev ovl index, caller return addr, undefined.
  *
- * outputs:
- *	$78	old partition number, to be reloaded
- *	$79	return address in old partion number
+ * Load the previous overlay and jump to the caller return address.
+ * Updates __ovly_current.
  */
-	.global		__ovly_return
-	.type		__ovly_return, @function
-
-	.word		0
+	.align 	4
+	.global	__ovly_return
+	.type	__ovly_return, @function
 __ovly_return:
-	shlqbyi		$78, $lr, 4
-	shlqbyi		$79, $lr, 8
-	biz		$78, $79
-	.size		__ovly_return, . - __ovly_return
+	ila	tab1, _ovly_table - 16				# 0,2	0
+	shlqbyi	ovl, $lr, 4					# 1,4	0
+#nop
+	shlqbyi	target, $lr, 8					# 1,4	1
+#nop; lnop
+#nop; lnop
+	shli	off1, ovl, 4					# 0,4	4
+#lnop
+#nop
+	hbr	ovly_ret9, target				# 1,15	5
+#nop; lnop
+#nop; lnop
+#nop
+	lqx	vma, tab1, off1					# 1,6	8
+#nop; lnop
+#nop; lnop
+#nop; lnop
+#nop; lnop
+#nop; lnop
+#nop
+	rotqbyi	buf1, vma, 12					# 1,4	14
+#nop
+	stqd	save3, -48($sp)					# 1,6	15
+#nop
+	stqd	save2, -32($sp)					# 1,6	16
+#nop
+	stqd	save1, -16($sp)					# 1,6	17
+	andi	present1, buf1, 1				# 0,2	18
+	stqd	ovl, (__ovly_current - __ovly_return)($lr)	# 1,6	18
+#nop; lnop
+#nop
+	brz	present1, __ovly_load_event			# 1,4	20
+ovly_ret9:
+#nop
+	bi	target						# 1,4	21
 
-/**
+/*
  * __ovly_load - copy an overlay partion to local store.
  *
- * inputs:
- *	$78	partition number to be loaded.
- *	$79	branch target in new partition.
- *	$lr	link register, containing return addr.
- *
- * outputs:
- *	$lr	new link register, returning through __ovly_return.
+ * On entry $75 points to a word consisting of the overlay index in
+ * the top 14 bits, and the target address in the bottom 18 bits.
  *
- * Copy a new overlay partition into local store, or return
- * immediately if the partition is already resident.
+ * Sets up $lr to return via __ovly_return.
+ * Updates __ovly_current.
  */
-	.global		__ovly_load
-	.type		__ovly_load, @function
-
+	.align  3
+	.global	__ovly_load
+	.type	__ovly_load, @function
 __ovly_load:
-/* Save temporary registers to stack. */
-	stqd		$6, -16($sp)
-	stqd		$7, -32($sp)
-	stqd		$8, -48($sp)
-
-#ifdef OVLY_IRQ_SAVE
-/* Save irq state, then disable interrupts. */
-	stqd		$9, -64($sp)
-	ila		irqtmp, __ovly_irq_save
-	rdch		irq_stat, $SPU_RdMachStat
-	bid		irqtmp
-__ovly_irq_save:
-#endif
-
-#ifdef OVLY_PRINTFS
-//==============================================
-// In entry_event_hook segment=0x%08x entry-address=0x%08x
-//==============================================
-# save registers
-	stqd	$10, SQWM5($sp)
-	stqd	$11, SQWM6($sp)
-	stqd	$12, SQWM7($sp)
-# Place input parameters onto the stack to form the
-# local storage memory image.
-	ila	$10, __entry_event_format
-	stqd	$10, SQWM12($sp)
-	ai	$10, $sp, SQWM9
-	stqd	$10, SQWM11($sp)
-	stqd	$sp, SQWM10($sp)
-	stqd	$78, SQWM9($sp)
-	stqd	$79, SQWM8($sp)
-# Construct a message consisting of the 8-bit opcode
-# and 24-bit local store pointer to the input
-# parameters and place it forllowing the stop and signal
-	ila	$10, 0x3ffff		# address mask
-	ilhu	$11, SPE_C99_VPRINTF << 8
-	ai	$12, $sp, SQWM12	# parameter pointer
-	selb	$11, $11, $12, $10	# combine command & address ptr
-	brsl	$10, next1a
-next1a:
-	.type	next1a, @function
-	lqr	$12, message1a
-	cwd	$10, message1a-next1a($10)
-	shufb	$11, $11, $12, $10	# insert msg into inst word
-	stqr	$11, message1a		# store cmd/ptr into msg word
-	dsync
-# Notify the PPE to perform the assisted call request
-# by issing a stop and signal with a signal code
-# of 0x2100 (C99 class)
-	stop	0x2100
-message1a:
-	.word	0
-
-# save registers
-	stqd	$13, SQWM8($sp)
-	stqd	$14, SQWM9($sp)
-	stqd	$15, SQWM10($sp)
-	stqd	$16, SQWM11($sp)
-
-# initialize loop
-	il	$13, 1
-	ila	$14, _ovly_buf_table
-	ila	$15, _ovly_buf_table_end
-
-loop_start1:
-# Place input parameters onto the stack to form the
-# local storage memory image.
-	ila	$10, __ovly_buf_table_format
-	stqd	$10, SQWM16($sp)
-	ai	$10, $sp, SQWM13
-	stqd	$10, SQWM15($sp)
-	stqd	$sp, SQWM14($sp)
-	stqd	$13, SQWM13($sp)
-	lqd	$16, 0($14)
-	rotqby	$16, $16, $14
-	stqd	$16, SQWM12($sp)
-# Construct a message consisting of the 8-bit opcode
-# and 24-bit local store pointer to the input
-# parameters and place it forllowing the stop and signal
-	ila	$10, 0x3ffff		# address mask
-	ilhu	$11, SPE_C99_VPRINTF << 8
-	ai	$12, $sp, SQWM16	# parameter pointer
-	selb	$11, $11, $12, $10	# combine command & address ptr
-	brsl	$10, next1b
-next1b:
-	.type	next1b, @function
-	lqr	$12, message1b
-	cwd	$10, message1b-next1b($10)
-	shufb	$11, $11, $12, $10	# insert msg into inst word
-	stqr	$11, message1b		# store cmd/ptr into msg word
-	dsync
-# Notify the PPE to perform the assisted call request
-# by issing a stop and signal with a signal code
-# of 0x2100 (C99 class)
-	stop	0x2100
-message1b:
-	.word	0
-
-# move to next entry
-	ai	$13, $13, 1
-	ai	$14, $14, 4
-	clgt	$16, $15, $14
-	brnz	$16, loop_start1
-
-# restore registers
-	lqd	$16, SQWM11($sp)
-	lqd	$15, SQWM10($sp)
-	lqd	$14, SQWM9($sp)
-	lqd	$13, SQWM8($sp)
-	lqd	$12, SQWM7($sp)
-	lqd	$11, SQWM6($sp)
-	lqd	$10, SQWM5($sp)
-//==============================================
+#if OVL_STUB_SIZE == 8
+########
+#nop
+	lqd	target, 0(parm)					# 1,6	-11
+#nop; lnop
+#nop; lnop
+#nop; lnop
+#nop; lnop
+#nop; lnop
+#nop
+	rotqby	target, target, parm				# 1,4	-5
+	ila	tab2, _ovly_table - 16				# 0,2	-4
+	stqd	save3, -48($sp)					# 1,6	-4
+#nop
+	stqd	save2, -32($sp)					# 1,6	-3
+#nop
+	stqd	save1, -16($sp)					# 1,6	-2
+	rotmi	ovl, target, -18				# 0,4	-1
+	hbr	ovly_load9, target				# 1,15	-1
+	ila	rv1, __ovly_return				# 0,2	0
+#lnop
+#nop; lnop
+#nop
+	lqd	cur, (__ovly_current - __ovly_return)(rv1)	# 1,6	2
+	shli	off2, ovl, 4					# 0,4	3
+	stqd	ovl, (__ovly_current - __ovly_return)(rv1)	# 1,6	3
+	ceq	rv2, $lr, rv1					# 0,2	4
+	lqd	rv3, (__rv_pattern - __ovly_return)(rv1)	# 1,6	4
+#nop; lnop
+#nop; lnop
+#nop
+	lqx	vma, tab2, off2					# 1,6	7
+########
+#else /* OVL_STUB_SIZE == 16 */
+########
+	ila	tab2, _ovly_table - 16				# 0,2	0
+	stqd	save3, -48($sp)					# 1,6	0
+	ila	rv1, __ovly_return				# 0,2	1
+	stqd	save2, -32($sp)					# 1,6	1
+	shli	off2, ovl, 4					# 0,4	2
+	lqa	cur, __ovly_current				# 1,6	2
+	nop
+	stqa	ovl, __ovly_current				# 1,6	3
+	ceq	rv2, $lr, rv1					# 0,2	4
+	lqd	rv3, (__rv_pattern - __ovly_return)(rv1)	# 1,6	4
+#nop
+	hbr	ovly_load9, target				# 1,15	5
+#nop
+	lqx	vma, tab2, off2					# 1,6	6
+#nop
+	stqd	save1, -16($sp)					# 1,6	7
+########
 #endif
 
-/* Set branch hint to overlay target. */
-	hbr		__ovly_load_ret, $79
+#nop; lnop
+#nop; lnop
+#nop
+	shufb	rv4, rv1, cur, rv3				# 1,4	10
+#nop
+	fsmb	rv5, rv2					# 1,4	11
+#nop
+	rotqmbyi rv6, $lr, -8					# 1,4	12
+#nop
+	rotqbyi	buf2, vma, 12					# 1,4	13
+#nop
+	lqd	save3, -48($sp)					# 1,6	14
+#nop; lnop
+	or	rv7, rv4, rv6					# 0,2	16
+	lqd	save2, -32($sp)					# 1,6	16
+	andi	present2, buf2, 1				# 0,2	17
+	lnop							# 1,0	17
+	selb	$lr, rv7, $lr, rv5				# 0,2	18
+	lqd	save1, -16($sp)					# 1,6	18
+#nop
+	brz	present2, __ovly_load_event			# 1,4	19
+ovly_load9:
+#nop
+	bi	target						# 1,4	20
 
-/* Get caller's overlay index by back chaining through stack frames.
- * Loop until end of stack (back chain all-zeros) or
- * encountered a link register we set here. */
-	lqd		bchn, 0($sp)
-	ila		retval, __ovly_return
-
-__ovly_backchain_loop:
-	lqd		lnkr, 16(bchn)
-	lqd		bchn, 0(bchn)
-	ceq		cmp, lnkr, retval
-	ceqi		cmp2, bchn, 0
-	or		cmp, cmp, cmp2
-	brz		cmp, __ovly_backchain_loop
-
-/* If we reached the zero back-chain, then lnkr is bogus.  Clear the
- * part of lnkr that we use later (slot 3). */
-	rotqbyi		cmp2, cmp2, 4
-	andc		lnkr, lnkr, cmp2
-
-/* Set lr = {__ovly_return, prev ovl ndx, caller return adr, callee ovl ndx}. */
-	lqd		rv1, (__rv_pattern-__ovly_return+4)(retval)
-	shufb		rv2, retval, lnkr, rv1
-	shufb		rv3, $lr, $78, rv1
-	fsmbi		rv1, 0xff
-	selb		rv2, rv2, rv3, rv1
-/* If we have a tail call from one overlay function to another overlay,
-   then lr is already set up.  Don't change it.  */
-	ceq		rv1, $lr, retval
-	fsmb		rv1, rv1
-	selb		$lr, rv2, $lr, rv1
-
-/* Branch to $79 if non-overlay */
-	brz		$78, __ovly_load_restore
-
-/* Load values from _ovly_table[$78].
+/* If we get here, we are about to load a new overlay.
+ * "vma" contains the relevant entry from _ovly_table[].
  *	extern struct {
  *		u32 vma;
  *		u32 size;
@@ -315,265 +262,166 @@ __ovly_backchain_loop:
  *		u32 buf;
  *	} _ovly_table[];
  */
-	shli		off, $78, 4
-	ila		tab, _ovly_table - 16
-	lqx		vma, tab, off
-	rotqbyi		buf, vma, 12
-
-/* Load values from _ovly_buf_table[buf].
- *	extern struct {
- *		u32 mapped;
- *	} _ovly_buf_table[];
- */
-	ila		tab, _ovly_buf_table
-	ai		off, buf, -1
-	shli		off, off, 2
-	lqx		map, tab, off
-	rotqby		cur, map, off
-
-/* Branch to $79 now if overlay is already mapped.  */
-	ceq		cmp, $78, cur
-	brnz		cmp, __ovly_load_restore
-
-/* Marker for profiling code.  If we get here, we are about to load
- * a new overlay.
- */
-	.global		__ovly_load_event
-	.type		__ovly_load_event, @function
+	.align  3
+	.global	__ovly_load_event
+	.type	__ovly_load_event, @function
 __ovly_load_event:
-
-/* Set _ovly_buf_table[buf].mapped = $78. */
-	cwx		genwi, tab, off
-	shufb		map, $78, map, genwi
-	stqx		map, tab, off
-
-/* A new partition needs to be loaded. Prepare for DMA loop.
- * _EAR_ is the 64b base EA, filled in at run time by the
- * loader, and indicating the value for SPU executable image start.
- */
-	lqd		cgshuf, (__cg_pattern-__ovly_return+4)(retval)
-	rotqbyi		osize, vma, 4
-	rotqbyi		sz, vma, 8
-	lqa		ea64, _EAR_
-
+#nop
+	rotqbyi	sz, vma, 8					# 1,4	0
+#nop
+	rotqbyi	osize, vma, 4					# 1,4	1
+#nop
+	lqa	ea64, _EAR_					# 1,6	2
+#nop
+	lqd	cgshuf, (__cg_pattern - __ovly_return)($lr)	# 1,6	3
+
+/* We could predict the branch at the end of this loop by adding a few
+   instructions, and there are plenty of free cycles to do so without
+   impacting loop execution time.  However, it doesn't make a great
+   deal of sense since we need to wait for the dma to complete anyway.  */
 __ovly_xfer_loop:
-/* 64b add to compute next ea64. */
-	rotqmbyi	off64, sz, -4
-	cg		cgbits, ea64, off64
-	shufb		add64, cgbits, cgbits, cgshuf
-	addx		add64, ea64, off64
-	ori		ea64, add64, 0
-
-/* Setup DMA parameters, then issue DMA request. */
-	rotqbyi		ealo, add64, 4
-	ila		maxsize, MFC_MAX_DMA_SIZE
-	cgt		cmp, osize, maxsize
-	selb		sz, osize, maxsize, cmp
-	ila		tagid, MFC_TAG_ID
-	wrch		$MFC_LSA, vma
-	wrch		$MFC_EAH, ea64
-	wrch		$MFC_EAL, ealo
-	wrch		$MFC_Size, sz
-	wrch		$MFC_TagId, tagid
-	ila		cmd, MFC_GET_CMD
-	wrch		$MFC_Cmd, cmd
-
-#ifdef OVLY_PRINTFS
-//==============================================
-// In dma_event_hook vma=0x%08x ea=%08x%08x sz=%08x
-//==============================================
-# save registers
-	stqd	$10, SQWM5($sp)
-	stqd	$11, SQWM6($sp)
-	stqd	$12, SQWM7($sp)
-# Place input parameters onto the stack to form the
-# local storage memory image.
-	ila	$10, __dma_event_format
-	stqd	$10, SQWM14($sp)
-	ai	$10, $sp, SQWM11
-	stqd	$10, SQWM13($sp)
-	stqd	$sp, SQWM12($sp)
-	stqd	vma, SQWM11($sp)
-	stqd	ea64, SQWM10($sp)
-	stqd	ealo, SQWM9($sp)
-	stqd	sz, SQWM8($sp)
-# Construct a message consisting of the 8-bit opcode
-# and 24-bit local store pointer to the input
-# parameters and place it forllowing the stop and signal
-	ila	$10, 0x3ffff		# address mask
-	ilhu	$11, SPE_C99_VPRINTF << 8
-	ai	$12, $sp, SQWM14	# parameter pointer
-	selb	$11, $11, $12, $10	# combine command & address ptr
-	brsl	$10, next3a
-next3a:
-	.type	next3a, @function
-	lqr	$12, message3a
-	cwd	$10, message3a-next3a($10)
-	shufb	$11, $11, $12, $10	# insert msg into inst word
-	stqr	$11, message3a		# store cmd/ptr into msg word
-	dsync
-# Notify the PPE to perform the assisted call request
-# by issing a stop and signal with a signal code
-# of 0x2100 (C99 class)
-	stop	0x2100
-message3a:
-	.word	0
-
-# restore registers
-	lqd	$12, SQWM7($sp)
-	lqd	$11, SQWM6($sp)
-	lqd	$10, SQWM5($sp)
-//==============================================
-#endif
-
-/* Increment vma, decrement size, branch back as needed. */
-	a		vma, vma, sz
-	sf		osize, sz, osize
-	brnz		osize, __ovly_xfer_loop
-
-/* Save app's tagmask, wait for DMA complete, restore mask. */
-	rdch		oldmask, $MFC_RdTagMask
+#nop
+	rotqmbyi off64, sz, -4					# 1,4	4
+#nop; lnop
+#nop; lnop
+#nop; lnop
+	cg	cgbits, ea64, off64				# 0,2	8
+#lnop
+#nop; lnop
+#nop
+	shufb	add64, cgbits, cgbits, cgshuf			# 1,4	10
+#nop; lnop
+#nop; lnop
+#nop; lnop
+	addx	add64, ea64, off64				# 0,2	14
+#lnop
+	ila	maxsize, MFC_MAX_DMA_SIZE			# 0,2	15
+	lnop
+	ori	ea64, add64, 0					# 0,2	16
+	rotqbyi	ealo, add64, 4					# 1,4	16
+	cgt	cmp, osize, maxsize				# 0,2	17
+	wrch	$MFC_LSA, vma					# 1,6	17
+#nop; lnop
+	selb	sz, osize, maxsize, cmp				# 0,2	19
+	wrch	$MFC_EAH, ea64					# 1,6	19
+	ila	tagid, MFC_TAG_ID				# 0,2	20
+	wrch	$MFC_EAL, ealo					# 1,6	20
+	ila	cmd, MFC_GET_CMD				# 0,2	21
+	wrch	$MFC_Size, sz					# 1,6	21
+	sf	osize, sz, osize				# 0,2	22
+	wrch	$MFC_TagId, tagid				# 1,6	22
+	a	vma, vma, sz					# 0,2	23
+	wrch	$MFC_Cmd, cmd					# 1,6	23
+#nop
+	brnz	osize, __ovly_xfer_loop				# 1,4	24
+
+/* Now update our data structions while waiting for DMA to complete.
+   Low bit of .buf needs to be cleared on the _ovly_table entry
+   corresponding to the evicted overlay, and set on the entry for the
+   newly loaded overlay.  Note that no overlay may in fact be evicted
+   as _ovly_buf_table[] starts with all zeros.  Don't zap .buf entry
+   for zero index!  Also of course update the _ovly_buf_table entry.  */
+#nop
+	lqd	newovl, (__ovly_current - __ovly_return)($lr)	# 1,6	25
+#nop; lnop
+#nop; lnop
+#nop; lnop
+#nop; lnop
+#nop; lnop
+	shli	off3, newovl, 4					# 0,4	31
+#lnop
+	ila	tab3, _ovly_table - 16				# 0,2	32
+#lnop
+#nop
+	fsmbi	pbyte, 1					# 1,4	33
+#nop; lnop
+#nop
+	lqx	vma, tab3, off3					# 1,6	35
+#nop; lnop
+	andi	pbit, pbyte, 1					# 0,2	37
+	lnop
+#nop; lnop
+#nop; lnop
+#nop; lnop
+	or	newvma, vma, pbit				# 0,2	41
+	rotqbyi	buf3, vma, 12					# 1,4	41
+#nop; lnop
+#nop
+	stqx	newvma, tab3, off3				# 1,6	43
+#nop; lnop
+	shli	off4, buf3, 2					# 1,4	45
+#lnop
+	ila	tab4, _ovly_buf_table				# 0,2	46
+#lnop
+#nop; lnop
+#nop; lnop
+#nop
+	lqx	map, tab4, off4					# 1,6	49
+#nop
+	cwx	genwi, tab4, off4				# 1,4	50
+#nop; lnop
+#nop; lnop
+#nop; lnop
+#nop; lnop
+#nop
+	rotqby	oldovl, map, off4				# 1,4	55
+	nop
+	shufb	newmap, newovl, map, genwi			# 0,4	56
 #if MFC_TAG_ID < 16
-	ilh		newmask, 1 << MFC_TAG_ID
+	ila	newmask, 1 << MFC_TAG_ID			# 0,2	57
 #else
-	ilhu		newmask, 1 << (MFC_TAG_ID - 16)
-#endif
-	wrch		$MFC_WrTagMask, newmask
-	ila		tagstat, MFC_TAG_UPDATE_ALL
-	wrch		$MFC_WrTagUpdate, tagstat
-	rdch		tagstat, $MFC_RdTagStat
-	sync
-	wrch		$MFC_WrTagMask, oldmask
-
-#ifdef OVLY_PRINTFS
-//==============================================
-// In debug_event_hook link-register=0x%08x %08x %08x %08x
-//==============================================
-# save registers
-	stqd	$10, SQWM5($sp)
-	stqd	$11, SQWM6($sp)
-	stqd	$12, SQWM7($sp)
-# Place input parameters onto the stack to form the
-# local storage memory image.
-	ila	$10, __debug_event_format
-	stqd	$10, SQWM14($sp)
-	ai	$10, $sp, SQWM11
-	stqd	$10, SQWM13($sp)
-	stqd	$sp, SQWM12($sp)
-	stqd	$lr, SQWM11($sp)
-	rotqbyi $10, $lr, 4
-	stqd	$10, SQWM10($sp)
-	rotqbyi $10, $10, 4
-	stqd	$10, SQWM9($sp)
-	rotqbyi $10, $10, 4
-	stqd	$10, SQWM8($sp)
-# Construct a message consisting of the 8-bit opcode
-# and 24-bit local store pointer to the input
-# parameters and place it forllowing the stop and signal
-	ila	$10, 0x3ffff		# address mask
-	ilhu	$11, SPE_C99_VPRINTF << 8
-	ai	$12, $sp, SQWM14	# parameter pointer
-	selb	$11, $11, $12, $10	# combine command & address ptr
-	brsl	$10, next2a
-next2a:
-	.type	next2a, @function
-	lqr	$12, message2a
-	cwd	$10, message2a-next2a($10)
-	shufb	$11, $11, $12, $10	# insert msg into inst word
-	stqr	$11, message2a		# store cmd/ptr into msg word
-	dsync
-# Notify the PPE to perform the assisted call request
-# by issing a stop and signal with a signal code
-# of 0x2100 (C99 class)
-	stop	0x2100
-message2a:
-	.word	0
-
-# save registers
-	stqd	$13, SQWM8($sp)
-	stqd	$14, SQWM9($sp)
-	stqd	$15, SQWM10($sp)
-	stqd	$16, SQWM11($sp)
-
-# initialize loop
-	il	$13, 1
-	ila	$14, _ovly_buf_table
-	ila	$15, _ovly_buf_table_end
-
-loop_start2:
-# Place input parameters onto the stack to form the
-# local storage memory image.
-	ila	$10, __ovly_buf_table_format
-	stqd	$10, SQWM16($sp)
-	ai	$10, $sp, SQWM13
-	stqd	$10, SQWM15($sp)
-	stqd	$sp, SQWM14($sp)
-	stqd	$13, SQWM13($sp)
-	lqd	$16, 0($14)
-	rotqby	$16, $16, $14
-	stqd	$16, SQWM12($sp)
-# Construct a message consisting of the 8-bit opcode
-# and 24-bit local store pointer to the input
-# parameters and place it forllowing the stop and signal
-	ila	$10, 0x3ffff		# address mask
-	ilhu	$11, SPE_C99_VPRINTF << 8
-	ai	$12, $sp, SQWM16	# parameter pointer
-	selb	$11, $11, $12, $10	# combine command & address ptr
-	brsl	$10, next2b
-next2b:
-	.type	next2b, @function
-	lqr	$12, message2b
-	cwd	$10, message2b-next2b($10)
-	shufb	$11, $11, $12, $10	# insert msg into inst word
-	stqr	$11, message2b		# store cmd/ptr into msg word
-	dsync
-# Notify the PPE to perform the assisted call request
-# by issing a stop and signal with a signal code
-# of 0x2100 (C99 class)
-	stop	0x2100
-message2b:
-	.word	0
-
-# move to next entry
-	ai	$13, $13, 1
-	ai	$14, $14, 4
-	clgt	$16, $15, $14
-	brnz	$16, loop_start2
-
-# restore registers
-	lqd	$16, SQWM11($sp)
-	lqd	$15, SQWM10($sp)
-	lqd	$14, SQWM9($sp)
-	lqd	$13, SQWM8($sp)
-	lqd	$12, SQWM7($sp)
-	lqd	$11, SQWM6($sp)
-	lqd	$10, SQWM5($sp)
-//==============================================
+	ilhu	newmask, 1 << (MFC_TAG_ID - 16)			# 0,2	57
 #endif
+#lnop
+#nop; lnop
+#nop; lnop
+	stqx	newmap, tab4, off4				# 1,6	60
+
+/* Save app's tagmask, wait for DMA complete, restore mask.  */
+	ila	tagstat, MFC_TAG_UPDATE_ALL			# 0,2	61
+	rdch	oldmask, $MFC_RdTagMask				# 1,6	61
+#nop
+	wrch	$MFC_WrTagMask, newmask				# 1,6	62
+#nop
+	wrch	$MFC_WrTagUpdate, tagstat			# 1,6	63
+#nop
+	rdch	tagstat, $MFC_RdTagStat				# 1,6	64
+#nop
+	sync							# 1,4	65
+/* Any hint prior to the sync is lost.  A hint here allows the branch
+   to complete 15 cycles after the hint.  With no hint the branch will
+   take 18 or 19 cycles.  */
+	ila	tab5, _ovly_table - 16				# 0,2	66
+	hbr	do_load99, target				# 1,15	66
+	shli	off5, oldovl, 4					# 0,4	67
+	wrch	$MFC_WrTagMask, oldmask				# 1,6	67
+	ceqi	zovl, oldovl, 0					# 0,2	68
+#lnop
+#nop; lnop
+#nop
+	fsm	zovl, zovl					# 1,4	70
+#nop
+	lqx	oldvma, tab5, off5				# 1,6	71
+#nop
+	lqd	save3, -48($sp)					# 1,6	72
+#nop; lnop
+	andc	pbit, pbit, zovl				# 0,2	74
+	lqd	save2, -32($sp)					# 1,6	74
+#nop; lnop
+#nop; lnop
+	andc	oldvma, oldvma, pbit				# 0,2	77
+	lqd	save1, -16($sp)					# 1,6	77
+#nop; lnop
+	nop
+	stqx	oldvma, tab5, off5				# 1,6	79
+#nop; lnop
 
-	.global		_ovly_debug_event
-	.type		_ovly_debug_event, @function
+	.global	_ovly_debug_event
+	.type	_ovly_debug_event, @function
 _ovly_debug_event:
-/* GDB inserts debugger trap here.  */
 	nop
-
-__ovly_load_restore:
-#ifdef OVLY_IRQ_SAVE
-/* Conditionally re-enable interrupts. */
-	andi		irq_stat, irq_stat, 1
-	ila		irqtmp, __ovly_irq_restore
-	binze		irq_stat, irqtmp
-__ovly_irq_restore:
-	lqd		$9, -64($sp)
-#endif
-
-/* Restore saved registers. */
-	lqd		$8, -48($sp)
-	lqd		$7, -32($sp)
-	lqd		$6, -16($sp)
-
-__ovly_load_ret:
 /* Branch to target address. */
-	bi		$79
+do_load99:
+	bi	target						# 1,4	81
 
-	.size		__ovly_load, . - __ovly_load
+	.size	__ovly_load, . - __ovly_load
Index: ld/emultempl/spuelf.em
===================================================================
RCS file: /cvs/src/src/ld/emultempl/spuelf.em,v
retrieving revision 1.13
diff -u -p -r1.13 spuelf.em
--- ld/emultempl/spuelf.em	17 Aug 2007 13:50:48 -0000	1.13
+++ ld/emultempl/spuelf.em	28 Jan 2008 01:34:49 -0000
@@ -58,8 +58,6 @@ static const struct _ovl_stream ovl_mgr_
   ovl_mgr + sizeof (ovl_mgr)
 };
 
-static asection *toe = NULL;
-
 
 static int
 is_spu_target (void)
@@ -84,7 +82,8 @@ spu_after_open (void)
   gld${EMULATION_NAME}_after_open ();
 }
 
-/* Add section S at the end of output section OUTPUT_NAME.
+/* If O is NULL, add section S at the end of output section OUTPUT_NAME.
+   If O is not NULL, add section S at the beginning of output section O.
 
    Really, we should be duplicating ldlang.c map_input_to_output_sections
    logic here, ie. using the linker script to find where the section
@@ -95,11 +94,11 @@ spu_after_open (void)
    overlay manager code somewhere else.  */
 
 static void
-spu_place_special_section (asection *s, const char *output_name)
+spu_place_special_section (asection *s, asection *o, const char *output_name)
 {
   lang_output_section_statement_type *os;
 
-  os = lang_output_section_find (output_name);
+  os = lang_output_section_find (o != NULL ? o->name : output_name);
   if (os == NULL)
     {
       const char *save = s->name;
@@ -107,6 +106,15 @@ spu_place_special_section (asection *s, 
       gld${EMULATION_NAME}_place_orphan (s);
       s->name = save;
     }
+  else if (o != NULL && os->children.head != NULL)
+    {
+      lang_statement_list_type add;
+
+      lang_list_init (&add);
+      lang_add_section (&add, s, os);
+      *add.tail = os->children.head;
+      os->children.head = add.head;
+    }
   else
     lang_add_section (&os->children, s, os);
 
@@ -154,7 +162,7 @@ spu_elf_load_ovl_mgr (void)
 	  for (in = ovl_is->the_bfd->sections; in != NULL; in = in->next)
 	    if ((in->flags & (SEC_ALLOC | SEC_LOAD))
 		== (SEC_ALLOC | SEC_LOAD))
-	      spu_place_special_section (in, ".text");
+	      spu_place_special_section (in, NULL, ".text");
 	}
     }
 
@@ -164,7 +172,7 @@ spu_elf_load_ovl_mgr (void)
        os = os->next)
     if (os->bfd_section != NULL
 	&& spu_elf_section_data (os->bfd_section) != NULL
-	&& spu_elf_section_data (os->bfd_section)->ovl_index != 0)
+	&& spu_elf_section_data (os->bfd_section)->u.o.ovl_index != 0)
       {
 	if (os->bfd_section->alignment_power < 4)
 	  os->bfd_section->alignment_power = 4;
@@ -192,20 +200,15 @@ spu_before_allocation (void)
       /* Find overlays by inspecting section vmas.  */
       if (spu_elf_find_overlays (output_bfd, &link_info))
 	{
-	  asection *stub, *ovtab;
+	  int ret;
 
-	  if (!spu_elf_size_stubs (output_bfd, &link_info, non_overlay_stubs,
-				   stack_analysis, &stub, &ovtab, &toe))
+	  ret = spu_elf_size_stubs (output_bfd, &link_info,
+				    spu_place_special_section,
+				    non_overlay_stubs);
+	  if (ret == 0)
 	    einfo ("%X%P: can not size overlay stubs: %E\n");
-
-	  if (stub != NULL)
-	    {
-	      spu_place_special_section (stub, ".text");
-	      spu_place_special_section (ovtab, ".data");
-	      spu_place_special_section (toe, ".toe");
-
-	      spu_elf_load_ovl_mgr ();
-	    }
+	  else if (ret == 2)
+	    spu_elf_load_ovl_mgr ();
 	}
 
       /* We must not cache anything from the preliminary sizing.  */
@@ -235,10 +238,8 @@ gld${EMULATION_NAME}_finish (void)
 	einfo ("%X%P: %A exceeds local store range\n", s);
     }
 
-  if (toe != NULL
-      && !spu_elf_build_stubs (&link_info,
-			       emit_stub_syms || link_info.emitrelocations,
-			       toe))
+  if (!spu_elf_build_stubs (&link_info,
+			    emit_stub_syms || link_info.emitrelocations))
     einfo ("%X%P: can not build overlay stubs: %E\n");
 
   finish_default ();
Index: ld/testsuite/ld-spu/ovl.d
===================================================================
RCS file: /cvs/src/src/ld/testsuite/ld-spu/ovl.d,v
retrieving revision 1.5
diff -u -p -r1.5 ovl.d
--- ld/testsuite/ld-spu/ovl.d	25 Jan 2008 12:03:37 -0000	1.5
+++ ld/testsuite/ld-spu/ovl.d	28 Jan 2008 01:34:52 -0000
@@ -7,65 +7,90 @@
 Disassembly of section \.text:
 
 00000100 <_start>:
- 100:	1c f8 00 81 	ai	\$1,\$1,-32
- 104:	48 20 00 00 	xor	\$0,\$0,\$0
- 108:	24 00 00 80 	stqd	\$0,0\(\$1\)
- 10c:	24 00 40 80 	stqd	\$0,16\(\$1\)
- 110:	33 00 04 00 	brsl	\$0,130 <00000000\.ovl_call\.f1_a1>	# 130
-			110: SPU_REL16	f1_a1
- 114:	33 00 04 80 	brsl	\$0,138 <00000000\.ovl_call\.f2_a1>	# 138
-			114: SPU_REL16	f2_a1
- 118:	33 00 07 00 	brsl	\$0,150 <00000000\.ovl_call\.f1_a2>	# 150
-			118: SPU_REL16	f1_a2
- 11c:	42 00 ac 09 	ila	\$9,344	# 158
-			11c: SPU_ADDR18	f2_a2
- 120:	35 20 04 80 	bisl	\$0,\$9
- 124:	1c 08 00 81 	ai	\$1,\$1,32	# 20
- 128:	32 7f fb 00 	br	100 <_start>	# 100
-			128: SPU_REL16	_start
+.*	ai	\$1,\$1,-32
+.*	xor	\$0,\$0,\$0
+.*	stqd	\$0,0\(\$1\)
+.*	stqd	\$0,16\(\$1\)
+.*	brsl	\$0,.* <00000000\.ovl_call\.f1_a1>.*
+.*SPU_REL16	f1_a1
+.*	brsl	\$0,.* <00000000\.ovl_call\.f2_a1>.*
+.*SPU_REL16	f2_a1
+.*	brsl	\$0,.* <00000000\.ovl_call\.f1_a2>.*
+.*SPU_REL16	f1_a2
+#.*	ila	\$9,328	# 148
+.*	ila	\$9,352	# 160
+.*SPU_ADDR18	f2_a2
+.*	bisl	\$0,\$9
+.*	ai	\$1,\$1,32	# 20
+.*	br	100 <_start>	# 100
+.*SPU_REL16	_start
 
 0000012c <f0>:
- 12c:	35 00 00 00 	bi	\$0
+.*	bi	\$0
+
+#00000130 <00000000\.ovl_call\.f1_a1>:
+#.*	brsl	\$75,.* <__ovly_load>.*
+#.*00 04 04 00.*
+#
+#00000138 <00000000\.ovl_call\.f2_a1>:
+#.*	brsl	\$75,.* <__ovly_load>.*
+#.*00 04 04 04.*
+#
+#00000140 <00000000\.ovl_call\.f1_a2>:
+#.*	brsl	\$75,.* <__ovly_load>.*
+#.*00 08 04 00.*
+#
+#00000148 <00000000\.ovl_call\.f2_a2>:
+#.*	brsl	\$75,.* <__ovly_load>.*
+#.*00 08 04 24.*
+#
+#00000150 <00000000\.ovl_call\.f4_a1>:
+#.*	brsl	\$75,.* <__ovly_load>.*
+#.*00 04 04 10.*
+#
+#00000158 <00000000.ovl_call.14:8>:
+#.*	brsl	\$75,.* <__ovly_load>.*
+#.*00 08 04 34.*
 
 00000130 <00000000\.ovl_call\.f1_a1>:
- 130:	42 02 00 4f 	ila	\$79,1024	# 400
- 134:	32 00 02 80 	br	148 .*
-			134: SPU_REL16	\*ABS\*\+0x148
-
-00000138 <00000000\.ovl_call\.f2_a1>:
- 138:	42 02 02 4f 	ila	\$79,1028	# 404
- 13c:	32 00 01 80 	br	148 .*
-			13c: SPU_REL16	\*ABS\*\+0x148
-
-00000140 <00000000\.ovl_call\.f4_a1>:
- 140:	42 02 08 4f 	ila	\$79,1040	# 410
- 144:	40 20 00 00 	nop	\$0
- 148:	42 00 00 ce 	ila	\$78,1
- 14c:	32 00 0a 80 	br	1a0 <__ovly_load>	# 1a0
-			14c: SPU_REL16	__ovly_load
-
-00000150 <00000000\.ovl_call\.f1_a2>:
- 150:	42 02 00 4f 	ila	\$79,1024	# 400
- 154:	32 00 02 80 	br	168 .*
-			154: SPU_REL16	\*ABS\*\+0x168
-
-00000158 <00000000\.ovl_call\.f2_a2>:
- 158:	42 02 12 4f 	ila	\$79,1060	# 424
- 15c:	32 00 01 80 	br	168 .*
-			15c: SPU_REL16	\*ABS\*\+0x168
-
-00000160 <00000000\.ovl_call\.14:8>:
- 160:	42 02 1a 4f 	ila	\$79,1076	# 434
- 164:	40 20 00 00 	nop	\$0
- 168:	42 00 01 4e 	ila	\$78,2
- 16c:	32 00 06 80 	br	1a0 <__ovly_load>	# 1a0
-			16c: SPU_REL16	__ovly_load
+.*	ila	\$78,1
+.*	lnop
+.*	ila	\$79,1024	# 400
+.*	br	.* <__ovly_load>.*
+
+00000140 <00000000\.ovl_call\.f2_a1>:
+.*	ila	\$78,1
+.*	lnop
+.*	ila	\$79,1028	# 404
+.*	br	.* <__ovly_load>.*
+
+00000150 <00000000.ovl_call.f1_a2>:
+.*	ila	\$78,2
+.*	lnop
+.*	ila	\$79,1024	# 400
+.*	br	.* <__ovly_load>.*
+
+00000160 <00000000\.ovl_call\.f2_a2>:
+.*	ila	\$78,2
+.*	lnop
+.*	ila	\$79,1060	# 424
+.*	br	.* <__ovly_load>.*
+
+00000170 <00000000\.ovl_call\.f4_a1>:
+.*	ila	\$78,1
+.*	lnop
+.*	ila	\$79,1040	# 410
+.*	br	.* <__ovly_load>.*
+
+00000180 <00000000.ovl_call.14:8>:
+.*	ila	\$78,2
+.*	lnop
+.*	ila	\$79,1076	# 434
+.*	br	.* <__ovly_load>.*
+
 #...
 [0-9a-f]+ <__ovly_return>:
-[0-9a-f ]+:	3f e1 00 4e 	shlqbyi	\$78,\$0,4
-[0-9a-f ]+:	3f e2 00 4f 	shlqbyi	\$79,\$0,8
-[0-9a-f ]+:	25 00 27 ce 	biz	\$78,\$79
-
+#...
 [0-9a-f]+ <__ovly_load>:
 #...
 [0-9a-f]+ <_ovly_debug_event>:
@@ -73,67 +98,75 @@ Disassembly of section \.text:
 Disassembly of section \.ov_a1:
 
 00000400 <f1_a1>:
- 400:	32 00 01 80 	br	40c <f3_a1>	# 40c
-			400: SPU_REL16	f3_a1
+.*	br	.* <f3_a1>.*
+.*SPU_REL16	f3_a1
 
 00000404 <f2_a1>:
- 404:	42 00 a0 03 	ila	\$3,320	# 140
-			404: SPU_ADDR18	f4_a1
- 408:	35 00 00 00 	bi	\$0
+#.*	ila	\$3,336	# 150
+.*	ila	\$3,368	# 170
+.*SPU_ADDR18	f4_a1
+.*	bi	\$0
 
 0000040c <f3_a1>:
- 40c:	35 00 00 00 	bi	\$0
+.*	bi	\$0
 
 00000410 <f4_a1>:
- 410:	35 00 00 00 	bi	\$0
+.*	bi	\$0
 	\.\.\.
 Disassembly of section \.ov_a2:
 
 00000400 <f1_a2>:
- 400:	24 00 40 80 	stqd	\$0,16\(\$1\)
- 404:	24 ff 80 81 	stqd	\$1,-32\(\$1\)
- 408:	1c f8 00 81 	ai	\$1,\$1,-32
- 40c:	33 7f a4 00 	brsl	\$0,12c <f0>	# 12c
-			40c: SPU_REL16	f0
- 410:	33 7f a4 00 	brsl	\$0,130 <00000000\.ovl_call\.f1_a1>	# 130
-			410: SPU_REL16	f1_a1
- 414:	33 00 03 80 	brsl	\$0,430 <f3_a2>	# 430
-			414: SPU_REL16	f3_a2
- 418:	34 00 c0 80 	lqd	\$0,48\(\$1\)	# 30
- 41c:	1c 08 00 81 	ai	\$1,\$1,32	# 20
- 420:	35 00 00 00 	bi	\$0
+.*	stqd	\$0,16\(\$1\)
+.*	stqd	\$1,-32\(\$1\)
+.*	ai	\$1,\$1,-32
+.*	brsl	\$0,12c <f0>	# 12c
+.*SPU_REL16	f0
+.*	brsl	\$0,130 <00000000\.ovl_call\.f1_a1>	# 130
+.*SPU_REL16	f1_a1
+.*	brsl	\$0,430 <f3_a2>	# 430
+.*SPU_REL16	f3_a2
+.*	lqd	\$0,48\(\$1\)	# 30
+.*	ai	\$1,\$1,32	# 20
+.*	bi	\$0
 
 00000424 <f2_a2>:
- 424:	41 00 00 03 	ilhu	\$3,0
-			424: SPU_ADDR16_HI	f4_a2
- 428:	60 80 b0 03 	iohl	\$3,352	# 160
-			428: SPU_ADDR16_LO	f4_a2
- 42c:	35 00 00 00 	bi	\$0
+.*	ilhu	\$3,0
+.*SPU_ADDR16_HI	f4_a2
+#.*	iohl	\$3,344	# 158
+.*	iohl	\$3,384	# 180
+.*SPU_ADDR16_LO	f4_a2
+.*	bi	\$0
 
 00000430 <f3_a2>:
- 430:	35 00 00 00 	bi	\$0
+.*	bi	\$0
 
 00000434 <f4_a2>:
- 434:	32 7f ff 80 	br	430 <f3_a2>	# 430
-			434: SPU_REL16	f3_a2
+.*	br	.* <f3_a2>.*
+.*SPU_REL16	f3_a2
 	\.\.\.
 Disassembly of section .data:
 
-00000440 <_ovly_table>:
- 440:	00 00 04 00 .*
- 444:	00 00 00 20 .*
- 448:	00 00 02 f0 .*
+00000440 <_ovly_table-0x10>:
+	\.\.\.
  44c:	00 00 00 01 .*
+00000450 <_ovly_table>:
  450:	00 00 04 00 .*
- 454:	00 00 00 40 .*
- 458:	00 00 03 10 .*
- 45c:	00 00 00 01 .*
+ 454:	00 00 00 20 .*
+# 458:	00 00 03 40 .*
+ 458:	00 00 03 70 .*
+ 45c:	00 00 00 02 .*
+ 460:	00 00 04 00 .*
+ 464:	00 00 00 40 .*
+# 468:	00 00 03 60 .*
+ 468:	00 00 03 90 .*
+ 46c:	00 00 00 02 .*
+
+00000470 <_ovly_buf_table>:
+	\.\.\.
 
-00000460 <_ovly_buf_table>:
- 460:	00 00 00 00 .*
 Disassembly of section \.toe:
 
-00000470 <_EAR_>:
+00000480 <_EAR_>:
 	\.\.\.
 Disassembly of section \.note\.spu_name:
 
Index: ld/testsuite/ld-spu/ovl2.d
===================================================================
RCS file: /cvs/src/src/ld/testsuite/ld-spu/ovl2.d,v
retrieving revision 1.5
diff -u -p -r1.5 ovl2.d
--- ld/testsuite/ld-spu/ovl2.d	25 Jan 2008 12:03:37 -0000	1.5
+++ ld/testsuite/ld-spu/ovl2.d	28 Jan 2008 01:34:52 -0000
@@ -7,40 +7,50 @@
 Disassembly of section \.text:
 
 00000100 <_start>:
- 100:	33 00 06 00 	brsl	\$0,130 <00000000\.ovl_call\.f1_a1>	# 130
-			100: SPU_REL16	f1_a1
- 104:	33 00 03 80 	brsl	\$0,120 <00000000\.ovl_call\.10:4>	# 120
-			104: SPU_REL16	setjmp
- 108:	32 7f ff 00 	br	100 <_start>	# 100
-			108: SPU_REL16	_start
+.*	brsl	\$0,.* <00000000\.ovl_call\.f1_a1>.*
+.*SPU_REL16	f1_a1
+.*	brsl	\$0,.* <00000000\.ovl_call\.10:4>.*
+.*SPU_REL16	setjmp
+.*	br	100 <_start>	# 100
+.*SPU_REL16	_start
 
 0000010c <setjmp>:
- 10c:	35 00 00 00 	bi	\$0
+.*	bi	\$0
 
 00000110 <longjmp>:
- 110:	35 00 00 00 	bi	\$0
-	...
+.*	bi	\$0
+.*
 
-00000120 <00000000\.ovl_call.10:4>:
- 120:	42 00 86 4f 	ila	\$79,268	# 10c
- 124:	40 20 00 00 	nop	\$0
- 128:	42 00 00 4e 	ila	\$78,0
- 12c:	32 00 0a 80 	br	180 <__ovly_load>	# 180
-			12c: SPU_REL16	__ovly_load
-
-00000130 <00000000\.ovl_call.f1_a1>:
- 130:	42 02 00 4f 	ila	\$79,1024	# 400
- 134:	40 20 00 00 	nop	\$0
- 138:	42 00 00 ce 	ila	\$78,1
- 13c:	32 00 08 80 	br	180 <__ovly_load>	# 180
-			13c: SPU_REL16	__ovly_load
+#00000118 <00000000\.ovl_call.f1_a1>:
+#.*	brsl	\$75,.* <__ovly_load>.*
+#.*00 04 04 00.*
+#
+#00000120 <00000000\.ovl_call.10:4>:
+#.*	brsl	\$75,.* <__ovly_load>.*
+#.*00 00 01 0c.*
+#
+#00000128 <_SPUEAR_f1_a2>:
+#.*	brsl	\$75,.* <__ovly_load>.*
+#.*00 08 04 00.*
+
+00000120 <00000000\.ovl_call.f1_a1>:
+.*	ila	\$78,1
+.*	lnop
+.*	ila	\$79,1024	# 400
+.*	br	.* <__ovly_load>.*
+
+00000130 <00000000\.ovl_call.10:4>:
+.*	ila	\$78,0
+.*	lnop
+.*	ila	\$79,268	# 10c
+.*	br	.* <__ovly_load>.*
 
 00000140 <_SPUEAR_f1_a2>:
- 140:	42 02 00 4f 	ila	\$79,1024	# 400
- 144:	40 20 00 00 	nop	\$0
- 148:	42 00 01 4e 	ila	\$78,2
- 14c:	32 00 06 80 	br	180 <__ovly_load>	# 180
-			14c: SPU_REL16	__ovly_load
+.*	ila	\$78,2
+.*	lnop
+.*	ila	\$79,1024	# 400
+.*	br	.* <__ovly_load>.*
+
 #...
 Disassembly of section \.ov_a1:
 
@@ -55,21 +65,27 @@ Disassembly of section \.ov_a2:
 	\.\.\.
 Disassembly of section \.data:
 
-00000410 <_ovly_table>:
- 410:	00 00 04 00 	.*
- 414:	00 00 00 10 	.*
- 418:	00 00 02 d0 	.*
+00000410 <_ovly_table-0x10>:
+	\.\.\.
  41c:	00 00 00 01 	.*
+00000420 <_ovly_table>:
  420:	00 00 04 00 	.*
  424:	00 00 00 10 	.*
- 428:	00 00 02 e0 	.*
- 42c:	00 00 00 01 	.*
+# 428:	00 00 03 10 	.*
+ 428:	00 00 03 30 	.*
+ 42c:	00 00 00 02 	.*
+ 430:	00 00 04 00 	.*
+ 434:	00 00 00 10 	.*
+# 438:	00 00 03 20 	.*
+ 438:	00 00 03 40 	.*
+ 43c:	00 00 00 02 	.*
+
+00000440 <_ovly_buf_table>:
+	\.\.\.
 
-00000430 <_ovly_buf_table>:
- 430:	00 00 00 00 	.*
 Disassembly of section \.toe:
 
-00000440 <_EAR_>:
+00000450 <_EAR_>:
 	\.\.\.
 Disassembly of section \.note\.spu_name:
 

-- 
Alan Modra
Australia Development Lab, IBM



More information about the Binutils mailing list