[Patch macho/bfd/gas] .indirect_symbol, take 3.

Iain Sandoe developer@sandoe-acoustics.co.uk
Thu Jan 12 08:37:00 GMT 2012


On 10 Jan 2012, at 13:03, Tristan Gingold wrote:

>> Here is a different implementation of the .indirect_symbol patch.

V3 ;-)

>> In this we keep indirect_symbols in a linked list in the section to  
>> which they belong.
>
> I don't think that linked list is the best choice.  The overhead is  
> large (at least 2x), and the list structure is useful only for  
> creating it (i.e. for gas).  I would prefer to have an array in BFD,  
> and the linked list in GAS.

well, it's a bit like a balloon - push in one place and it pops out in  
another - this version is lighter in BFD, but heavier in GAS.

>> If this is OK, then a TODO would be to populate the list when a  
>> mach-o file is read in -
>> - this is nicely symmetrical -
>> - and means that sections can be reordered/removed without having  
>> to cross-check the dysymtab.
>>
>> The only thing that is less satisfactory, is that there is no way  
>> (AFAICS) to mark a symbol as 'referenced by an indirect' which  
>> means if one deletes (or wishes to delete) symbols - then one  
>> should cross-check the usage from the indirect tables.
>
> Yes, this is an issue.  I need to investigate how to address that.

I guess we could request a bit in the symbol flags...
>
> Use bfd_assert instead.
>
> Note that we should take care of local and absolute symbols.

I think locals are now done.. I will investigate ABS as a follow-on...
>>
>>      case OBJ_MACH_O_SYM_PRIV_EXT:
>> 	s->n_type |= BFD_MACH_O_N_PEXT ;
>> +	s->n_desc &= ~LAZY; /* The native tool swithes this off too.  */
>> 	/* We follow the system tools in marking PEXT as also global.  */
>> 	/* Fall through.  */
>
> Unrelated chunk ?

no, symbol stubs and lazy pointer section symbols are, by default,  
lazy but this gets cancelled if they are defined (or local).

>>
>> +	  if (nactual != ncalc)
>> +	    as_bad (_("there %s %d indirect_symbol%sin section %s but"
>> +		      " %d %s expected"), (nactual == 1)?"is":"are", nactual,
>
> "is":"are" don't make sense with other languages !

... and in some languages plurals are indicated by additional words ...

Does this not get handled by the translation process?

I don't especially like grammatically incorrect error/warning messages..

If it is un-handled, what is the usual solution?

====

Naturally, the changelog needs some tweaking, but I trust that we can  
do this once we're agreed on the basic patch...

OK for this version?
Iain

  bfd/mach-o.c           |   64 +++++++++++-
  bfd/mach-o.h           |   34 ++++---
  gas/config/obj-macho.c |  257 +++++++++++++++++++++++++++++++++++++++ 
+++------
  gas/config/obj-macho.h |    3 +
  4 files changed, 310 insertions(+), 48 deletions(-)

diff --git a/bfd/mach-o.c b/bfd/mach-o.c
index 6913b1d..9511665 100644
--- a/bfd/mach-o.c
+++ b/bfd/mach-o.c
@@ -2079,6 +2079,33 @@ bfd_mach_o_build_seg_command (const char  
*segment,
    return TRUE;
  }

+/* Count the number of indirect symbols in the image.
+   Requires that the sections are in their final order.  */
+
+static unsigned int
+bfd_mach_o_count_indirect_symbols (bfd *abfd, bfd_mach_o_data_struct  
*mdata)
+{
+  unsigned int i;
+  unsigned int nisyms = 0;
+
+  for (i = 0; i < mdata->nsects; ++i)
+    {
+      bfd_mach_o_section *sec = mdata->sections[i];
+
+      switch (sec->flags & BFD_MACH_O_SECTION_TYPE_MASK)
+	{
+	  case BFD_MACH_O_S_NON_LAZY_SYMBOL_POINTERS:
+	  case BFD_MACH_O_S_LAZY_SYMBOL_POINTERS:
+	  case BFD_MACH_O_S_SYMBOL_STUBS:
+	    nisyms += bfd_mach_o_section_get_nbr_indirect (abfd, sec);
+	    break;
+	  default:
+	    break;
+	}
+    }
+  return nisyms;
+}
+
  static bfd_boolean
  bfd_mach_o_build_dysymtab_command (bfd *abfd,
  				   bfd_mach_o_data_struct *mdata,
@@ -2135,9 +2162,11 @@ bfd_mach_o_build_dysymtab_command (bfd *abfd,
        dsym->nundefsym = 0;
      }

+  dsym->nindirectsyms = bfd_mach_o_count_indirect_symbols (abfd,  
mdata);
    if (dsym->nindirectsyms > 0)
      {
        unsigned i;
+      unsigned n;

        mdata->filelen = FILE_ALIGN (mdata->filelen, 2);
        dsym->indirectsymoff = mdata->filelen;
@@ -2146,11 +2175,38 @@ bfd_mach_o_build_dysymtab_command (bfd *abfd,
        dsym->indirect_syms = bfd_zalloc (abfd, dsym->nindirectsyms *  
4);
        if (dsym->indirect_syms == NULL)
          return FALSE;
-
-      /* So fill in the indices.  */
-      for (i = 0; i < dsym->nindirectsyms; ++i)
+		
+      n = 0;
+      for (i = 0; i < mdata->nsects; ++i)
  	{
-	  /* TODO: fill in the table.  */
+	  bfd_mach_o_section *sec = mdata->sections[i];
+
+	  switch (sec->flags & BFD_MACH_O_SECTION_TYPE_MASK)
+	    {
+	      case BFD_MACH_O_S_NON_LAZY_SYMBOL_POINTERS:
+	      case BFD_MACH_O_S_LAZY_SYMBOL_POINTERS:
+	      case BFD_MACH_O_S_SYMBOL_STUBS:
+		{
+		  unsigned j, num;
+		  bfd_mach_o_asymbol **isyms = sec->indirect_syms;
+		
+		  num = bfd_mach_o_section_get_nbr_indirect (abfd, sec);
+		  if (isyms == NULL || num == 0)
+		    break;
+		  /* Record the starting index in the reserved1 field.  */
+		  sec->reserved1 = n;
+		  for (j = 0; j < num; j++, n++)
+		    {
+		      if (isyms[j] == NULL)
+		        dsym->indirect_syms[n] = 0x80000000;
+		      else
+		        dsym->indirect_syms[n] = isyms[j]->symbol.udata.i;
+		    }
+		}
+		break;
+	      default:
+		break;
+	    }
  	}
      }

diff --git a/bfd/mach-o.h b/bfd/mach-o.h
index ca810a0..7f54961 100644
--- a/bfd/mach-o.h
+++ b/bfd/mach-o.h
@@ -42,6 +42,18 @@ typedef struct bfd_mach_o_header
  }
  bfd_mach_o_header;

+typedef struct bfd_mach_o_asymbol
+{
+  /* The actual symbol which the rest of BFD works with.  */
+  asymbol symbol;
+
+  /* Mach-O symbol fields.  */
+  unsigned char n_type;
+  unsigned char n_sect;
+  unsigned short n_desc;
+}
+bfd_mach_o_asymbol;
+
  #define BFD_MACH_O_SEGNAME_SIZE 16
  #define BFD_MACH_O_SECTNAME_SIZE 16

@@ -64,6 +76,12 @@ typedef struct bfd_mach_o_section
    /* Corresponding bfd section.  */
    asection *bfdsection;

+  /* An array holding the indirect symbols for this section.
+     NULL values indicate local symbols.
+     The number of symbols is determined from the section size and  
type.  */
+
+  bfd_mach_o_asymbol **indirect_syms;
+
    /* Simply linked list.  */
    struct bfd_mach_o_section *next;
  }
@@ -105,26 +123,12 @@ typedef struct bfd_mach_o_reloc_info
  }
  bfd_mach_o_reloc_info;

-typedef struct bfd_mach_o_asymbol
-{
-  /* The actual symbol which the rest of BFD works with.  */
-  asymbol symbol;
-
-  /* Mach-O symbol fields.  */
-  unsigned char n_type;
-  unsigned char n_sect;
-  unsigned short n_desc;
-}
-bfd_mach_o_asymbol;
-
  /* The symbol table is sorted like this:
   (1) local.
  	(otherwise in order of generation)
   (2) external defined
  	(sorted by name)
- (3) external undefined
-	(sorted by name)
- (4) common
+ (3) external undefined / common
  	(sorted by name)
  */

diff --git a/gas/config/obj-macho.c b/gas/config/obj-macho.c
index 43f4fba..cf94de5 100644
--- a/gas/config/obj-macho.c
+++ b/gas/config/obj-macho.c
@@ -1032,6 +1032,7 @@ obj_mach_o_set_symbol_qualifier (symbolS *sym,  
int type)

        case OBJ_MACH_O_SYM_PRIV_EXT:
  	s->n_type |= BFD_MACH_O_N_PEXT ;
+	s->n_desc &= ~LAZY; /* The native tool swithes this off too.  */
  	/* We follow the system tools in marking PEXT as also global.  */
  	/* Fall through.  */

@@ -1131,13 +1132,79 @@ obj_mach_o_sym_qual (int ntype)
    demand_empty_rest_of_line ();
  }

-/* Dummy function to allow test-code to work while we are working
-   on things.  */
+typedef struct obj_mach_o_indirect_sym
+{
+  symbolS *sym;
+  segT sect;
+  struct obj_mach_o_indirect_sym *next;
+} obj_mach_o_indirect_sym;
+
+/* We store in order an maintain a pointer to the last one - to save  
reversing
+   later.  */
+obj_mach_o_indirect_sym *indirect_syms;
+obj_mach_o_indirect_sym *indirect_syms_tail;

  static void
-obj_mach_o_placeholder (int arg ATTRIBUTE_UNUSED)
+obj_mach_o_indirect_symbol (int arg ATTRIBUTE_UNUSED)
  {
-  ignore_rest_of_line ();
+  bfd_mach_o_section *sec = bfd_mach_o_get_mach_o_section (now_seg);
+
+#ifdef md_flush_pending_output
+  md_flush_pending_output ();
+#endif
+
+  if (obj_mach_o_is_static)
+    as_bad (_("use of .indirect_symbols requires `-dynamic'"));
+
+  switch (sec->flags & BFD_MACH_O_SECTION_TYPE_MASK)
+    {
+      case BFD_MACH_O_S_SYMBOL_STUBS:
+      case BFD_MACH_O_S_LAZY_SYMBOL_POINTERS:
+      case BFD_MACH_O_S_NON_LAZY_SYMBOL_POINTERS:
+        {
+          obj_mach_o_indirect_sym *isym;
+	  char *name = input_line_pointer;
+	  char c = get_symbol_end ();
+	  symbolS *sym = symbol_find_or_make (name);
+	  unsigned int elsize =
+			bfd_mach_o_section_get_entry_size (stdoutput, sec);
+
+	  if (elsize == 0)
+	    {
+	      as_bad (_("attempt to add an indirect_symbol to a stub or"
+			" reference section with a zero-sized element at %s"),
+			name);
+	      *input_line_pointer = c;
+	      ignore_rest_of_line ();
+	      return;
+	  }
+	  *input_line_pointer = c;
+
+	  isym = (obj_mach_o_indirect_sym *)
+			xmalloc (sizeof (obj_mach_o_indirect_sym));
+	  if (isym == NULL)
+	    abort ();
+
+	  /* Just record the data for now, we will validate it when we
+	     compute the output.  */
+	  isym->sym = sym;
+	  isym->sect = now_seg;
+	  isym->next = NULL;
+	  if (indirect_syms == NULL)
+	    indirect_syms = isym;
+	  else
+	    indirect_syms_tail->next = isym;
+	  indirect_syms_tail = isym;
+	}
+        break;
+
+      default:
+	as_bad (_("an .indirect_symbol must be in a symbol pointer"
+		  " or stub section."));
+	ignore_rest_of_line ();
+	return;
+    }
+  demand_empty_rest_of_line ();
  }

  const pseudo_typeS mach_o_pseudo_table[] =
@@ -1231,7 +1298,7 @@ const pseudo_typeS mach_o_pseudo_table[] =
    {"no_dead_strip",	obj_mach_o_sym_qual,  
OBJ_MACH_O_SYM_NO_DEAD_STRIP},
    {"weak",		obj_mach_o_sym_qual, OBJ_MACH_O_SYM_WEAK}, /* ext */

-  {"indirect_symbol",	obj_mach_o_placeholder, 0},
+  {"indirect_symbol",	obj_mach_o_indirect_symbol, 0},

    /* File flags.  */
    { "subsections_via_symbols", obj_mach_o_fileprop,
@@ -1270,15 +1337,25 @@ obj_mach_o_type_for_symbol (bfd_mach_o_asymbol  
*s)

  void obj_macho_frob_label (struct symbol *sp)
  {
-  bfd_mach_o_asymbol *s = (bfd_mach_o_asymbol *) symbol_get_bfdsym  
(sp);
-  /* This is the base symbol type, that we mask in.  */
-  unsigned base_type = obj_mach_o_type_for_symbol (s);
-  bfd_mach_o_section *sec = bfd_mach_o_get_mach_o_section (s- 
 >symbol.section);
+  bfd_mach_o_asymbol *s;
+  unsigned base_type;
+  bfd_mach_o_section *sec;
    int sectype = -1;

+  /* Leave local symbols alone.  */
+
+  if (S_IS_LOCAL (sp))
+    return;
+
+  s = (bfd_mach_o_asymbol *) symbol_get_bfdsym (sp);
+  /* Leave debug symbols alone.  */
    if ((s->n_type & BFD_MACH_O_N_STAB) != 0)
-    return; /* Leave alone.  */
-
+    return;
+
+  /* This is the base symbol type, that we mask in.  */
+  base_type = obj_mach_o_type_for_symbol (s);
+
+  sec = bfd_mach_o_get_mach_o_section (s->symbol.section);
    if (sec != NULL)
      sectype = sec->flags & BFD_MACH_O_SECTION_TYPE_MASK;

@@ -1307,34 +1384,41 @@ void obj_macho_frob_label (struct symbol *sp)
  int
  obj_macho_frob_symbol (struct symbol *sp)
  {
-  bfd_mach_o_asymbol *s = (bfd_mach_o_asymbol *) symbol_get_bfdsym  
(sp);
-  unsigned base_type = obj_mach_o_type_for_symbol (s);
-  bfd_mach_o_section *sec = bfd_mach_o_get_mach_o_section (s- 
 >symbol.section);
+  bfd_mach_o_asymbol *s;
+  unsigned base_type;
+  bfd_mach_o_section *sec;
    int sectype = -1;
-
+
+  /* Leave local symbols alone.  */
+  if (S_IS_LOCAL (sp))
+    return 0;
+
+  s = (bfd_mach_o_asymbol *) symbol_get_bfdsym (sp);
+  /* Leave debug symbols alone.  */
+  if ((s->n_type & BFD_MACH_O_N_STAB) != 0)
+    return 0;
+
+  base_type = obj_mach_o_type_for_symbol (s);
+  sec = bfd_mach_o_get_mach_o_section (s->symbol.section);
    if (sec != NULL)
      sectype = sec->flags & BFD_MACH_O_SECTION_TYPE_MASK;

-  if ((s->n_type & BFD_MACH_O_N_STAB) != 0)
-    return 0; /* Leave alone.  */
-  else if (s->symbol.section == bfd_und_section_ptr)
+  if (s->symbol.section == bfd_und_section_ptr)
      {
        /* ??? Do we really gain much from implementing this as well  
as the
  	 mach-o specific ones?  */
        if (s->symbol.flags & BSF_WEAK)
  	s->n_desc |= BFD_MACH_O_N_WEAK_REF;

-      /* Undefined references, become extern.  */
-      if (s->n_desc & REFE)
-	{
-	  s->n_desc &= ~REFE;
-	  s->n_type |= BFD_MACH_O_N_EXT;
-	}
-
-      /* So do undefined 'no_dead_strip's.  */
-      if (s->n_desc & BFD_MACH_O_N_NO_DEAD_STRIP)
-	s->n_type |= BFD_MACH_O_N_EXT;
-
+      /* Undefined syms, become extern.  */
+      s->n_type |= BFD_MACH_O_N_EXT;
+      S_SET_EXTERNAL (sp);
+    }
+  else if (s->symbol.section == bfd_com_section_ptr)
+    {
+      /* ... so to comm.  */
+      s->n_type |= BFD_MACH_O_N_EXT;
+      S_SET_EXTERNAL (sp);
      }
    else
      {
@@ -1353,6 +1437,7 @@ obj_macho_frob_symbol (struct symbol *sp)
      {
        /* Anything here that should be added that is non-standard.  */
        s->n_desc &= ~BFD_MACH_O_REFERENCE_MASK;
+      s->symbol.udata.i = SYM_MACHO_FIELDS_NOT_VALIDATED;
      }
    else if (s->symbol.udata.i == SYM_MACHO_FIELDS_NOT_VALIDATED)
      {
@@ -1388,6 +1473,120 @@ obj_macho_frob_symbol (struct symbol *sp)
    return 0;
  }

+static void
+obj_mach_o_set_indirect_symbols (bfd *abfd, asection *sec,
+				 void *xxx ATTRIBUTE_UNUSED)
+{
+  bfd_vma sect_size = bfd_section_size (abfd, sec);
+  bfd_mach_o_section *ms = bfd_mach_o_get_mach_o_section (sec);
+  unsigned lazy = 0;
+
+  /* See if we have any indirect syms to consider.  */
+  if (indirect_syms == NULL)
+    return;
+
+  /* Process indirect symbols to determine if we have errors there.  */
+
+  switch (ms->flags & BFD_MACH_O_SECTION_TYPE_MASK)
+    {
+      case BFD_MACH_O_S_SYMBOL_STUBS:
+      case BFD_MACH_O_S_LAZY_SYMBOL_POINTERS:
+	lazy = LAZY;
+	/* Fall through.  */
+      case BFD_MACH_O_S_NON_LAZY_SYMBOL_POINTERS:
+	{
+	  unsigned int nactual = 0;
+	  unsigned int ncalc;
+	  obj_mach_o_indirect_sym *isym;
+	  obj_mach_o_indirect_sym *list = NULL;
+	  obj_mach_o_indirect_sym *list_tail = NULL;
+	  unsigned long eltsiz =
+			bfd_mach_o_section_get_entry_size (abfd, ms);
+
+	  for (isym = indirect_syms; isym != NULL; isym = isym->next)
+	    {
+	      if (isym->sect == sec)
+		{
+		  nactual++;
+		  if (list == NULL)
+		    list = isym;
+		  else
+		    list_tail->next = isym;
+		  list_tail = isym;
+		}
+	    }
+
+	  /* If none are in this section, stop here.  */
+	  if (nactual == 0)
+	    break;
+
+	  /* If we somehow added indirect symbols to a section with a zero
+	     entry size, we're dead ... */
+	  gas_assert (eltsiz != 0);
+
+	  ncalc = (unsigned int) (sect_size / eltsiz);
+	  if (nactual != ncalc)
+	    as_bad (_("there %s %d indirect_symbol%sin section %s but"
+		      " %d %s expected"), (nactual == 1)?"is":"are", nactual,
+		      (nactual == 1)?" ":"s ", sec->name, ncalc,
+		      (ncalc == 1)?"is":"are");
+	  else
+	    {
+	      unsigned n;
+	      bfd_mach_o_asymbol *sym;
+	      ms->indirect_syms =
+			bfd_zalloc (abfd,
+				    nactual * sizeof (bfd_mach_o_asymbol *));
+
+	      gas_assert (ms->indirect_syms != NULL);
+	
+	      for (isym = list, n = 0; isym != NULL; isym = isym->next, n++)
+		{
+		  /* Array is init to NULL & NULL signals a local symbol
+		     If the section is lazy-bound, we need to keep the
+		     reference to the symbol, since dyld can override.  */
+		  if (S_IS_LOCAL (isym->sym) && ! lazy)
+		    ;
+		  else
+		    {
+		      sym = (bfd_mach_o_asymbol *)symbol_get_bfdsym (isym->sym);
+		      if (sym == NULL)
+		        ;
+		      /* If the symbols is external ...  */
+		      else if (S_IS_EXTERNAL (isym->sym)
+			       || (sym->n_type & BFD_MACH_O_N_EXT)
+			       || ! S_IS_DEFINED (isym->sym)
+			       || lazy)
+			{
+			  sym->n_desc &= ~LAZY;
+			  /* ... and can be lazy, if not defined or hidden.  */
+			  if ((sym->n_type & BFD_MACH_O_N_TYPE)
+			       == BFD_MACH_O_N_UNDF
+			      && ! (sym->n_type & BFD_MACH_O_N_PEXT)
+			      && (sym->n_type & BFD_MACH_O_N_EXT))
+			    sym->n_desc |= lazy;
+			  ms->indirect_syms[n] = sym;
+		        }
+		    }
+		}
+	    }
+	}
+	break;
+
+      default:
+	break;
+    }
+}
+
+/* The process of relocation could alter what's externally visible,  
thus we
+   leave setting the indirect symbols until last.  */
+
+void
+obj_mach_o_frob_file_after_relocs (void)
+{
+  bfd_map_over_sections (stdoutput, obj_mach_o_set_indirect_symbols,  
(char *) 0);
+}
+
  /* Support stabs for mach-o.  */

  void
diff --git a/gas/config/obj-macho.h b/gas/config/obj-macho.h
index cbc3a4f..9f1f3db 100644
--- a/gas/config/obj-macho.h
+++ b/gas/config/obj-macho.h
@@ -62,6 +62,9 @@ extern void obj_macho_frob_label (struct symbol *);
  #define obj_frob_symbol(s, punt) punt = obj_macho_frob_symbol(s)
  extern int obj_macho_frob_symbol (struct symbol *);

+#define obj_frob_file_after_relocs obj_mach_o_frob_file_after_relocs
+extern void obj_mach_o_frob_file_after_relocs (void);
+
  #define EMIT_SECTION_SYMBOLS		0

  #define OBJ_PROCESS_STAB(SEG,W,S,T,O,D)	 
obj_mach_o_process_stab(W,S,T,O,D)



More information about the Binutils mailing list