This is the mail archive of the gdb-patches@sourceware.org mailing list for the GDB project.


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]
Other format: [Raw text]

Re: [PATCH v2 2/8] Handle copy relocations


* Tom Tromey <tromey@adacore.com> [2019-08-01 11:04:06 -0600]:

> In ELF, if a data symbol is defined in a shared library and used by
> the main program, it will be subject to a "copy relocation".  In this
> scenario, the main program has a copy of the symbol in question, and a
> relocation that tells ld.so to copy the data from the shared library.
> Then the symbol in the main program is used to satisfy all references.
> 
> This patch changes gdb to handle this scenario.  Data symbols coming
> from ELF shared libraries get a special flag that indicates that the
> symbol's address may be subject to copy relocation.
> 
> I looked briefly into handling copy relocations by looking at the
> actual relocations in the main program, but this seemed difficult to
> do with BFD.
> 
> Note that no caching is done here.  Perhaps this could be changed if
> need be; I wanted to avoid possible problems with either objfile
> lifetimes and changes, or conflicts with the long-term (vapor-ware)
> objfile splitting project.
> 
> gdb/ChangeLog
> 2019-08-01  Tom Tromey  <tromey@adacore.com>
> 
> 	* symmisc.c (dump_msymbols): Don't use MSYMBOL_VALUE_ADDRESS.
> 	* ada-lang.c (lesseq_defined_than): Handle
> 	LOC_STATIC.
> 	* dwarf2read.c (dwarf2_per_objfile): Add can_copy
> 	parameter.
> 	(dwarf2_has_info): Likewise.
> 	(new_symbol): Set maybe_copied on symbol when
> 	appropriate.
> 	* dwarf2read.h (dwarf2_per_objfile): Add can_copy
> 	parameter.
> 	<can_copy>: New member.
> 	* elfread.c (record_minimal_symbol): Set maybe_copied
> 	on symbol when appropriate.
> 	(elf_symfile_read): Update call to dwarf2_has_info.
> 	* minsyms.c (lookup_minimal_symbol_linkage): New
> 	function.
> 	* minsyms.h (lookup_minimal_symbol_linkage): Declare.
> 	* symtab.c (get_symbol_address, get_msymbol_address):
> 	New functions.
> 	* symtab.h (get_symbol_address, get_msymbol_address):
> 	Declare.
> 	(SYMBOL_VALUE_ADDRESS, MSYMBOL_VALUE_ADDRESS): Handle
> 	maybe_copied.
> 	(struct symbol, struct minimal_symbol) <maybe_copied>:
> 	New member.
> ---
>  gdb/ChangeLog    | 28 ++++++++++++++++++++++++++++
>  gdb/ada-lang.c   |  9 +++++++++
>  gdb/dwarf2read.c | 44 ++++++++++++++++++++++++++------------------
>  gdb/dwarf2read.h | 10 ++++++++--
>  gdb/elfread.c    | 16 +++++++++++-----
>  gdb/minsyms.c    | 20 ++++++++++++++++++++
>  gdb/minsyms.h    |  7 +++++++
>  gdb/symfile.h    |  3 ++-
>  gdb/symmisc.c    | 10 +++++++---
>  gdb/symtab.c     | 44 ++++++++++++++++++++++++++++++++++++++++++++
>  gdb/symtab.h     | 42 +++++++++++++++++++++++++++++++++++++++---
>  11 files changed, 201 insertions(+), 32 deletions(-)
> 
> diff --git a/gdb/ada-lang.c b/gdb/ada-lang.c
> index 7a5cc4272c6..4e1ee31887a 100644
> --- a/gdb/ada-lang.c
> +++ b/gdb/ada-lang.c
> @@ -4734,6 +4734,15 @@ lesseq_defined_than (struct symbol *sym0, struct symbol *sym1)
>      case LOC_CONST:
>        return SYMBOL_VALUE (sym0) == SYMBOL_VALUE (sym1)
>          && equiv_types (SYMBOL_TYPE (sym0), SYMBOL_TYPE (sym1));
> +
> +    case LOC_STATIC:
> +      {
> +        const char *name0 = SYMBOL_LINKAGE_NAME (sym0);
> +        const char *name1 = SYMBOL_LINKAGE_NAME (sym1);
> +        return (strcmp (name0, name1) == 0
> +                && SYMBOL_VALUE_ADDRESS (sym0) == SYMBOL_VALUE_ADDRESS (sym1));
> +      }
> +
>      default:
>        return 0;
>      }
> diff --git a/gdb/dwarf2read.c b/gdb/dwarf2read.c
> index d3b9d64f342..90dfde0f1e9 100644
> --- a/gdb/dwarf2read.c
> +++ b/gdb/dwarf2read.c
> @@ -2130,8 +2130,10 @@ attr_value_as_address (struct attribute *attr)
>  /* See declaration.  */
>  
>  dwarf2_per_objfile::dwarf2_per_objfile (struct objfile *objfile_,
> -					const dwarf2_debug_sections *names)
> -  : objfile (objfile_)
> +					const dwarf2_debug_sections *names,
> +					bool can_copy_)
> +  : objfile (objfile_),
> +    can_copy (can_copy_)
>  {
>    if (names == NULL)
>      names = &dwarf2_elf_names;
> @@ -2206,11 +2208,14 @@ private:
>  /* Try to locate the sections we need for DWARF 2 debugging
>     information and return true if we have enough to do something.
>     NAMES points to the dwarf2 section names, or is NULL if the standard
> -   ELF names are used.  */
> +   ELF names are used.  CAN_COPY is true for formats where symbol
> +   interposition is possible and so symbol values must follow copy
> +   relocation rules.  */
>  
>  int
>  dwarf2_has_info (struct objfile *objfile,
> -                 const struct dwarf2_debug_sections *names)
> +                 const struct dwarf2_debug_sections *names,
> +		 bool can_copy)
>  {
>    if (objfile->flags & OBJF_READNEVER)
>      return 0;
> @@ -2220,7 +2225,8 @@ dwarf2_has_info (struct objfile *objfile,
>  
>    if (dwarf2_per_objfile == NULL)
>      dwarf2_per_objfile = dwarf2_objfile_data_key.emplace (objfile, objfile,
> -							  names);
> +							  names,
> +							  can_copy);
>  
>    return (!dwarf2_per_objfile->info.is_virtual
>  	  && dwarf2_per_objfile->info.s.section != NULL
> @@ -21646,19 +21652,21 @@ new_symbol (struct die_info *die, struct type *type, struct dwarf2_cu *cu,
>  		}
>  	      else if (attr2 && (DW_UNSND (attr2) != 0))
>  		{
> -		  /* Workaround gfortran PR debug/40040 - it uses
> -		     DW_AT_location for variables in -fPIC libraries which may
> -		     get overriden by other libraries/executable and get
> -		     a different address.  Resolve it by the minimal symbol
> -		     which may come from inferior's executable using copy
> -		     relocation.  Make this workaround only for gfortran as for
> -		     other compilers GDB cannot guess the minimal symbol
> -		     Fortran mangling kind.  */
> -		  if (cu->language == language_fortran && die->parent
> -		      && die->parent->tag == DW_TAG_module
> -		      && cu->producer
> -		      && startswith (cu->producer, "GNU Fortran"))
> -		    SYMBOL_ACLASS_INDEX (sym) = LOC_UNRESOLVED;
> +		  if (SYMBOL_CLASS (sym) == LOC_STATIC
> +		      && (objfile->flags & OBJF_MAINLINE) == 0
> +		      && dwarf2_per_objfile->can_copy)
> +		    {
> +		      /* A global static variable might be subject to
> +			 copy relocation.  We first check for a local
> +			 minsym, though, because maybe the symbol was
> +			 marked hidden, in which case this would not
> +			 apply.  */
> +		      minimal_symbol *found
> +			= (lookup_minimal_symbol_linkage
> +			   (SYMBOL_LINKAGE_NAME (sym), objfile));
> +		      if (found != nullptr)
> +			sym->maybe_copied = 1;
> +		    }
>  
>  		  /* A variable with DW_AT_external is never static,
>  		     but it may be block-scoped.  */
> diff --git a/gdb/dwarf2read.h b/gdb/dwarf2read.h
> index 8939f97af53..94cfc652e3e 100644
> --- a/gdb/dwarf2read.h
> +++ b/gdb/dwarf2read.h
> @@ -104,9 +104,12 @@ struct dwarf2_per_objfile
>  {
>    /* Construct a dwarf2_per_objfile for OBJFILE.  NAMES points to the
>       dwarf2 section names, or is NULL if the standard ELF names are
> -     used.  */
> +     used.  CAN_COPY is true for formats where symbol
> +     interposition is possible and so symbol values must follow copy
> +     relocation rules.  */
>    dwarf2_per_objfile (struct objfile *objfile,
> -		      const dwarf2_debug_sections *names);
> +		      const dwarf2_debug_sections *names,
> +		      bool can_copy);
>  
>    ~dwarf2_per_objfile ();
>  
> @@ -206,6 +209,9 @@ public:
>       original data was compressed using 'dwz -m'.  */
>    std::unique_ptr<struct dwz_file> dwz_file;
>  
> +  /* Whether copy relocations are supported by this object format.  */
> +  bool can_copy;
> +
>    /* A flag indicating whether this objfile has a section loaded at a
>       VMA of 0.  */
>    bool has_section_at_zero = false;
> diff --git a/gdb/elfread.c b/gdb/elfread.c
> index 630550b80dc..b2fade4124b 100644
> --- a/gdb/elfread.c
> +++ b/gdb/elfread.c
> @@ -203,10 +203,16 @@ record_minimal_symbol (minimal_symbol_reader &reader,
>        || ms_type == mst_text_gnu_ifunc)
>      address = gdbarch_addr_bits_remove (gdbarch, address);
>  
> -  return reader.record_full (name, name_len, copy_name, address,
> -			     ms_type,
> -			     gdb_bfd_section_index (objfile->obfd,
> -						    bfd_section));
> +  struct minimal_symbol *result
> +    = reader.record_full (name, name_len, copy_name, address,
> +			  ms_type,
> +			  gdb_bfd_section_index (objfile->obfd,
> +						 bfd_section));
> +  if ((objfile->flags & OBJF_MAINLINE) == 0
> +      && (ms_type == mst_data || ms_type == mst_bss))
> +    result->maybe_copied = 1;
> +
> +  return result;
>  }
>  
>  /* Read the symbol table of an ELF file.
> @@ -1239,7 +1245,7 @@ elf_symfile_read (struct objfile *objfile, symfile_add_flags symfile_flags)
>  				bfd_section_size (abfd, str_sect));
>      }
>  
> -  if (dwarf2_has_info (objfile, NULL))
> +  if (dwarf2_has_info (objfile, NULL, true))
>      {
>        dw_index_kind index_kind;
>  
> diff --git a/gdb/minsyms.c b/gdb/minsyms.c
> index 0f734ebc761..6f4afa979f9 100644
> --- a/gdb/minsyms.c
> +++ b/gdb/minsyms.c
> @@ -519,6 +519,26 @@ iterate_over_minimal_symbols
>  
>  /* See minsyms.h.  */
>  
> +struct minimal_symbol *
> +lookup_minimal_symbol_linkage (const char *name, struct objfile *objf)
> +{
> +  unsigned int hash = msymbol_hash (name) % MINIMAL_SYMBOL_HASH_SIZE;
> +
> +  for (minimal_symbol *msymbol = objf->per_bfd->msymbol_hash[hash];
> +       msymbol != NULL;
> +       msymbol = msymbol->hash_next)
> +    {
> +      if (strcmp (MSYMBOL_LINKAGE_NAME (msymbol), name) == 0
> +	  && (MSYMBOL_TYPE (msymbol) == mst_data
> +	      || MSYMBOL_TYPE (msymbol) == mst_bss))
> +	return msymbol;
> +    }
> +
> +  return nullptr;
> +}

I found the name chosen for this function rather confusing.  When
comparing to say 'lookup_minimal_symbol_text' which does
sort-of-almost the same thing but for text type symbols, I might have
been tempted to go with 'lookup_minimal_symbol_data'.  Am I missing
something behind the choice of function name?

Thanks,
Andrew


> +
> +/* See minsyms.h.  */
> +
>  struct bound_minimal_symbol
>  lookup_minimal_symbol_text (const char *name, struct objfile *objf)
>  {
> diff --git a/gdb/minsyms.h b/gdb/minsyms.h
> index bb43165620d..fae6cd25496 100644
> --- a/gdb/minsyms.h
> +++ b/gdb/minsyms.h
> @@ -193,6 +193,13 @@ struct bound_minimal_symbol lookup_minimal_symbol (const char *,
>  
>  struct bound_minimal_symbol lookup_bound_minimal_symbol (const char *);
>  
> +/* Look through the minimal symbols in OBJF for a global (not
> +   file-local) minsym whose linkage name is NAME.  Returns a minimal
> +   symbol that matches, or nullptr otherwise.  */
> +
> +struct minimal_symbol *lookup_minimal_symbol_linkage
> +  (const char *name, struct objfile *objf);
> +
>  /* Look through all the current minimal symbol tables and find the
>     first minimal symbol that matches NAME and has text type.  If OBJF
>     is non-NULL, limit the search to that objfile.  Returns a bound
> diff --git a/gdb/symfile.h b/gdb/symfile.h
> index 741b085e0c4..170308f10d5 100644
> --- a/gdb/symfile.h
> +++ b/gdb/symfile.h
> @@ -585,7 +585,8 @@ struct dwarf2_debug_sections {
>  };
>  
>  extern int dwarf2_has_info (struct objfile *,
> -                            const struct dwarf2_debug_sections *);
> +                            const struct dwarf2_debug_sections *,
> +			    bool = false);
>  
>  /* Dwarf2 sections that can be accessed by dwarf2_get_section_info.  */
>  enum dwarf2_section_enum {
> diff --git a/gdb/symmisc.c b/gdb/symmisc.c
> index 7666de390cd..db93d716704 100644
> --- a/gdb/symmisc.c
> +++ b/gdb/symmisc.c
> @@ -236,9 +236,13 @@ dump_msymbols (struct objfile *objfile, struct ui_file *outfile)
>  	  break;
>  	}
>        fprintf_filtered (outfile, "[%2d] %c ", index, ms_type);
> -      fputs_filtered (paddress (gdbarch, MSYMBOL_VALUE_ADDRESS (objfile,
> -								msymbol)),
> -		      outfile);
> +
> +      /* Use the relocated address as shown in the symbol here -- do
> +	 not try to respect copy relocations.  */
> +      CORE_ADDR addr = (msymbol->value.address
> +			+ ANOFFSET (objfile->section_offsets,
> +				    msymbol->section));
> +      fputs_filtered (paddress (gdbarch, addr), outfile);
>        fprintf_filtered (outfile, " %s", MSYMBOL_LINKAGE_NAME (msymbol));
>        if (section)
>  	{
> diff --git a/gdb/symtab.c b/gdb/symtab.c
> index 87a0c8e4da6..0ff212e0d97 100644
> --- a/gdb/symtab.c
> +++ b/gdb/symtab.c
> @@ -6068,6 +6068,50 @@ symbol_set_symtab (struct symbol *symbol, struct symtab *symtab)
>    symbol->owner.symtab = symtab;
>  }
>  
> +/* See symtab.h.  */
> +
> +CORE_ADDR
> +get_symbol_address (const struct symbol *sym)
> +{
> +  gdb_assert (sym->maybe_copied);
> +  gdb_assert (SYMBOL_CLASS (sym) == LOC_STATIC);
> +
> +  const char *linkage_name = SYMBOL_LINKAGE_NAME (sym);
> +
> +  for (objfile *objfile : current_program_space->objfiles ())
> +    {
> +      minimal_symbol *minsym
> +	= lookup_minimal_symbol_linkage (linkage_name, objfile);
> +      if (minsym != nullptr)
> +	return MSYMBOL_VALUE_ADDRESS (objfile, minsym);
> +    }
> +  return sym->ginfo.value.address;
> +}
> +
> +/* See symtab.h.  */
> +
> +CORE_ADDR
> +get_msymbol_address (struct objfile *objf, const struct minimal_symbol *minsym)
> +{
> +  gdb_assert (minsym->maybe_copied);
> +  gdb_assert ((objf->flags & OBJF_MAINLINE) == 0);
> +
> +  const char *linkage_name = MSYMBOL_LINKAGE_NAME (minsym);
> +
> +  for (objfile *objfile : current_program_space->objfiles ())
> +    {
> +      if ((objfile->flags & OBJF_MAINLINE) != 0)
> +	{
> +	  minimal_symbol *found
> +	    = lookup_minimal_symbol_linkage (linkage_name, objfile);
> +	  if (found != nullptr)
> +	    return MSYMBOL_VALUE_ADDRESS (objfile, found);
> +	}
> +    }
> +  return (minsym->value.address
> +	  + ANOFFSET (objf->section_offsets, minsym->section));
> +}
> +
>  
>  
>  void
> diff --git a/gdb/symtab.h b/gdb/symtab.h
> index 9f1791ba63c..dd5a1299b9b 100644
> --- a/gdb/symtab.h
> +++ b/gdb/symtab.h
> @@ -454,6 +454,14 @@ extern const char *symbol_get_demangled_name
>  
>  extern CORE_ADDR symbol_overlayed_address (CORE_ADDR, struct obj_section *);
>  
> +/* Return the address of SYM.  The MAYBE_COPIED flag must be set on
> +   SYM.  If SYM appears in the main program's minimal symbols, then
> +   that minsym's address is returned; otherwise, SYM's address is
> +   returned.  This should generally only be used via the
> +   SYMBOL_VALUE_ADDRESS macro.  */
> +
> +extern CORE_ADDR get_symbol_address (const struct symbol *sym);
> +
>  /* Note that all the following SYMBOL_* macros are used with the
>     SYMBOL argument being either a partial symbol or
>     a full symbol.  Both types have a ginfo field.  In particular
> @@ -463,7 +471,9 @@ extern CORE_ADDR symbol_overlayed_address (CORE_ADDR, struct obj_section *);
>     field only, instead of the SYMBOL parameter.  */
>  
>  #define SYMBOL_VALUE(symbol)		(symbol)->ginfo.value.ivalue
> -#define SYMBOL_VALUE_ADDRESS(symbol)	((symbol)->ginfo.value.address + 0)
> +#define SYMBOL_VALUE_ADDRESS(symbol)			      \
> +  (((symbol)->maybe_copied) ? get_symbol_address (symbol)     \
> +   : ((symbol)->ginfo.value.address))
>  #define SET_SYMBOL_VALUE_ADDRESS(symbol, new_value)	\
>    ((symbol)->ginfo.value.address = (new_value))
>  #define SYMBOL_VALUE_BYTES(symbol)	(symbol)->ginfo.value.bytes
> @@ -671,6 +681,14 @@ struct minimal_symbol : public general_symbol_info
>       the object file format may not carry that piece of information.  */
>    unsigned int has_size : 1;
>  
> +  /* For data symbols only, if this is set, then the symbol might be
> +     subject to copy relocation.  In this case, a minimal symbol
> +     matching the symbol's linkage name is first looked for in the
> +     main objfile.  If found, then that address is used; otherwise the
> +     address in this symbol is used.  */
> +
> +  unsigned maybe_copied : 1;
> +
>    /* Minimal symbols with the same hash key are kept on a linked
>       list.  This is the link.  */
>  
> @@ -690,6 +708,15 @@ struct minimal_symbol : public general_symbol_info
>    bool text_p () const;
>  };
>  
> +/* Return the address of MINSYM, which comes from OBJF.  The
> +   MAYBE_COPIED flag must be set on MINSYM.  If MINSYM appears in the
> +   main program's minimal symbols, then that minsym's address is
> +   returned; otherwise, MINSYM's address is returned.  This should
> +   generally only be used via the MSYMBOL_VALUE_ADDRESS macro.  */
> +
> +extern CORE_ADDR get_msymbol_address (struct objfile *objf,
> +				      const struct minimal_symbol *minsym);
> +
>  #define MSYMBOL_TARGET_FLAG_1(msymbol)  (msymbol)->target_flag_1
>  #define MSYMBOL_TARGET_FLAG_2(msymbol)  (msymbol)->target_flag_2
>  #define MSYMBOL_SIZE(msymbol)		((msymbol)->size + 0)
> @@ -708,8 +735,9 @@ struct minimal_symbol : public general_symbol_info
>  /* The relocated address of the minimal symbol, using the section
>     offsets from OBJFILE.  */
>  #define MSYMBOL_VALUE_ADDRESS(objfile, symbol)				\
> -  ((symbol)->value.address					\
> -   + ANOFFSET ((objfile)->section_offsets, ((symbol)->section)))
> +  (((symbol)->maybe_copied) ? get_msymbol_address (objfile, symbol)	\
> +   : ((symbol)->value.address						\
> +      + ANOFFSET ((objfile)->section_offsets, ((symbol)->section))))
>  /* For a bound minsym, we can easily compute the address directly.  */
>  #define BMSYMBOL_VALUE_ADDRESS(symbol) \
>    MSYMBOL_VALUE_ADDRESS ((symbol).objfile, (symbol).minsym)
> @@ -1112,6 +1140,14 @@ struct symbol
>    /* Whether this is an inlined function (class LOC_BLOCK only).  */
>    unsigned is_inlined : 1;
>  
> +  /* For LOC_STATIC only, if this is set, then the symbol might be
> +     subject to copy relocation.  In this case, a minimal symbol
> +     matching the symbol's linkage name is first looked for in the
> +     main objfile.  If found, then that address is used; otherwise the
> +     address in this symbol is used.  */
> +
> +  unsigned maybe_copied : 1;
> +
>    /* The concrete type of this symbol.  */
>  
>    ENUM_BITFIELD (symbol_subclass_kind) subclass : 2;
> -- 
> 2.20.1
> 


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]