[PATCH v2 2/8] Handle copy relocations
Andrew Burgess
andrew.burgess@embecosm.com
Wed Aug 21 15:35:00 GMT 2019
* Tom Tromey <tromey@adacore.com> [2019-08-01 11:04:06 -0600]:
> In ELF, if a data symbol is defined in a shared library and used by
> the main program, it will be subject to a "copy relocation". In this
> scenario, the main program has a copy of the symbol in question, and a
> relocation that tells ld.so to copy the data from the shared library.
> Then the symbol in the main program is used to satisfy all references.
>
> This patch changes gdb to handle this scenario. Data symbols coming
> from ELF shared libraries get a special flag that indicates that the
> symbol's address may be subject to copy relocation.
>
> I looked briefly into handling copy relocations by looking at the
> actual relocations in the main program, but this seemed difficult to
> do with BFD.
>
> Note that no caching is done here. Perhaps this could be changed if
> need be; I wanted to avoid possible problems with either objfile
> lifetimes and changes, or conflicts with the long-term (vapor-ware)
> objfile splitting project.
>
> gdb/ChangeLog
> 2019-08-01 Tom Tromey <tromey@adacore.com>
>
> * symmisc.c (dump_msymbols): Don't use MSYMBOL_VALUE_ADDRESS.
> * ada-lang.c (lesseq_defined_than): Handle
> LOC_STATIC.
> * dwarf2read.c (dwarf2_per_objfile): Add can_copy
> parameter.
> (dwarf2_has_info): Likewise.
> (new_symbol): Set maybe_copied on symbol when
> appropriate.
> * dwarf2read.h (dwarf2_per_objfile): Add can_copy
> parameter.
> <can_copy>: New member.
> * elfread.c (record_minimal_symbol): Set maybe_copied
> on symbol when appropriate.
> (elf_symfile_read): Update call to dwarf2_has_info.
> * minsyms.c (lookup_minimal_symbol_linkage): New
> function.
> * minsyms.h (lookup_minimal_symbol_linkage): Declare.
> * symtab.c (get_symbol_address, get_msymbol_address):
> New functions.
> * symtab.h (get_symbol_address, get_msymbol_address):
> Declare.
> (SYMBOL_VALUE_ADDRESS, MSYMBOL_VALUE_ADDRESS): Handle
> maybe_copied.
> (struct symbol, struct minimal_symbol) <maybe_copied>:
> New member.
> ---
> gdb/ChangeLog | 28 ++++++++++++++++++++++++++++
> gdb/ada-lang.c | 9 +++++++++
> gdb/dwarf2read.c | 44 ++++++++++++++++++++++++++------------------
> gdb/dwarf2read.h | 10 ++++++++--
> gdb/elfread.c | 16 +++++++++++-----
> gdb/minsyms.c | 20 ++++++++++++++++++++
> gdb/minsyms.h | 7 +++++++
> gdb/symfile.h | 3 ++-
> gdb/symmisc.c | 10 +++++++---
> gdb/symtab.c | 44 ++++++++++++++++++++++++++++++++++++++++++++
> gdb/symtab.h | 42 +++++++++++++++++++++++++++++++++++++++---
> 11 files changed, 201 insertions(+), 32 deletions(-)
>
> diff --git a/gdb/ada-lang.c b/gdb/ada-lang.c
> index 7a5cc4272c6..4e1ee31887a 100644
> --- a/gdb/ada-lang.c
> +++ b/gdb/ada-lang.c
> @@ -4734,6 +4734,15 @@ lesseq_defined_than (struct symbol *sym0, struct symbol *sym1)
> case LOC_CONST:
> return SYMBOL_VALUE (sym0) == SYMBOL_VALUE (sym1)
> && equiv_types (SYMBOL_TYPE (sym0), SYMBOL_TYPE (sym1));
> +
> + case LOC_STATIC:
> + {
> + const char *name0 = SYMBOL_LINKAGE_NAME (sym0);
> + const char *name1 = SYMBOL_LINKAGE_NAME (sym1);
> + return (strcmp (name0, name1) == 0
> + && SYMBOL_VALUE_ADDRESS (sym0) == SYMBOL_VALUE_ADDRESS (sym1));
> + }
> +
> default:
> return 0;
> }
> diff --git a/gdb/dwarf2read.c b/gdb/dwarf2read.c
> index d3b9d64f342..90dfde0f1e9 100644
> --- a/gdb/dwarf2read.c
> +++ b/gdb/dwarf2read.c
> @@ -2130,8 +2130,10 @@ attr_value_as_address (struct attribute *attr)
> /* See declaration. */
>
> dwarf2_per_objfile::dwarf2_per_objfile (struct objfile *objfile_,
> - const dwarf2_debug_sections *names)
> - : objfile (objfile_)
> + const dwarf2_debug_sections *names,
> + bool can_copy_)
> + : objfile (objfile_),
> + can_copy (can_copy_)
> {
> if (names == NULL)
> names = &dwarf2_elf_names;
> @@ -2206,11 +2208,14 @@ private:
> /* Try to locate the sections we need for DWARF 2 debugging
> information and return true if we have enough to do something.
> NAMES points to the dwarf2 section names, or is NULL if the standard
> - ELF names are used. */
> + ELF names are used. CAN_COPY is true for formats where symbol
> + interposition is possible and so symbol values must follow copy
> + relocation rules. */
>
> int
> dwarf2_has_info (struct objfile *objfile,
> - const struct dwarf2_debug_sections *names)
> + const struct dwarf2_debug_sections *names,
> + bool can_copy)
> {
> if (objfile->flags & OBJF_READNEVER)
> return 0;
> @@ -2220,7 +2225,8 @@ dwarf2_has_info (struct objfile *objfile,
>
> if (dwarf2_per_objfile == NULL)
> dwarf2_per_objfile = dwarf2_objfile_data_key.emplace (objfile, objfile,
> - names);
> + names,
> + can_copy);
>
> return (!dwarf2_per_objfile->info.is_virtual
> && dwarf2_per_objfile->info.s.section != NULL
> @@ -21646,19 +21652,21 @@ new_symbol (struct die_info *die, struct type *type, struct dwarf2_cu *cu,
> }
> else if (attr2 && (DW_UNSND (attr2) != 0))
> {
> - /* Workaround gfortran PR debug/40040 - it uses
> - DW_AT_location for variables in -fPIC libraries which may
> - get overriden by other libraries/executable and get
> - a different address. Resolve it by the minimal symbol
> - which may come from inferior's executable using copy
> - relocation. Make this workaround only for gfortran as for
> - other compilers GDB cannot guess the minimal symbol
> - Fortran mangling kind. */
> - if (cu->language == language_fortran && die->parent
> - && die->parent->tag == DW_TAG_module
> - && cu->producer
> - && startswith (cu->producer, "GNU Fortran"))
> - SYMBOL_ACLASS_INDEX (sym) = LOC_UNRESOLVED;
> + if (SYMBOL_CLASS (sym) == LOC_STATIC
> + && (objfile->flags & OBJF_MAINLINE) == 0
> + && dwarf2_per_objfile->can_copy)
> + {
> + /* A global static variable might be subject to
> + copy relocation. We first check for a local
> + minsym, though, because maybe the symbol was
> + marked hidden, in which case this would not
> + apply. */
> + minimal_symbol *found
> + = (lookup_minimal_symbol_linkage
> + (SYMBOL_LINKAGE_NAME (sym), objfile));
> + if (found != nullptr)
> + sym->maybe_copied = 1;
> + }
>
> /* A variable with DW_AT_external is never static,
> but it may be block-scoped. */
> diff --git a/gdb/dwarf2read.h b/gdb/dwarf2read.h
> index 8939f97af53..94cfc652e3e 100644
> --- a/gdb/dwarf2read.h
> +++ b/gdb/dwarf2read.h
> @@ -104,9 +104,12 @@ struct dwarf2_per_objfile
> {
> /* Construct a dwarf2_per_objfile for OBJFILE. NAMES points to the
> dwarf2 section names, or is NULL if the standard ELF names are
> - used. */
> + used. CAN_COPY is true for formats where symbol
> + interposition is possible and so symbol values must follow copy
> + relocation rules. */
> dwarf2_per_objfile (struct objfile *objfile,
> - const dwarf2_debug_sections *names);
> + const dwarf2_debug_sections *names,
> + bool can_copy);
>
> ~dwarf2_per_objfile ();
>
> @@ -206,6 +209,9 @@ public:
> original data was compressed using 'dwz -m'. */
> std::unique_ptr<struct dwz_file> dwz_file;
>
> + /* Whether copy relocations are supported by this object format. */
> + bool can_copy;
> +
> /* A flag indicating whether this objfile has a section loaded at a
> VMA of 0. */
> bool has_section_at_zero = false;
> diff --git a/gdb/elfread.c b/gdb/elfread.c
> index 630550b80dc..b2fade4124b 100644
> --- a/gdb/elfread.c
> +++ b/gdb/elfread.c
> @@ -203,10 +203,16 @@ record_minimal_symbol (minimal_symbol_reader &reader,
> || ms_type == mst_text_gnu_ifunc)
> address = gdbarch_addr_bits_remove (gdbarch, address);
>
> - return reader.record_full (name, name_len, copy_name, address,
> - ms_type,
> - gdb_bfd_section_index (objfile->obfd,
> - bfd_section));
> + struct minimal_symbol *result
> + = reader.record_full (name, name_len, copy_name, address,
> + ms_type,
> + gdb_bfd_section_index (objfile->obfd,
> + bfd_section));
> + if ((objfile->flags & OBJF_MAINLINE) == 0
> + && (ms_type == mst_data || ms_type == mst_bss))
> + result->maybe_copied = 1;
> +
> + return result;
> }
>
> /* Read the symbol table of an ELF file.
> @@ -1239,7 +1245,7 @@ elf_symfile_read (struct objfile *objfile, symfile_add_flags symfile_flags)
> bfd_section_size (abfd, str_sect));
> }
>
> - if (dwarf2_has_info (objfile, NULL))
> + if (dwarf2_has_info (objfile, NULL, true))
> {
> dw_index_kind index_kind;
>
> diff --git a/gdb/minsyms.c b/gdb/minsyms.c
> index 0f734ebc761..6f4afa979f9 100644
> --- a/gdb/minsyms.c
> +++ b/gdb/minsyms.c
> @@ -519,6 +519,26 @@ iterate_over_minimal_symbols
>
> /* See minsyms.h. */
>
> +struct minimal_symbol *
> +lookup_minimal_symbol_linkage (const char *name, struct objfile *objf)
> +{
> + unsigned int hash = msymbol_hash (name) % MINIMAL_SYMBOL_HASH_SIZE;
> +
> + for (minimal_symbol *msymbol = objf->per_bfd->msymbol_hash[hash];
> + msymbol != NULL;
> + msymbol = msymbol->hash_next)
> + {
> + if (strcmp (MSYMBOL_LINKAGE_NAME (msymbol), name) == 0
> + && (MSYMBOL_TYPE (msymbol) == mst_data
> + || MSYMBOL_TYPE (msymbol) == mst_bss))
> + return msymbol;
> + }
> +
> + return nullptr;
> +}
I found the name chosen for this function rather confusing. When
comparing to say 'lookup_minimal_symbol_text' which does
sort-of-almost the same thing but for text type symbols, I might have
been tempted to go with 'lookup_minimal_symbol_data'. Am I missing
something behind the choice of function name?
Thanks,
Andrew
> +
> +/* See minsyms.h. */
> +
> struct bound_minimal_symbol
> lookup_minimal_symbol_text (const char *name, struct objfile *objf)
> {
> diff --git a/gdb/minsyms.h b/gdb/minsyms.h
> index bb43165620d..fae6cd25496 100644
> --- a/gdb/minsyms.h
> +++ b/gdb/minsyms.h
> @@ -193,6 +193,13 @@ struct bound_minimal_symbol lookup_minimal_symbol (const char *,
>
> struct bound_minimal_symbol lookup_bound_minimal_symbol (const char *);
>
> +/* Look through the minimal symbols in OBJF for a global (not
> + file-local) minsym whose linkage name is NAME. Returns a minimal
> + symbol that matches, or nullptr otherwise. */
> +
> +struct minimal_symbol *lookup_minimal_symbol_linkage
> + (const char *name, struct objfile *objf);
> +
> /* Look through all the current minimal symbol tables and find the
> first minimal symbol that matches NAME and has text type. If OBJF
> is non-NULL, limit the search to that objfile. Returns a bound
> diff --git a/gdb/symfile.h b/gdb/symfile.h
> index 741b085e0c4..170308f10d5 100644
> --- a/gdb/symfile.h
> +++ b/gdb/symfile.h
> @@ -585,7 +585,8 @@ struct dwarf2_debug_sections {
> };
>
> extern int dwarf2_has_info (struct objfile *,
> - const struct dwarf2_debug_sections *);
> + const struct dwarf2_debug_sections *,
> + bool = false);
>
> /* Dwarf2 sections that can be accessed by dwarf2_get_section_info. */
> enum dwarf2_section_enum {
> diff --git a/gdb/symmisc.c b/gdb/symmisc.c
> index 7666de390cd..db93d716704 100644
> --- a/gdb/symmisc.c
> +++ b/gdb/symmisc.c
> @@ -236,9 +236,13 @@ dump_msymbols (struct objfile *objfile, struct ui_file *outfile)
> break;
> }
> fprintf_filtered (outfile, "[%2d] %c ", index, ms_type);
> - fputs_filtered (paddress (gdbarch, MSYMBOL_VALUE_ADDRESS (objfile,
> - msymbol)),
> - outfile);
> +
> + /* Use the relocated address as shown in the symbol here -- do
> + not try to respect copy relocations. */
> + CORE_ADDR addr = (msymbol->value.address
> + + ANOFFSET (objfile->section_offsets,
> + msymbol->section));
> + fputs_filtered (paddress (gdbarch, addr), outfile);
> fprintf_filtered (outfile, " %s", MSYMBOL_LINKAGE_NAME (msymbol));
> if (section)
> {
> diff --git a/gdb/symtab.c b/gdb/symtab.c
> index 87a0c8e4da6..0ff212e0d97 100644
> --- a/gdb/symtab.c
> +++ b/gdb/symtab.c
> @@ -6068,6 +6068,50 @@ symbol_set_symtab (struct symbol *symbol, struct symtab *symtab)
> symbol->owner.symtab = symtab;
> }
>
> +/* See symtab.h. */
> +
> +CORE_ADDR
> +get_symbol_address (const struct symbol *sym)
> +{
> + gdb_assert (sym->maybe_copied);
> + gdb_assert (SYMBOL_CLASS (sym) == LOC_STATIC);
> +
> + const char *linkage_name = SYMBOL_LINKAGE_NAME (sym);
> +
> + for (objfile *objfile : current_program_space->objfiles ())
> + {
> + minimal_symbol *minsym
> + = lookup_minimal_symbol_linkage (linkage_name, objfile);
> + if (minsym != nullptr)
> + return MSYMBOL_VALUE_ADDRESS (objfile, minsym);
> + }
> + return sym->ginfo.value.address;
> +}
> +
> +/* See symtab.h. */
> +
> +CORE_ADDR
> +get_msymbol_address (struct objfile *objf, const struct minimal_symbol *minsym)
> +{
> + gdb_assert (minsym->maybe_copied);
> + gdb_assert ((objf->flags & OBJF_MAINLINE) == 0);
> +
> + const char *linkage_name = MSYMBOL_LINKAGE_NAME (minsym);
> +
> + for (objfile *objfile : current_program_space->objfiles ())
> + {
> + if ((objfile->flags & OBJF_MAINLINE) != 0)
> + {
> + minimal_symbol *found
> + = lookup_minimal_symbol_linkage (linkage_name, objfile);
> + if (found != nullptr)
> + return MSYMBOL_VALUE_ADDRESS (objfile, found);
> + }
> + }
> + return (minsym->value.address
> + + ANOFFSET (objf->section_offsets, minsym->section));
> +}
> +
>
>
> void
> diff --git a/gdb/symtab.h b/gdb/symtab.h
> index 9f1791ba63c..dd5a1299b9b 100644
> --- a/gdb/symtab.h
> +++ b/gdb/symtab.h
> @@ -454,6 +454,14 @@ extern const char *symbol_get_demangled_name
>
> extern CORE_ADDR symbol_overlayed_address (CORE_ADDR, struct obj_section *);
>
> +/* Return the address of SYM. The MAYBE_COPIED flag must be set on
> + SYM. If SYM appears in the main program's minimal symbols, then
> + that minsym's address is returned; otherwise, SYM's address is
> + returned. This should generally only be used via the
> + SYMBOL_VALUE_ADDRESS macro. */
> +
> +extern CORE_ADDR get_symbol_address (const struct symbol *sym);
> +
> /* Note that all the following SYMBOL_* macros are used with the
> SYMBOL argument being either a partial symbol or
> a full symbol. Both types have a ginfo field. In particular
> @@ -463,7 +471,9 @@ extern CORE_ADDR symbol_overlayed_address (CORE_ADDR, struct obj_section *);
> field only, instead of the SYMBOL parameter. */
>
> #define SYMBOL_VALUE(symbol) (symbol)->ginfo.value.ivalue
> -#define SYMBOL_VALUE_ADDRESS(symbol) ((symbol)->ginfo.value.address + 0)
> +#define SYMBOL_VALUE_ADDRESS(symbol) \
> + (((symbol)->maybe_copied) ? get_symbol_address (symbol) \
> + : ((symbol)->ginfo.value.address))
> #define SET_SYMBOL_VALUE_ADDRESS(symbol, new_value) \
> ((symbol)->ginfo.value.address = (new_value))
> #define SYMBOL_VALUE_BYTES(symbol) (symbol)->ginfo.value.bytes
> @@ -671,6 +681,14 @@ struct minimal_symbol : public general_symbol_info
> the object file format may not carry that piece of information. */
> unsigned int has_size : 1;
>
> + /* For data symbols only, if this is set, then the symbol might be
> + subject to copy relocation. In this case, a minimal symbol
> + matching the symbol's linkage name is first looked for in the
> + main objfile. If found, then that address is used; otherwise the
> + address in this symbol is used. */
> +
> + unsigned maybe_copied : 1;
> +
> /* Minimal symbols with the same hash key are kept on a linked
> list. This is the link. */
>
> @@ -690,6 +708,15 @@ struct minimal_symbol : public general_symbol_info
> bool text_p () const;
> };
>
> +/* Return the address of MINSYM, which comes from OBJF. The
> + MAYBE_COPIED flag must be set on MINSYM. If MINSYM appears in the
> + main program's minimal symbols, then that minsym's address is
> + returned; otherwise, MINSYM's address is returned. This should
> + generally only be used via the MSYMBOL_VALUE_ADDRESS macro. */
> +
> +extern CORE_ADDR get_msymbol_address (struct objfile *objf,
> + const struct minimal_symbol *minsym);
> +
> #define MSYMBOL_TARGET_FLAG_1(msymbol) (msymbol)->target_flag_1
> #define MSYMBOL_TARGET_FLAG_2(msymbol) (msymbol)->target_flag_2
> #define MSYMBOL_SIZE(msymbol) ((msymbol)->size + 0)
> @@ -708,8 +735,9 @@ struct minimal_symbol : public general_symbol_info
> /* The relocated address of the minimal symbol, using the section
> offsets from OBJFILE. */
> #define MSYMBOL_VALUE_ADDRESS(objfile, symbol) \
> - ((symbol)->value.address \
> - + ANOFFSET ((objfile)->section_offsets, ((symbol)->section)))
> + (((symbol)->maybe_copied) ? get_msymbol_address (objfile, symbol) \
> + : ((symbol)->value.address \
> + + ANOFFSET ((objfile)->section_offsets, ((symbol)->section))))
> /* For a bound minsym, we can easily compute the address directly. */
> #define BMSYMBOL_VALUE_ADDRESS(symbol) \
> MSYMBOL_VALUE_ADDRESS ((symbol).objfile, (symbol).minsym)
> @@ -1112,6 +1140,14 @@ struct symbol
> /* Whether this is an inlined function (class LOC_BLOCK only). */
> unsigned is_inlined : 1;
>
> + /* For LOC_STATIC only, if this is set, then the symbol might be
> + subject to copy relocation. In this case, a minimal symbol
> + matching the symbol's linkage name is first looked for in the
> + main objfile. If found, then that address is used; otherwise the
> + address in this symbol is used. */
> +
> + unsigned maybe_copied : 1;
> +
> /* The concrete type of this symbol. */
>
> ENUM_BITFIELD (symbol_subclass_kind) subclass : 2;
> --
> 2.20.1
>
More information about the Gdb-patches
mailing list