[PATCH v2 1/2] readelf: Fix symbol display for RELR relocs
Fangrui Song
i@maskray.me
Thu May 30 07:28:20 GMT 2024
On Wed, May 29, 2024 at 7:44 AM Szabolcs Nagy <szabolcs.nagy@arm.com> wrote:
>
> Filter symbols before binary searching for the right symbol to display
> for a given address, such that only displayable symbols are present and
> at most one per address.
>
> The current logic does not handle multiple symbols for the same address
> well if some of them are empty, the selected symbol is not stable with
> respect to an unrelated symbol table change and on aarch64 often mapping
> symbols are displayed which is not useful.
>
> Filtering solves these problems at the cost of a linear scan of the
> sorted symbol table.
>
> The heuristic to select the best symbol likely could be improved, this
> patch aims to improve symbol display for RELR without complex logic
> such that the output is useful and stable for ld tests.
> ---
> v2:
> - new is_special_symbol_name.
> - fix is_aarch64_special_symbol_name.
> ---
> binutils/readelf.c | 142 +++++++++++++++++++++++++++++++++++++++------
> 1 file changed, 125 insertions(+), 17 deletions(-)
>
> diff --git a/binutils/readelf.c b/binutils/readelf.c
> index 103720e0b0d..1c0f0254abc 100644
> --- a/binutils/readelf.c
> +++ b/binutils/readelf.c
> @@ -1539,7 +1539,6 @@ static const char *
> get_symbol_at (Elf_Internal_Sym * symtab,
> uint64_t nsyms,
> char * strtab,
> - uint64_t strtablen,
> uint64_t where,
> uint64_t * offset_return)
> {
> @@ -1548,10 +1547,6 @@ get_symbol_at (Elf_Internal_Sym * symtab,
> Elf_Internal_Sym * best = NULL;
> uint64_t dist = 0x100000;
>
> - /* Paranoia. */
> - if (symtab == NULL || nsyms == 0 || strtab == NULL || strtablen == 0)
> - return NULL;
> -
> /* FIXME: Since this function is likely to be called repeatedly with
> slightly increasing addresses each time, we could speed things up by
> caching the last returned value and starting our search from there. */
> @@ -1564,8 +1559,7 @@ get_symbol_at (Elf_Internal_Sym * symtab,
>
> value = sym->st_value;
>
> - if (sym->st_name != 0
> - && where >= value
> + if (where >= value
> && where - value < dist)
> {
> best = sym;
> @@ -1583,9 +1577,6 @@ get_symbol_at (Elf_Internal_Sym * symtab,
> if (best == NULL)
> return NULL;
>
> - if (best->st_name >= strtablen)
> - return NULL;
> -
> if (offset_return != NULL)
> * offset_return = dist;
>
> @@ -1596,7 +1587,6 @@ static void
> print_relr_addr_and_sym (Elf_Internal_Sym * symtab,
> uint64_t nsyms,
> char * strtab,
> - uint64_t strtablen,
> uint64_t where)
> {
> const char * symname = NULL;
> @@ -1605,7 +1595,7 @@ print_relr_addr_and_sym (Elf_Internal_Sym * symtab,
> print_vma (where, ZERO_HEX);
> printf (" ");
>
> - symname = get_symbol_at (symtab, nsyms, strtab, strtablen, where, & offset);
> + symname = get_symbol_at (symtab, nsyms, strtab, where, & offset);
>
> if (symname == NULL)
> printf ("<no sym>");
> @@ -1619,6 +1609,117 @@ print_relr_addr_and_sym (Elf_Internal_Sym * symtab,
> }
> }
>
> +/* See bfd_is_aarch64_special_symbol_name. */
> +
> +static bool
> +is_aarch64_special_symbol_name (const char *name)
> +{
> + if (!name || name[0] != '$')
> + return false;
> + if (name[1] == 'x' || name[1] == 'd')
> + /* Map. */;
> + else if (name[1] == 'm' || name[1] == 'f' || name[1] == 'p')
> + /* Tag. */;
> + else
> + return false;
> + return name[2] == 0 || name[2] == '.';
> +}
LGTM
> +static bool
> +is_special_symbol_name (Filedata * filedata, const char * s)
> +{
> + switch (filedata->file_header.e_machine)
> + {
> + case EM_AARCH64:
> + return is_aarch64_special_symbol_name (s);
> +
> + default:
> + return false;
> + }
> +}
> +
> +/* Allows selecting the best symbol from a set for displaying addresses.
> + BEST is the current best or NULL if there are no good symbols yet.
> + SYM is the next symbol to consider, if it is better than BEST then
> + return SYM else return BEST. */
> +
> +static Elf_Internal_Sym *
> +select_display_sym (Filedata * filedata,
> + char * strtab,
> + uint64_t strtablen,
> + Elf_Internal_Sym * best,
> + Elf_Internal_Sym * sym)
> +{
> + /* Ignore empty or invalid syms. */
> + if (sym->st_name == 0)
> + return best;
> + if (sym->st_name >= strtablen)
> + return best;
> + /* Ignore undefined or TLS syms. */
> + if (sym->st_shndx == SHN_UNDEF)
> + return best;
> + if (ELF_ST_TYPE (sym->st_info) == STT_TLS)
> + return best;
LGTM.
(My llvm-readelf impl should skip STT_TLS as well.)
> + char *s = strtab + sym->st_name;
> +
> + /* Don't display special symbols. */
> + if (is_special_symbol_name (filedata, s))
> + return best;
> +
> + /* Here SYM is good for display. */
> +
> + if (best == NULL)
> + return sym;
> +
> + char *sbest = strtab + best->st_name;
> +
> + /* Prefer non-local symbols. */
> + if (ELF_ST_BIND (sym->st_info) == STB_LOCAL
> + && ELF_ST_BIND (best->st_info) != STB_LOCAL)
> + return best;
> + if (ELF_ST_BIND (sym->st_info) != STB_LOCAL
> + && ELF_ST_BIND (best->st_info) == STB_LOCAL)
> + return sym;
> +
> + /* Select based on lexicographic order. */
> + return strcmp (s, sbest) < 0 ? sym : best;
> +}
> +
> +/* Filter the sorted SYMTAB symbol array in-place to select at most one
> + symbol for an address and drop symbols that are not good to display.
> + Returns the new array length. */
> +
> +static uint64_t
> +filter_display_syms (Filedata * filedata,
> + Elf_Internal_Sym * symtab,
> + uint64_t nsyms,
> + char * strtab,
> + uint64_t strtablen)
> +{
> + Elf_Internal_Sym *r = symtab;
> + Elf_Internal_Sym *w = symtab;
> + Elf_Internal_Sym *best = NULL;
> + Elf_Internal_Sym *end = symtab + nsyms;
> + while (r < end)
> + {
> + /* Select the best symbol for an address. */
> + while (r < end
> + && (best == NULL || best->st_value == r->st_value))
Does this need a limit in case there are too many symbols at the same
address (pathological cases)?
> + {
> + best = select_display_sym (filedata, strtab, strtablen, best, r);
> + r++;
> + }
> + if (best != NULL)
> + {
> + *w = *best;
> + w++;
> + best = NULL;
> + }
> + }
> + return w - symtab;
> +}
> +
> static /* signed */ int
> symcmp (const void *p, const void *q)
> {
> @@ -1730,13 +1831,20 @@ dump_relr_relocations (Filedata * filedata,
> if (relrs == NULL)
> return false;
>
> + /* Paranoia. */
> + if (strtab == NULL)
> + strtablen = 0;
> + if (symtab == NULL)
> + nsyms = 0;
> +
> if (symtab != NULL)
> {
> /* Symbol tables are not sorted on address, but we want a quick lookup
> for the symbol associated with each address computed below, so sort
> - the table now. FIXME: This assumes that the symbol table will not
> - be used later on for some other purpose. */
> + the table then filter out unwanted entries. FIXME: This assumes that
> + the symbol table will not be used later on for some other purpose. */
> qsort (symtab, nsyms, sizeof (Elf_Internal_Sym), symcmp);
> + nsyms = filter_display_syms (filedata, symtab, nsyms, strtab, strtablen);
LGTM. This probably should be changed to a separate allocation at some
point to avoid pitfalls..
> }
>
> if (relr_entsize == sizeof (Elf32_External_Relr))
> @@ -1761,7 +1869,7 @@ dump_relr_relocations (Filedata * filedata,
> if ((entry & 1) == 0)
> {
> where = entry;
> - print_relr_addr_and_sym (symtab, nsyms, strtab, strtablen, where);
> + print_relr_addr_and_sym (symtab, nsyms, strtab, where);
> printf ("\n");
> where += relr_entsize;
> }
> @@ -1785,13 +1893,13 @@ dump_relr_relocations (Filedata * filedata,
>
> if (first)
> {
> - print_relr_addr_and_sym (symtab, nsyms, strtab, strtablen, addr);
> + print_relr_addr_and_sym (symtab, nsyms, strtab, addr);
> first = false;
> }
> else
> {
> printf (_("\n%*s "), relr_entsize == 4 ? 15 : 23, " ");
> - print_relr_addr_and_sym (symtab, nsyms, strtab, strtablen, addr);
> + print_relr_addr_and_sym (symtab, nsyms, strtab, addr);
> }
> }
>
> --
> 2.25.1
LGTM.
I assume that we will have ld+readelf tests when the aarch64 RELR
support is ready :)
More information about the Binutils
mailing list