[PATCH] unstrip: Call adjust_relocs no more than once per section.
Mark Wielaard
mark@klomp.org
Tue Feb 6 15:36:11 GMT 2024
Hi Aaron,
On Mon, 2024-02-05 at 18:11 -0500, Aaron Merey wrote:
> During symtab merging, adjust_relocs might be called multiple times on
> some SHT_REL/SHT_RELA sections. In these cases it is possible for a
> relocation's symbol index to be correctly mapped from X to Y during the
> first call to adjust_relocs but then wrongly remapped from Y to Z during
> the second call.
>
> Fix this by adjusting relocation symbol indices just once per section.
>
> Also add stable sorting for symbols during symtab merging so that the
> symbol order in the output file's symtab does not depend on undefined
> behaviour in qsort.
>
> Note that adjust_relocs still might be called a second time on a section
> during add_new_section_symbols. However since add_new_section_symbols
> generates its own distinct symbol index map, this should not trigger the
> bug described above.
>
> https://sourceware.org/bugzilla/show_bug.cgi?id=31097
>
> Signed-off-by: Aaron Merey <amerey@redhat.com>
> ---
>
> I added some tests to run-unstrip-test.sh to verify that strip+unstrip
> does not alter the symbols referred to by relocations. I tried to use
> eu-elfcmp for this purpose. However for a pair of i386 ET_REL binaries
> I did some testing with, eu-elfcmp skips checking the SHT_REL/SHT_RELA
> sections because the sh_flags do not contain SHF_ALLOC (the flags only
> contain SHF_INFO_LINK in this case).
>
> I'm not sure if this eu-elfcmp behaviour is intentional or if eu-elfcmp
> should start comparing REL/RELA sections even if they aren't allocated.
So it looks like elfcmp explicitly checks ebl_section_strip_p and
doesn't compare sections that are strippable. Maybe we should add an
eu-elfcmp --all-sections flag?
We should probably also check that it handles the new SHT_RELR sections
correctly. I see it only checks for SHT_REL and SHT_RELA in the code.
Although RELR is really simple and doesn't have symbol references. So
it is probably fine.
> src/unstrip.c | 81 ++++++++++++++++----
> tests/.gitignore | 1 +
> tests/Makefile.am | 3 +-
> tests/elf-print-reloc-syms.c | 144 +++++++++++++++++++++++++++++++++++
> tests/run-unstrip-test.sh | 8 ++
> 5 files changed, 221 insertions(+), 16 deletions(-)
> create mode 100644 tests/elf-print-reloc-syms.c
>
> diff --git a/src/unstrip.c b/src/unstrip.c
> index d5bd1821..f37d6c58 100644
> --- a/src/unstrip.c
> +++ b/src/unstrip.c
> @@ -598,21 +598,30 @@ adjust_relocs (Elf_Scn *outscn, Elf_Scn *inscn, const GElf_Shdr *shdr,
> /* Adjust all the relocation sections in the file. */
> static void
> adjust_all_relocs (Elf *elf, Elf_Scn *symtab, const GElf_Shdr *symshdr,
> - size_t map[], size_t map_size)
> + size_t map[], size_t map_size, bool *scn_filter)
> {
Maybe bool scn_filter[] since it really is an array?
> size_t new_sh_link = elf_ndxscn (symtab);
> Elf_Scn *scn = NULL;
> while ((scn = elf_nextscn (elf, scn)) != NULL)
> if (scn != symtab)
> {
> + if (scn_filter != NULL)
> + {
> + size_t ndx = elf_ndxscn (scn);
> +
> + /* Skip relocations that were already mapped during adjust_relocs
> + for the stripped symtab. This is to avoid mapping a relocation's
> + symbol index from X to Y during the first adjust_relocs and then
> + wrongly remapping it from Y to Z during the second call. */
> + if (scn_filter[ndx])
> + continue;
> + }
> +
> GElf_Shdr shdr_mem;
> GElf_Shdr *shdr = gelf_getshdr (scn, &shdr_mem);
> ELF_CHECK (shdr != NULL, _("cannot get section header: %s"));
> - /* Don't redo SHT_GROUP, groups are in both the stripped and debug,
> - it will already have been done by adjust_relocs for the
> - stripped_symtab. */
> - if (shdr->sh_type != SHT_NOBITS && shdr->sh_type != SHT_GROUP
> - && shdr->sh_link == new_sh_link)
> +
> + if (shdr->sh_type != SHT_NOBITS && shdr->sh_link == new_sh_link)
> adjust_relocs (scn, scn, shdr, map, map_size, symshdr);
> }
> }
OK.
> @@ -697,7 +706,7 @@ add_new_section_symbols (Elf_Scn *old_symscn, size_t old_shnum,
> }
>
> /* Adjust any relocations referring to the old symbol table. */
> - adjust_all_relocs (elf, symscn, shdr, symndx_map, nsym - 1);
> + adjust_all_relocs (elf, symscn, shdr, symndx_map, nsym - 1, NULL);
>
> return symdata;
> }
OK.
> @@ -874,6 +883,7 @@ collect_symbols (Elf *outelf, bool rel, Elf_Scn *symscn, Elf_Scn *strscn,
> s->shndx = shndx;
> s->info.info = sym->st_info;
> s->info.other = sym->st_other;
> + s->duplicate = NULL;
>
> if (scnmap != NULL && shndx != SHN_UNDEF && shndx < SHN_LORESERVE)
> s->shndx = scnmap[shndx - 1];
> @@ -903,8 +913,7 @@ collect_symbols (Elf *outelf, bool rel, Elf_Scn *symscn, Elf_Scn *strscn,
> if (s1->value > s2->value) \
> return 1
>
> -/* Compare symbols with a consistent ordering,
> - but one only meaningful for equality. */
> +/* Symbol comparison used to sort symbols in preparation for deduplication. */
> static int
> compare_symbols (const void *a, const void *b)
> {
> @@ -915,6 +924,38 @@ compare_symbols (const void *a, const void *b)
> CMP (size);
> CMP (shndx);
>
> + int res = s1->compare - s2->compare;
> + if (res != 0)
> + return res;
> +
> + res = strcmp (s1->name, s2->name);
> + if (res != 0)
> + return res;
> +
> + /* Duplicates still have distinct positions in the symbol index map.
> + Compare map positions to ensure that duplicate symbols are ordered
> + consistently even if the sort function is unstable. */
> + CMP (map);
> + error_exit (0, _("found two identical index map positions."));
> +}
>
> +/* Symbol comparison used to deduplicate symbols found in both the stripped
> + and unstripped input files.
> +
> + Similar to compare_symbols, but does not differentiate symbols based
> + on their position in the symbol index map. Duplicates can't be found
> + by comparing index map postions because they always have distinct
> + positions in the map. */
> +static int
> +compare_symbols_duplicate (const void *a, const void *b)
> +{
> + const struct symbol *s1 = a;
> + const struct symbol *s2 = b;
> +
> + CMP (value);
> + CMP (size);
> + CMP (shndx);
> +
> return (s1->compare - s2->compare) ?: strcmp (s1->name, s2->name);
> }
>
Right, this new compare_symbols_duplicate is what compare_symbols did
before, which is not a stable sort.
> @@ -946,13 +987,13 @@ compare_symbols_output (const void *a, const void *b)
> /* binutils always puts section symbols in section index order. */
> CMP (shndx);
> else if (s1 != s2)
> - error_exit (0, "section symbols in unexpected order");
> + error_exit (0, _("section symbols in unexpected order"));
> }
>
Yes, this is a translatable user visible string.
>
> /* Nothing really matters, so preserve the original order. */
> CMP (map);
> else if (s1 != s2)
> - error_exit (0, "found two identical symbols");
> + error_exit (0, _("found two identical symbols"));
> }
>
> return cmp;
Likewise.
> @@ -1855,7 +1896,8 @@ more sections in stripped file than debug file -- arguments reversed?"));
> }
>
> struct symbol *n = s;
> - while (n + 1 < &symbols[total_syms] && !compare_symbols (s, n + 1))
> + while (n + 1 < &symbols[total_syms]
> + && !compare_symbols_duplicate (s, n + 1))
> ++n;
Right, because compare_symbols_duplicate does what compare_symbols use
to do.
>
> while (s < n)
> @@ -1992,6 +2034,11 @@ more sections in stripped file than debug file -- arguments reversed?"));
> elf_flagdata (symdata, ELF_C_SET, ELF_F_DIRTY);
> update_shdr (unstripped_symtab, shdr);
>
> + /* Track which sections are adjusted during the first round
> + of calls to adjust_relocs. */
> + bool scn_adjusted[unstripped_shnum];
> + memset (scn_adjusted, 0, sizeof scn_adjusted);
> +
> if (stripped_symtab != NULL)
> {
> /* Adjust any relocations referring to the old symbol table. */
> @@ -2000,14 +2047,18 @@ more sections in stripped file than debug file -- arguments reversed?"));
> sec < §ions[stripped_shnum - 1];
> ++sec)
> if (sec->outscn != NULL && sec->shdr.sh_link == old_sh_link)
> - adjust_relocs (sec->outscn, sec->scn, &sec->shdr,
> - symndx_map, total_syms, shdr);
> + {
> + adjust_relocs (sec->outscn, sec->scn, &sec->shdr,
> + symndx_map, total_syms, shdr);
> + scn_adjusted[elf_ndxscn (sec->outscn)] = true;
> + }
> }
>
OK.
> /* Also adjust references to the other old symbol table. */
> adjust_all_relocs (unstripped, unstripped_symtab, shdr,
> &symndx_map[stripped_nsym - 1],
> - total_syms - (stripped_nsym - 1));
> + total_syms - (stripped_nsym - 1),
> + scn_adjusted);
>
> free (symbols);
> free (symndx_map);
OK.
> diff --git a/tests/.gitignore b/tests/.gitignore
> index 5bebb2c4..d00a883e 100644
> --- a/tests/.gitignore
> +++ b/tests/.gitignore
> @@ -63,6 +63,7 @@
> /elfshphehdr
> /elfstrmerge
> /elfstrtab
> +/elf-print-reloc-syms
> /emptyfile
> /fillfile
> /find-prologues
OK.
> diff --git a/tests/Makefile.am b/tests/Makefile.am
> index 2373c980..13bd9d56 100644
> --- a/tests/Makefile.am
> +++ b/tests/Makefile.am
> @@ -62,7 +62,7 @@ check_PROGRAMS = arextract arsymtest newfile saridx scnnames sectiondump \
> dwelf_elf_e_machine_string \
> getphdrnum leb128 read_unaligned \
> msg_tst system-elf-libelf-test system-elf-gelf-test \
> - nvidia_extended_linemap_libdw \
> + nvidia_extended_linemap_libdw elf-print-reloc-syms \
> $(asm_TESTS)
>
OK.
>
> asm_TESTS = asm-tst1 asm-tst2 asm-tst3 asm-tst4 asm-tst5 \
> @@ -810,6 +810,7 @@ getphdrnum_LDADD = $(libelf) $(libdw)
> leb128_LDADD = $(libelf) $(libdw)
> read_unaligned_LDADD = $(libelf) $(libdw)
> nvidia_extended_linemap_libdw_LDADD = $(libelf) $(libdw)
> +elf_print_reloc_syms_LDADD = $(libelf)
>
> # We want to test the libelf headers against the system elf.h header.
> # Don't include any -I CPPFLAGS. Except when we install our own elf.h.
OK.
> diff --git a/tests/elf-print-reloc-syms.c b/tests/elf-print-reloc-syms.c
> new file mode 100644
> index 00000000..d6b7867d
> --- /dev/null
> +++ b/tests/elf-print-reloc-syms.c
> @@ -0,0 +1,144 @@
> +/* Copyright (C) 2024 Red Hat, Inc.
> + This file is part of elfutils.
> +
> + This file is free software; you can redistribute it and/or modify
> + it under the terms of the GNU General Public License as published by
> + the Free Software Foundation; either version 3 of the License, or
> + (at your option) any later version.
> +
> + elfutils is distributed in the hope that it will be useful, but
> + WITHOUT ANY WARRANTY; without even the implied warranty of
> + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
> + GNU General Public License for more details.
> +
> + You should have received a copy of the GNU General Public License
> + along with this program. If not, see <http://www.gnu.org/licenses/>. */
> +
> +#ifdef HAVE_CONFIG_H
> +# include <config.h>
> +#endif
> +
> +#include <sys/types.h>
> +#include <sys/stat.h>
> +#include <fcntl.h>
> +#include <inttypes.h>
> +#include <libelf.h>
> +#include <gelf.h>
> +#include <stdbool.h>
> +#include <stdio.h>
> +#include <string.h>
> +#include <unistd.h>
> +#include <assert.h>
> +
> +static void
> +print_reloc_symnames (Elf *elf, Elf_Scn *scn, GElf_Shdr *shdr, size_t sh_entsize)
> +{
> + int nentries = shdr->sh_size / sh_entsize;
> +
> + /* Get the data of the section. */
> + Elf_Data *data = elf_getdata (scn, NULL);
> + assert (data != NULL);
> +
> + /* Get the symbol table information. */
> + Elf_Scn *symscn = elf_getscn (elf, shdr->sh_link);
> + GElf_Shdr symshdr_mem;
> + GElf_Shdr *symshdr = gelf_getshdr (symscn, &symshdr_mem);
> + Elf_Data *symdata = elf_getdata (symscn, NULL);
> + assert (symshdr != NULL);
> + assert (symdata != NULL);
> +
> + /* Search for the optional extended section index table. */
> + Elf_Data *xndxdata = NULL;
> + int xndxscnidx = elf_scnshndx (scn);
> + if (xndxscnidx)
> + xndxdata = elf_getdata (elf_getscn (elf, xndxscnidx), NULL);
> +
> + /* Get the section header string table index. */
> + size_t shstrndx;
> + assert (elf_getshdrstrndx (elf, &shstrndx) >= 0);
> +
> + printf("Section: %s\n", elf_strptr (elf, shstrndx, shdr->sh_name));
> + for (int cnt = 0; cnt < nentries; ++cnt)
> + {
> + GElf_Rel relmem;
> + GElf_Rel *rel = gelf_getrel (data, cnt, &relmem);
> +
> +
> + if (likely (rel != NULL))
> + {
> + GElf_Sym symmem;
> + Elf32_Word xndx;
> + GElf_Sym *sym = gelf_getsymshndx (symdata, xndxdata,
> + GELF_R_SYM (rel->r_info),
> + &symmem, &xndx);
> +
> + if (sym == NULL)
> + {
> + printf ("<SYM NOT FOUND>\n");
> + continue;
> + }
> +
> + if (GELF_ST_TYPE (sym->st_info) != STT_SECTION)
> + printf ("%s\n", elf_strptr (elf, symshdr->sh_link, sym->st_name));
> + else
> + {
> + /* This is a relocation against a STT_SECTION symbol. */
> + GElf_Shdr secshdr_mem;
> + GElf_Shdr *secshdr;
> + secshdr = gelf_getshdr (elf_getscn (elf,
> + sym->st_shndx == SHN_XINDEX
> + ? xndx : sym->st_shndx),
> + &secshdr_mem);
> +
> + if (secshdr == NULL)
> + printf("<SECTION NOT FOUND>\n");
> + else
> + printf ("%s\n",
> + elf_strptr (elf, shstrndx, secshdr->sh_name));
> + }
> + }
> + }
> +}
> +
> +int
> +main (int argc, char *argv[])
> +{
> + if (argc != 2)
> + {
> + printf ("Usage: elf_print_reloc_syms FILE\n");
> + return -1;
> + }
> +
> + elf_version (EV_CURRENT);
> +
> + int fd = open(argv[1], O_RDONLY);
> + assert (fd != -1);
> +
> + Elf *elf = elf_begin (fd, ELF_C_READ, NULL);
> + assert (elf != NULL);
> +
> + size_t shnums;
> + assert (elf_getshdrnum (elf, &shnums) >= 0);
> +
> + Elf_Scn *scn = NULL;
> + while ((scn = elf_nextscn (elf, scn)) != NULL)
> + {
> + GElf_Shdr shdr_mem;
> + GElf_Shdr *shdr = gelf_getshdr (scn, &shdr_mem);
> +
> + if (shdr != NULL)
> + {
> + /* Print the names of symbols referred to by relocations. */
> + if (shdr->sh_type == SHT_REL)
> + {
> + size_t sh_entsize = gelf_fsize (elf, ELF_T_REL, 1, EV_CURRENT);
> + print_reloc_symnames (elf, scn, shdr, sh_entsize);
> + }
> + else if (shdr->sh_type == SHT_RELA)
> + {
> + size_t sh_entsize = gelf_fsize (elf, ELF_T_RELA, 1, EV_CURRENT);
> + print_reloc_symnames (elf, scn, shdr, sh_entsize);
> + }
> + }
> + }
> +}
It is just a testcase, but cleaning up would be nice.
elf_end(elf);
close (fd);
(See also below for runtest support)
> diff --git a/tests/run-unstrip-test.sh b/tests/run-unstrip-test.sh
> index dc7d3a42..03373b3f 100755
> --- a/tests/run-unstrip-test.sh
> +++ b/tests/run-unstrip-test.sh
> @@ -33,6 +33,14 @@ testrun ${abs_top_builddir}/src/unstrip -o testfile.unstrip $stripped $debugfile
>
> testrun ${abs_top_builddir}/src/elfcmp --hash-inexact $original testfile.unstrip
>
> +tempfiles syms-orig syms-testfile
> +
> +# Check whether relocated symbols changed.
> +${abs_top_builddir}/tests/elf-print-reloc-syms $original > syms-orig
> +${abs_top_builddir}/tests/elf-print-reloc-syms testfile.unstrip > syms-testfile
>
These really should run under "testrun" so they will use the just build
libelf (and run under valgrind with --enable-valgrind, which would have
caught that the testcase doesn't clean up all resources it allocates).
> +testrun diff syms-orig syms-testfile
> +
> # Also test modifying the file in place.
>
> rm -f testfile.inplace
Looks good in general. Just clean up in the new testcase and run it
with testrun.
Thanks,
Mark
More information about the Elfutils-devel
mailing list