[PATCH 2/3] gdb: Add soname to build-id mapping for corefiles
Lancelot SIX
lsix@lancelotsix.com
Sun Aug 15 14:51:47 GMT 2021
Hi,
I have a few comments I placed bellow.
On Thu, Aug 12, 2021 at 12:24:05AM -0400, Aaron Merey via Gdb-patches wrote:
> Since commit aa2d5a422 gdb has been able to read executable and shared
> library build-ids within core files.
>
> Expand this functionality so that each program_space maintains a map of
> sonames to build-ids for each shared library referenced in the program_space's
> core file.
>
> This feature may be used to verify that gdb has found the correct shared
> libraries for core files and to facilitate downloading shared libaries via
> debuginfod.
> ---
> gdb/arch-utils.c | 21 +++++++++----------
> gdb/arch-utils.h | 21 +++++++++----------
> gdb/build-id.h | 2 ++
> gdb/corelow.c | 13 +++++++++++-
> gdb/gdbarch.c | 2 +-
> gdb/gdbarch.h | 4 ++--
> gdb/gdbarch.sh | 2 +-
> gdb/linux-tdep.c | 52 +++++++++++++++++++++++++++++++++++++-----------
> gdb/progspace.c | 36 +++++++++++++++++++++++++++++++++
> gdb/progspace.h | 17 ++++++++++++++++
> gdb/solib.c | 35 ++++++++++++++++++++++++++++++++
> gdb/solib.h | 5 +++++
> 12 files changed, 173 insertions(+), 37 deletions(-)
>
> diff --git a/gdb/arch-utils.c b/gdb/arch-utils.c
> index 4290d637ce1..4c7497e6b4c 100644
> --- a/gdb/arch-utils.c
> +++ b/gdb/arch-utils.c
> @@ -1072,16 +1072,17 @@ default_get_pc_address_flags (frame_info *frame, CORE_ADDR pc)
>
> /* See arch-utils.h. */
> void
> -default_read_core_file_mappings (struct gdbarch *gdbarch,
> - struct bfd *cbfd,
> - gdb::function_view<void (ULONGEST count)>
> - pre_loop_cb,
> - gdb::function_view<void (int num,
> - ULONGEST start,
> - ULONGEST end,
> - ULONGEST file_ofs,
> - const char *filename)>
> - loop_cb)
> +default_read_core_file_mappings
> + (struct gdbarch *gdbarch,
> + struct bfd *cbfd,
> + gdb::function_view<void (ULONGEST count)> pre_loop_cb,
> + gdb::function_view<void (int num,
> + ULONGEST start,
> + ULONGEST end,
> + ULONGEST file_ofs,
> + const char *filename,
> + const bfd_build_id *build_id)>
> + loop_cb)
It looks like 'loop_cb' could go on the previous line.
If the type of the function callbacks are too big, I guess it could be
possible to give them a name before declaring the function. Something
like
using loop_cb_ftype = gdb::function_view<void (...)>;
> {
> }
>
> diff --git a/gdb/arch-utils.h b/gdb/arch-utils.h
> index 03e9082f6d7..9139438c5fd 100644
> --- a/gdb/arch-utils.h
> +++ b/gdb/arch-utils.h
> @@ -295,14 +295,15 @@ extern std::string default_get_pc_address_flags (frame_info *frame,
> CORE_ADDR pc);
>
> /* Default implementation of gdbarch read_core_file_mappings method. */
> -extern void default_read_core_file_mappings (struct gdbarch *gdbarch,
> - struct bfd *cbfd,
> - gdb::function_view<void (ULONGEST count)>
> - pre_loop_cb,
> - gdb::function_view<void (int num,
> - ULONGEST start,
> - ULONGEST end,
> - ULONGEST file_ofs,
> - const char *filename)>
> - loop_cb);
> +extern void default_read_core_file_mappings
> + (struct gdbarch *gdbarch,
> + struct bfd *cbfd,
> + gdb::function_view<void (ULONGEST count)> pre_loop_cb,
> + gdb::function_view<void (int num,
> + ULONGEST start,
> + ULONGEST end,
> + ULONGEST file_ofs,
> + const char *filename,
> + const bfd_build_id *build_id)>
> + loop_cb);
loop_cb could also go up one line here.
> #endif /* ARCH_UTILS_H */
> diff --git a/gdb/build-id.h b/gdb/build-id.h
> index 42f8d57ede1..3c9402ee71b 100644
> --- a/gdb/build-id.h
> +++ b/gdb/build-id.h
> @@ -20,8 +20,10 @@
> #ifndef BUILD_ID_H
> #define BUILD_ID_H
>
> +#include "defs.h"
> #include "gdb_bfd.h"
> #include "gdbsupport/rsp-low.h"
> +#include <string>
>
> /* Locate NT_GNU_BUILD_ID from ABFD and return its content. */
>
> diff --git a/gdb/corelow.c b/gdb/corelow.c
> index eb785a08633..97eadceed84 100644
> --- a/gdb/corelow.c
> +++ b/gdb/corelow.c
> @@ -214,7 +214,7 @@ core_target::build_file_mappings ()
> /* read_core_file_mappings will invoke this lambda for each mapping
> that it finds. */
> [&] (int num, ULONGEST start, ULONGEST end, ULONGEST file_ofs,
> - const char *filename)
> + const char *filename, const bfd_build_id *build_id)
> {
> /* Architecture-specific read_core_mapping methods are expected to
> weed out non-file-backed mappings. */
> @@ -282,6 +282,16 @@ core_target::build_file_mappings ()
>
> /* Set target_section fields. */
> m_core_file_mappings.emplace_back (start, end, sec);
> +
> + /* If this is a bfd of a shared library, record its soname
> + and build id. */
> + if (build_id != nullptr)
> + {
> + gdb::optional<std::string> soname = gdb_bfd_read_elf_soname (bfd);
> + if (soname)
> + current_program_space->set_cbfd_soname_build_id (soname->data (),
> + build_id);
Here, since set_cbfd_soname_build_id's first argument is a std::string,
you could just use '*soname' instead of 'soname->data ()'.
> + }
> });
>
> normalize_mem_ranges (&m_core_unavailable_mappings);
> @@ -305,6 +315,7 @@ core_target::close ()
> comments in clear_solib in solib.c. */
> clear_solib ();
>
> + current_program_space->clear_cbfd_soname_build_ids ();
> current_program_space->cbfd.reset (nullptr);
> }
>
> diff --git a/gdb/gdbarch.c b/gdb/gdbarch.c
> index 830a86df89f..b6472bb36d5 100644
> --- a/gdb/gdbarch.c
> +++ b/gdb/gdbarch.c
> @@ -5411,7 +5411,7 @@ set_gdbarch_get_pc_address_flags (struct gdbarch *gdbarch,
> }
>
> void
> -gdbarch_read_core_file_mappings (struct gdbarch *gdbarch, struct bfd *cbfd, gdb::function_view<void (ULONGEST count)> pre_loop_cb, gdb::function_view<void (int num, ULONGEST start, ULONGEST end, ULONGEST file_ofs, const char *filename)> loop_cb)
> +gdbarch_read_core_file_mappings (struct gdbarch *gdbarch, struct bfd *cbfd, gdb::function_view<void (ULONGEST count)> pre_loop_cb, gdb::function_view<void (int num, ULONGEST start, ULONGEST end, ULONGEST file_ofs, const char *filename, const bfd_build_id *build_id)> loop_cb)
> {
> gdb_assert (gdbarch != NULL);
> gdb_assert (gdbarch->read_core_file_mappings != NULL);
> diff --git a/gdb/gdbarch.h b/gdb/gdbarch.h
> index 7db3e36d76a..dbd1fa0afc7 100644
> --- a/gdb/gdbarch.h
> +++ b/gdb/gdbarch.h
> @@ -1710,8 +1710,8 @@ extern void set_gdbarch_get_pc_address_flags (struct gdbarch *gdbarch, gdbarch_g
>
> /* Read core file mappings */
>
> -typedef void (gdbarch_read_core_file_mappings_ftype) (struct gdbarch *gdbarch, struct bfd *cbfd, gdb::function_view<void (ULONGEST count)> pre_loop_cb, gdb::function_view<void (int num, ULONGEST start, ULONGEST end, ULONGEST file_ofs, const char *filename)> loop_cb);
> -extern void gdbarch_read_core_file_mappings (struct gdbarch *gdbarch, struct bfd *cbfd, gdb::function_view<void (ULONGEST count)> pre_loop_cb, gdb::function_view<void (int num, ULONGEST start, ULONGEST end, ULONGEST file_ofs, const char *filename)> loop_cb);
> +typedef void (gdbarch_read_core_file_mappings_ftype) (struct gdbarch *gdbarch, struct bfd *cbfd, gdb::function_view<void (ULONGEST count)> pre_loop_cb, gdb::function_view<void (int num, ULONGEST start, ULONGEST end, ULONGEST file_ofs, const char *filename, const bfd_build_id *build_id)> loop_cb);
> +extern void gdbarch_read_core_file_mappings (struct gdbarch *gdbarch, struct bfd *cbfd, gdb::function_view<void (ULONGEST count)> pre_loop_cb, gdb::function_view<void (int num, ULONGEST start, ULONGEST end, ULONGEST file_ofs, const char *filename, const bfd_build_id *build_id)> loop_cb);
> extern void set_gdbarch_read_core_file_mappings (struct gdbarch *gdbarch, gdbarch_read_core_file_mappings_ftype *read_core_file_mappings);
>
> extern struct gdbarch_tdep *gdbarch_tdep (struct gdbarch *gdbarch);
> diff --git a/gdb/gdbarch.sh b/gdb/gdbarch.sh
> index 9bc9de91c30..56679b8fee6 100755
> --- a/gdb/gdbarch.sh
> +++ b/gdb/gdbarch.sh
> @@ -1210,7 +1210,7 @@ m;ULONGEST;type_align;struct type *type;type;;default_type_align;;0
> f;std::string;get_pc_address_flags;frame_info *frame, CORE_ADDR pc;frame, pc;;default_get_pc_address_flags;;0
>
> # Read core file mappings
> -m;void;read_core_file_mappings;struct bfd *cbfd, gdb::function_view<void (ULONGEST count)> pre_loop_cb, gdb::function_view<void (int num, ULONGEST start, ULONGEST end, ULONGEST file_ofs, const char *filename)> loop_cb;cbfd, pre_loop_cb, loop_cb;;default_read_core_file_mappings;;0
> +m;void;read_core_file_mappings;struct bfd *cbfd, gdb::function_view<void (ULONGEST count)> pre_loop_cb, gdb::function_view<void (int num, ULONGEST start, ULONGEST end, ULONGEST file_ofs, const char *filename, const bfd_build_id *build_id)> loop_cb;cbfd, pre_loop_cb, loop_cb;;default_read_core_file_mappings;;0
>
> EOF
> }
> diff --git a/gdb/linux-tdep.c b/gdb/linux-tdep.c
> index 637d3d36a0b..eb35a2b5297 100644
> --- a/gdb/linux-tdep.c
> +++ b/gdb/linux-tdep.c
> @@ -43,6 +43,7 @@
> #include "gcore-elf.h"
>
> #include <ctype.h>
> +#include <unordered_map>
>
> /* This enum represents the values that the user can choose when
> informing the Linux kernel about which memory mappings will be
> @@ -1096,16 +1097,17 @@ linux_info_proc (struct gdbarch *gdbarch, const char *args,
> for each mapping. */
>
> static void
> -linux_read_core_file_mappings (struct gdbarch *gdbarch,
> - struct bfd *cbfd,
> - gdb::function_view<void (ULONGEST count)>
> - pre_loop_cb,
> - gdb::function_view<void (int num,
> - ULONGEST start,
> - ULONGEST end,
> - ULONGEST file_ofs,
> - const char *filename)>
> - loop_cb)
> +linux_read_core_file_mappings
> + (struct gdbarch *gdbarch,
> + struct bfd *cbfd,
> + gdb::function_view<void (ULONGEST count)> pre_loop_cb,
> + gdb::function_view<void (int num,
> + ULONGEST start,
> + ULONGEST end,
> + ULONGEST file_ofs,
> + const char *filename,
> + const bfd_build_id *build_id)>
> + loop_cb)
'loop_cb' could be on the line above.
> {
> /* Ensure that ULONGEST is big enough for reading 64-bit core files. */
> gdb_static_assert (sizeof (ULONGEST) >= 8);
> @@ -1174,6 +1176,23 @@ linux_read_core_file_mappings (struct gdbarch *gdbarch,
> if (f != descend)
> warning (_("malformed note - filename area is too big"));
>
> + const bfd_build_id *orig_build_id = cbfd->build_id;
> + std::unordered_map<ULONGEST, const bfd_build_id *> vma_map;
> + std::unordered_map<char *, const bfd_build_id *> filename_map;
> +
> + /* Search for solib build-ids in the core file. Each time one is found,
> + map the start vma of the corresponding elf header to the build-id. */
> + for (bfd_section *sec = cbfd->sections; sec != nullptr; sec = sec->next)
> + {
> + cbfd->build_id = nullptr;
> +
> + if (sec->flags & SEC_LOAD
> + && get_elf_backend_data (cbfd)->elf_backend_core_find_build_id
> + (cbfd, (bfd_vma) sec->filepos))
> + vma_map[sec->vma] = cbfd->build_id;
> + }
> +
> + cbfd->build_id = orig_build_id;
> pre_loop_cb (count);
>
> for (int i = 0; i < count; i++)
> @@ -1187,8 +1206,17 @@ linux_read_core_file_mappings (struct gdbarch *gdbarch,
> descdata += addr_size;
> char * filename = filenames;
> filenames += strlen ((char *) filenames) + 1;
> + const bfd_build_id *build_id = vma_map[start];
> +
> + /* Map filename to the build-id associated with this start vma,
> + if such a build-id was found. Otherwise use the build-id
> + already associated with this filename if it exists. */
> + if (build_id != nullptr)
> + filename_map[filename] = build_id;
> + else
> + build_id = filename_map[filename];
>
> - loop_cb (i, start, end, file_ofs, filename);
> + loop_cb (i, start, end, file_ofs, filename, build_id);
> }
> }
>
> @@ -1217,7 +1245,7 @@ linux_core_info_proc_mappings (struct gdbarch *gdbarch, const char *args)
> }
> },
> [=] (int num, ULONGEST start, ULONGEST end, ULONGEST file_ofs,
> - const char *filename)
> + const char *filename, const bfd_build_id *build_id)
> {
> if (gdbarch_addr_bit (gdbarch) == 32)
> printf_filtered ("\t%10s %10s %10s %10s %s\n",
> diff --git a/gdb/progspace.c b/gdb/progspace.c
> index 7080bf8ee27..d39bd45fcf4 100644
> --- a/gdb/progspace.c
> +++ b/gdb/progspace.c
> @@ -17,6 +17,7 @@
> You should have received a copy of the GNU General Public License
> along with this program. If not, see <http://www.gnu.org/licenses/>. */
>
> +#include "build-id.h"
> #include "defs.h"
> #include "gdbcmd.h"
> #include "objfiles.h"
> @@ -358,6 +359,41 @@ print_program_space (struct ui_out *uiout, int requested)
> }
> }
>
> +/* See progspace.h. */
> +
> +void
> +program_space::set_cbfd_soname_build_id (std::string soname,
This parameter could be 'std::string const &' or...
> + const bfd_build_id *build_id)
> +{
> + std::string build_id_hex = build_id_to_string (build_id);
> + cbfd_soname_to_build_id[soname] = build_id_hex;
... use 'std::move (soname)' here.
I guess the more 'usual' approach would be to have the argument as a
const reference (but to be honest, the implication of calling one more
ctor and copying the soname is negligible, to say the least).
> +
> + return;
I am not sure if the GNU coding standard says something about this, but
'return;' as the last statement of a void function is redundant.
> +}
> +
> +/* See progspace.h. */
> +
> +const char *
> +program_space::get_cbfd_soname_build_id (const char *soname)
With set_cbfd_soname_build_id using a std::string, I would find it more
consistent to use std::string here also. Any reason not to use it I
missed?
You could use 'basename (soname.c_str ())' bellow.
The return type could also be 'const std::string *' (the map stores
std::string internally), but keeping a const char * is pretty similar.
> +{
> + gdb_assert (soname);
> +
> + auto it = cbfd_soname_to_build_id.find (basename (soname));
> + if (it == cbfd_soname_to_build_id.end ())
> + return nullptr;
> +
> + return it->second.c_str ();
> +}
> +
> +/* See progspace.h. */
> +
> +void
> +program_space::clear_cbfd_soname_build_ids ()
> +{
> + cbfd_soname_to_build_id.clear ();
> + return;
Same here, I guess 'return;' could be removed.
> +}
> +
> /* Boolean test for an already-known program space id. */
>
> static int
> diff --git a/gdb/progspace.h b/gdb/progspace.h
> index fb348ca7539..b42b3ffc4f1 100644
> --- a/gdb/progspace.h
> +++ b/gdb/progspace.h
> @@ -30,6 +30,7 @@
> #include "gdbsupport/safe-iterator.h"
> #include <list>
> #include <vector>
> +#include <unordered_map>
>
> struct target_ops;
> struct bfd;
> @@ -324,6 +325,19 @@ struct program_space
> /* Binary file diddling handle for the core file. */
> gdb_bfd_ref_ptr cbfd;
>
> + /* Associate a core file SONAME with BUILD_ID so that it can be retrieved
> + with get_cbfd_soname_build_id. */
> + void set_cbfd_soname_build_id (std::string soname,
> + const bfd_build_id *build_id);
> +
> + /* If a core file SONAME had a build-id associated with it by a previous
> + call to set_cbfd_soname_build_id then return the build-id as a
> + NULL-terminated hex string. */
> + const char *get_cbfd_soname_build_id (const char *soname);
> +
> + /* Clear all core file soname to build-id mappings. */
> + void clear_cbfd_soname_build_ids ();
> +
> /* The address space attached to this program space. More than one
> program space may be bound to the same address space. In the
> traditional unix-like debugging scenario, this will usually
> @@ -378,6 +392,9 @@ struct program_space
> /* The set of target sections matching the sections mapped into
> this program space. Managed by both exec_ops and solib.c. */
> target_section_table m_target_sections;
> +
> + /* Mapping of a core file's library sonames to their respective build-ids. */
> + std::unordered_map<std::string, std::string> cbfd_soname_to_build_id;
> };
>
> /* An address space. It is used for comparing if
> diff --git a/gdb/solib.c b/gdb/solib.c
> index e30affbb7e7..8b92cf7db53 100644
> --- a/gdb/solib.c
> +++ b/gdb/solib.c
> @@ -23,6 +23,7 @@
> #include <fcntl.h>
> #include "symtab.h"
> #include "bfd.h"
> +#include "build-id.h"
> #include "symfile.h"
> #include "objfiles.h"
> #include "gdbcore.h"
> @@ -1585,6 +1586,40 @@ gdb_bfd_scan_elf_dyntag (const int desired_dyntag, bfd *abfd, CORE_ADDR *ptr,
> return 0;
> }
>
> +/* See solib.h. */
> +
> +gdb::optional<std::string>
> +gdb_bfd_read_elf_soname (struct bfd *bfd)
> +{
> + gdb_assert (bfd != nullptr);
> +
> + gdb_bfd_ref_ptr abfd = gdb_bfd_open (bfd->filename, gnutarget);
> +
> + if (abfd == nullptr)
> + return gdb::optional<std::string> ();
> +
> + /* Check that bfd is an ET_DYN ELF file. */
> + bfd_check_format (abfd.get (), bfd_object);
> + if (!(bfd_get_file_flags (abfd.get ()) & DYNAMIC))
> + return gdb::optional<std::string> ();
> +
> + /* Determine soname of shared library. If found map soname to build-id. */
> + CORE_ADDR idx;
> + if (!gdb_bfd_scan_elf_dyntag (DT_SONAME, abfd.get (), &idx, nullptr))
> + return gdb::optional<std::string> ();
> +
> + struct bfd_section *dynstr = bfd_get_section_by_name (abfd.get (), ".dynstr");
> + if (dynstr == nullptr)
> + return gdb::optional<std::string> ();
> +
> + /* Read the soname from the string table. */
> + gdb::byte_vector dynstr_buf;
> + if (!gdb_bfd_get_full_section_contents (abfd.get (), dynstr, &dynstr_buf))
> + return gdb::optional<std::string> ();
> +
> + return gdb::optional<std::string> ((char *)dynstr_buf.data () + idx);
This will not change much, but you could cast to 'const char *' (this
is the type the std::string constructor expects).
> +}
> +
> /* Lookup the value for a specific symbol from symbol table. Look up symbol
> from ABFD. MATCH_SYM is a callback function to determine whether to pick
> up a symbol. DATA is the input of this callback function. Return NULL
> diff --git a/gdb/solib.h b/gdb/solib.h
> index c50f74e06bf..51cc047463f 100644
> --- a/gdb/solib.h
> +++ b/gdb/solib.h
> @@ -118,6 +118,11 @@ extern CORE_ADDR gdb_bfd_lookup_symbol_from_symtab (bfd *abfd,
> extern int gdb_bfd_scan_elf_dyntag (const int desired_dyntag, bfd *abfd,
> CORE_ADDR *ptr, CORE_ADDR *ptr_addr);
>
> +/* If BFD is an ELF shared object then attempt to return the string
> + referred to by its DT_SONAME tag. */
> +
> +extern gdb::optional<std::string> gdb_bfd_read_elf_soname (struct bfd *bfd);
> +
> /* Enable or disable optional solib event breakpoints as appropriate. */
>
> extern void update_solib_breakpoints (void);
> --
> 2.31.1
>
I hope the comments are helpful.
Best,
Lancelot.
More information about the Gdb-patches
mailing list