[PATCH v4 5/6] elf: Use mmap to map in symbol and relocation tables

H.J. Lu hjl.tools@gmail.com
Fri Mar 8 15:36:13 GMT 2024


On Thu, Mar 7, 2024 at 4:42 PM Alan Modra <amodra@gmail.com> wrote:
>
> On Wed, Mar 06, 2024 at 03:24:00PM -0800, H.J. Lu wrote:
> > Add _bfd_mmap_read_untracked to mmap in symbol tables and relocations
> > whose sizes >= 4 * page size.  Don't cache external relocations when
> > mmap is used.
> >
> > When mmap is used to map in all ELF sections, data to link the 3.5GB
> > clang executable in LLVM 17 debug build on Linux/x86-64 with 32GB RAM
> > is:
> >
> >               stdio           mmap            improvement
> > user          84.28           85.04           -0.9%
> > system                12.46           10.16           14%
> > total         96              95.35           0.7%
> > page faults   4837944         4047667         16%
> >
> > and data to link the 275M cc1plus executable in GCC 14 stage 1 build
> > is:
> >
> > user          5.22            5.27            -1%
> > system                0.94            0.84            11%
> > total         6.20            6.13            0.7%
> > page faults   361272          323377          10%
> >
> >       * elf.c (bfd_elf_get_elf_syms): Replace bfd_read with
> >       _bfd_mmap_read_untracked.
> >       * elflink.c (elf_link_read_relocs_from_section): Add 2 arguments
> >       to return mmap memory address and size.
> >       (_bfd_elf_link_info_read_relocs); Replace bfd_read with
> >       _bfd_mmap_read_untracked.
> >       (bfd_elf_final_link): Don't cache external relocations when mmap
> >       is used.
> >       * libbfd.c (_bfd_mmap_read_untracked ): New.
> >       * libbfd-in.h (_bfd_mmap_read_untracked): Likewise.
> >       * libbfd.h: Regenerated.
> > ---
> >  bfd/elf.c       | 47 +++++++++++++++++++++++++++++++++++------------
> >  bfd/elflink.c   | 44 ++++++++++++++++++++++++++++++++------------
> >  bfd/libbfd-in.h |  3 +++
> >  bfd/libbfd.c    | 33 +++++++++++++++++++++++++++++++++
> >  bfd/libbfd.h    |  3 +++
> >  5 files changed, 106 insertions(+), 24 deletions(-)
> >
> > diff --git a/bfd/elf.c b/bfd/elf.c
> > index e2e31a93950..7427dca2ba0 100644
> > --- a/bfd/elf.c
> > +++ b/bfd/elf.c
> > @@ -464,19 +464,30 @@ bfd_elf_get_elf_syms (bfd *ibfd,
> >        goto out;
> >      }
> >    pos = symtab_hdr->sh_offset + symoffset * extsym_size;
> > +  size_t alloc_ext_size = amt;
> >    if (extsym_buf == NULL)
> >      {
> > -      alloc_ext = bfd_malloc (amt);
> > -      extsym_buf = alloc_ext;
> > +#ifdef USE_MMAP
> > +      if ((ibfd->flags & BFD_PLUGIN) != 0
> > +       || amt < _bfd_minimum_mmap_size)
> > +     {
> > +#endif
> > +       alloc_ext = bfd_malloc (amt);
> > +       extsym_buf = alloc_ext;
> > +#ifdef USE_MMAP
> > +     }
> > +#endif
> >      }
> > -  if (extsym_buf == NULL
> > -      || bfd_seek (ibfd, pos, SEEK_SET) != 0
> > -      || bfd_read (extsym_buf, amt, ibfd) != amt)
> > +
> > +  if (bfd_seek (ibfd, pos, SEEK_SET) != 0
> > +      || !_bfd_mmap_read_untracked (&extsym_buf, &alloc_ext_size,
> > +                                 &alloc_ext, ibfd))
> >      {
> >        intsym_buf = NULL;
> >        goto out;
> >      }
> >
> > +  size_t alloc_extshndx_size = 0;
> >    if (shndx_hdr == NULL || shndx_hdr->sh_size == 0)
> >      extshndx_buf = NULL;
> >    else
> > @@ -487,15 +498,27 @@ bfd_elf_get_elf_syms (bfd *ibfd,
> >         intsym_buf = NULL;
> >         goto out;
> >       }
> > +      alloc_extshndx_size = amt;
> >        pos = shndx_hdr->sh_offset + symoffset * sizeof (Elf_External_Sym_Shndx);
> >        if (extshndx_buf == NULL)
> >       {
> > -       alloc_extshndx = (Elf_External_Sym_Shndx *) bfd_malloc (amt);
> > -       extshndx_buf = alloc_extshndx;
> > +#ifdef USE_MMAP
> > +       if ((ibfd->flags & BFD_PLUGIN) != 0
> > +           || amt < _bfd_minimum_mmap_size)
> > +         {
> > +#endif
> > +           alloc_extshndx
> > +             = (Elf_External_Sym_Shndx *) bfd_malloc (amt);
> > +           extshndx_buf = alloc_extshndx;
> > +#ifdef USE_MMAP
> > +         }
> > +#endif
> >       }
> > -      if (extshndx_buf == NULL
> > -       || bfd_seek (ibfd, pos, SEEK_SET) != 0
> > -       || bfd_read (extshndx_buf, amt, ibfd) != amt)
> > +      if (bfd_seek (ibfd, pos, SEEK_SET) != 0
> > +       || !_bfd_mmap_read_untracked ((void **) &extshndx_buf,
> > +                                     &alloc_extshndx_size,
> > +                                     (void **) &alloc_extshndx,
> > +                                     ibfd))
> >       {
> >         intsym_buf = NULL;
> >         goto out;
> > @@ -534,8 +557,8 @@ bfd_elf_get_elf_syms (bfd *ibfd,
> >        }
> >
> >   out:
> > -  free (alloc_ext);
> > -  free (alloc_extshndx);
> > +  _bfd_munmap_readonly_untracked (alloc_ext, alloc_ext_size);
> > +  _bfd_munmap_readonly_untracked (alloc_extshndx, alloc_extshndx_size);
> >
> >    return intsym_buf;
> >  }
> > diff --git a/bfd/elflink.c b/bfd/elflink.c
> > index 47fb890f94f..4602fb3d10f 100644
> > --- a/bfd/elflink.c
> > +++ b/bfd/elflink.c
> > @@ -2644,8 +2644,11 @@ _bfd_elf_link_assign_sym_version (struct elf_link_hash_entry *h, void *data)
> >     may be either a REL or a RELA section.  The relocations are
> >     translated into RELA relocations and stored in INTERNAL_RELOCS,
> >     which should have already been allocated to contain enough space.
> > -   The EXTERNAL_RELOCS are a buffer where the external form of the
> > -   relocations should be stored.
> > +   The *EXTERNAL_RELOCS_P are a buffer where the external form of the
> > +   relocations should be stored.  If *EXTERNAL_RELOCS_P is NULL,
> > +   *EXTERNAL_RELOCS_P and *EXTERNAL_RELOCS_SIZE_P returns the mmap
> > +   memory address and size.  Otherwise, *EXTERNAL_RELOCS_SIZE_P is
> > +   unchanged and EXTERNAL_RELOCS_SIZE_P returns 0.
>
> Comment doesn't match function params.

Fixed.

> >     Returns FALSE if something goes wrong.  */
> >
> > @@ -2653,7 +2656,8 @@ static bool
> >  elf_link_read_relocs_from_section (bfd *abfd,
> >                                  asection *sec,
> >                                  Elf_Internal_Shdr *shdr,
> > -                                void *external_relocs,
> > +                                void **external_relocs_addr,
> > +                                size_t *external_relocs_size_addr,
>
> I think external_relocs_size is a better name.  Why the _addr suffix?

Fixed.

> >                                  Elf_Internal_Rela *internal_relocs)
> >  {
> >    const struct elf_backend_data *bed;
> > @@ -2663,13 +2667,17 @@ elf_link_read_relocs_from_section (bfd *abfd,
> >    Elf_Internal_Rela *irela;
> >    Elf_Internal_Shdr *symtab_hdr;
> >    size_t nsyms;
> > +  void *external_relocs = *external_relocs_addr;
> >
> >    /* Position ourselves at the start of the section.  */
> >    if (bfd_seek (abfd, shdr->sh_offset, SEEK_SET) != 0)
> >      return false;
> >
> >    /* Read the relocations.  */
> > -  if (bfd_read (external_relocs, shdr->sh_size, abfd) != shdr->sh_size)
> > +  *external_relocs_size_addr = shdr->sh_size;
> > +  if (!_bfd_mmap_read_untracked (&external_relocs,
> > +                              external_relocs_size_addr,
> > +                              external_relocs_addr, abfd))
> >      return false;
> >
> >    symtab_hdr = &elf_tdata (abfd)->symtab_hdr;
> > @@ -2754,6 +2762,7 @@ _bfd_elf_link_info_read_relocs (bfd *abfd,
> >                               bool keep_memory)
> >  {
> >    void *alloc1 = NULL;
> > +  size_t alloc1_size;
> >    Elf_Internal_Rela *alloc2 = NULL;
> >    const struct elf_backend_data *bed = get_elf_backend_data (abfd);
> >    struct bfd_elf_section_data *esdo = elf_section_data (o);
> > @@ -2791,17 +2800,26 @@ _bfd_elf_link_info_read_relocs (bfd *abfd,
> >        if (esdo->rela.hdr)
> >       size += esdo->rela.hdr->sh_size;
> >
> > -      alloc1 = bfd_malloc (size);
> > -      if (alloc1 == NULL)
> > -     goto error_return;
> > -      external_relocs = alloc1;
> > +#ifdef USE_MMAP
> > +      if (size < _bfd_minimum_mmap_size)
> > +     {
> > +#endif
> > +       alloc1 = bfd_malloc (size);
> > +       if (alloc1 == NULL)
> > +         goto error_return;
> > +       external_relocs = alloc1;
> > +#ifdef USE_MMAP
> > +     }
> > +#endif
> >      }
> > +  else
> > +    alloc1 = external_relocs;
> >
> >    internal_rela_relocs = internal_relocs;
> >    if (esdo->rel.hdr)
> >      {
> >        if (!elf_link_read_relocs_from_section (abfd, o, esdo->rel.hdr,
> > -                                           external_relocs,
> > +                                           &alloc1, &alloc1_size,
> >                                             internal_relocs))
> >       goto error_return;
> >        external_relocs = (((bfd_byte *) external_relocs)
> > @@ -2812,7 +2830,7 @@ _bfd_elf_link_info_read_relocs (bfd *abfd,
> >
> >    if (esdo->rela.hdr
> >        && (!elf_link_read_relocs_from_section (abfd, o, esdo->rela.hdr,
> > -                                           external_relocs,
> > +                                           &alloc1, &alloc1_size,
> >                                             internal_rela_relocs)))
> >      goto error_return;
> >
> > @@ -2820,7 +2838,7 @@ _bfd_elf_link_info_read_relocs (bfd *abfd,
> >    if (keep_memory)
> >      esdo->relocs = internal_relocs;
> >
> > -  free (alloc1);
> > +  _bfd_munmap_readonly_untracked (alloc1, alloc1_size);
> >
> >    /* Don't free alloc2, since if it was allocated we are passing it
> >       back (under the name of internal_relocs).  */
> > @@ -2828,7 +2846,7 @@ _bfd_elf_link_info_read_relocs (bfd *abfd,
> >    return internal_relocs;
> >
> >   error_return:
> > -  free (alloc1);
> > +  _bfd_munmap_readonly_untracked (alloc1, alloc1_size);
> >    if (alloc2 != NULL)
> >      {
> >        if (keep_memory)
> > @@ -12741,12 +12759,14 @@ bfd_elf_final_link (bfd *abfd, struct bfd_link_info *info)
> >       goto error_return;
> >      }
> >
> > +#ifndef USE_MMAP
> >    if (max_external_reloc_size != 0)
> >      {
> >        flinfo.external_relocs = bfd_malloc (max_external_reloc_size);
> >        if (flinfo.external_relocs == NULL)
> >       goto error_return;
> >      }
> > +#endif
> >
> >    if (max_internal_reloc_count != 0)
> >      {
> > diff --git a/bfd/libbfd-in.h b/bfd/libbfd-in.h
> > index 7887fad9c92..effe1b86b53 100644
> > --- a/bfd/libbfd-in.h
> > +++ b/bfd/libbfd-in.h
> > @@ -905,6 +905,9 @@ extern void _bfd_munmap_readonly_untracked
> >  #define _bfd_munmap_readonly_untracked(ptr, rsize) free (ptr)
> >  #endif
> >
> > +extern bool _bfd_mmap_read_untracked
> > +  (void **, size_t *, void **, bfd *) ATTRIBUTE_HIDDEN;
> > +
> >  static inline void *
> >  _bfd_malloc_and_read (bfd *abfd, bfd_size_type asize, bfd_size_type rsize)
> >  {
> > diff --git a/bfd/libbfd.c b/bfd/libbfd.c
> > index c847c4f0180..0a31f113999 100644
> > --- a/bfd/libbfd.c
> > +++ b/bfd/libbfd.c
> > @@ -1180,6 +1180,39 @@ _bfd_mmap_readonly_tracked (bfd *abfd, size_t rsize)
> >  }
> >  #endif
> >
> > +/* Attempt to read *SIZE_ADDR bytes from ABFD's iostream to *PTR_P.
> > +   Return true if the full the amount has been read.  If *PTR_P is
> > +   NULL, mmap should be used, return the memory address at the
> > +   current offset in *PTR_P as well as return mmap address and size
> > +   in *PTR_ADDR and *SIZE_ADDR.  Otherwise, return NULL in *PTR_ADDR
> > +   and 0 in *SIZE_ADDR.  */
> > +
> > +bool
> > +_bfd_mmap_read_untracked (void **ptr_p, size_t *size_addr,
> > +                       void **ptr_addr, bfd *abfd)
>
> I find these parameter names confusing.  Perhaps
> void **data, size_t *size, void **mmap_base, bfd *abfd

Fixed.

> > +{
> > +  void *ptr = *ptr_p;
> > +  size_t size = *size_addr;
> > +
> > +#ifdef USE_MMAP
> > +  if (ptr == NULL)
> > +    {
> > +      ptr = _bfd_mmap_readonly_untracked (abfd, size, ptr_addr,
> > +                                       size_addr);
> > +      if (ptr == NULL || ptr == (void *) -1)
> > +     abort ();
> > +      *ptr_p = ptr;
> > +      return true;
> > +    }
> > +  else
> > +#endif
> > +    {
> > +      *ptr_addr = NULL;
> > +      *size_addr = 0;
> > +      return bfd_read (ptr, size, abfd) == size;
> > +    }
> > +}
> > +
> >  /* Default implementation */
> >
> >  bool
> > diff --git a/bfd/libbfd.h b/bfd/libbfd.h
> > index 1515a03b093..c80f5a86ed1 100644
> > --- a/bfd/libbfd.h
> > +++ b/bfd/libbfd.h
> > @@ -911,6 +911,9 @@ extern void _bfd_munmap_readonly_untracked
> >  #define _bfd_munmap_readonly_untracked(ptr, rsize) free (ptr)
> >  #endif
> >
> > +extern bool _bfd_mmap_read_untracked
> > +  (void **, size_t *, void **, bfd *) ATTRIBUTE_HIDDEN;
> > +
> >  static inline void *
> >  _bfd_malloc_and_read (bfd *abfd, bfd_size_type asize, bfd_size_type rsize)
> >  {
> > --
> > 2.44.0
>
> OK with these fixed.

Here is the v5 patch:

https://patchwork.sourceware.org/project/binutils/list/?series=31726

I will check it in tomorrow.

Thanks.

-- 
H.J.


More information about the Binutils mailing list