[PATCH] libdw: add thread-safety to dwarf_getabbrev()

Mark Wielaard mark@klomp.org
Sat Aug 24 23:24:00 GMT 2019


Hi,

On Fri, Aug 16, 2019 at 02:24:28PM -0500, Jonathon Anderson wrote:
> 2. Adding a lock & array structure to the memory manager (pseudo-TLS)
>   (libdwP.h, libdw_alloc.c)

Specifically concentrating on this part.

> diff --git a/libdw/ChangeLog b/libdw/ChangeLog
> index bf1f4857..87abf7a7 100644
> --- a/libdw/ChangeLog
> +++ b/libdw/ChangeLog
> @@ -1,3 +1,12 @@
> +2019-08-15  Jonathon Anderson  <jma14@rice.edu>
> +
> +	* libdw_alloc.c (__libdw_allocate): Added thread-safe stack allocator.
> +	* libdwP.h (Dwarf): Likewise.
> +	* dwarf_begin_elf.c (dwarf_begin_elf): Support for above.
> +	* dwarf_end.c (dwarf_end): Likewise.
> [...]
> +	* Makefile.am: Link -lpthread to provide rwlocks.

> diff --git a/libdw/Makefile.am b/libdw/Makefile.am
> index 7a3d5322..6d0a0187 100644
> --- a/libdw/Makefile.am
> +++ b/libdw/Makefile.am
> @@ -108,7 +108,7 @@ am_libdw_pic_a_OBJECTS = $(libdw_a_SOURCES:.c=.os)
> libdw_so_LIBS = libdw_pic.a ../libdwelf/libdwelf_pic.a \
> 	  ../libdwfl/libdwfl_pic.a ../libebl/libebl.a
> libdw_so_DEPS = ../lib/libeu.a ../libelf/libelf.so
> -libdw_so_LDLIBS = $(libdw_so_DEPS) -ldl -lz $(argp_LDADD) $(zip_LIBS)
> +libdw_so_LDLIBS = $(libdw_so_DEPS) -ldl -lz $(argp_LDADD) $(zip_LIBS)
> -lpthread
> libdw_so_SOURCES =
> libdw.so$(EXEEXT): $(srcdir)/libdw.map $(libdw_so_LIBS) $(libdw_so_DEPS)
> # The rpath is necessary for libebl because its $ORIGIN use will

Do we also need -pthread for CFLAGS?

> diff --git a/libdw/dwarf_begin_elf.c b/libdw/dwarf_begin_elf.c
> index 38c8f5c6..b3885bb5 100644
> --- a/libdw/dwarf_begin_elf.c
> +++ b/libdw/dwarf_begin_elf.c
> @@ -417,11 +417,14 @@ dwarf_begin_elf (Elf *elf, Dwarf_Cmd cmd, Elf_Scn
> *scngrp)
>   /* Initialize the memory handling.  */
>   result->mem_default_size = mem_default_size;
>   result->oom_handler = __libdw_oom;
> -  result->mem_tail = (struct libdw_memblock *) (result + 1);
> -  result->mem_tail->size = (result->mem_default_size
> -			    - offsetof (struct libdw_memblock, mem));
> -  result->mem_tail->remaining = result->mem_tail->size;
> -  result->mem_tail->prev = NULL;
> +  pthread_rwlock_init(&result->mem_rwl, NULL);
> +  result->mem_stacks = 1;
> +  result->mem_tails = malloc (sizeof (struct libdw_memblock *));
> +  result->mem_tails[0] = (struct libdw_memblock *) (result + 1);
> +  result->mem_tails[0]->size = (result->mem_default_size
> +			       - offsetof (struct libdw_memblock, mem));
> +  result->mem_tails[0]->remaining = result->mem_tails[0]->size;
> +  result->mem_tails[0]->prev = NULL;

Can't we just set mem_tails[0] = NULL?
Wouldn't __libdw_alloc_tail () then initialize it?

> diff --git a/libdw/dwarf_end.c b/libdw/dwarf_end.c
> index 29795c10..6317bcda 100644
> --- a/libdw/dwarf_end.c
> +++ b/libdw/dwarf_end.c
> @@ -94,14 +94,22 @@ dwarf_end (Dwarf *dwarf)
>       /* And the split Dwarf.  */
>       tdestroy (dwarf->split_tree, noop_free);
> 
> -      struct libdw_memblock *memp = dwarf->mem_tail;
> -      /* The first block is allocated together with the Dwarf object.  */
> -      while (memp->prev != NULL)
> -	{
> -	  struct libdw_memblock *prevp = memp->prev;
> -	  free (memp);
> -	  memp = prevp;
> -	}
> +      for (size_t i = 0; i < dwarf->mem_stacks; i++)
> +        {
> +          struct libdw_memblock *memp = dwarf->mem_tails[i];
> +          /* The first block is allocated together with the Dwarf object.
> */

Then this wouldn't be true again.

> +          while (memp != NULL && memp->prev != NULL)
> +	    {
> +	      struct libdw_memblock *prevp = memp->prev;
> +	      free (memp);
> +	      memp = prevp;
> +	    }
> +          /* Only for stack 0 though, the others are allocated
> individually.  */
> +          if (memp != NULL && i > 0)
> +            free (memp);
> +        }

And then you don't need this special case.

> +      free (dwarf->mem_tails);
> +      pthread_rwlock_destroy (&dwarf->mem_rwl);
> 
>       /* Free the pubnames helper structure.  */
>       free (dwarf->pubnames_sets);
> diff --git a/libdw/libdwP.h b/libdw/libdwP.h
> index eebb7d12..442d493d 100644
> --- a/libdw/libdwP.h
> +++ b/libdw/libdwP.h
> @@ -31,6 +31,7 @@
> 
> #include <libintl.h>
> #include <stdbool.h>
> +#include <pthread.h>
> 
> #include <libdw.h>
> #include <dwarf.h>
> @@ -218,16 +219,18 @@ struct Dwarf
>   /* Similar for addrx/constx, which will come from .debug_addr section.  */
>   struct Dwarf_CU *fake_addr_cu;
> 
> -  /* Internal memory handling.  This is basically a simplified
> +  /* Internal memory handling.  This is basically a simplified thread-local
>      reimplementation of obstacks.  Unfortunately the standard obstack
>      implementation is not usable in libraries.  */
> +  pthread_rwlock_t mem_rwl;
> +  size_t mem_stacks;
>   struct libdw_memblock
>   {
>     size_t size;
>     size_t remaining;
>     struct libdw_memblock *prev;
>     char mem[0];
> -  } *mem_tail;
> +  } **mem_tails;

Please document what/how exactly mem_rwl protects which other fields.

>   /* Default size of allocated memory blocks.  */
>   size_t mem_default_size;
> @@ -572,7 +575,7 @@ extern void __libdw_seterrno (int value)
> internal_function;
> 
> /* Memory handling, the easy parts.  This macro does not do any locking.  */

This comment isn't true anymore. __libdw_alloc_tail does locking.
Also I think the locking should be extended beyond just that
functions, see below.

> #define libdw_alloc(dbg, type, tsize, cnt) \
> -  ({ struct libdw_memblock *_tail = (dbg)->mem_tail;			      \
> +  ({ struct libdw_memblock *_tail = __libdw_alloc_tail(dbg);		      \
>      size_t _required = (tsize) * (cnt);				      \
>      type *_result = (type *) (_tail->mem + (_tail->size -
> _tail->remaining));\
>      size_t _padding = ((__alignof (type)				      \
> @@ -591,6 +594,10 @@ extern void __libdw_seterrno (int value)
> internal_function;
> #define libdw_typed_alloc(dbg, type) \
>   libdw_alloc (dbg, type, sizeof (type), 1)
> 
> +/* Callback to choose a thread-local memory allocation stack.  */
> +extern struct libdw_memblock *__libdw_alloc_tail (Dwarf* dbg)
> +     __nonnull_attribute__ (1);
> +
> /* Callback to allocate more.  */
> extern void *__libdw_allocate (Dwarf *dbg, size_t minsize, size_t align)
>      __attribute__ ((__malloc__)) __nonnull_attribute__ (1);
> diff --git a/libdw/libdw_alloc.c b/libdw/libdw_alloc.c
> index f1e08714..c3c5e8a7 100644
> --- a/libdw/libdw_alloc.c
> +++ b/libdw/libdw_alloc.c
> @@ -33,9 +33,73 @@
> 
> #include <errno.h>
> #include <stdlib.h>
> +#include <assert.h>
> #include "libdwP.h"
> #include "system.h"
> +#include "stdatomic.h"
> +#if USE_VG_ANNOTATIONS == 1
> +#include <helgrind.h>
> +#include <drd.h>
> +#else
> +#define ANNOTATE_HAPPENS_BEFORE(X)
> +#define ANNOTATE_HAPPENS_AFTER(X)
> +#endif
> +
> +
> +#define thread_local __thread
> 
> +static thread_local int initialized = 0;
> +static thread_local size_t thread_id;
> +static atomic_size_t next_id = ATOMIC_VAR_INIT(0);

Could you initialize thread_id to (size_t) -1?
Then you don't need another thread_local to indicate not initialized?

> +struct libdw_memblock *
> +__libdw_alloc_tail (Dwarf *dbg)
> +{
> +  if (!initialized)

And change this to thead_id == (size_t) -1?

> +    {
> +      thread_id = atomic_fetch_add (&next_id, 1);
> +      initialized = 1;
> +    }
> +
> +  pthread_rwlock_rdlock (&dbg->mem_rwl);
> +  if (thread_id >= dbg->mem_stacks)
> +    {
> +      pthread_rwlock_unlock (&dbg->mem_rwl);
> +      pthread_rwlock_wrlock (&dbg->mem_rwl);
> +
> +      /* Another thread may have already reallocated. In theory using an
> +         atomic would be faster, but given that this only happens once per
> +         thread per Dwarf, some minor slowdown should be fine.  */
> +      if (thread_id >= dbg->mem_stacks)
> +        {
> +          dbg->mem_tails = realloc (dbg->mem_tails, (thread_id+1)
> +                                    * sizeof (struct libdw_memblock *));
> +          assert(dbg->mem_tails);

Don't use assert here, use if (dbg->mem_tails == NULL) dbg->oom_handler ();

But what if realloc moves the block?
Then all dbg->mem_tails[thread_id] pointers become invalid.
And this function drops the lock before returning a libdw_memblock*.

So I think the lock needs to extend beyond this function somehow.  Or
we need to prevent another thread reallocing while we are dealing with
the assigned memblock.

> +          for (size_t i = dbg->mem_stacks; i <= thread_id; i++)
> +            dbg->mem_tails[i] = NULL;
> +          dbg->mem_stacks = thread_id + 1;
> +          ANNOTATE_HAPPENS_BEFORE (&dbg->mem_tails);
> +        }
> +
> +      pthread_rwlock_unlock (&dbg->mem_rwl);
> +      pthread_rwlock_rdlock (&dbg->mem_rwl);
> +    }
> +
> +  /* At this point, we have an entry in the tail array.  */
> +  ANNOTATE_HAPPENS_AFTER (&dbg->mem_tails);
> +  struct libdw_memblock *result = dbg->mem_tails[thread_id];
> +  if (result == NULL)
> +    {
> +      result = malloc (dbg->mem_default_size);
> +      result->size = dbg->mem_default_size
> +                     - offsetof (struct libdw_memblock, mem);
> +      result->remaining = result->size;
> +      result->prev = NULL;
> +      dbg->mem_tails[thread_id] = result;
> +    }
> +  pthread_rwlock_unlock (&dbg->mem_rwl);
> +  return result;

Here the lock is dropped and result points into dbg->mem_tails
which means it cannot be used because the realloc above might
make it invalid.

> +}
> 
> void *
> __libdw_allocate (Dwarf *dbg, size_t minsize, size_t align)
> @@ -52,8 +116,10 @@ __libdw_allocate (Dwarf *dbg, size_t minsize, size_t
> align)
>   newp->size = size - offsetof (struct libdw_memblock, mem);
>   newp->remaining = (uintptr_t) newp + size - (result + minsize);
> 
> -  newp->prev = dbg->mem_tail;
> -  dbg->mem_tail = newp;
> +  pthread_rwlock_rdlock (&dbg->mem_rwl);
> +  newp->prev = dbg->mem_tails[thread_id];
> +  dbg->mem_tails[thread_id] = newp;
> +  pthread_rwlock_unlock (&dbg->mem_rwl);
> 
>   return (void *) result;
> }

Cheers,

Mark



More information about the Elfutils-devel mailing list