From 9077c0df713e9adfdee7fe1f66005453316842de Mon Sep 17 00:00:00 2001
From: Jonathon Anderson <jma14@rice.edu>
Date: Thu, 8 Aug 2019 09:01:56 -0500
Subject: [PATCH] libdw: add thread-safety to dwarf_getabbrev()
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

For applications that need the information in the DIEs, the Dwarf_Abbrev
hash table et al. becomes a massive data race. This fixes that by:

1. Adding atomics & locks to the hash table to manage concurrency
   (lib/dynamicsizehash_concurrent.{c,h})
2. Adding a lock & array structure the the memory manager (pseudo-TLS)
   (libdwP.h, libdw_alloc.c)
3. Adding extra configure options for Helgrind/DRD annotations
   (configure.ac)
4. Including "stdatomic.h" from FreeBSD, to support C11-style atomics.
   (lib/stdatomic.h)

Signed-off-by: Jonathon Anderson <jma14@rice.edu>
Signed-off-by: Srđan Milaković <sm108@rice.edu>
---
 ChangeLog                        |   5 +
 configure.ac                     |  30 ++
 lib/ChangeLog                    |   6 +
 lib/Makefile.am                  |   5 +-
 lib/dynamicsizehash_concurrent.c | 522 +++++++++++++++++++++++++++++++
 lib/dynamicsizehash_concurrent.h | 118 +++++++
 lib/stdatomic.h                  | 442 ++++++++++++++++++++++++++
 libdw/ChangeLog                  |   9 +
 libdw/Makefile.am                |   2 +-
 libdw/dwarf_abbrev_hash.c        |   2 +-
 libdw/dwarf_abbrev_hash.h        |   2 +-
 libdw/dwarf_begin_elf.c          |  13 +-
 libdw/dwarf_end.c                |  24 +-
 libdw/libdwP.h                   |  13 +-
 libdw/libdw_alloc.c              |  70 ++++-
 15 files changed, 1240 insertions(+), 23 deletions(-)
 create mode 100644 lib/dynamicsizehash_concurrent.c
 create mode 100644 lib/dynamicsizehash_concurrent.h
 create mode 100644 lib/stdatomic.h

diff --git a/ChangeLog b/ChangeLog
index bed3999f..93907ddd 100644
--- a/ChangeLog
+++ b/ChangeLog
@@ -1,3 +1,8 @@
+2019-08-15  Jonathon Anderson <jma14@rice.edu>
+
+	* configure.ac: Add new --enable-valgrind-annotations
+	* configure.ac: Add new --with-valgrind (headers only)
+
 2019-08-13  Mark Wielaard  <mark@klomp.org>
 
 	* configure.ac: Set version to 0.177.
diff --git a/configure.ac b/configure.ac
index c443fa3b..c5406b44 100644
--- a/configure.ac
+++ b/configure.ac
@@ -323,6 +323,35 @@ if test "$use_valgrind" = yes; then
 fi
 AM_CONDITIONAL(USE_VALGRIND, test "$use_valgrind" = yes)
 
+AC_ARG_WITH([valgrind],
+AS_HELP_STRING([--with-valgrind],[include directory for Valgrind headers]),
+[with_valgrind_headers=$withval], [with_valgrind_headers=no])
+if test "x$with_valgrind_headers" != xno; then
+    save_CFLAGS="$CFLAGS"
+    CFLAGS="$CFLAGS -I$with_valgrind_headers"
+    AC_COMPILE_IFELSE([AC_LANG_SOURCE([[
+      #include <valgrind/valgrind.h>
+      int main() { return 0; }
+    ]])], [ HAVE_VALGRIND_HEADERS="yes"
+            CFLAGS="$save_CFLAGS -I$with_valgrind_headers" ],
+          [ AC_MSG_ERROR([invalid valgrind include directory: $with_valgrind_headers]) ])
+fi
+
+AC_ARG_ENABLE([valgrind-annotations],
+AS_HELP_STRING([--enable-valgrind-annotations],[insert extra annotations for better valgrind support]),
+[use_vg_annotations=$enableval], [use_vg_annotations=no])
+if test "$use_vg_annotations" = yes; then
+    if test "x$HAVE_VALGRIND_HEADERS" != "xyes"; then
+      AC_MSG_CHECKING([whether Valgrind headers are available])
+      AC_COMPILE_IFELSE([AC_LANG_SOURCE([[
+        #include <valgrind/valgrind.h>
+        int main() { return 0; }
+      ]])], [ AC_MSG_RESULT([yes]) ],
+            [ AC_MSG_ERROR([valgrind annotations requested but no headers are available]) ])
+    fi
+fi
+AM_CONDITIONAL(USE_VG_ANNOTATIONS, test "$use_vg_annotations" = yes)
+
 AC_ARG_ENABLE([install-elfh],
 AS_HELP_STRING([--enable-install-elfh],[install elf.h in include dir]),
                [install_elfh=$enableval], [install_elfh=no])
@@ -668,6 +697,7 @@ AC_MSG_NOTICE([
   OTHER FEATURES
     Deterministic archives by default  : ${default_ar_deterministic}
     Native language support            : ${USE_NLS}
+    Extra Valgrind annotations         : ${use_vg_annotations}
 
   EXTRA TEST FEATURES (used with make check)
     have bunzip2 installed (required)  : ${HAVE_BUNZIP2}
diff --git a/lib/ChangeLog b/lib/ChangeLog
index 7381860c..e6d08509 100644
--- a/lib/ChangeLog
+++ b/lib/ChangeLog
@@ -1,3 +1,9 @@
+2019-08-08  Jonathon Anderson  <jma14@rice.edu>
+
+	* dynamicsizehash_concurrent.{c,h}: New files.
+	* stdatomic.h: New file, taken from FreeBSD.
+	* Makefile.am (noinst_HEADERS): Added *.h above.
+
 2019-05-03  Rosen Penev  <rosenp@gmail.com>
 
 	* color.c (parse_opt): Cast program_invocation_short_name to char *.
diff --git a/lib/Makefile.am b/lib/Makefile.am
index 36d21a07..af7228b9 100644
--- a/lib/Makefile.am
+++ b/lib/Makefile.am
@@ -38,8 +38,9 @@ libeu_a_SOURCES = xstrdup.c xstrndup.c xmalloc.c next_prime.c \
 		  color.c printversion.c
 
 noinst_HEADERS = fixedsizehash.h libeu.h system.h dynamicsizehash.h list.h \
-		 eu-config.h color.h printversion.h bpf.h
-EXTRA_DIST = dynamicsizehash.c
+		 eu-config.h color.h printversion.h bpf.h \
+		 dynamicsizehash_concurrent.h stdatomic.h
+EXTRA_DIST = dynamicsizehash.c dynamicsizehash_concurrent.c
 
 if !GPROF
 xmalloc_CFLAGS = -ffunction-sections
diff --git a/lib/dynamicsizehash_concurrent.c b/lib/dynamicsizehash_concurrent.c
new file mode 100644
index 00000000..d645b143
--- /dev/null
+++ b/lib/dynamicsizehash_concurrent.c
@@ -0,0 +1,522 @@
+/* Copyright (C) 2000-2019 Red Hat, Inc.
+   This file is part of elfutils.
+   Written by Srdan Milakovic <sm108@rice.edu>, 2019.
+   Derived from Ulrich Drepper <drepper@redhat.com>, 2000.
+
+   This file is free software; you can redistribute it and/or modify
+   it under the terms of either
+
+     * the GNU Lesser General Public License as published by the Free
+       Software Foundation; either version 3 of the License, or (at
+       your option) any later version
+
+   or
+
+     * the GNU General Public License as published by the Free
+       Software Foundation; either version 2 of the License, or (at
+       your option) any later version
+
+   or both in parallel, as here.
+
+   elfutils is distributed in the hope that it will be useful, but
+   WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   General Public License for more details.
+
+   You should have received copies of the GNU General Public License and
+   the GNU Lesser General Public License along with this program.  If
+   not, see <http://www.gnu.org/licenses/>.  */
+
+#include <assert.h>
+#include <stdlib.h>
+#include <system.h>
+#include <pthread.h>
+
+/* Before including this file the following macros must be defined:
+
+   NAME      name of the hash table structure.
+   TYPE      data type of the hash table entries
+   COMPARE   comparison function taking two pointers to TYPE objects
+
+   The following macros if present select features:
+
+   ITERATE   iterating over the table entries is possible
+   REVERSE   iterate in reverse order of insert
+ */
+
+
+static size_t
+lookup (NAME *htab, HASHTYPE hval, TYPE val __attribute__ ((unused)))
+{
+  /* First hash function: simply take the modul but prevent zero.  Small values
+      can skip the division, which helps performance when this is common.  */
+  size_t idx = 1 + (hval < htab->size ? hval : hval % htab->size);
+
+#if COMPARE != 0  /* A handful of tables don't actually compare the entries in
+                    the table, they instead rely on the hash.  In that case, we
+                    can skip parts that relate to the value. */
+  TYPE val_ptr;
+#endif
+  HASHTYPE hash;
+
+  hash = atomic_load_explicit(&htab->table[idx].hashval,
+                              memory_order_acquire);
+  if (hash == hval)
+    {
+#if COMPARE == 0
+      return idx;
+#else
+      val_ptr = (TYPE) atomic_load_explicit(&htab->table[idx].val_ptr,
+                                            memory_order_acquire);
+      if (COMPARE(val_ptr, val) == 0)
+          return idx;
+#endif
+    }
+  else if (hash == 0)
+    {
+      return 0;
+    }
+
+  /* Second hash function as suggested in [Knuth].  */
+  HASHTYPE second_hash = 1 + hval % (htab->size - 2);
+
+  for(;;)
+    {
+      if (idx <= second_hash)
+          idx = htab->size + idx - second_hash;
+      else
+          idx -= second_hash;
+
+      hash = atomic_load_explicit(&htab->table[idx].hashval,
+                                  memory_order_acquire);
+      if (hash == hval)
+        {
+#if COMPARE == 0
+          return idx;
+#else
+          val_ptr = (TYPE) atomic_load_explicit(&htab->table[idx].val_ptr,
+                                                memory_order_acquire);
+          if (COMPARE(val_ptr, val) == 0)
+              return idx;
+#endif
+        }
+      else if (hash == 0)
+        {
+          return 0;
+        }
+    }
+}
+
+static int
+insert_helper (NAME *htab, HASHTYPE hval, TYPE val)
+{
+  /* First hash function: simply take the modul but prevent zero.  Small values
+      can skip the division, which helps performance when this is common.  */
+  size_t idx = 1 + (hval < htab->size ? hval : hval % htab->size);
+
+  TYPE val_ptr;
+  HASHTYPE hash;
+
+  hash = atomic_load_explicit(&htab->table[idx].hashval,
+                              memory_order_acquire);
+  if (hash == hval)
+    {
+      val_ptr = (TYPE) atomic_load_explicit(&htab->table[idx].val_ptr,
+                                            memory_order_acquire);
+      if (COMPARE(val_ptr, val) != 0)
+          return -1;
+    }
+  else if (hash == 0)
+    {
+      val_ptr = NULL;
+      atomic_compare_exchange_strong_explicit(&htab->table[idx].val_ptr,
+                                              (uintptr_t *) &val_ptr,
+                                              (uintptr_t) val,
+                                              memory_order_acquire,
+                                              memory_order_acquire);
+
+      if (val_ptr == NULL)
+        {
+          atomic_store_explicit(&htab->table[idx].hashval, hval,
+                                memory_order_release);
+          return 0;
+        }
+      else
+        {
+          do
+            {
+              hash = atomic_load_explicit(&htab->table[idx].val_ptr,
+                                          memory_order_acquire);
+            }
+          while (hash == 0);
+        }
+    }
+
+  /* Second hash function as suggested in [Knuth].  */
+  HASHTYPE second_hash = 1 + hval % (htab->size - 2);
+
+  for(;;)
+    {
+      if (idx <= second_hash)
+          idx = htab->size + idx - second_hash;
+      else
+          idx -= second_hash;
+
+      hash = atomic_load_explicit(&htab->table[idx].hashval,
+                                  memory_order_acquire);
+      if (hash == hval)
+        {
+          val_ptr = (TYPE) atomic_load_explicit(&htab->table[idx].val_ptr,
+                                                memory_order_acquire);
+          if (COMPARE(val_ptr, val) != 0)
+              return -1;
+        }
+      else if (hash == 0)
+        {
+          val_ptr = NULL;
+          atomic_compare_exchange_strong_explicit(&htab->table[idx].val_ptr,
+                                                  (uintptr_t *) &val_ptr,
+                                                  (uintptr_t) val,
+                                                  memory_order_acquire,
+                                                  memory_order_acquire);
+
+          if (val_ptr == NULL)
+            {
+              atomic_store_explicit(&htab->table[idx].hashval, hval,
+                                    memory_order_release);
+              return 0;
+            }
+          else
+            {
+              do
+                {
+                  hash = atomic_load_explicit(&htab->table[idx].val_ptr,
+                                              memory_order_acquire);
+                }
+              while (hash == 0);
+            }
+        }
+    }
+}
+
+#define NO_RESIZING 0u
+#define ALLOCATING_MEMORY 1u
+#define MOVING_DATA 3u
+#define CLEANING 2u
+
+#define STATE_BITS 2u
+#define STATE_INCREMENT (1u << STATE_BITS)
+#define STATE_MASK (STATE_INCREMENT - 1)
+#define GET_STATE(A) ((A) & STATE_MASK)
+
+#define IS_NO_RESIZE_OR_CLEANING(A) (((A) & 0x1u) == 0)
+
+#define GET_ACTIVE_WORKERS(A) ((A) >> STATE_BITS)
+
+#define INITIALIZATION_BLOCK_SIZE 256
+#define MOVE_BLOCK_SIZE 256
+#define CEIL(A, B) (((A) + (B) - 1) / (B))
+
+/* Initializes records and copies the data from the old table.
+   It can share work with other threads */
+static void resize_helper(NAME *htab, int blocking)
+{
+  size_t num_old_blocks = CEIL(htab->old_size, MOVE_BLOCK_SIZE);
+  size_t num_new_blocks = CEIL(htab->size, INITIALIZATION_BLOCK_SIZE);
+
+  size_t my_block;
+  size_t num_finished_blocks = 0;
+
+  while ((my_block = atomic_fetch_add_explicit(&htab->next_init_block, 1,
+                                                memory_order_acquire))
+                                                    < num_new_blocks)
+    {
+      size_t record_it = my_block * INITIALIZATION_BLOCK_SIZE;
+      size_t record_end = (my_block + 1) * INITIALIZATION_BLOCK_SIZE;
+      if (record_end > htab->size)
+          record_end = htab->size;
+
+      while (record_it++ != record_end)
+        {
+          atomic_init(&htab->table[record_it].hashval, (uintptr_t) NULL);
+          atomic_init(&htab->table[record_it].val_ptr, (uintptr_t) NULL);
+        }
+
+      num_finished_blocks++;
+    }
+
+  atomic_fetch_add_explicit(&htab->num_initialized_blocks,
+                            num_finished_blocks, memory_order_release);
+  while (atomic_load_explicit(&htab->num_initialized_blocks,
+                              memory_order_acquire) != num_new_blocks);
+
+  /* All block are initialized, start moving */
+  num_finished_blocks = 0;
+  while ((my_block = atomic_fetch_add_explicit(&htab->next_move_block, 1,
+                                                memory_order_acquire))
+                                                    < num_old_blocks)
+    {
+      size_t record_it = my_block * MOVE_BLOCK_SIZE;
+      size_t record_end = (my_block + 1) * MOVE_BLOCK_SIZE;
+      if (record_end > htab->old_size)
+          record_end = htab->old_size;
+
+      while (record_it++ != record_end)
+        {
+          TYPE val_ptr = (TYPE) atomic_load_explicit(
+              &htab->old_table[record_it].val_ptr,
+              memory_order_acquire);
+          if (val_ptr == NULL)
+              continue;
+
+          HASHTYPE hashval = atomic_load_explicit(
+              &htab->old_table[record_it].hashval,
+              memory_order_acquire);
+          assert(hashval);
+
+          insert_helper(htab, hashval, val_ptr);
+        }
+
+      num_finished_blocks++;
+    }
+
+  atomic_fetch_add_explicit(&htab->num_moved_blocks, num_finished_blocks,
+                            memory_order_release);
+
+  if (blocking)
+      while (atomic_load_explicit(&htab->num_moved_blocks,
+                                  memory_order_acquire) != num_old_blocks);
+}
+
+static void
+resize_master(NAME *htab)
+{
+  htab->old_size = htab->size;
+  htab->old_table = htab->table;
+
+  htab->size = next_prime(htab->size * 2);
+  htab->table = malloc((1 + htab->size) * sizeof(htab->table[0]));
+  assert(htab->table);
+
+  /* Change state from ALLOCATING_MEMORY to MOVING_DATA */
+  atomic_fetch_xor_explicit(&htab->resizing_state,
+                            ALLOCATING_MEMORY ^ MOVING_DATA,
+                            memory_order_release);
+
+  resize_helper(htab, 1);
+
+  /* Change state from MOVING_DATA to CLEANING */
+  size_t resize_state = atomic_fetch_xor_explicit(&htab->resizing_state,
+                                                  MOVING_DATA ^ CLEANING,
+                                                  memory_order_acq_rel);
+  while (GET_ACTIVE_WORKERS(resize_state) != 0)
+      resize_state = atomic_load_explicit(&htab->resizing_state,
+                                          memory_order_acquire);
+
+  /* There are no more active workers */
+  atomic_store_explicit(&htab->next_init_block, 0, memory_order_relaxed);
+  atomic_store_explicit(&htab->num_initialized_blocks, 0,
+                        memory_order_relaxed);
+
+  atomic_store_explicit(&htab->next_move_block, 0, memory_order_relaxed);
+  atomic_store_explicit(&htab->num_moved_blocks, 0, memory_order_relaxed);
+
+  free(htab->old_table);
+
+  /* Change state to NO_RESIZING */
+  atomic_fetch_xor_explicit(&htab->resizing_state, CLEANING ^ NO_RESIZING,
+                            memory_order_relaxed);
+
+}
+
+static void
+resize_worker(NAME *htab)
+{
+  size_t resize_state = atomic_load_explicit(&htab->resizing_state,
+                                              memory_order_acquire);
+
+  /* If the resize has finished */
+  if (IS_NO_RESIZE_OR_CLEANING(resize_state))
+      return;
+
+  /* Register as worker and check if the resize has finished in the meantime*/
+  resize_state = atomic_fetch_add_explicit(&htab->resizing_state,
+                                            STATE_INCREMENT,
+                                            memory_order_acquire);
+  if (IS_NO_RESIZE_OR_CLEANING(resize_state))
+    {
+      atomic_fetch_sub_explicit(&htab->resizing_state, STATE_INCREMENT,
+                                memory_order_relaxed);
+      return;
+    }
+
+  /* Wait while the new table is being allocated. */
+  while (GET_STATE(resize_state) == ALLOCATING_MEMORY)
+      resize_state = atomic_load_explicit(&htab->resizing_state,
+                                          memory_order_acquire);
+
+  /* Check if the resize is done */
+  assert(GET_STATE(resize_state) != NO_RESIZING);
+  if (GET_STATE(resize_state) == CLEANING)
+    {
+      atomic_fetch_sub_explicit(&htab->resizing_state, STATE_INCREMENT,
+                                memory_order_relaxed);
+      return;
+    }
+
+  resize_helper(htab, 0);
+
+  /* Deregister worker */
+  atomic_fetch_sub_explicit(&htab->resizing_state, STATE_INCREMENT,
+                            memory_order_release);
+}
+
+
+int
+#define INIT(name) _INIT (name)
+#define _INIT(name) \
+  name##_init
+INIT(NAME) (NAME *htab, size_t init_size)
+{
+  /* We need the size to be a prime.  */
+  init_size = next_prime (init_size);
+
+  /* Initialize the data structure.  */
+  htab->size = init_size;
+  atomic_init(&htab->filled, 0);
+  atomic_init(&htab->resizing_state, 0);
+
+  atomic_init(&htab->next_init_block, 0);
+  atomic_init(&htab->num_initialized_blocks, 0);
+
+  atomic_init(&htab->next_move_block, 0);
+  atomic_init(&htab->num_moved_blocks, 0);
+
+  pthread_rwlock_init(&htab->resize_rwl, NULL);
+
+  htab->table = (void *) malloc ((init_size + 1) * sizeof (htab->table[0]));
+  if (htab->table == NULL)
+      return -1;
+
+  for (size_t i = 0; i <= init_size; i++)
+    {
+      atomic_init(&htab->table[i].hashval, (uintptr_t) NULL);
+      atomic_init(&htab->table[i].val_ptr, (uintptr_t) NULL);
+    }
+
+  return 0;
+}
+
+
+int
+#define FREE(name) _FREE (name)
+#define _FREE(name) \
+name##_free
+FREE(NAME) (NAME *htab)
+{
+  pthread_rwlock_destroy(&htab->resize_rwl);
+  free (htab->table);
+  return 0;
+}
+
+
+int
+#define INSERT(name) _INSERT (name)
+#define _INSERT(name) \
+name##_insert
+INSERT(NAME) (NAME *htab, HASHTYPE hval, TYPE data)
+{
+  int incremented = 0;
+
+  for(;;)
+    {
+      while (pthread_rwlock_tryrdlock(&htab->resize_rwl) != 0)
+          resize_worker(htab);
+
+      size_t filled;
+      if (!incremented)
+        {
+          filled = atomic_fetch_add_explicit(&htab->filled, 1,
+                                              memory_order_acquire);
+          incremented = 1;
+        }
+      else
+        {
+          filled = atomic_load_explicit(&htab->filled,
+                                        memory_order_acquire);
+        }
+
+
+      if (100 * filled > 90 * htab->size)
+        {
+          /* Table is filled more than 90%.  Resize the table.  */
+
+          size_t resizing_state = atomic_load_explicit(&htab->resizing_state,
+                                                        memory_order_acquire);
+          if (resizing_state == 0 &&
+              atomic_compare_exchange_strong_explicit(&htab->resizing_state,
+                                                      &resizing_state,
+                                                      ALLOCATING_MEMORY,
+                                                      memory_order_acquire,
+                                                      memory_order_acquire))
+            {
+              /* Master thread */
+              pthread_rwlock_unlock(&htab->resize_rwl);
+
+              pthread_rwlock_wrlock(&htab->resize_rwl);
+              resize_master(htab);
+              pthread_rwlock_unlock(&htab->resize_rwl);
+
+            }
+          else
+            {
+              /* Worker thread */
+              pthread_rwlock_unlock(&htab->resize_rwl);
+              resize_worker(htab);
+            }
+        }
+      else
+        {
+          /* Lock acquired, no need for resize*/
+          break;
+        }
+    }
+
+  int ret_val = insert_helper(htab, hval, data);
+  if (ret_val == -1)
+      atomic_fetch_sub_explicit(&htab->filled, 1, memory_order_relaxed);
+  pthread_rwlock_unlock(&htab->resize_rwl);
+  return ret_val;
+}
+
+
+
+TYPE
+#define FIND(name) _FIND (name)
+#define _FIND(name) \
+  name##_find
+FIND(NAME) (NAME *htab, HASHTYPE hval, TYPE val)
+{
+  while (pthread_rwlock_tryrdlock(&htab->resize_rwl) != 0)
+      resize_worker(htab);
+
+  size_t idx;
+
+  /* Make the hash data nonzero.  */
+  hval = hval ?: 1;
+  idx = lookup(htab, hval, val);
+
+  if (idx == 0)
+    {
+      pthread_rwlock_unlock(&htab->resize_rwl);
+      return NULL;
+    }
+
+  /* get a copy before unlocking the lock */
+  TYPE ret_val = (TYPE) atomic_load_explicit(&htab->table[idx].val_ptr,
+                                             memory_order_relaxed);
+
+  pthread_rwlock_unlock(&htab->resize_rwl);
+  return ret_val;
+}
+
diff --git a/lib/dynamicsizehash_concurrent.h b/lib/dynamicsizehash_concurrent.h
new file mode 100644
index 00000000..eb06ac9e
--- /dev/null
+++ b/lib/dynamicsizehash_concurrent.h
@@ -0,0 +1,118 @@
+/* Copyright (C) 2000-2019 Red Hat, Inc.
+   This file is part of elfutils.
+   Written by Srdan Milakovic <sm108@rice.edu>, 2019.
+   Derived from Ulrich Drepper <drepper@redhat.com>, 2000.
+
+   This file is free software; you can redistribute it and/or modify
+   it under the terms of either
+
+     * the GNU Lesser General Public License as published by the Free
+       Software Foundation; either version 3 of the License, or (at
+       your option) any later version
+
+   or
+
+     * the GNU General Public License as published by the Free
+       Software Foundation; either version 2 of the License, or (at
+       your option) any later version
+
+   or both in parallel, as here.
+
+   elfutils is distributed in the hope that it will be useful, but
+   WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   General Public License for more details.
+
+   You should have received copies of the GNU General Public License and
+   the GNU Lesser General Public License along with this program.  If
+   not, see <http://www.gnu.org/licenses/>.  */
+
+#include <stddef.h>
+#include <pthread.h>
+#include "stdatomic.h"
+/* Before including this file the following macros must be defined:
+
+   NAME      name of the hash table structure.
+   TYPE      data type of the hash table entries
+
+   The following macros if present select features:
+
+   ITERATE   iterating over the table entries is possible
+   HASHTYPE  integer type for hash values, default unsigned long int
+ */
+
+
+
+#ifndef HASHTYPE
+# define HASHTYPE unsigned long int
+#endif
+
+#ifndef RESIZE_BLOCK_SIZE
+# define RESIZE_BLOCK_SIZE 256
+#endif
+
+/* Defined separately.  */
+extern size_t next_prime (size_t seed);
+
+
+/* Table entry type.  */
+#define _DYNHASHCONENTTYPE(name)       \
+  typedef struct name##_ent         \
+  {                                 \
+    _Atomic(HASHTYPE) hashval;      \
+    atomic_uintptr_t val_ptr;       \
+  } name##_ent
+#define DYNHASHENTTYPE(name) _DYNHASHCONENTTYPE (name)
+DYNHASHENTTYPE (NAME);
+
+/* Type of the dynamic hash table data structure.  */
+#define _DYNHASHCONTYPE(name) \
+typedef struct                                     \
+{                                                  \
+  size_t size;                                     \
+  size_t old_size;                                 \
+  atomic_size_t filled;                            \
+  name##_ent *table;                               \
+  name##_ent *old_table;                           \
+  atomic_size_t resizing_state;                    \
+  atomic_size_t next_init_block;                   \
+  atomic_size_t num_initialized_blocks;            \
+  atomic_size_t next_move_block;                   \
+  atomic_size_t num_moved_blocks;                  \
+  pthread_rwlock_t resize_rwl;                     \
+} name
+#define DYNHASHTYPE(name) _DYNHASHCONTYPE (name)
+DYNHASHTYPE (NAME);
+
+
+
+#define _FUNCTIONS(name)                                            \
+/* Initialize the hash table.  */                                   \
+extern int name##_init (name *htab, size_t init_size);              \
+                                                                    \
+/* Free resources allocated for hash table.  */                     \
+extern int name##_free (name *htab);                                \
+                                                                    \
+/* Insert new entry.  */                                            \
+extern int name##_insert (name *htab, HASHTYPE hval, TYPE data);    \
+                                                                    \
+/* Find entry in hash table.  */                                    \
+extern TYPE name##_find (name *htab, HASHTYPE hval, TYPE val);
+#define FUNCTIONS(name) _FUNCTIONS (name)
+FUNCTIONS (NAME)
+
+
+#ifndef NO_UNDEF
+# undef DYNHASHENTTYPE
+# undef DYNHASHTYPE
+# undef FUNCTIONS
+# undef _FUNCTIONS
+# undef XFUNCTIONS
+# undef _XFUNCTIONS
+# undef NAME
+# undef TYPE
+# undef ITERATE
+# undef COMPARE
+# undef FIRST
+# undef NEXT
+#endif
diff --git a/lib/stdatomic.h b/lib/stdatomic.h
new file mode 100644
index 00000000..49626662
--- /dev/null
+++ b/lib/stdatomic.h
@@ -0,0 +1,442 @@
+/*-
+ * Copyright (c) 2011 Ed Schouten <ed@FreeBSD.org>
+ *                    David Chisnall <theraven@FreeBSD.org>
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * $FreeBSD$
+ */
+
+#ifndef _STDATOMIC_H_
+#define	_STDATOMIC_H_
+
+#include <stddef.h>
+#include <stdint.h>
+
+#if !defined(__has_feature)
+#define __has_feature(x) 0
+#endif
+#if !defined(__has_builtin)
+#define __has_builtin(x) 0
+#endif
+#if !defined(__GNUC_PREREQ__)
+#if defined(__GNUC__) && defined(__GNUC_MINOR__)
+#define __GNUC_PREREQ__(maj, min)					\
+	((__GNUC__ << 16) + __GNUC_MINOR__ >= ((maj) << 16) + (min))
+#else
+#define __GNUC_PREREQ__(maj, min) 0
+#endif
+#endif
+
+#if !defined(__CLANG_ATOMICS) && !defined(__GNUC_ATOMICS)
+#if __has_feature(c_atomic)
+#define	__CLANG_ATOMICS
+#elif __GNUC_PREREQ__(4, 7)
+#define	__GNUC_ATOMICS
+#elif !defined(__GNUC__)
+#error "stdatomic.h does not support your compiler"
+#endif
+#endif
+
+/*
+ * language independent type to represent a Boolean value
+ */
+
+typedef int __Bool;
+
+/*
+ * 7.17.1 Atomic lock-free macros.
+ */
+
+#ifdef __GCC_ATOMIC_BOOL_LOCK_FREE
+#define	ATOMIC_BOOL_LOCK_FREE		__GCC_ATOMIC_BOOL_LOCK_FREE
+#endif
+#ifdef __GCC_ATOMIC_CHAR_LOCK_FREE
+#define	ATOMIC_CHAR_LOCK_FREE		__GCC_ATOMIC_CHAR_LOCK_FREE
+#endif
+#ifdef __GCC_ATOMIC_CHAR16_T_LOCK_FREE
+#define	ATOMIC_CHAR16_T_LOCK_FREE	__GCC_ATOMIC_CHAR16_T_LOCK_FREE
+#endif
+#ifdef __GCC_ATOMIC_CHAR32_T_LOCK_FREE
+#define	ATOMIC_CHAR32_T_LOCK_FREE	__GCC_ATOMIC_CHAR32_T_LOCK_FREE
+#endif
+#ifdef __GCC_ATOMIC_WCHAR_T_LOCK_FREE
+#define	ATOMIC_WCHAR_T_LOCK_FREE	__GCC_ATOMIC_WCHAR_T_LOCK_FREE
+#endif
+#ifdef __GCC_ATOMIC_SHORT_LOCK_FREE
+#define	ATOMIC_SHORT_LOCK_FREE		__GCC_ATOMIC_SHORT_LOCK_FREE
+#endif
+#ifdef __GCC_ATOMIC_INT_LOCK_FREE
+#define	ATOMIC_INT_LOCK_FREE		__GCC_ATOMIC_INT_LOCK_FREE
+#endif
+#ifdef __GCC_ATOMIC_LONG_LOCK_FREE
+#define	ATOMIC_LONG_LOCK_FREE		__GCC_ATOMIC_LONG_LOCK_FREE
+#endif
+#ifdef __GCC_ATOMIC_LLONG_LOCK_FREE
+#define	ATOMIC_LLONG_LOCK_FREE		__GCC_ATOMIC_LLONG_LOCK_FREE
+#endif
+#ifdef __GCC_ATOMIC_POINTER_LOCK_FREE
+#define	ATOMIC_POINTER_LOCK_FREE	__GCC_ATOMIC_POINTER_LOCK_FREE
+#endif
+
+#if !defined(__CLANG_ATOMICS)
+#define	_Atomic(T)			struct { volatile __typeof__(T) __val; }
+#endif
+
+/*
+ * 7.17.2 Initialization.
+ */
+
+#if defined(__CLANG_ATOMICS)
+#define	ATOMIC_VAR_INIT(value)		(value)
+#define	atomic_init(obj, value)		__c11_atomic_init(obj, value)
+#else
+#define	ATOMIC_VAR_INIT(value)		{ .__val = (value) }
+#define	atomic_init(obj, value)		((void)((obj)->__val = (value)))
+#endif
+
+/*
+ * Clang and recent GCC both provide predefined macros for the memory
+ * orderings.  If we are using a compiler that doesn't define them, use the
+ * clang values - these will be ignored in the fallback path.
+ */
+
+#ifndef __ATOMIC_RELAXED
+#define __ATOMIC_RELAXED		0
+#endif
+#ifndef __ATOMIC_CONSUME
+#define __ATOMIC_CONSUME		1
+#endif
+#ifndef __ATOMIC_ACQUIRE
+#define __ATOMIC_ACQUIRE		2
+#endif
+#ifndef __ATOMIC_RELEASE
+#define __ATOMIC_RELEASE		3
+#endif
+#ifndef __ATOMIC_ACQ_REL
+#define __ATOMIC_ACQ_REL		4
+#endif
+#ifndef __ATOMIC_SEQ_CST
+#define __ATOMIC_SEQ_CST		5
+#endif
+
+/*
+ * 7.17.3 Order and consistency.
+ *
+ * The memory_order_* constants that denote the barrier behaviour of the
+ * atomic operations.
+ */
+
+typedef enum {
+    memory_order_relaxed = __ATOMIC_RELAXED,
+    memory_order_consume = __ATOMIC_CONSUME,
+    memory_order_acquire = __ATOMIC_ACQUIRE,
+    memory_order_release = __ATOMIC_RELEASE,
+    memory_order_acq_rel = __ATOMIC_ACQ_REL,
+    memory_order_seq_cst = __ATOMIC_SEQ_CST
+} memory_order;
+
+/*
+ * 7.17.4 Fences.
+ */
+
+//#define __unused
+
+//static __inline void
+//atomic_thread_fence(memory_order __order __unused)
+//{
+//
+//#ifdef __CLANG_ATOMICS
+//    __c11_atomic_thread_fence(__order);
+//#elif defined(__GNUC_ATOMICS)
+//    __atomic_thread_fence(__order);
+//#else
+//    __sync_synchronize();
+//#endif
+//}
+//
+//static __inline void
+//atomic_signal_fence(memory_order __order __unused)
+//{
+//
+//#ifdef __CLANG_ATOMICS
+//    __c11_atomic_signal_fence(__order);
+//#elif defined(__GNUC_ATOMICS)
+//    __atomic_signal_fence(__order);
+//#else
+//    __asm volatile ("" ::: "memory");
+//#endif
+//}
+
+//#undef __unused
+
+/*
+ * 7.17.5 Lock-free property.
+ */
+
+#if defined(_KERNEL)
+/* Atomics in kernelspace are always lock-free. */
+#define	atomic_is_lock_free(obj) \
+	((void)(obj), (__Bool)1)
+#elif defined(__CLANG_ATOMICS)
+#define	atomic_is_lock_free(obj) \
+	__atomic_is_lock_free(sizeof(*(obj)), obj)
+#elif defined(__GNUC_ATOMICS)
+#define	atomic_is_lock_free(obj) \
+	__atomic_is_lock_free(sizeof((obj)->__val), &(obj)->__val)
+#else
+#define	atomic_is_lock_free(obj) \
+	((void)(obj), sizeof((obj)->__val) <= sizeof(void *))
+#endif
+
+/*
+ * 7.17.6 Atomic integer types.
+ */
+
+typedef _Atomic(__Bool)			atomic_bool;
+typedef _Atomic(char)			atomic_char;
+typedef _Atomic(signed char)		atomic_schar;
+typedef _Atomic(unsigned char)		atomic_uchar;
+typedef _Atomic(short)			atomic_short;
+typedef _Atomic(unsigned short)		atomic_ushort;
+typedef _Atomic(int)			atomic_int;
+typedef _Atomic(unsigned int)		atomic_uint;
+typedef _Atomic(long)			atomic_long;
+typedef _Atomic(unsigned long)		atomic_ulong;
+typedef _Atomic(long long)		atomic_llong;
+typedef _Atomic(unsigned long long)	atomic_ullong;
+#if 0
+typedef _Atomic(char16_t)		atomic_char16_t;
+typedef _Atomic(char32_t)		atomic_char32_t;
+#endif
+typedef _Atomic(wchar_t)		atomic_wchar_t;
+typedef _Atomic(int_least8_t)		atomic_int_least8_t;
+typedef _Atomic(uint_least8_t)		atomic_uint_least8_t;
+typedef _Atomic(int_least16_t)		atomic_int_least16_t;
+typedef _Atomic(uint_least16_t)		atomic_uint_least16_t;
+typedef _Atomic(int_least32_t)		atomic_int_least32_t;
+typedef _Atomic(uint_least32_t)		atomic_uint_least32_t;
+typedef _Atomic(int_least64_t)		atomic_int_least64_t;
+typedef _Atomic(uint_least64_t)		atomic_uint_least64_t;
+typedef _Atomic(int_fast8_t)		atomic_int_fast8_t;
+typedef _Atomic(uint_fast8_t)		atomic_uint_fast8_t;
+typedef _Atomic(int_fast16_t)		atomic_int_fast16_t;
+typedef _Atomic(uint_fast16_t)		atomic_uint_fast16_t;
+typedef _Atomic(int_fast32_t)		atomic_int_fast32_t;
+typedef _Atomic(uint_fast32_t)		atomic_uint_fast32_t;
+typedef _Atomic(int_fast64_t)		atomic_int_fast64_t;
+typedef _Atomic(uint_fast64_t)		atomic_uint_fast64_t;
+typedef _Atomic(intptr_t)		atomic_intptr_t;
+typedef _Atomic(uintptr_t)		atomic_uintptr_t;
+typedef _Atomic(size_t)			atomic_size_t;
+typedef _Atomic(ptrdiff_t)		atomic_ptrdiff_t;
+typedef _Atomic(intmax_t)		atomic_intmax_t;
+typedef _Atomic(uintmax_t)		atomic_uintmax_t;
+
+/*
+ * 7.17.7 Operations on atomic types.
+ */
+
+/*
+ * Compiler-specific operations.
+ */
+
+#if defined(__CLANG_ATOMICS)
+#define	atomic_compare_exchange_strong_explicit(object, expected,	\
+    desired, success, failure)						\
+	__c11_atomic_compare_exchange_strong(object, expected, desired,	\
+	    success, failure)
+#define	atomic_compare_exchange_weak_explicit(object, expected,		\
+    desired, success, failure)						\
+	__c11_atomic_compare_exchange_weak(object, expected, desired,	\
+	    success, failure)
+#define	atomic_exchange_explicit(object, desired, order)		\
+	__c11_atomic_exchange(object, desired, order)
+#define	atomic_fetch_add_explicit(object, operand, order)		\
+	__c11_atomic_fetch_add(object, operand, order)
+#define	atomic_fetch_and_explicit(object, operand, order)		\
+	__c11_atomic_fetch_and(object, operand, order)
+#define	atomic_fetch_or_explicit(object, operand, order)		\
+	__c11_atomic_fetch_or(object, operand, order)
+#define	atomic_fetch_sub_explicit(object, operand, order)		\
+	__c11_atomic_fetch_sub(object, operand, order)
+#define	atomic_fetch_xor_explicit(object, operand, order)		\
+	__c11_atomic_fetch_xor(object, operand, order)
+#define	atomic_load_explicit(object, order)				\
+	__c11_atomic_load(object, order)
+#define	atomic_store_explicit(object, desired, order)			\
+	__c11_atomic_store(object, desired, order)
+#elif defined(__GNUC_ATOMICS)
+#define	atomic_compare_exchange_strong_explicit(object, expected,	\
+    desired, success, failure)						\
+	__atomic_compare_exchange_n(&(object)->__val, expected,		\
+	    desired, 0, success, failure)
+#define	atomic_compare_exchange_weak_explicit(object, expected,		\
+    desired, success, failure)						\
+	__atomic_compare_exchange_n(&(object)->__val, expected,		\
+	    desired, 1, success, failure)
+#define	atomic_exchange_explicit(object, desired, order)		\
+	__atomic_exchange_n(&(object)->__val, desired, order)
+#define	atomic_fetch_add_explicit(object, operand, order)		\
+	__atomic_fetch_add(&(object)->__val, operand, order)
+#define	atomic_fetch_and_explicit(object, operand, order)		\
+	__atomic_fetch_and(&(object)->__val, operand, order)
+#define	atomic_fetch_or_explicit(object, operand, order)		\
+	__atomic_fetch_or(&(object)->__val, operand, order)
+#define	atomic_fetch_sub_explicit(object, operand, order)		\
+	__atomic_fetch_sub(&(object)->__val, operand, order)
+#define	atomic_fetch_xor_explicit(object, operand, order)		\
+	__atomic_fetch_xor(&(object)->__val, operand, order)
+#define	atomic_load_explicit(object, order)				\
+	__atomic_load_n(&(object)->__val, order)
+#define	atomic_store_explicit(object, desired, order)			\
+	__atomic_store_n(&(object)->__val, desired, order)
+#else
+#define	__atomic_apply_stride(object, operand) \
+	(((__typeof__((object)->__val))0) + (operand))
+#define	atomic_compare_exchange_strong_explicit(object, expected,	\
+    desired, success, failure)	__extension__ ({			\
+	__typeof__(expected) __ep = (expected);				\
+	__typeof__(*__ep) __e = *__ep;					\
+	(void)(success); (void)(failure);				\
+	(__Bool)((*__ep = __sync_val_compare_and_swap(&(object)->__val,	\
+	    __e, desired)) == __e);					\
+})
+#define	atomic_compare_exchange_weak_explicit(object, expected,		\
+    desired, success, failure)						\
+	atomic_compare_exchange_strong_explicit(object, expected,	\
+		desired, success, failure)
+#if __has_builtin(__sync_swap)
+/* Clang provides a full-barrier atomic exchange - use it if available. */
+#define	atomic_exchange_explicit(object, desired, order)		\
+	((void)(order), __sync_swap(&(object)->__val, desired))
+#else
+/*
+ * __sync_lock_test_and_set() is only an acquire barrier in theory (although in
+ * practice it is usually a full barrier) so we need an explicit barrier before
+ * it.
+ */
+#define	atomic_exchange_explicit(object, desired, order)		\
+__extension__ ({							\
+	__typeof__(object) __o = (object);				\
+	__typeof__(desired) __d = (desired);				\
+	(void)(order);							\
+	__sync_synchronize();						\
+	__sync_lock_test_and_set(&(__o)->__val, __d);			\
+})
+#endif
+#define	atomic_fetch_add_explicit(object, operand, order)		\
+	((void)(order), __sync_fetch_and_add(&(object)->__val,		\
+	    __atomic_apply_stride(object, operand)))
+#define	atomic_fetch_and_explicit(object, operand, order)		\
+	((void)(order), __sync_fetch_and_and(&(object)->__val, operand))
+#define	atomic_fetch_or_explicit(object, operand, order)		\
+	((void)(order), __sync_fetch_and_or(&(object)->__val, operand))
+#define	atomic_fetch_sub_explicit(object, operand, order)		\
+	((void)(order), __sync_fetch_and_sub(&(object)->__val,		\
+	    __atomic_apply_stride(object, operand)))
+#define	atomic_fetch_xor_explicit(object, operand, order)		\
+	((void)(order), __sync_fetch_and_xor(&(object)->__val, operand))
+#define	atomic_load_explicit(object, order)				\
+	((void)(order), __sync_fetch_and_add(&(object)->__val, 0))
+#define	atomic_store_explicit(object, desired, order)			\
+	((void)atomic_exchange_explicit(object, desired, order))
+#endif
+
+/*
+ * Convenience functions.
+ *
+ * Don't provide these in kernel space. In kernel space, we should be
+ * disciplined enough to always provide explicit barriers.
+ */
+
+#ifndef _KERNEL
+#define	atomic_compare_exchange_strong(object, expected, desired)	\
+	atomic_compare_exchange_strong_explicit(object, expected,	\
+	    desired, memory_order_seq_cst, memory_order_seq_cst)
+#define	atomic_compare_exchange_weak(object, expected, desired)		\
+	atomic_compare_exchange_weak_explicit(object, expected,		\
+	    desired, memory_order_seq_cst, memory_order_seq_cst)
+#define	atomic_exchange(object, desired)				\
+	atomic_exchange_explicit(object, desired, memory_order_seq_cst)
+#define	atomic_fetch_add(object, operand)				\
+	atomic_fetch_add_explicit(object, operand, memory_order_seq_cst)
+#define	atomic_fetch_and(object, operand)				\
+	atomic_fetch_and_explicit(object, operand, memory_order_seq_cst)
+#define	atomic_fetch_or(object, operand)				\
+	atomic_fetch_or_explicit(object, operand, memory_order_seq_cst)
+#define	atomic_fetch_sub(object, operand)				\
+	atomic_fetch_sub_explicit(object, operand, memory_order_seq_cst)
+#define	atomic_fetch_xor(object, operand)				\
+	atomic_fetch_xor_explicit(object, operand, memory_order_seq_cst)
+#define	atomic_load(object)						\
+	atomic_load_explicit(object, memory_order_seq_cst)
+#define	atomic_store(object, desired)					\
+	atomic_store_explicit(object, desired, memory_order_seq_cst)
+#endif /* !_KERNEL */
+
+/*
+ * 7.17.8 Atomic flag type and operations.
+ *
+ * XXX: Assume atomic_bool can be used as an atomic_flag. Is there some
+ * kind of compiler built-in type we could use?
+ */
+
+typedef struct {
+    atomic_bool	__flag;
+} atomic_flag;
+
+#define	ATOMIC_FLAG_INIT		{ ATOMIC_VAR_INIT(0) }
+
+static __inline __Bool
+atomic_flag_test_and_set_explicit(volatile atomic_flag *__object,
+                                  memory_order __order)
+{
+    return (atomic_exchange_explicit(&__object->__flag, 1, __order));
+}
+
+static __inline void
+atomic_flag_clear_explicit(volatile atomic_flag *__object, memory_order __order)
+{
+
+    atomic_store_explicit(&__object->__flag, 0, __order);
+}
+
+#ifndef _KERNEL
+static __inline __Bool
+atomic_flag_test_and_set(volatile atomic_flag *__object)
+{
+
+    return (atomic_flag_test_and_set_explicit(__object,
+                                              memory_order_seq_cst));
+}
+
+static __inline void
+atomic_flag_clear(volatile atomic_flag *__object)
+{
+
+    atomic_flag_clear_explicit(__object, memory_order_seq_cst);
+}
+#endif /* !_KERNEL */
+
+#endif /* !_STDATOMIC_H_ */
\ No newline at end of file
diff --git a/libdw/ChangeLog b/libdw/ChangeLog
index bf1f4857..87abf7a7 100644
--- a/libdw/ChangeLog
+++ b/libdw/ChangeLog
@@ -1,3 +1,12 @@
+2019-08-15  Jonathon Anderson  <jma14@rice.edu>
+
+	* libdw_alloc.c (__libdw_allocate): Added thread-safe stack allocator.
+	* libdwP.h (Dwarf): Likewise.
+	* dwarf_begin_elf.c (dwarf_begin_elf): Support for above.
+	* dwarf_end.c (dwarf_end): Likewise.
+	* dwarf_abbrev_hash.{c,h}: Use the *_concurrent hash table.
+	* Makefile.am: Link -lpthread to provide rwlocks.
+
 2019-08-12  Mark Wielaard  <mark@klomp.org>
 
 	* libdw.map (ELFUTILS_0.177): Add new version of dwelf_elf_begin.
diff --git a/libdw/Makefile.am b/libdw/Makefile.am
index 7a3d5322..6d0a0187 100644
--- a/libdw/Makefile.am
+++ b/libdw/Makefile.am
@@ -108,7 +108,7 @@ am_libdw_pic_a_OBJECTS = $(libdw_a_SOURCES:.c=.os)
 libdw_so_LIBS = libdw_pic.a ../libdwelf/libdwelf_pic.a \
 	  ../libdwfl/libdwfl_pic.a ../libebl/libebl.a
 libdw_so_DEPS = ../lib/libeu.a ../libelf/libelf.so
-libdw_so_LDLIBS = $(libdw_so_DEPS) -ldl -lz $(argp_LDADD) $(zip_LIBS)
+libdw_so_LDLIBS = $(libdw_so_DEPS) -ldl -lz $(argp_LDADD) $(zip_LIBS) -lpthread
 libdw_so_SOURCES =
 libdw.so$(EXEEXT): $(srcdir)/libdw.map $(libdw_so_LIBS) $(libdw_so_DEPS)
 # The rpath is necessary for libebl because its $ORIGIN use will
diff --git a/libdw/dwarf_abbrev_hash.c b/libdw/dwarf_abbrev_hash.c
index f52f5ad5..c2548140 100644
--- a/libdw/dwarf_abbrev_hash.c
+++ b/libdw/dwarf_abbrev_hash.c
@@ -38,7 +38,7 @@
 #define next_prime __libdwarf_next_prime
 extern size_t next_prime (size_t) attribute_hidden;
 
-#include <dynamicsizehash.c>
+#include <dynamicsizehash_concurrent.c>
 
 #undef next_prime
 #define next_prime attribute_hidden __libdwarf_next_prime
diff --git a/libdw/dwarf_abbrev_hash.h b/libdw/dwarf_abbrev_hash.h
index d2f02ccc..bc3d62c7 100644
--- a/libdw/dwarf_abbrev_hash.h
+++ b/libdw/dwarf_abbrev_hash.h
@@ -34,6 +34,6 @@
 #define TYPE Dwarf_Abbrev *
 #define COMPARE(a, b) (0)
 
-#include <dynamicsizehash.h>
+#include <dynamicsizehash_concurrent.h>
 
 #endif	/* dwarf_abbrev_hash.h */
diff --git a/libdw/dwarf_begin_elf.c b/libdw/dwarf_begin_elf.c
index 38c8f5c6..b3885bb5 100644
--- a/libdw/dwarf_begin_elf.c
+++ b/libdw/dwarf_begin_elf.c
@@ -417,11 +417,14 @@ dwarf_begin_elf (Elf *elf, Dwarf_Cmd cmd, Elf_Scn *scngrp)
   /* Initialize the memory handling.  */
   result->mem_default_size = mem_default_size;
   result->oom_handler = __libdw_oom;
-  result->mem_tail = (struct libdw_memblock *) (result + 1);
-  result->mem_tail->size = (result->mem_default_size
-			    - offsetof (struct libdw_memblock, mem));
-  result->mem_tail->remaining = result->mem_tail->size;
-  result->mem_tail->prev = NULL;
+  pthread_rwlock_init(&result->mem_rwl, NULL);
+  result->mem_stacks = 1;
+  result->mem_tails = malloc (sizeof (struct libdw_memblock *));
+  result->mem_tails[0] = (struct libdw_memblock *) (result + 1);
+  result->mem_tails[0]->size = (result->mem_default_size
+			       - offsetof (struct libdw_memblock, mem));
+  result->mem_tails[0]->remaining = result->mem_tails[0]->size;
+  result->mem_tails[0]->prev = NULL;
 
   if (cmd == DWARF_C_READ || cmd == DWARF_C_RDWR)
     {
diff --git a/libdw/dwarf_end.c b/libdw/dwarf_end.c
index 29795c10..6317bcda 100644
--- a/libdw/dwarf_end.c
+++ b/libdw/dwarf_end.c
@@ -94,14 +94,22 @@ dwarf_end (Dwarf *dwarf)
       /* And the split Dwarf.  */
       tdestroy (dwarf->split_tree, noop_free);
 
-      struct libdw_memblock *memp = dwarf->mem_tail;
-      /* The first block is allocated together with the Dwarf object.  */
-      while (memp->prev != NULL)
-	{
-	  struct libdw_memblock *prevp = memp->prev;
-	  free (memp);
-	  memp = prevp;
-	}
+      for (size_t i = 0; i < dwarf->mem_stacks; i++)
+        {
+          struct libdw_memblock *memp = dwarf->mem_tails[i];
+          /* The first block is allocated together with the Dwarf object.  */
+          while (memp != NULL && memp->prev != NULL)
+	    {
+	      struct libdw_memblock *prevp = memp->prev;
+	      free (memp);
+	      memp = prevp;
+	    }
+          /* Only for stack 0 though, the others are allocated individually.  */
+          if (memp != NULL && i > 0)
+            free (memp);
+        }
+      free (dwarf->mem_tails);
+      pthread_rwlock_destroy (&dwarf->mem_rwl);
 
       /* Free the pubnames helper structure.  */
       free (dwarf->pubnames_sets);
diff --git a/libdw/libdwP.h b/libdw/libdwP.h
index eebb7d12..442d493d 100644
--- a/libdw/libdwP.h
+++ b/libdw/libdwP.h
@@ -31,6 +31,7 @@
 
 #include <libintl.h>
 #include <stdbool.h>
+#include <pthread.h>
 
 #include <libdw.h>
 #include <dwarf.h>
@@ -218,16 +219,18 @@ struct Dwarf
   /* Similar for addrx/constx, which will come from .debug_addr section.  */
   struct Dwarf_CU *fake_addr_cu;
 
-  /* Internal memory handling.  This is basically a simplified
+  /* Internal memory handling.  This is basically a simplified thread-local
      reimplementation of obstacks.  Unfortunately the standard obstack
      implementation is not usable in libraries.  */
+  pthread_rwlock_t mem_rwl;
+  size_t mem_stacks;
   struct libdw_memblock
   {
     size_t size;
     size_t remaining;
     struct libdw_memblock *prev;
     char mem[0];
-  } *mem_tail;
+  } **mem_tails;
 
   /* Default size of allocated memory blocks.  */
   size_t mem_default_size;
@@ -572,7 +575,7 @@ extern void __libdw_seterrno (int value) internal_function;
 
 /* Memory handling, the easy parts.  This macro does not do any locking.  */
 #define libdw_alloc(dbg, type, tsize, cnt) \
-  ({ struct libdw_memblock *_tail = (dbg)->mem_tail;			      \
+  ({ struct libdw_memblock *_tail = __libdw_alloc_tail(dbg);		      \
      size_t _required = (tsize) * (cnt);				      \
      type *_result = (type *) (_tail->mem + (_tail->size - _tail->remaining));\
      size_t _padding = ((__alignof (type)				      \
@@ -591,6 +594,10 @@ extern void __libdw_seterrno (int value) internal_function;
 #define libdw_typed_alloc(dbg, type) \
   libdw_alloc (dbg, type, sizeof (type), 1)
 
+/* Callback to choose a thread-local memory allocation stack.  */
+extern struct libdw_memblock *__libdw_alloc_tail (Dwarf* dbg)
+     __nonnull_attribute__ (1);
+
 /* Callback to allocate more.  */
 extern void *__libdw_allocate (Dwarf *dbg, size_t minsize, size_t align)
      __attribute__ ((__malloc__)) __nonnull_attribute__ (1);
diff --git a/libdw/libdw_alloc.c b/libdw/libdw_alloc.c
index f1e08714..c3c5e8a7 100644
--- a/libdw/libdw_alloc.c
+++ b/libdw/libdw_alloc.c
@@ -33,9 +33,73 @@
 
 #include <errno.h>
 #include <stdlib.h>
+#include <assert.h>
 #include "libdwP.h"
 #include "system.h"
+#include "stdatomic.h"
+#if USE_VG_ANNOTATIONS == 1
+#include <helgrind.h>
+#include <drd.h>
+#else
+#define ANNOTATE_HAPPENS_BEFORE(X)
+#define ANNOTATE_HAPPENS_AFTER(X)
+#endif
+
+
+#define thread_local __thread
 
+static thread_local int initialized = 0;
+static thread_local size_t thread_id;
+static atomic_size_t next_id = ATOMIC_VAR_INIT(0);
+
+struct libdw_memblock *
+__libdw_alloc_tail (Dwarf *dbg)
+{
+  if (!initialized)
+    {
+      thread_id = atomic_fetch_add (&next_id, 1);
+      initialized = 1;
+    }
+
+  pthread_rwlock_rdlock (&dbg->mem_rwl);
+  if (thread_id >= dbg->mem_stacks)
+    {
+      pthread_rwlock_unlock (&dbg->mem_rwl);
+      pthread_rwlock_wrlock (&dbg->mem_rwl);
+
+      /* Another thread may have already reallocated. In theory using an
+         atomic would be faster, but given that this only happens once per
+         thread per Dwarf, some minor slowdown should be fine.  */
+      if (thread_id >= dbg->mem_stacks)
+        {
+          dbg->mem_tails = realloc (dbg->mem_tails, (thread_id+1)
+                                    * sizeof (struct libdw_memblock *));
+          assert(dbg->mem_tails);
+          for (size_t i = dbg->mem_stacks; i <= thread_id; i++)
+            dbg->mem_tails[i] = NULL;
+          dbg->mem_stacks = thread_id + 1;
+          ANNOTATE_HAPPENS_BEFORE (&dbg->mem_tails);
+        }
+
+      pthread_rwlock_unlock (&dbg->mem_rwl);
+      pthread_rwlock_rdlock (&dbg->mem_rwl);
+    }
+
+  /* At this point, we have an entry in the tail array.  */
+  ANNOTATE_HAPPENS_AFTER (&dbg->mem_tails);
+  struct libdw_memblock *result = dbg->mem_tails[thread_id];
+  if (result == NULL)
+    {
+      result = malloc (dbg->mem_default_size);
+      result->size = dbg->mem_default_size
+                     - offsetof (struct libdw_memblock, mem);
+      result->remaining = result->size;
+      result->prev = NULL;
+      dbg->mem_tails[thread_id] = result;
+    }
+  pthread_rwlock_unlock (&dbg->mem_rwl);
+  return result;
+}
 
 void *
 __libdw_allocate (Dwarf *dbg, size_t minsize, size_t align)
@@ -52,8 +116,10 @@ __libdw_allocate (Dwarf *dbg, size_t minsize, size_t align)
   newp->size = size - offsetof (struct libdw_memblock, mem);
   newp->remaining = (uintptr_t) newp + size - (result + minsize);
 
-  newp->prev = dbg->mem_tail;
-  dbg->mem_tail = newp;
+  pthread_rwlock_rdlock (&dbg->mem_rwl);
+  newp->prev = dbg->mem_tails[thread_id];
+  dbg->mem_tails[thread_id] = newp;
+  pthread_rwlock_unlock (&dbg->mem_rwl);
 
   return (void *) result;
 }
-- 
2.23.0