From 9077c0df713e9adfdee7fe1f66005453316842de Mon Sep 17 00:00:00 2001 From: Jonathon Anderson Date: Thu, 8 Aug 2019 09:01:56 -0500 Subject: [PATCH] libdw: add thread-safety to dwarf_getabbrev() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit For applications that need the information in the DIEs, the Dwarf_Abbrev hash table et al. becomes a massive data race. This fixes that by: 1. Adding atomics & locks to the hash table to manage concurrency (lib/dynamicsizehash_concurrent.{c,h}) 2. Adding a lock & array structure the the memory manager (pseudo-TLS) (libdwP.h, libdw_alloc.c) 3. Adding extra configure options for Helgrind/DRD annotations (configure.ac) 4. Including "stdatomic.h" from FreeBSD, to support C11-style atomics. (lib/stdatomic.h) Signed-off-by: Jonathon Anderson Signed-off-by: Srđan Milaković --- ChangeLog | 5 + configure.ac | 30 ++ lib/ChangeLog | 6 + lib/Makefile.am | 5 +- lib/dynamicsizehash_concurrent.c | 522 +++++++++++++++++++++++++++++++ lib/dynamicsizehash_concurrent.h | 118 +++++++ lib/stdatomic.h | 442 ++++++++++++++++++++++++++ libdw/ChangeLog | 9 + libdw/Makefile.am | 2 +- libdw/dwarf_abbrev_hash.c | 2 +- libdw/dwarf_abbrev_hash.h | 2 +- libdw/dwarf_begin_elf.c | 13 +- libdw/dwarf_end.c | 24 +- libdw/libdwP.h | 13 +- libdw/libdw_alloc.c | 70 ++++- 15 files changed, 1240 insertions(+), 23 deletions(-) create mode 100644 lib/dynamicsizehash_concurrent.c create mode 100644 lib/dynamicsizehash_concurrent.h create mode 100644 lib/stdatomic.h diff --git a/ChangeLog b/ChangeLog index bed3999f..93907ddd 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,3 +1,8 @@ +2019-08-15 Jonathon Anderson + + * configure.ac: Add new --enable-valgrind-annotations + * configure.ac: Add new --with-valgrind (headers only) + 2019-08-13 Mark Wielaard * configure.ac: Set version to 0.177. diff --git a/configure.ac b/configure.ac index c443fa3b..c5406b44 100644 --- a/configure.ac +++ b/configure.ac @@ -323,6 +323,35 @@ if test "$use_valgrind" = yes; then fi AM_CONDITIONAL(USE_VALGRIND, test "$use_valgrind" = yes) +AC_ARG_WITH([valgrind], +AS_HELP_STRING([--with-valgrind],[include directory for Valgrind headers]), +[with_valgrind_headers=$withval], [with_valgrind_headers=no]) +if test "x$with_valgrind_headers" != xno; then + save_CFLAGS="$CFLAGS" + CFLAGS="$CFLAGS -I$with_valgrind_headers" + AC_COMPILE_IFELSE([AC_LANG_SOURCE([[ + #include + int main() { return 0; } + ]])], [ HAVE_VALGRIND_HEADERS="yes" + CFLAGS="$save_CFLAGS -I$with_valgrind_headers" ], + [ AC_MSG_ERROR([invalid valgrind include directory: $with_valgrind_headers]) ]) +fi + +AC_ARG_ENABLE([valgrind-annotations], +AS_HELP_STRING([--enable-valgrind-annotations],[insert extra annotations for better valgrind support]), +[use_vg_annotations=$enableval], [use_vg_annotations=no]) +if test "$use_vg_annotations" = yes; then + if test "x$HAVE_VALGRIND_HEADERS" != "xyes"; then + AC_MSG_CHECKING([whether Valgrind headers are available]) + AC_COMPILE_IFELSE([AC_LANG_SOURCE([[ + #include + int main() { return 0; } + ]])], [ AC_MSG_RESULT([yes]) ], + [ AC_MSG_ERROR([valgrind annotations requested but no headers are available]) ]) + fi +fi +AM_CONDITIONAL(USE_VG_ANNOTATIONS, test "$use_vg_annotations" = yes) + AC_ARG_ENABLE([install-elfh], AS_HELP_STRING([--enable-install-elfh],[install elf.h in include dir]), [install_elfh=$enableval], [install_elfh=no]) @@ -668,6 +697,7 @@ AC_MSG_NOTICE([ OTHER FEATURES Deterministic archives by default : ${default_ar_deterministic} Native language support : ${USE_NLS} + Extra Valgrind annotations : ${use_vg_annotations} EXTRA TEST FEATURES (used with make check) have bunzip2 installed (required) : ${HAVE_BUNZIP2} diff --git a/lib/ChangeLog b/lib/ChangeLog index 7381860c..e6d08509 100644 --- a/lib/ChangeLog +++ b/lib/ChangeLog @@ -1,3 +1,9 @@ +2019-08-08 Jonathon Anderson + + * dynamicsizehash_concurrent.{c,h}: New files. + * stdatomic.h: New file, taken from FreeBSD. + * Makefile.am (noinst_HEADERS): Added *.h above. + 2019-05-03 Rosen Penev * color.c (parse_opt): Cast program_invocation_short_name to char *. diff --git a/lib/Makefile.am b/lib/Makefile.am index 36d21a07..af7228b9 100644 --- a/lib/Makefile.am +++ b/lib/Makefile.am @@ -38,8 +38,9 @@ libeu_a_SOURCES = xstrdup.c xstrndup.c xmalloc.c next_prime.c \ color.c printversion.c noinst_HEADERS = fixedsizehash.h libeu.h system.h dynamicsizehash.h list.h \ - eu-config.h color.h printversion.h bpf.h -EXTRA_DIST = dynamicsizehash.c + eu-config.h color.h printversion.h bpf.h \ + dynamicsizehash_concurrent.h stdatomic.h +EXTRA_DIST = dynamicsizehash.c dynamicsizehash_concurrent.c if !GPROF xmalloc_CFLAGS = -ffunction-sections diff --git a/lib/dynamicsizehash_concurrent.c b/lib/dynamicsizehash_concurrent.c new file mode 100644 index 00000000..d645b143 --- /dev/null +++ b/lib/dynamicsizehash_concurrent.c @@ -0,0 +1,522 @@ +/* Copyright (C) 2000-2019 Red Hat, Inc. + This file is part of elfutils. + Written by Srdan Milakovic , 2019. + Derived from Ulrich Drepper , 2000. + + This file is free software; you can redistribute it and/or modify + it under the terms of either + + * the GNU Lesser General Public License as published by the Free + Software Foundation; either version 3 of the License, or (at + your option) any later version + + or + + * the GNU General Public License as published by the Free + Software Foundation; either version 2 of the License, or (at + your option) any later version + + or both in parallel, as here. + + elfutils is distributed in the hope that it will be useful, but + WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + General Public License for more details. + + You should have received copies of the GNU General Public License and + the GNU Lesser General Public License along with this program. If + not, see . */ + +#include +#include +#include +#include + +/* Before including this file the following macros must be defined: + + NAME name of the hash table structure. + TYPE data type of the hash table entries + COMPARE comparison function taking two pointers to TYPE objects + + The following macros if present select features: + + ITERATE iterating over the table entries is possible + REVERSE iterate in reverse order of insert + */ + + +static size_t +lookup (NAME *htab, HASHTYPE hval, TYPE val __attribute__ ((unused))) +{ + /* First hash function: simply take the modul but prevent zero. Small values + can skip the division, which helps performance when this is common. */ + size_t idx = 1 + (hval < htab->size ? hval : hval % htab->size); + +#if COMPARE != 0 /* A handful of tables don't actually compare the entries in + the table, they instead rely on the hash. In that case, we + can skip parts that relate to the value. */ + TYPE val_ptr; +#endif + HASHTYPE hash; + + hash = atomic_load_explicit(&htab->table[idx].hashval, + memory_order_acquire); + if (hash == hval) + { +#if COMPARE == 0 + return idx; +#else + val_ptr = (TYPE) atomic_load_explicit(&htab->table[idx].val_ptr, + memory_order_acquire); + if (COMPARE(val_ptr, val) == 0) + return idx; +#endif + } + else if (hash == 0) + { + return 0; + } + + /* Second hash function as suggested in [Knuth]. */ + HASHTYPE second_hash = 1 + hval % (htab->size - 2); + + for(;;) + { + if (idx <= second_hash) + idx = htab->size + idx - second_hash; + else + idx -= second_hash; + + hash = atomic_load_explicit(&htab->table[idx].hashval, + memory_order_acquire); + if (hash == hval) + { +#if COMPARE == 0 + return idx; +#else + val_ptr = (TYPE) atomic_load_explicit(&htab->table[idx].val_ptr, + memory_order_acquire); + if (COMPARE(val_ptr, val) == 0) + return idx; +#endif + } + else if (hash == 0) + { + return 0; + } + } +} + +static int +insert_helper (NAME *htab, HASHTYPE hval, TYPE val) +{ + /* First hash function: simply take the modul but prevent zero. Small values + can skip the division, which helps performance when this is common. */ + size_t idx = 1 + (hval < htab->size ? hval : hval % htab->size); + + TYPE val_ptr; + HASHTYPE hash; + + hash = atomic_load_explicit(&htab->table[idx].hashval, + memory_order_acquire); + if (hash == hval) + { + val_ptr = (TYPE) atomic_load_explicit(&htab->table[idx].val_ptr, + memory_order_acquire); + if (COMPARE(val_ptr, val) != 0) + return -1; + } + else if (hash == 0) + { + val_ptr = NULL; + atomic_compare_exchange_strong_explicit(&htab->table[idx].val_ptr, + (uintptr_t *) &val_ptr, + (uintptr_t) val, + memory_order_acquire, + memory_order_acquire); + + if (val_ptr == NULL) + { + atomic_store_explicit(&htab->table[idx].hashval, hval, + memory_order_release); + return 0; + } + else + { + do + { + hash = atomic_load_explicit(&htab->table[idx].val_ptr, + memory_order_acquire); + } + while (hash == 0); + } + } + + /* Second hash function as suggested in [Knuth]. */ + HASHTYPE second_hash = 1 + hval % (htab->size - 2); + + for(;;) + { + if (idx <= second_hash) + idx = htab->size + idx - second_hash; + else + idx -= second_hash; + + hash = atomic_load_explicit(&htab->table[idx].hashval, + memory_order_acquire); + if (hash == hval) + { + val_ptr = (TYPE) atomic_load_explicit(&htab->table[idx].val_ptr, + memory_order_acquire); + if (COMPARE(val_ptr, val) != 0) + return -1; + } + else if (hash == 0) + { + val_ptr = NULL; + atomic_compare_exchange_strong_explicit(&htab->table[idx].val_ptr, + (uintptr_t *) &val_ptr, + (uintptr_t) val, + memory_order_acquire, + memory_order_acquire); + + if (val_ptr == NULL) + { + atomic_store_explicit(&htab->table[idx].hashval, hval, + memory_order_release); + return 0; + } + else + { + do + { + hash = atomic_load_explicit(&htab->table[idx].val_ptr, + memory_order_acquire); + } + while (hash == 0); + } + } + } +} + +#define NO_RESIZING 0u +#define ALLOCATING_MEMORY 1u +#define MOVING_DATA 3u +#define CLEANING 2u + +#define STATE_BITS 2u +#define STATE_INCREMENT (1u << STATE_BITS) +#define STATE_MASK (STATE_INCREMENT - 1) +#define GET_STATE(A) ((A) & STATE_MASK) + +#define IS_NO_RESIZE_OR_CLEANING(A) (((A) & 0x1u) == 0) + +#define GET_ACTIVE_WORKERS(A) ((A) >> STATE_BITS) + +#define INITIALIZATION_BLOCK_SIZE 256 +#define MOVE_BLOCK_SIZE 256 +#define CEIL(A, B) (((A) + (B) - 1) / (B)) + +/* Initializes records and copies the data from the old table. + It can share work with other threads */ +static void resize_helper(NAME *htab, int blocking) +{ + size_t num_old_blocks = CEIL(htab->old_size, MOVE_BLOCK_SIZE); + size_t num_new_blocks = CEIL(htab->size, INITIALIZATION_BLOCK_SIZE); + + size_t my_block; + size_t num_finished_blocks = 0; + + while ((my_block = atomic_fetch_add_explicit(&htab->next_init_block, 1, + memory_order_acquire)) + < num_new_blocks) + { + size_t record_it = my_block * INITIALIZATION_BLOCK_SIZE; + size_t record_end = (my_block + 1) * INITIALIZATION_BLOCK_SIZE; + if (record_end > htab->size) + record_end = htab->size; + + while (record_it++ != record_end) + { + atomic_init(&htab->table[record_it].hashval, (uintptr_t) NULL); + atomic_init(&htab->table[record_it].val_ptr, (uintptr_t) NULL); + } + + num_finished_blocks++; + } + + atomic_fetch_add_explicit(&htab->num_initialized_blocks, + num_finished_blocks, memory_order_release); + while (atomic_load_explicit(&htab->num_initialized_blocks, + memory_order_acquire) != num_new_blocks); + + /* All block are initialized, start moving */ + num_finished_blocks = 0; + while ((my_block = atomic_fetch_add_explicit(&htab->next_move_block, 1, + memory_order_acquire)) + < num_old_blocks) + { + size_t record_it = my_block * MOVE_BLOCK_SIZE; + size_t record_end = (my_block + 1) * MOVE_BLOCK_SIZE; + if (record_end > htab->old_size) + record_end = htab->old_size; + + while (record_it++ != record_end) + { + TYPE val_ptr = (TYPE) atomic_load_explicit( + &htab->old_table[record_it].val_ptr, + memory_order_acquire); + if (val_ptr == NULL) + continue; + + HASHTYPE hashval = atomic_load_explicit( + &htab->old_table[record_it].hashval, + memory_order_acquire); + assert(hashval); + + insert_helper(htab, hashval, val_ptr); + } + + num_finished_blocks++; + } + + atomic_fetch_add_explicit(&htab->num_moved_blocks, num_finished_blocks, + memory_order_release); + + if (blocking) + while (atomic_load_explicit(&htab->num_moved_blocks, + memory_order_acquire) != num_old_blocks); +} + +static void +resize_master(NAME *htab) +{ + htab->old_size = htab->size; + htab->old_table = htab->table; + + htab->size = next_prime(htab->size * 2); + htab->table = malloc((1 + htab->size) * sizeof(htab->table[0])); + assert(htab->table); + + /* Change state from ALLOCATING_MEMORY to MOVING_DATA */ + atomic_fetch_xor_explicit(&htab->resizing_state, + ALLOCATING_MEMORY ^ MOVING_DATA, + memory_order_release); + + resize_helper(htab, 1); + + /* Change state from MOVING_DATA to CLEANING */ + size_t resize_state = atomic_fetch_xor_explicit(&htab->resizing_state, + MOVING_DATA ^ CLEANING, + memory_order_acq_rel); + while (GET_ACTIVE_WORKERS(resize_state) != 0) + resize_state = atomic_load_explicit(&htab->resizing_state, + memory_order_acquire); + + /* There are no more active workers */ + atomic_store_explicit(&htab->next_init_block, 0, memory_order_relaxed); + atomic_store_explicit(&htab->num_initialized_blocks, 0, + memory_order_relaxed); + + atomic_store_explicit(&htab->next_move_block, 0, memory_order_relaxed); + atomic_store_explicit(&htab->num_moved_blocks, 0, memory_order_relaxed); + + free(htab->old_table); + + /* Change state to NO_RESIZING */ + atomic_fetch_xor_explicit(&htab->resizing_state, CLEANING ^ NO_RESIZING, + memory_order_relaxed); + +} + +static void +resize_worker(NAME *htab) +{ + size_t resize_state = atomic_load_explicit(&htab->resizing_state, + memory_order_acquire); + + /* If the resize has finished */ + if (IS_NO_RESIZE_OR_CLEANING(resize_state)) + return; + + /* Register as worker and check if the resize has finished in the meantime*/ + resize_state = atomic_fetch_add_explicit(&htab->resizing_state, + STATE_INCREMENT, + memory_order_acquire); + if (IS_NO_RESIZE_OR_CLEANING(resize_state)) + { + atomic_fetch_sub_explicit(&htab->resizing_state, STATE_INCREMENT, + memory_order_relaxed); + return; + } + + /* Wait while the new table is being allocated. */ + while (GET_STATE(resize_state) == ALLOCATING_MEMORY) + resize_state = atomic_load_explicit(&htab->resizing_state, + memory_order_acquire); + + /* Check if the resize is done */ + assert(GET_STATE(resize_state) != NO_RESIZING); + if (GET_STATE(resize_state) == CLEANING) + { + atomic_fetch_sub_explicit(&htab->resizing_state, STATE_INCREMENT, + memory_order_relaxed); + return; + } + + resize_helper(htab, 0); + + /* Deregister worker */ + atomic_fetch_sub_explicit(&htab->resizing_state, STATE_INCREMENT, + memory_order_release); +} + + +int +#define INIT(name) _INIT (name) +#define _INIT(name) \ + name##_init +INIT(NAME) (NAME *htab, size_t init_size) +{ + /* We need the size to be a prime. */ + init_size = next_prime (init_size); + + /* Initialize the data structure. */ + htab->size = init_size; + atomic_init(&htab->filled, 0); + atomic_init(&htab->resizing_state, 0); + + atomic_init(&htab->next_init_block, 0); + atomic_init(&htab->num_initialized_blocks, 0); + + atomic_init(&htab->next_move_block, 0); + atomic_init(&htab->num_moved_blocks, 0); + + pthread_rwlock_init(&htab->resize_rwl, NULL); + + htab->table = (void *) malloc ((init_size + 1) * sizeof (htab->table[0])); + if (htab->table == NULL) + return -1; + + for (size_t i = 0; i <= init_size; i++) + { + atomic_init(&htab->table[i].hashval, (uintptr_t) NULL); + atomic_init(&htab->table[i].val_ptr, (uintptr_t) NULL); + } + + return 0; +} + + +int +#define FREE(name) _FREE (name) +#define _FREE(name) \ +name##_free +FREE(NAME) (NAME *htab) +{ + pthread_rwlock_destroy(&htab->resize_rwl); + free (htab->table); + return 0; +} + + +int +#define INSERT(name) _INSERT (name) +#define _INSERT(name) \ +name##_insert +INSERT(NAME) (NAME *htab, HASHTYPE hval, TYPE data) +{ + int incremented = 0; + + for(;;) + { + while (pthread_rwlock_tryrdlock(&htab->resize_rwl) != 0) + resize_worker(htab); + + size_t filled; + if (!incremented) + { + filled = atomic_fetch_add_explicit(&htab->filled, 1, + memory_order_acquire); + incremented = 1; + } + else + { + filled = atomic_load_explicit(&htab->filled, + memory_order_acquire); + } + + + if (100 * filled > 90 * htab->size) + { + /* Table is filled more than 90%. Resize the table. */ + + size_t resizing_state = atomic_load_explicit(&htab->resizing_state, + memory_order_acquire); + if (resizing_state == 0 && + atomic_compare_exchange_strong_explicit(&htab->resizing_state, + &resizing_state, + ALLOCATING_MEMORY, + memory_order_acquire, + memory_order_acquire)) + { + /* Master thread */ + pthread_rwlock_unlock(&htab->resize_rwl); + + pthread_rwlock_wrlock(&htab->resize_rwl); + resize_master(htab); + pthread_rwlock_unlock(&htab->resize_rwl); + + } + else + { + /* Worker thread */ + pthread_rwlock_unlock(&htab->resize_rwl); + resize_worker(htab); + } + } + else + { + /* Lock acquired, no need for resize*/ + break; + } + } + + int ret_val = insert_helper(htab, hval, data); + if (ret_val == -1) + atomic_fetch_sub_explicit(&htab->filled, 1, memory_order_relaxed); + pthread_rwlock_unlock(&htab->resize_rwl); + return ret_val; +} + + + +TYPE +#define FIND(name) _FIND (name) +#define _FIND(name) \ + name##_find +FIND(NAME) (NAME *htab, HASHTYPE hval, TYPE val) +{ + while (pthread_rwlock_tryrdlock(&htab->resize_rwl) != 0) + resize_worker(htab); + + size_t idx; + + /* Make the hash data nonzero. */ + hval = hval ?: 1; + idx = lookup(htab, hval, val); + + if (idx == 0) + { + pthread_rwlock_unlock(&htab->resize_rwl); + return NULL; + } + + /* get a copy before unlocking the lock */ + TYPE ret_val = (TYPE) atomic_load_explicit(&htab->table[idx].val_ptr, + memory_order_relaxed); + + pthread_rwlock_unlock(&htab->resize_rwl); + return ret_val; +} + diff --git a/lib/dynamicsizehash_concurrent.h b/lib/dynamicsizehash_concurrent.h new file mode 100644 index 00000000..eb06ac9e --- /dev/null +++ b/lib/dynamicsizehash_concurrent.h @@ -0,0 +1,118 @@ +/* Copyright (C) 2000-2019 Red Hat, Inc. + This file is part of elfutils. + Written by Srdan Milakovic , 2019. + Derived from Ulrich Drepper , 2000. + + This file is free software; you can redistribute it and/or modify + it under the terms of either + + * the GNU Lesser General Public License as published by the Free + Software Foundation; either version 3 of the License, or (at + your option) any later version + + or + + * the GNU General Public License as published by the Free + Software Foundation; either version 2 of the License, or (at + your option) any later version + + or both in parallel, as here. + + elfutils is distributed in the hope that it will be useful, but + WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + General Public License for more details. + + You should have received copies of the GNU General Public License and + the GNU Lesser General Public License along with this program. If + not, see . */ + +#include +#include +#include "stdatomic.h" +/* Before including this file the following macros must be defined: + + NAME name of the hash table structure. + TYPE data type of the hash table entries + + The following macros if present select features: + + ITERATE iterating over the table entries is possible + HASHTYPE integer type for hash values, default unsigned long int + */ + + + +#ifndef HASHTYPE +# define HASHTYPE unsigned long int +#endif + +#ifndef RESIZE_BLOCK_SIZE +# define RESIZE_BLOCK_SIZE 256 +#endif + +/* Defined separately. */ +extern size_t next_prime (size_t seed); + + +/* Table entry type. */ +#define _DYNHASHCONENTTYPE(name) \ + typedef struct name##_ent \ + { \ + _Atomic(HASHTYPE) hashval; \ + atomic_uintptr_t val_ptr; \ + } name##_ent +#define DYNHASHENTTYPE(name) _DYNHASHCONENTTYPE (name) +DYNHASHENTTYPE (NAME); + +/* Type of the dynamic hash table data structure. */ +#define _DYNHASHCONTYPE(name) \ +typedef struct \ +{ \ + size_t size; \ + size_t old_size; \ + atomic_size_t filled; \ + name##_ent *table; \ + name##_ent *old_table; \ + atomic_size_t resizing_state; \ + atomic_size_t next_init_block; \ + atomic_size_t num_initialized_blocks; \ + atomic_size_t next_move_block; \ + atomic_size_t num_moved_blocks; \ + pthread_rwlock_t resize_rwl; \ +} name +#define DYNHASHTYPE(name) _DYNHASHCONTYPE (name) +DYNHASHTYPE (NAME); + + + +#define _FUNCTIONS(name) \ +/* Initialize the hash table. */ \ +extern int name##_init (name *htab, size_t init_size); \ + \ +/* Free resources allocated for hash table. */ \ +extern int name##_free (name *htab); \ + \ +/* Insert new entry. */ \ +extern int name##_insert (name *htab, HASHTYPE hval, TYPE data); \ + \ +/* Find entry in hash table. */ \ +extern TYPE name##_find (name *htab, HASHTYPE hval, TYPE val); +#define FUNCTIONS(name) _FUNCTIONS (name) +FUNCTIONS (NAME) + + +#ifndef NO_UNDEF +# undef DYNHASHENTTYPE +# undef DYNHASHTYPE +# undef FUNCTIONS +# undef _FUNCTIONS +# undef XFUNCTIONS +# undef _XFUNCTIONS +# undef NAME +# undef TYPE +# undef ITERATE +# undef COMPARE +# undef FIRST +# undef NEXT +#endif diff --git a/lib/stdatomic.h b/lib/stdatomic.h new file mode 100644 index 00000000..49626662 --- /dev/null +++ b/lib/stdatomic.h @@ -0,0 +1,442 @@ +/*- + * Copyright (c) 2011 Ed Schouten + * David Chisnall + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * + * $FreeBSD$ + */ + +#ifndef _STDATOMIC_H_ +#define _STDATOMIC_H_ + +#include +#include + +#if !defined(__has_feature) +#define __has_feature(x) 0 +#endif +#if !defined(__has_builtin) +#define __has_builtin(x) 0 +#endif +#if !defined(__GNUC_PREREQ__) +#if defined(__GNUC__) && defined(__GNUC_MINOR__) +#define __GNUC_PREREQ__(maj, min) \ + ((__GNUC__ << 16) + __GNUC_MINOR__ >= ((maj) << 16) + (min)) +#else +#define __GNUC_PREREQ__(maj, min) 0 +#endif +#endif + +#if !defined(__CLANG_ATOMICS) && !defined(__GNUC_ATOMICS) +#if __has_feature(c_atomic) +#define __CLANG_ATOMICS +#elif __GNUC_PREREQ__(4, 7) +#define __GNUC_ATOMICS +#elif !defined(__GNUC__) +#error "stdatomic.h does not support your compiler" +#endif +#endif + +/* + * language independent type to represent a Boolean value + */ + +typedef int __Bool; + +/* + * 7.17.1 Atomic lock-free macros. + */ + +#ifdef __GCC_ATOMIC_BOOL_LOCK_FREE +#define ATOMIC_BOOL_LOCK_FREE __GCC_ATOMIC_BOOL_LOCK_FREE +#endif +#ifdef __GCC_ATOMIC_CHAR_LOCK_FREE +#define ATOMIC_CHAR_LOCK_FREE __GCC_ATOMIC_CHAR_LOCK_FREE +#endif +#ifdef __GCC_ATOMIC_CHAR16_T_LOCK_FREE +#define ATOMIC_CHAR16_T_LOCK_FREE __GCC_ATOMIC_CHAR16_T_LOCK_FREE +#endif +#ifdef __GCC_ATOMIC_CHAR32_T_LOCK_FREE +#define ATOMIC_CHAR32_T_LOCK_FREE __GCC_ATOMIC_CHAR32_T_LOCK_FREE +#endif +#ifdef __GCC_ATOMIC_WCHAR_T_LOCK_FREE +#define ATOMIC_WCHAR_T_LOCK_FREE __GCC_ATOMIC_WCHAR_T_LOCK_FREE +#endif +#ifdef __GCC_ATOMIC_SHORT_LOCK_FREE +#define ATOMIC_SHORT_LOCK_FREE __GCC_ATOMIC_SHORT_LOCK_FREE +#endif +#ifdef __GCC_ATOMIC_INT_LOCK_FREE +#define ATOMIC_INT_LOCK_FREE __GCC_ATOMIC_INT_LOCK_FREE +#endif +#ifdef __GCC_ATOMIC_LONG_LOCK_FREE +#define ATOMIC_LONG_LOCK_FREE __GCC_ATOMIC_LONG_LOCK_FREE +#endif +#ifdef __GCC_ATOMIC_LLONG_LOCK_FREE +#define ATOMIC_LLONG_LOCK_FREE __GCC_ATOMIC_LLONG_LOCK_FREE +#endif +#ifdef __GCC_ATOMIC_POINTER_LOCK_FREE +#define ATOMIC_POINTER_LOCK_FREE __GCC_ATOMIC_POINTER_LOCK_FREE +#endif + +#if !defined(__CLANG_ATOMICS) +#define _Atomic(T) struct { volatile __typeof__(T) __val; } +#endif + +/* + * 7.17.2 Initialization. + */ + +#if defined(__CLANG_ATOMICS) +#define ATOMIC_VAR_INIT(value) (value) +#define atomic_init(obj, value) __c11_atomic_init(obj, value) +#else +#define ATOMIC_VAR_INIT(value) { .__val = (value) } +#define atomic_init(obj, value) ((void)((obj)->__val = (value))) +#endif + +/* + * Clang and recent GCC both provide predefined macros for the memory + * orderings. If we are using a compiler that doesn't define them, use the + * clang values - these will be ignored in the fallback path. + */ + +#ifndef __ATOMIC_RELAXED +#define __ATOMIC_RELAXED 0 +#endif +#ifndef __ATOMIC_CONSUME +#define __ATOMIC_CONSUME 1 +#endif +#ifndef __ATOMIC_ACQUIRE +#define __ATOMIC_ACQUIRE 2 +#endif +#ifndef __ATOMIC_RELEASE +#define __ATOMIC_RELEASE 3 +#endif +#ifndef __ATOMIC_ACQ_REL +#define __ATOMIC_ACQ_REL 4 +#endif +#ifndef __ATOMIC_SEQ_CST +#define __ATOMIC_SEQ_CST 5 +#endif + +/* + * 7.17.3 Order and consistency. + * + * The memory_order_* constants that denote the barrier behaviour of the + * atomic operations. + */ + +typedef enum { + memory_order_relaxed = __ATOMIC_RELAXED, + memory_order_consume = __ATOMIC_CONSUME, + memory_order_acquire = __ATOMIC_ACQUIRE, + memory_order_release = __ATOMIC_RELEASE, + memory_order_acq_rel = __ATOMIC_ACQ_REL, + memory_order_seq_cst = __ATOMIC_SEQ_CST +} memory_order; + +/* + * 7.17.4 Fences. + */ + +//#define __unused + +//static __inline void +//atomic_thread_fence(memory_order __order __unused) +//{ +// +//#ifdef __CLANG_ATOMICS +// __c11_atomic_thread_fence(__order); +//#elif defined(__GNUC_ATOMICS) +// __atomic_thread_fence(__order); +//#else +// __sync_synchronize(); +//#endif +//} +// +//static __inline void +//atomic_signal_fence(memory_order __order __unused) +//{ +// +//#ifdef __CLANG_ATOMICS +// __c11_atomic_signal_fence(__order); +//#elif defined(__GNUC_ATOMICS) +// __atomic_signal_fence(__order); +//#else +// __asm volatile ("" ::: "memory"); +//#endif +//} + +//#undef __unused + +/* + * 7.17.5 Lock-free property. + */ + +#if defined(_KERNEL) +/* Atomics in kernelspace are always lock-free. */ +#define atomic_is_lock_free(obj) \ + ((void)(obj), (__Bool)1) +#elif defined(__CLANG_ATOMICS) +#define atomic_is_lock_free(obj) \ + __atomic_is_lock_free(sizeof(*(obj)), obj) +#elif defined(__GNUC_ATOMICS) +#define atomic_is_lock_free(obj) \ + __atomic_is_lock_free(sizeof((obj)->__val), &(obj)->__val) +#else +#define atomic_is_lock_free(obj) \ + ((void)(obj), sizeof((obj)->__val) <= sizeof(void *)) +#endif + +/* + * 7.17.6 Atomic integer types. + */ + +typedef _Atomic(__Bool) atomic_bool; +typedef _Atomic(char) atomic_char; +typedef _Atomic(signed char) atomic_schar; +typedef _Atomic(unsigned char) atomic_uchar; +typedef _Atomic(short) atomic_short; +typedef _Atomic(unsigned short) atomic_ushort; +typedef _Atomic(int) atomic_int; +typedef _Atomic(unsigned int) atomic_uint; +typedef _Atomic(long) atomic_long; +typedef _Atomic(unsigned long) atomic_ulong; +typedef _Atomic(long long) atomic_llong; +typedef _Atomic(unsigned long long) atomic_ullong; +#if 0 +typedef _Atomic(char16_t) atomic_char16_t; +typedef _Atomic(char32_t) atomic_char32_t; +#endif +typedef _Atomic(wchar_t) atomic_wchar_t; +typedef _Atomic(int_least8_t) atomic_int_least8_t; +typedef _Atomic(uint_least8_t) atomic_uint_least8_t; +typedef _Atomic(int_least16_t) atomic_int_least16_t; +typedef _Atomic(uint_least16_t) atomic_uint_least16_t; +typedef _Atomic(int_least32_t) atomic_int_least32_t; +typedef _Atomic(uint_least32_t) atomic_uint_least32_t; +typedef _Atomic(int_least64_t) atomic_int_least64_t; +typedef _Atomic(uint_least64_t) atomic_uint_least64_t; +typedef _Atomic(int_fast8_t) atomic_int_fast8_t; +typedef _Atomic(uint_fast8_t) atomic_uint_fast8_t; +typedef _Atomic(int_fast16_t) atomic_int_fast16_t; +typedef _Atomic(uint_fast16_t) atomic_uint_fast16_t; +typedef _Atomic(int_fast32_t) atomic_int_fast32_t; +typedef _Atomic(uint_fast32_t) atomic_uint_fast32_t; +typedef _Atomic(int_fast64_t) atomic_int_fast64_t; +typedef _Atomic(uint_fast64_t) atomic_uint_fast64_t; +typedef _Atomic(intptr_t) atomic_intptr_t; +typedef _Atomic(uintptr_t) atomic_uintptr_t; +typedef _Atomic(size_t) atomic_size_t; +typedef _Atomic(ptrdiff_t) atomic_ptrdiff_t; +typedef _Atomic(intmax_t) atomic_intmax_t; +typedef _Atomic(uintmax_t) atomic_uintmax_t; + +/* + * 7.17.7 Operations on atomic types. + */ + +/* + * Compiler-specific operations. + */ + +#if defined(__CLANG_ATOMICS) +#define atomic_compare_exchange_strong_explicit(object, expected, \ + desired, success, failure) \ + __c11_atomic_compare_exchange_strong(object, expected, desired, \ + success, failure) +#define atomic_compare_exchange_weak_explicit(object, expected, \ + desired, success, failure) \ + __c11_atomic_compare_exchange_weak(object, expected, desired, \ + success, failure) +#define atomic_exchange_explicit(object, desired, order) \ + __c11_atomic_exchange(object, desired, order) +#define atomic_fetch_add_explicit(object, operand, order) \ + __c11_atomic_fetch_add(object, operand, order) +#define atomic_fetch_and_explicit(object, operand, order) \ + __c11_atomic_fetch_and(object, operand, order) +#define atomic_fetch_or_explicit(object, operand, order) \ + __c11_atomic_fetch_or(object, operand, order) +#define atomic_fetch_sub_explicit(object, operand, order) \ + __c11_atomic_fetch_sub(object, operand, order) +#define atomic_fetch_xor_explicit(object, operand, order) \ + __c11_atomic_fetch_xor(object, operand, order) +#define atomic_load_explicit(object, order) \ + __c11_atomic_load(object, order) +#define atomic_store_explicit(object, desired, order) \ + __c11_atomic_store(object, desired, order) +#elif defined(__GNUC_ATOMICS) +#define atomic_compare_exchange_strong_explicit(object, expected, \ + desired, success, failure) \ + __atomic_compare_exchange_n(&(object)->__val, expected, \ + desired, 0, success, failure) +#define atomic_compare_exchange_weak_explicit(object, expected, \ + desired, success, failure) \ + __atomic_compare_exchange_n(&(object)->__val, expected, \ + desired, 1, success, failure) +#define atomic_exchange_explicit(object, desired, order) \ + __atomic_exchange_n(&(object)->__val, desired, order) +#define atomic_fetch_add_explicit(object, operand, order) \ + __atomic_fetch_add(&(object)->__val, operand, order) +#define atomic_fetch_and_explicit(object, operand, order) \ + __atomic_fetch_and(&(object)->__val, operand, order) +#define atomic_fetch_or_explicit(object, operand, order) \ + __atomic_fetch_or(&(object)->__val, operand, order) +#define atomic_fetch_sub_explicit(object, operand, order) \ + __atomic_fetch_sub(&(object)->__val, operand, order) +#define atomic_fetch_xor_explicit(object, operand, order) \ + __atomic_fetch_xor(&(object)->__val, operand, order) +#define atomic_load_explicit(object, order) \ + __atomic_load_n(&(object)->__val, order) +#define atomic_store_explicit(object, desired, order) \ + __atomic_store_n(&(object)->__val, desired, order) +#else +#define __atomic_apply_stride(object, operand) \ + (((__typeof__((object)->__val))0) + (operand)) +#define atomic_compare_exchange_strong_explicit(object, expected, \ + desired, success, failure) __extension__ ({ \ + __typeof__(expected) __ep = (expected); \ + __typeof__(*__ep) __e = *__ep; \ + (void)(success); (void)(failure); \ + (__Bool)((*__ep = __sync_val_compare_and_swap(&(object)->__val, \ + __e, desired)) == __e); \ +}) +#define atomic_compare_exchange_weak_explicit(object, expected, \ + desired, success, failure) \ + atomic_compare_exchange_strong_explicit(object, expected, \ + desired, success, failure) +#if __has_builtin(__sync_swap) +/* Clang provides a full-barrier atomic exchange - use it if available. */ +#define atomic_exchange_explicit(object, desired, order) \ + ((void)(order), __sync_swap(&(object)->__val, desired)) +#else +/* + * __sync_lock_test_and_set() is only an acquire barrier in theory (although in + * practice it is usually a full barrier) so we need an explicit barrier before + * it. + */ +#define atomic_exchange_explicit(object, desired, order) \ +__extension__ ({ \ + __typeof__(object) __o = (object); \ + __typeof__(desired) __d = (desired); \ + (void)(order); \ + __sync_synchronize(); \ + __sync_lock_test_and_set(&(__o)->__val, __d); \ +}) +#endif +#define atomic_fetch_add_explicit(object, operand, order) \ + ((void)(order), __sync_fetch_and_add(&(object)->__val, \ + __atomic_apply_stride(object, operand))) +#define atomic_fetch_and_explicit(object, operand, order) \ + ((void)(order), __sync_fetch_and_and(&(object)->__val, operand)) +#define atomic_fetch_or_explicit(object, operand, order) \ + ((void)(order), __sync_fetch_and_or(&(object)->__val, operand)) +#define atomic_fetch_sub_explicit(object, operand, order) \ + ((void)(order), __sync_fetch_and_sub(&(object)->__val, \ + __atomic_apply_stride(object, operand))) +#define atomic_fetch_xor_explicit(object, operand, order) \ + ((void)(order), __sync_fetch_and_xor(&(object)->__val, operand)) +#define atomic_load_explicit(object, order) \ + ((void)(order), __sync_fetch_and_add(&(object)->__val, 0)) +#define atomic_store_explicit(object, desired, order) \ + ((void)atomic_exchange_explicit(object, desired, order)) +#endif + +/* + * Convenience functions. + * + * Don't provide these in kernel space. In kernel space, we should be + * disciplined enough to always provide explicit barriers. + */ + +#ifndef _KERNEL +#define atomic_compare_exchange_strong(object, expected, desired) \ + atomic_compare_exchange_strong_explicit(object, expected, \ + desired, memory_order_seq_cst, memory_order_seq_cst) +#define atomic_compare_exchange_weak(object, expected, desired) \ + atomic_compare_exchange_weak_explicit(object, expected, \ + desired, memory_order_seq_cst, memory_order_seq_cst) +#define atomic_exchange(object, desired) \ + atomic_exchange_explicit(object, desired, memory_order_seq_cst) +#define atomic_fetch_add(object, operand) \ + atomic_fetch_add_explicit(object, operand, memory_order_seq_cst) +#define atomic_fetch_and(object, operand) \ + atomic_fetch_and_explicit(object, operand, memory_order_seq_cst) +#define atomic_fetch_or(object, operand) \ + atomic_fetch_or_explicit(object, operand, memory_order_seq_cst) +#define atomic_fetch_sub(object, operand) \ + atomic_fetch_sub_explicit(object, operand, memory_order_seq_cst) +#define atomic_fetch_xor(object, operand) \ + atomic_fetch_xor_explicit(object, operand, memory_order_seq_cst) +#define atomic_load(object) \ + atomic_load_explicit(object, memory_order_seq_cst) +#define atomic_store(object, desired) \ + atomic_store_explicit(object, desired, memory_order_seq_cst) +#endif /* !_KERNEL */ + +/* + * 7.17.8 Atomic flag type and operations. + * + * XXX: Assume atomic_bool can be used as an atomic_flag. Is there some + * kind of compiler built-in type we could use? + */ + +typedef struct { + atomic_bool __flag; +} atomic_flag; + +#define ATOMIC_FLAG_INIT { ATOMIC_VAR_INIT(0) } + +static __inline __Bool +atomic_flag_test_and_set_explicit(volatile atomic_flag *__object, + memory_order __order) +{ + return (atomic_exchange_explicit(&__object->__flag, 1, __order)); +} + +static __inline void +atomic_flag_clear_explicit(volatile atomic_flag *__object, memory_order __order) +{ + + atomic_store_explicit(&__object->__flag, 0, __order); +} + +#ifndef _KERNEL +static __inline __Bool +atomic_flag_test_and_set(volatile atomic_flag *__object) +{ + + return (atomic_flag_test_and_set_explicit(__object, + memory_order_seq_cst)); +} + +static __inline void +atomic_flag_clear(volatile atomic_flag *__object) +{ + + atomic_flag_clear_explicit(__object, memory_order_seq_cst); +} +#endif /* !_KERNEL */ + +#endif /* !_STDATOMIC_H_ */ \ No newline at end of file diff --git a/libdw/ChangeLog b/libdw/ChangeLog index bf1f4857..87abf7a7 100644 --- a/libdw/ChangeLog +++ b/libdw/ChangeLog @@ -1,3 +1,12 @@ +2019-08-15 Jonathon Anderson + + * libdw_alloc.c (__libdw_allocate): Added thread-safe stack allocator. + * libdwP.h (Dwarf): Likewise. + * dwarf_begin_elf.c (dwarf_begin_elf): Support for above. + * dwarf_end.c (dwarf_end): Likewise. + * dwarf_abbrev_hash.{c,h}: Use the *_concurrent hash table. + * Makefile.am: Link -lpthread to provide rwlocks. + 2019-08-12 Mark Wielaard * libdw.map (ELFUTILS_0.177): Add new version of dwelf_elf_begin. diff --git a/libdw/Makefile.am b/libdw/Makefile.am index 7a3d5322..6d0a0187 100644 --- a/libdw/Makefile.am +++ b/libdw/Makefile.am @@ -108,7 +108,7 @@ am_libdw_pic_a_OBJECTS = $(libdw_a_SOURCES:.c=.os) libdw_so_LIBS = libdw_pic.a ../libdwelf/libdwelf_pic.a \ ../libdwfl/libdwfl_pic.a ../libebl/libebl.a libdw_so_DEPS = ../lib/libeu.a ../libelf/libelf.so -libdw_so_LDLIBS = $(libdw_so_DEPS) -ldl -lz $(argp_LDADD) $(zip_LIBS) +libdw_so_LDLIBS = $(libdw_so_DEPS) -ldl -lz $(argp_LDADD) $(zip_LIBS) -lpthread libdw_so_SOURCES = libdw.so$(EXEEXT): $(srcdir)/libdw.map $(libdw_so_LIBS) $(libdw_so_DEPS) # The rpath is necessary for libebl because its $ORIGIN use will diff --git a/libdw/dwarf_abbrev_hash.c b/libdw/dwarf_abbrev_hash.c index f52f5ad5..c2548140 100644 --- a/libdw/dwarf_abbrev_hash.c +++ b/libdw/dwarf_abbrev_hash.c @@ -38,7 +38,7 @@ #define next_prime __libdwarf_next_prime extern size_t next_prime (size_t) attribute_hidden; -#include +#include #undef next_prime #define next_prime attribute_hidden __libdwarf_next_prime diff --git a/libdw/dwarf_abbrev_hash.h b/libdw/dwarf_abbrev_hash.h index d2f02ccc..bc3d62c7 100644 --- a/libdw/dwarf_abbrev_hash.h +++ b/libdw/dwarf_abbrev_hash.h @@ -34,6 +34,6 @@ #define TYPE Dwarf_Abbrev * #define COMPARE(a, b) (0) -#include +#include #endif /* dwarf_abbrev_hash.h */ diff --git a/libdw/dwarf_begin_elf.c b/libdw/dwarf_begin_elf.c index 38c8f5c6..b3885bb5 100644 --- a/libdw/dwarf_begin_elf.c +++ b/libdw/dwarf_begin_elf.c @@ -417,11 +417,14 @@ dwarf_begin_elf (Elf *elf, Dwarf_Cmd cmd, Elf_Scn *scngrp) /* Initialize the memory handling. */ result->mem_default_size = mem_default_size; result->oom_handler = __libdw_oom; - result->mem_tail = (struct libdw_memblock *) (result + 1); - result->mem_tail->size = (result->mem_default_size - - offsetof (struct libdw_memblock, mem)); - result->mem_tail->remaining = result->mem_tail->size; - result->mem_tail->prev = NULL; + pthread_rwlock_init(&result->mem_rwl, NULL); + result->mem_stacks = 1; + result->mem_tails = malloc (sizeof (struct libdw_memblock *)); + result->mem_tails[0] = (struct libdw_memblock *) (result + 1); + result->mem_tails[0]->size = (result->mem_default_size + - offsetof (struct libdw_memblock, mem)); + result->mem_tails[0]->remaining = result->mem_tails[0]->size; + result->mem_tails[0]->prev = NULL; if (cmd == DWARF_C_READ || cmd == DWARF_C_RDWR) { diff --git a/libdw/dwarf_end.c b/libdw/dwarf_end.c index 29795c10..6317bcda 100644 --- a/libdw/dwarf_end.c +++ b/libdw/dwarf_end.c @@ -94,14 +94,22 @@ dwarf_end (Dwarf *dwarf) /* And the split Dwarf. */ tdestroy (dwarf->split_tree, noop_free); - struct libdw_memblock *memp = dwarf->mem_tail; - /* The first block is allocated together with the Dwarf object. */ - while (memp->prev != NULL) - { - struct libdw_memblock *prevp = memp->prev; - free (memp); - memp = prevp; - } + for (size_t i = 0; i < dwarf->mem_stacks; i++) + { + struct libdw_memblock *memp = dwarf->mem_tails[i]; + /* The first block is allocated together with the Dwarf object. */ + while (memp != NULL && memp->prev != NULL) + { + struct libdw_memblock *prevp = memp->prev; + free (memp); + memp = prevp; + } + /* Only for stack 0 though, the others are allocated individually. */ + if (memp != NULL && i > 0) + free (memp); + } + free (dwarf->mem_tails); + pthread_rwlock_destroy (&dwarf->mem_rwl); /* Free the pubnames helper structure. */ free (dwarf->pubnames_sets); diff --git a/libdw/libdwP.h b/libdw/libdwP.h index eebb7d12..442d493d 100644 --- a/libdw/libdwP.h +++ b/libdw/libdwP.h @@ -31,6 +31,7 @@ #include #include +#include #include #include @@ -218,16 +219,18 @@ struct Dwarf /* Similar for addrx/constx, which will come from .debug_addr section. */ struct Dwarf_CU *fake_addr_cu; - /* Internal memory handling. This is basically a simplified + /* Internal memory handling. This is basically a simplified thread-local reimplementation of obstacks. Unfortunately the standard obstack implementation is not usable in libraries. */ + pthread_rwlock_t mem_rwl; + size_t mem_stacks; struct libdw_memblock { size_t size; size_t remaining; struct libdw_memblock *prev; char mem[0]; - } *mem_tail; + } **mem_tails; /* Default size of allocated memory blocks. */ size_t mem_default_size; @@ -572,7 +575,7 @@ extern void __libdw_seterrno (int value) internal_function; /* Memory handling, the easy parts. This macro does not do any locking. */ #define libdw_alloc(dbg, type, tsize, cnt) \ - ({ struct libdw_memblock *_tail = (dbg)->mem_tail; \ + ({ struct libdw_memblock *_tail = __libdw_alloc_tail(dbg); \ size_t _required = (tsize) * (cnt); \ type *_result = (type *) (_tail->mem + (_tail->size - _tail->remaining));\ size_t _padding = ((__alignof (type) \ @@ -591,6 +594,10 @@ extern void __libdw_seterrno (int value) internal_function; #define libdw_typed_alloc(dbg, type) \ libdw_alloc (dbg, type, sizeof (type), 1) +/* Callback to choose a thread-local memory allocation stack. */ +extern struct libdw_memblock *__libdw_alloc_tail (Dwarf* dbg) + __nonnull_attribute__ (1); + /* Callback to allocate more. */ extern void *__libdw_allocate (Dwarf *dbg, size_t minsize, size_t align) __attribute__ ((__malloc__)) __nonnull_attribute__ (1); diff --git a/libdw/libdw_alloc.c b/libdw/libdw_alloc.c index f1e08714..c3c5e8a7 100644 --- a/libdw/libdw_alloc.c +++ b/libdw/libdw_alloc.c @@ -33,9 +33,73 @@ #include #include +#include #include "libdwP.h" #include "system.h" +#include "stdatomic.h" +#if USE_VG_ANNOTATIONS == 1 +#include +#include +#else +#define ANNOTATE_HAPPENS_BEFORE(X) +#define ANNOTATE_HAPPENS_AFTER(X) +#endif + + +#define thread_local __thread +static thread_local int initialized = 0; +static thread_local size_t thread_id; +static atomic_size_t next_id = ATOMIC_VAR_INIT(0); + +struct libdw_memblock * +__libdw_alloc_tail (Dwarf *dbg) +{ + if (!initialized) + { + thread_id = atomic_fetch_add (&next_id, 1); + initialized = 1; + } + + pthread_rwlock_rdlock (&dbg->mem_rwl); + if (thread_id >= dbg->mem_stacks) + { + pthread_rwlock_unlock (&dbg->mem_rwl); + pthread_rwlock_wrlock (&dbg->mem_rwl); + + /* Another thread may have already reallocated. In theory using an + atomic would be faster, but given that this only happens once per + thread per Dwarf, some minor slowdown should be fine. */ + if (thread_id >= dbg->mem_stacks) + { + dbg->mem_tails = realloc (dbg->mem_tails, (thread_id+1) + * sizeof (struct libdw_memblock *)); + assert(dbg->mem_tails); + for (size_t i = dbg->mem_stacks; i <= thread_id; i++) + dbg->mem_tails[i] = NULL; + dbg->mem_stacks = thread_id + 1; + ANNOTATE_HAPPENS_BEFORE (&dbg->mem_tails); + } + + pthread_rwlock_unlock (&dbg->mem_rwl); + pthread_rwlock_rdlock (&dbg->mem_rwl); + } + + /* At this point, we have an entry in the tail array. */ + ANNOTATE_HAPPENS_AFTER (&dbg->mem_tails); + struct libdw_memblock *result = dbg->mem_tails[thread_id]; + if (result == NULL) + { + result = malloc (dbg->mem_default_size); + result->size = dbg->mem_default_size + - offsetof (struct libdw_memblock, mem); + result->remaining = result->size; + result->prev = NULL; + dbg->mem_tails[thread_id] = result; + } + pthread_rwlock_unlock (&dbg->mem_rwl); + return result; +} void * __libdw_allocate (Dwarf *dbg, size_t minsize, size_t align) @@ -52,8 +116,10 @@ __libdw_allocate (Dwarf *dbg, size_t minsize, size_t align) newp->size = size - offsetof (struct libdw_memblock, mem); newp->remaining = (uintptr_t) newp + size - (result + minsize); - newp->prev = dbg->mem_tail; - dbg->mem_tail = newp; + pthread_rwlock_rdlock (&dbg->mem_rwl); + newp->prev = dbg->mem_tails[thread_id]; + dbg->mem_tails[thread_id] = newp; + pthread_rwlock_unlock (&dbg->mem_rwl); return (void *) result; } -- 2.23.0