This is the mail archive of the
libc-alpha@sourceware.org
mailing list for the glibc project.
[PATCH] Async signal safe TLS accesses
- From: Andrew Hunter <ahh at google dot com>
- To: libc-alpha at sourceware dot org, ppluzhnikov at google dot com
- Cc: Andrew Hunter <ahh at google dot com>
- Date: Thu, 5 Dec 2013 12:01:11 -0800
- Subject: [PATCH] Async signal safe TLS accesses
- Authentication-results: sourceware.org; auth=none
- References: <CALoOobP6rTDosadvLKhHY+deDsU-FtvyO8QX_Y4dZy716e2ATQ at mail dot gmail dot com>
TLS accesses from initial-exec variables are async-signal-safe. Even
dynamic-type accesses from shared objects loaded by ld.so at startup
are. But dynamic accesses from dlopen()ed objects are not, which
means a lot of trouble for any sort of per-thread state we want to
use from signal handlers since we can't rely on always having
initial-exec. Make all TLS access always signal safe.
Doing this has a few components to it:
* We introduce a set of symbols __signal_safe_{malloc,free,memalign,&c}.
They do what it says on the box, but guarantee async-signal-safety.
We provide a minimal mmap-based implementation in ld.so. (This may
prove useful elsewhere in libc.)
* We use these throughout dl-tls.c in paths reachable from tls_get_addr
(and, importantly, for allocations on other paths that might be
freed/realloced from tls_get_addr.)
* tls_get_addr synchronizes with dlopen() by dl_load_lock; this lock
is reentrant, but not, alas, signal safe. Replace this with simple
CAS based synchronization.
* Use signal masking in the slow path of tls_get_addr to prevent
reentrant TLS initialization. The most complicated part here is
ensuring a dlopen which forces static TLS (and thus updates all
threads' DTVs) does not interfere with this.
This is version 4 of the patch, containing:
- ppluzhnikov@google.com's fixes to Hurd and whitespace.
- fixes for triegel@redhat.com's comments about synchronization.
---
elf/Versions | 1 +
elf/dl-misc.c | 132 +++++++++++++++++++++++++++++++++
elf/dl-open.c | 5 +-
elf/dl-reloc.c | 49 ++++++++++--
elf/dl-tls.c | 143 ++++++++++++++++++++++++------------
nptl/Makefile | 10 ++-
nptl/allocatestack.c | 19 ++---
nptl/tst-tls7.c | 120 ++++++++++++++++++++++++++++++
nptl/tst-tls7mod.c | 39 ++++++++++
sysdeps/generic/ldsodefs.h | 16 ++++
sysdeps/mach/hurd/dl-sysdep.h | 7 ++
sysdeps/unix/sysv/linux/dl-sysdep.c | 46 ++++++++++++
sysdeps/unix/sysv/linux/dl-sysdep.h | 4 +
13 files changed, 525 insertions(+), 66 deletions(-)
create mode 100644 nptl/tst-tls7.c
create mode 100644 nptl/tst-tls7mod.c
diff --git a/elf/Versions b/elf/Versions
index 2383992..01b7a59 100644
--- a/elf/Versions
+++ b/elf/Versions
@@ -53,6 +53,7 @@ ld {
_dl_allocate_tls; _dl_allocate_tls_init;
_dl_argv; _dl_find_dso_for_object; _dl_get_tls_static_info;
_dl_deallocate_tls; _dl_make_stack_executable; _dl_out_of_memory;
+ _dl_clear_dtv;
_dl_rtld_di_serinfo; _dl_starting_up; _dl_tls_setup;
_rtld_global; _rtld_global_ro;
diff --git a/elf/dl-misc.c b/elf/dl-misc.c
index 5fc13a4..cec65d0 100644
--- a/elf/dl-misc.c
+++ b/elf/dl-misc.c
@@ -19,6 +19,7 @@
#include <assert.h>
#include <fcntl.h>
#include <ldsodefs.h>
+#include <libc-symbols.h>
#include <limits.h>
#include <link.h>
#include <stdarg.h>
@@ -364,3 +365,134 @@ _dl_higher_prime_number (unsigned long int n)
return *low;
}
+
+/* To support accessing TLS variables from signal handlers, we need an
+ async signal safe memory allocator. These routines are never
+ themselves invoked reentrantly (all calls to them are surrounded by
+ signal masks) but may be invoked concurrently from many threads.
+ The current implementation is not particularly performant nor space
+ efficient, but it will be used rarely (and only in binaries that use
+ dlopen.) The API matches that of malloc() and friends. */
+
+struct __signal_safe_allocator_header
+{
+ size_t size;
+ void *start;
+};
+
+void *weak_function
+__signal_safe_memalign (size_t boundary, size_t size)
+{
+ struct __signal_safe_allocator_header *header;
+ if (boundary < sizeof (*header))
+ boundary = sizeof (*header);
+
+ /* Boundary must be a power of two. */
+ if (boundary & (boundary - 1) == 0)
+ return NULL;
+
+ size_t pg = GLRO (dl_pagesize);
+ size_t padded_size;
+ if (boundary <= pg)
+ {
+ /* We'll get a pointer certainly aligned to boundary, so just
+ add one more boundary-sized chunk to hold the header. */
+ padded_size = roundup (size, boundary) + boundary;
+ }
+ else
+ {
+ /* If we want K pages aligned to a J-page boundary, K+J+1 pages
+ contains at least one such region that isn't directly at the start
+ (so we can place the header.) This is wasteful, but you're the one
+ who wanted 64K-aligned TLS. */
+ padded_size = roundup (size, pg) + boundary + pg;
+ }
+
+
+ size_t actual_size = roundup (padded_size, pg);
+ void *actual = mmap (NULL, actual_size, PROT_READ | PROT_WRITE,
+ MAP_ANONYMOUS | MAP_PRIVATE, -1, 0);
+ if (actual == MAP_FAILED)
+ return NULL;
+
+ if (boundary <= pg)
+ {
+ header = actual + boundary - sizeof (*header);
+ }
+ else
+ {
+ intptr_t actual_pg = ((intptr_t) actual) / pg;
+ intptr_t boundary_pg = boundary / pg;
+ intptr_t start_pg = actual_pg + boundary_pg;
+ start_pg -= start_pg % boundary_pg;
+ if (start_pg > (actual_pg + 1))
+ {
+ int ret = munmap (actual, (start_pg - actual_pg - 1) * pg);
+ assert (ret == 0);
+ actual = (void *) ((start_pg - 1) * pg);
+ }
+ char *start = (void *) (start_pg * pg);
+ header = start - sizeof (*header);
+
+ }
+ header->size = actual_size;
+ header->start = actual;
+ void *ptr = header;
+ ptr += sizeof (*header);
+ if (((intptr_t) ptr) % boundary != 0)
+ _dl_fatal_printf ("__signal_safe_memalign produced incorrect alignment\n");
+ return ptr;
+}
+
+void * weak_function
+__signal_safe_malloc (size_t size)
+{
+ return __signal_safe_memalign (1, size);
+}
+
+void weak_function
+__signal_safe_free (void *ptr)
+{
+ if (ptr == NULL)
+ return;
+
+ struct __signal_safe_allocator_header *header = ((char *) ptr) - sizeof (*header);
+ int ret = munmap (header->start, header->size);
+
+ assert (ret == 0);
+}
+
+void * weak_function
+__signal_safe_realloc (void *ptr, size_t size)
+{
+ if (size == 0)
+ {
+ __signal_safe_free (ptr);
+ return NULL;
+ }
+ if (ptr == NULL)
+ return __signal_safe_malloc (size);
+
+ struct __signal_safe_allocator_header *header = ((char *) ptr) - sizeof (*header);
+ size_t old_size = header->size;
+ if (old_size - sizeof (*header) >= size)
+ return ptr;
+
+ void *new_ptr = __signal_safe_malloc (size);
+ if (new_ptr == NULL)
+ return NULL;
+
+ memcpy (new_ptr, ptr, old_size);
+ __signal_safe_free (ptr);
+
+ return new_ptr;
+}
+
+void * weak_function
+__signal_safe_calloc (size_t nmemb, size_t size)
+{
+ void *ptr = __signal_safe_malloc (nmemb * size);
+ if (ptr == NULL)
+ return NULL;
+ return memset (ptr, 0, nmemb * size);
+}
diff --git a/elf/dl-open.c b/elf/dl-open.c
index 1403c8c..277d591 100644
--- a/elf/dl-open.c
+++ b/elf/dl-open.c
@@ -548,7 +548,10 @@ cannot load any more object with static TLS"));
generation of the DSO we are allocating data for. */
_dl_update_slotinfo (imap->l_tls_modid);
#endif
-
+ /* We do this iteration under a signal mask in dl-reloc; why not
+ here? Because these symbols are new and dlopen hasn't
+ returned yet. So we can't possibly be racing with a TLS
+ access to them from another thread. */
GL(dl_init_static_tls) (imap);
assert (imap->l_need_tls_init == 0);
}
diff --git a/elf/dl-reloc.c b/elf/dl-reloc.c
index 5c54310..f8ab396 100644
--- a/elf/dl-reloc.c
+++ b/elf/dl-reloc.c
@@ -16,8 +16,10 @@
License along with the GNU C Library; if not, see
<http://www.gnu.org/licenses/>. */
+#include <atomic.h>
#include <errno.h>
#include <libintl.h>
+#include <signal.h>
#include <stdlib.h>
#include <unistd.h>
#include <ldsodefs.h>
@@ -70,8 +72,6 @@ _dl_try_allocate_static_tls (struct link_map *map)
size_t offset = GL(dl_tls_static_used) + (freebytes - n * map->l_tls_align
- map->l_tls_firstbyte_offset);
-
- map->l_tls_offset = GL(dl_tls_static_used) = offset;
#elif TLS_DTV_AT_TP
/* dl_tls_static_used includes the TCB at the beginning. */
size_t offset = (((GL(dl_tls_static_used)
@@ -83,9 +83,36 @@ _dl_try_allocate_static_tls (struct link_map *map)
if (used > GL(dl_tls_static_size))
goto fail;
- map->l_tls_offset = offset;
+#else
+# error "Either TLS_TCB_AT_TP or TLS_DTV_AT_TP must be defined"
+#endif
+ /* We've computed the new value we want, now try to install it. */
+ ptrdiff_t val;
+ if ((val = map->l_tls_offset) == NO_TLS_OFFSET)
+ {
+ /* l_tls_offset starts out at NO_TLS_OFFSET, and all attempts to
+ change it go from NO_TLS_OFFSET to some other value. We use
+ compare_and_exchange to ensure only one attempt succeeds. We
+ don't actually need any memory ordering here, but _acq is the
+ weakest available. */
+ atomic_compare_and_exchange_bool_acq (&map->l_tls_offset,
+ offset,
+ NO_TLS_OFFSET);
+ val = map->l_tls_offset;
+ assert (val != NO_TLS_OFFSET);
+ }
+ if (val != offset)
+ {
+ /* Lost a race to a TLS access in another thread. Too bad, nothing
+ we can do here. */
+ goto fail;
+ }
+ /* We installed the value; now update the globals. */
+#if TLS_TCB_AT_TP
+ GL (dl_tls_static_used) = offset;
+#elif TLS_DTV_AT_TP
map->l_tls_firstbyte_offset = GL(dl_tls_static_used);
- GL(dl_tls_static_used) = used;
+ GL (dl_tls_static_used) = used;
#else
# error "Either TLS_TCB_AT_TP or TLS_DTV_AT_TP must be defined"
#endif
@@ -114,8 +141,18 @@ void
internal_function __attribute_noinline__
_dl_allocate_static_tls (struct link_map *map)
{
- if (map->l_tls_offset == FORCED_DYNAMIC_TLS_OFFSET
- || _dl_try_allocate_static_tls (map))
+ /* We wrap this in a signal mask because it has to iterate all
+ threads (including this one) and update this map's TLS entry.
+ A handler accessing TLS would try to do the same update and
+ break. */
+ sigset_t old;
+ _dl_mask_all_signals (&old);
+ int err = -1;
+ if (map->l_tls_offset != FORCED_DYNAMIC_TLS_OFFSET)
+ err = _dl_try_allocate_static_tls (map);
+
+ _dl_unmask_signals (&old);
+ if (err != 0)
{
_dl_signal_error (0, map->l_name, NULL, N_("\
cannot allocate memory in static TLS block"));
diff --git a/elf/dl-tls.c b/elf/dl-tls.c
index 576d9a1..836fc57 100644
--- a/elf/dl-tls.c
+++ b/elf/dl-tls.c
@@ -17,6 +17,7 @@
<http://www.gnu.org/licenses/>. */
#include <assert.h>
+#include <atomic.h>
#include <errno.h>
#include <libintl.h>
#include <signal.h>
@@ -293,7 +294,7 @@ allocate_dtv (void *result)
initial set of modules. This should avoid in most cases expansions
of the dtv. */
dtv_length = GL(dl_tls_max_dtv_idx) + DTV_SURPLUS;
- dtv = calloc (dtv_length + 2, sizeof (dtv_t));
+ dtv = __signal_safe_calloc (dtv_length + 2, sizeof (dtv_t));
if (dtv != NULL)
{
/* This is the initial length of the dtv. */
@@ -463,6 +464,18 @@ _dl_allocate_tls (void *mem)
}
rtld_hidden_def (_dl_allocate_tls)
+void
+internal_function
+_dl_clear_dtv (dtv_t *dtv)
+{
+ for (size_t cnt = 0; cnt < dtv[-1].counter; ++cnt)
+ if (! dtv[1 + cnt].pointer.is_static
+ && dtv[1 + cnt].pointer.val != TLS_DTV_UNALLOCATED)
+ __signal_safe_free (dtv[1 + cnt].pointer.val);
+ memset (dtv, '\0', (dtv[-1].counter + 1) * sizeof (dtv_t));
+}
+
+rtld_hidden_def (_dl_clear_dtv)
#ifndef SHARED
extern dtv_t _dl_static_dtv[];
@@ -479,11 +492,11 @@ _dl_deallocate_tls (void *tcb, bool dealloc_tcb)
for (size_t cnt = 0; cnt < dtv[-1].counter; ++cnt)
if (! dtv[1 + cnt].pointer.is_static
&& dtv[1 + cnt].pointer.val != TLS_DTV_UNALLOCATED)
- free (dtv[1 + cnt].pointer.val);
+ __signal_safe_free (dtv[1 + cnt].pointer.val);
/* The array starts with dtv[-1]. */
if (dtv != GL(dl_initial_dtv))
- free (dtv - 1);
+ __signal_safe_free (dtv - 1);
if (dealloc_tcb)
{
@@ -521,20 +534,21 @@ rtld_hidden_def (_dl_deallocate_tls)
# endif
-static void *
-allocate_and_init (struct link_map *map)
+static void
+allocate_and_init (dtv_t *dtv, struct link_map *map)
{
void *newp;
-
- newp = __libc_memalign (map->l_tls_align, map->l_tls_blocksize);
+ newp = __signal_safe_memalign (map->l_tls_align, map->l_tls_blocksize);
if (newp == NULL)
oom ();
- /* Initialize the memory. */
+ /* Initialize the memory. Since this is our thread's space, we are
+ under a signal mask, and no one has touched this section before,
+ we can safely just overwrite whatever's there. */
memset (__mempcpy (newp, map->l_tls_initimage, map->l_tls_initimage_size),
'\0', map->l_tls_blocksize - map->l_tls_initimage_size);
- return newp;
+ dtv->pointer.val = newp;
}
@@ -576,7 +590,15 @@ _dl_update_slotinfo (unsigned long int req_modid)
the entry we need. */
size_t new_gen = listp->slotinfo[idx].gen;
size_t total = 0;
-
+ int ret;
+ sigset_t old;
+ _dl_mask_all_signals (&old);
+ /* We use the signal mask as a lock against reentrancy here.
+ Check that a signal taken before the lock didn't already
+ update us. */
+ dtv = THREAD_DTV ();
+ if (dtv[0].counter >= listp->slotinfo[idx].gen)
+ goto out;
/* We have to look through the entire dtv slotinfo list. */
listp = GL(dl_tls_dtv_slotinfo_list);
do
@@ -596,25 +618,27 @@ _dl_update_slotinfo (unsigned long int req_modid)
if (gen <= dtv[0].counter)
continue;
+ size_t modid = total + cnt;
+
/* If there is no map this means the entry is empty. */
struct link_map *map = listp->slotinfo[cnt].map;
if (map == NULL)
{
/* If this modid was used at some point the memory
might still be allocated. */
- if (! dtv[total + cnt].pointer.is_static
- && dtv[total + cnt].pointer.val != TLS_DTV_UNALLOCATED)
+ if (dtv[-1].counter >= modid
+ && !dtv[modid].pointer.is_static
+ && dtv[modid].pointer.val != TLS_DTV_UNALLOCATED)
{
- free (dtv[total + cnt].pointer.val);
- dtv[total + cnt].pointer.val = TLS_DTV_UNALLOCATED;
+ __signal_safe_free (dtv[modid].pointer.val);
+ dtv[modid].pointer.val = TLS_DTV_UNALLOCATED;
}
continue;
}
+ assert (modid == map->l_tls_modid);
/* Check whether the current dtv array is large enough. */
- size_t modid = map->l_tls_modid;
- assert (total + cnt == modid);
if (dtv[-1].counter < modid)
{
/* Reallocate the dtv. */
@@ -628,17 +652,17 @@ _dl_update_slotinfo (unsigned long int req_modid)
{
/* This is the initial dtv that was allocated
during rtld startup using the dl-minimal.c
- malloc instead of the real malloc. We can't
+ malloc instead of the real allocator. We can't
free it, we have to abandon the old storage. */
- newp = malloc ((2 + newsize) * sizeof (dtv_t));
+ newp = __signal_safe_malloc ((2 + newsize) * sizeof (dtv_t));
if (newp == NULL)
oom ();
memcpy (newp, &dtv[-1], (2 + oldsize) * sizeof (dtv_t));
}
else
{
- newp = realloc (&dtv[-1],
+ newp = __signal_safe_realloc (&dtv[-1],
(2 + newsize) * sizeof (dtv_t));
if (newp == NULL)
oom ();
@@ -668,7 +692,7 @@ _dl_update_slotinfo (unsigned long int req_modid)
deallocate even if it is this dtv entry we are
supposed to load. The reason is that we call
memalign and not malloc. */
- free (dtv[modid].pointer.val);
+ __signal_safe_free (dtv[modid].pointer.val);
/* This module is loaded dynamically- We defer memory
allocation. */
@@ -685,6 +709,8 @@ _dl_update_slotinfo (unsigned long int req_modid)
/* This will be the new maximum generation counter. */
dtv[0].counter = new_gen;
+ out:
+ _dl_unmask_signals (&old);
}
return the_map;
@@ -710,39 +736,60 @@ tls_get_addr_tail (GET_ADDR_ARGS, dtv_t *dtv, struct link_map *the_map)
the_map = listp->slotinfo[idx].map;
}
-
- again:
- /* Make sure that, if a dlopen running in parallel forces the
- variable into static storage, we'll wait until the address in the
- static TLS block is set up, and use that. If we're undecided
- yet, make sure we make the decision holding the lock as well. */
- if (__builtin_expect (the_map->l_tls_offset
- != FORCED_DYNAMIC_TLS_OFFSET, 0))
+ sigset_t old;
+ _dl_mask_all_signals (&old);
+
+ /* As with update_slotinfo, we use the sigmask as a check against
+ reentrancy. */
+ if (dtv[GET_ADDR_MODULE].pointer.val != TLS_DTV_UNALLOCATED)
+ goto out;
+
+ /* Synchronize against a parallel dlopen() forcing this variable
+ into static storage. If that happens, we have to be more careful
+ about initializing the area, as that dlopen() will be iterating
+ the threads to do so itself. */
+ ptrdiff_t offset;
+ if ((offset = the_map->l_tls_offset) == NO_TLS_OFFSET)
{
- __rtld_lock_lock_recursive (GL(dl_load_lock));
- if (__builtin_expect (the_map->l_tls_offset == NO_TLS_OFFSET, 1))
- {
- the_map->l_tls_offset = FORCED_DYNAMIC_TLS_OFFSET;
- __rtld_lock_unlock_recursive (GL(dl_load_lock));
- }
- else
+ /* l_tls_offset starts out at NO_TLS_OFFSET, and all attempts to
+ change it go from NO_TLS_OFFSET to some other value. We use
+ compare_and_exchange to ensure only one attempt succeeds. We
+ don't actually need any memory ordering here, but _acq is the
+ weakest available. */
+ atomic_compare_and_exchange_bool_acq (&the_map->l_tls_offset,
+ FORCED_DYNAMIC_TLS_OFFSET,
+ NO_TLS_OFFSET);
+ offset = the_map->l_tls_offset;
+ assert (offset != NO_TLS_OFFSET);
+ }
+ if (offset == FORCED_DYNAMIC_TLS_OFFSET)
+ {
+ allocate_and_init (&dtv[GET_ADDR_MODULE], the_map);
+ }
+ else
+ {
+ void **pp = &dtv[GET_ADDR_MODULE].pointer.val;
+ while (atomic_forced_read (*pp) == TLS_DTV_UNALLOCATED)
{
- __rtld_lock_unlock_recursive (GL(dl_load_lock));
- if (__builtin_expect (the_map->l_tls_offset
- != FORCED_DYNAMIC_TLS_OFFSET, 1))
- {
- void *p = dtv[GET_ADDR_MODULE].pointer.val;
- if (__builtin_expect (p == TLS_DTV_UNALLOCATED, 0))
- goto again;
-
- return (char *) p + GET_ADDR_OFFSET;
- }
+ /* for lack of a better (safe) thing to do, just spin.
+ Someone else (not us; it's done under a signal mask) set
+ this map to a static TLS offset, and they'll iterate all
+ threads to initialize it. They'll eventually write
+ to pointer.val, at which point we know they've fully
+ completed initialization. */
+ atomic_delay ();
}
+ /* Make sure we've picked up their initialization of the actual
+ block; this pairs against the write barrier in
+ init_one_static_tls, guaranteeing that we see their write of
+ the tls_initimage into the static region. */
+ atomic_read_barrier ();
}
- void *p = dtv[GET_ADDR_MODULE].pointer.val = allocate_and_init (the_map);
- dtv[GET_ADDR_MODULE].pointer.is_static = false;
+out:
+ assert (dtv[GET_ADDR_MODULE].pointer.val != TLS_DTV_UNALLOCATED);
+ _dl_unmask_signals (&old);
- return (char *) p + GET_ADDR_OFFSET;
+ return (char *) dtv[GET_ADDR_MODULE].pointer.val + GET_ADDR_OFFSET;
}
diff --git a/nptl/Makefile b/nptl/Makefile
index cd601e5..a34e56a 100644
--- a/nptl/Makefile
+++ b/nptl/Makefile
@@ -289,7 +289,7 @@ tests += tst-cancelx2 tst-cancelx3 tst-cancelx4 tst-cancelx5 \
tst-oncex3 tst-oncex4
endif
ifeq ($(build-shared),yes)
-tests += tst-atfork2 tst-tls3 tst-tls4 tst-tls5 tst-_res1 tst-fini1 \
+tests += tst-atfork2 tst-tls3 tst-tls4 tst-tls5 tst-tls7 tst-_res1 tst-fini1 \
tst-stackguard1
tests-nolibpthread += tst-fini1
ifeq ($(have-z-execstack),yes)
@@ -300,7 +300,8 @@ endif
modules-names = tst-atfork2mod tst-tls3mod tst-tls4moda tst-tls4modb \
tst-tls5mod tst-tls5moda tst-tls5modb tst-tls5modc \
tst-tls5modd tst-tls5mode tst-tls5modf \
- tst-_res1mod1 tst-_res1mod2 tst-execstack-mod tst-fini1mod
+ tst-_res1mod1 tst-_res1mod2 tst-execstack-mod tst-fini1mod \
+ tst-tls7mod
extra-test-objs += $(addsuffix .os,$(strip $(modules-names))) tst-cleanup4aux.o
test-extras += $(modules-names) tst-cleanup4aux
test-modules = $(addprefix $(objpfx),$(addsuffix .so,$(modules-names)))
@@ -314,6 +315,7 @@ tst-tls5modc.so-no-z-defs = yes
tst-tls5modd.so-no-z-defs = yes
tst-tls5mode.so-no-z-defs = yes
tst-tls5modf.so-no-z-defs = yes
+tst-tls7mod.so-no-z-defs = yes
ifeq ($(build-shared),yes)
# Build all the modules even when not actually running test programs.
@@ -471,6 +473,10 @@ $(objpfx)tst-tls5: $(objpfx)tst-tls5mod.so $(shared-thread-library)
LDFLAGS-tst-tls5 = $(no-as-needed)
LDFLAGS-tst-tls5mod.so = -Wl,-soname,tst-tls5mod.so
+$(objpfx)tst-tls7: $(libdl) $(shared-thread-library)
+$(objpfx)tst-tls7.out: $(objpfx)tst-tls7mod.so
+$(objpfx)tst-tls7mod.so: $(shared-thread-library)
+
ifeq ($(build-shared),yes)
ifeq ($(run-built-tests),yes)
tests: $(objpfx)tst-tls6.out
diff --git a/nptl/allocatestack.c b/nptl/allocatestack.c
index 1e0fe1f..6ac9e98 100644
--- a/nptl/allocatestack.c
+++ b/nptl/allocatestack.c
@@ -242,11 +242,7 @@ get_cached_stack (size_t *sizep, void **memp)
/* Clear the DTV. */
dtv_t *dtv = GET_DTV (TLS_TPADJ (result));
- for (size_t cnt = 0; cnt < dtv[-1].counter; ++cnt)
- if (! dtv[1 + cnt].pointer.is_static
- && dtv[1 + cnt].pointer.val != TLS_DTV_UNALLOCATED)
- free (dtv[1 + cnt].pointer.val);
- memset (dtv, '\0', (dtv[-1].counter + 1) * sizeof (dtv_t));
+ _dl_clear_dtv (dtv);
/* Re-initialize the TLS. */
_dl_allocate_tls_init (TLS_TPADJ (result));
@@ -1177,13 +1173,18 @@ init_one_static_tls (struct pthread *curp, struct link_map *map)
# error "Either TLS_TCB_AT_TP or TLS_DTV_AT_TP must be defined"
# endif
- /* Fill in the DTV slot so that a later LD/GD access will find it. */
- dtv[map->l_tls_modid].pointer.val = dest;
- dtv[map->l_tls_modid].pointer.is_static = true;
-
/* Initialize the memory. */
memset (__mempcpy (dest, map->l_tls_initimage, map->l_tls_initimage_size),
'\0', map->l_tls_blocksize - map->l_tls_initimage_size);
+
+ /* Fill in the DTV slot so that a later LD/GD access will find it. */
+ dtv[map->l_tls_modid].pointer.is_static = true;
+ /* Pairs against the read barrier in tls_get_attr_tail, guaranteeing
+ any thread waiting for an update to pointer.val sees the
+ initimage write. */
+ atomic_write_barrier ();
+ dtv[map->l_tls_modid].pointer.val = dest;
+
}
void
diff --git a/nptl/tst-tls7.c b/nptl/tst-tls7.c
new file mode 100644
index 0000000..2b2a743
--- /dev/null
+++ b/nptl/tst-tls7.c
@@ -0,0 +1,120 @@
+/* Copyright (C) 2013 Free Software Foundation, Inc.
+ This file is part of the GNU C Library.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library; if not, see
+ <http://www.gnu.org/licenses/>. */
+
+#include <dlfcn.h>
+#include <pthread.h>
+#include <semaphore.h>
+#include <signal.h>
+#include <stdio.h>
+#include <stdlib.h>
+
+/* This test checks that TLS in a dlopened object works when first accessed
+ from a signal handler.
+*/
+
+void *
+spin (void *ignored)
+{
+ while (1)
+ {
+ /* busywork */
+ free (malloc (128));
+ }
+
+ /* never reached */
+ return NULL;
+}
+
+int
+do_test (void)
+{
+ pthread_t th[10];
+
+ for (int i = 0; i < 10; ++i)
+ {
+ if (pthread_create (&th[i], NULL, spin, NULL))
+ {
+ puts ("pthread_create failed");
+ exit (1);
+ }
+ }
+#define NITERS 75
+
+ for (int i = 0; i < NITERS; ++i)
+ {
+ void *h = dlopen ("tst-tls7mod.so", RTLD_LAZY);
+ if (h == NULL)
+ {
+ puts ("dlopen failed");
+ exit (1);
+ }
+
+ void (*action) (int, siginfo_t *, void *) = dlsym (h, "action");
+ if (action == NULL)
+ {
+ puts ("dlsym for action failed");
+ exit (1);
+ }
+
+ struct sigaction sa;
+ sa.sa_sigaction = action;
+ sigemptyset (&sa.sa_mask);
+ sa.sa_flags = SA_SIGINFO;
+ if (sigaction (SIGUSR1, &sa, NULL))
+ {
+ puts ("sigaction failed");
+ exit (1);
+ }
+
+ sem_t sem;
+ if (sem_init (&sem, 0, 0))
+ {
+ puts ("sem_init failed");
+ }
+
+ sigval_t val;
+ val.sival_ptr = &sem;
+ for (int i = 0; i < 10; ++i)
+ {
+ if (pthread_sigqueue (th[i], SIGUSR1, val))
+ {
+ puts ("pthread_sigqueue failed");
+ }
+ }
+
+
+ for (int i = 0; i < 10; ++i)
+ {
+ if (sem_wait (&sem))
+ {
+ puts ("sem_wait failed");
+ }
+ }
+
+ if (dlclose (h))
+ {
+ puts ("dlclose failed");
+ exit (1);
+ }
+ }
+ return 0;
+}
+
+#define TIMEOUT 4
+
+#define TEST_FUNCTION do_test ()
+#include "../test-skeleton.c"
diff --git a/nptl/tst-tls7mod.c b/nptl/tst-tls7mod.c
new file mode 100644
index 0000000..d4a88ef
--- /dev/null
+++ b/nptl/tst-tls7mod.c
@@ -0,0 +1,39 @@
+/* Copyright (C) 2013 Free Software Foundation, Inc.
+ This file is part of the GNU C Library.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library; if not, see
+ <http://www.gnu.org/licenses/>. */
+
+#include <semaphore.h>
+#include <signal.h>
+#include <unistd.h>
+
+
+static __thread int tls_data = 17;
+
+void
+action (int signo, siginfo_t *info, void *ignored)
+{
+ sem_t *sem = info->si_value.sival_ptr;
+ if (tls_data != 17)
+ {
+ write (STDOUT_FILENO, "wrong TLS value\n", 17);
+ _exit (1);
+ }
+
+ /* arbitrary choice, just write something unique-ish */
+ tls_data = (int) info;
+
+ sem_post (sem);
+}
diff --git a/sysdeps/generic/ldsodefs.h b/sysdeps/generic/ldsodefs.h
index e7b0516..b8b420b 100644
--- a/sysdeps/generic/ldsodefs.h
+++ b/sysdeps/generic/ldsodefs.h
@@ -233,6 +233,11 @@ extern int _dl_name_match_p (const char *__name, const struct link_map *__map)
extern unsigned long int _dl_higher_prime_number (unsigned long int n)
internal_function;
+/* Mask every signal, returning the previous sigmask in OLD. */
+extern void _dl_mask_all_signals (sigset_t *old) internal_function;
+/* Undo _dl_mask_all_signals. */
+extern void _dl_unmask_signals (sigset_t *old) internal_function;
+
/* Function used as argument for `_dl_receive_error' function. The
arguments are the error code, error string, and the objname the
error occurred in. */
@@ -983,6 +988,17 @@ extern void *_dl_allocate_tls_storage (void)
extern void *_dl_allocate_tls_init (void *) internal_function;
rtld_hidden_proto (_dl_allocate_tls_init)
+extern void *__signal_safe_memalign (size_t boundary, size_t size);
+extern void *__signal_safe_malloc (size_t size);
+extern void __signal_safe_free (void *ptr);
+extern void *__signal_safe_realloc (void *ptr, size_t size);
+extern void *__signal_safe_calloc (size_t nmemb, size_t size);
+
+/* Remove all allocated dynamic TLS regions from a DTV
+ for reuse by new thread. */
+extern void _dl_clear_dtv (dtv_t *dtv) internal_function;
+rtld_hidden_proto (_dl_clear_dtv)
+
/* Deallocate memory allocated with _dl_allocate_tls. */
extern void _dl_deallocate_tls (void *tcb, bool dealloc_tcb) internal_function;
rtld_hidden_proto (_dl_deallocate_tls)
diff --git a/sysdeps/mach/hurd/dl-sysdep.h b/sysdeps/mach/hurd/dl-sysdep.h
index 52563b0..0e7cac4 100644
--- a/sysdeps/mach/hurd/dl-sysdep.h
+++ b/sysdeps/mach/hurd/dl-sysdep.h
@@ -29,3 +29,10 @@
# define DL_ARGV_NOT_RELRO 1
# define LIBC_STACK_END_NOT_RELRO 1
#endif
+
+#include <signal.h>
+inline void _dl_mask_all_signals (sigset_t *) internal_function;
+inline void _dl_mask_all_signals (sigset_t *) { }
+
+inline void _dl_unmask_all_signals (sigset_t *) internal_function;
+inline void _dl_unmask_all_signals (sigset_t *) { }
diff --git a/sysdeps/unix/sysv/linux/dl-sysdep.c b/sysdeps/unix/sysv/linux/dl-sysdep.c
index e80cb8d..164c1f1 100644
--- a/sysdeps/unix/sysv/linux/dl-sysdep.c
+++ b/sysdeps/unix/sysv/linux/dl-sysdep.c
@@ -19,6 +19,7 @@
/* Linux needs some special initialization, but otherwise uses
the generic dynamic linker system interface code. */
+#include <assert.h>
#include <string.h>
#include <fcntl.h>
#include <unistd.h>
@@ -130,3 +131,48 @@ _dl_discover_osversion (void)
return version;
}
+
+/* Mask every signal, returning the previous sigmask in OLD. */
+void
+internal_function
+_dl_mask_all_signals (sigset_t *old)
+{
+ int ret;
+ sigset_t new;
+
+ sigfillset (&new);
+
+ /* This function serves as a replacement to pthread_sigmask, which
+ isn't available from within the dynamic linker since it would require
+ linking with libpthread. We duplicate some of the functionality here
+ to avoid requiring libpthread. This isn't quite identical to
+ pthread_sigmask in that we do not mask internal signals used for
+ cancellation and setxid handling. This disables asyncrhonous
+ cancellation for the duration the signals are disabled, but it's a
+ small window, and prevents any problems with the use of TLS variables
+ in the signal handlers that would have executed. */
+
+ /* It's very important we don't touch errno here, as that's TLS; since this
+ gets called from get_tls_addr we might end up recursing. */
+
+ INTERNAL_SYSCALL_DECL (err);
+
+ ret = INTERNAL_SYSCALL (rt_sigprocmask, err, 4, SIG_SETMASK, &new, old,
+ _NSIG / 8);
+
+ assert (ret == 0);
+}
+
+/* Return sigmask to what it was before a call to _dl_mask_all_signals. */
+void
+internal_function
+_dl_unmask_signals (sigset_t *old)
+{
+ int ret;
+ INTERNAL_SYSCALL_DECL (err);
+
+ ret = INTERNAL_SYSCALL (rt_sigprocmask, err, 4, SIG_SETMASK, old, NULL,
+ _NSIG / 8);
+
+ assert (ret == 0);
+}
diff --git a/sysdeps/unix/sysv/linux/dl-sysdep.h b/sysdeps/unix/sysv/linux/dl-sysdep.h
index e1eab09..0fe1e1c 100644
--- a/sysdeps/unix/sysv/linux/dl-sysdep.h
+++ b/sysdeps/unix/sysv/linux/dl-sysdep.h
@@ -30,4 +30,8 @@
/* Get version of the OS. */
extern int _dl_discover_osversion (void) attribute_hidden;
# define HAVE_DL_DISCOVER_OSVERSION 1
+
+#include <signal.h>
+void _dl_mask_all_signals (sigset_t *) internal_function;
+void _dl_unmask_all_signals (sigset_t *) internal_function;
#endif
--
1.8.5.1