This is the mail archive of the libc-alpha@sourceware.org mailing list for the glibc project.


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]
Other format: [Raw text]

[PATCH] Async signal safe TLS accesses


This is patch 4/4 of the effort to make TLS access async-signal-safe.

TLS accesses from initial-exec variables are async-signal-safe.  Even
dynamic-type accesses from shared objects loaded by ld.so at startup
are.  But dynamic accesses from dlopen()ed objects are not, which
means a lot of trouble for any sort of per-thread state we want to use
from signal handlers since we can't rely on always having
initial-exec.  Make all TLS access always signal safe (for GNU1 TLS;
GNU2 TLS is still not safe, due to use of lazy relocations.)

To do this, now that we use a safe allocator, we just have to
eliminate the locking.

 * tls_get_addr synchronizes with dlopen() by dl_load_lock; this lock
   is reentrant, but not, alas, signal safe.  Replace this with simple
   CAS based synchronization.

 * Use signal masking in the slow path of tls_get_addr to prevent
   reentrant TLS initialization.  The most complicated part here is
   ensuring a dlopen which forces static TLS (and thus updates all
   threads' DTVs) does not interfere with this.

Also add a test case demonstrating safe signal access to dlopen'd TLS.

This is version 6 of the patch, containing:
    - ppluzhnikov@google.com's fixes to Hurd and whitespace.
    - fixes for triegel@redhat.com's comments about synchronization.
    - split into four patches as suggested by neleai@seznam.cz
    - typo fixes
    - fixes for the test case, ensuring malloc calls aren't optimized out.
    - comments as requested by Carlos
    - eliminate a bad cast warning
---
 elf/dl-open.c        |   5 ++-
 elf/dl-reloc.c       |  50 ++++++++++++++++++---
 elf/dl-tls.c         | 100 ++++++++++++++++++++++++++++--------------
 nptl/Makefile        |  12 +++++-
 nptl/allocatestack.c |  13 ++++--
 nptl/tst-tls7.c      | 120 +++++++++++++++++++++++++++++++++++++++++++++++++++
 nptl/tst-tls7mod.c   |  43 ++++++++++++++++++
 7 files changed, 297 insertions(+), 46 deletions(-)
 create mode 100644 nptl/tst-tls7.c
 create mode 100644 nptl/tst-tls7mod.c

diff --git a/elf/dl-open.c b/elf/dl-open.c
index a9ca6b3..ea222d0 100644
--- a/elf/dl-open.c
+++ b/elf/dl-open.c
@@ -548,7 +548,10 @@ cannot load any more object with static TLS"));
 	     generation of the DSO we are allocating data for.  */
 	  _dl_update_slotinfo (imap->l_tls_modid);
 #endif
-
+	  /* We do this iteration under a signal mask in dl-reloc; why not
+	     here?  Because these symbols are new and dlopen hasn't
+	     returned yet.  So we can't possibly be racing with a TLS
+	     access to them from another thread.  */
 	  GL(dl_init_static_tls) (imap);
 	  assert (imap->l_need_tls_init == 0);
 	}
diff --git a/elf/dl-reloc.c b/elf/dl-reloc.c
index 1f66fcc..a5c43eb 100644
--- a/elf/dl-reloc.c
+++ b/elf/dl-reloc.c
@@ -16,8 +16,10 @@
    License along with the GNU C Library; if not, see
    <http://www.gnu.org/licenses/>.  */
 
+#include <atomic.h>
 #include <errno.h>
 #include <libintl.h>
+#include <signal.h>
 #include <stdlib.h>
 #include <unistd.h>
 #include <ldsodefs.h>
@@ -70,8 +72,6 @@ _dl_try_allocate_static_tls (struct link_map *map)
 
   size_t offset = GL(dl_tls_static_used) + (freebytes - n * map->l_tls_align
 					    - map->l_tls_firstbyte_offset);
-
-  map->l_tls_offset = GL(dl_tls_static_used) = offset;
 #elif TLS_DTV_AT_TP
   /* dl_tls_static_used includes the TCB at the beginning.  */
   size_t offset = (((GL(dl_tls_static_used)
@@ -83,9 +83,38 @@ _dl_try_allocate_static_tls (struct link_map *map)
   if (used > GL(dl_tls_static_size))
     goto fail;
 
-  map->l_tls_offset = offset;
+#else
+# error "Either TLS_TCB_AT_TP or TLS_DTV_AT_TP must be defined"
+#endif
+  /* We've computed the new value we want, now try to install it.  */
+  ptrdiff_t val;
+  if ((val = map->l_tls_offset) == NO_TLS_OFFSET)
+    {
+      /* l_tls_offset starts out at NO_TLS_OFFSET, and all attempts to
+	 change it go from NO_TLS_OFFSET to some other value.  We use
+	 compare_and_exchange to ensure only one attempt succeeds.  We
+	 don't actually need any memory ordering here, but _acq is the
+	 weakest available.  */
+      atomic_compare_and_exchange_bool_acq (&map->l_tls_offset,
+					    offset,
+					    NO_TLS_OFFSET);
+      val = map->l_tls_offset;
+      assert (val != NO_TLS_OFFSET);
+    }
+  if (val != offset)
+    {
+      /* We'd like to set a static offset for this section, but another
+	 thread has already used a dynamic TLS block for it.  Since we can
+	 only use static offsets if everyone does (and it's not practical
+	 to move that thread's dynamic block), we have to fail.  */
+      goto fail;
+    }
+  /* We installed the value; now update the globals.  */
+#if TLS_TCB_AT_TP
+  GL (dl_tls_static_used) = offset;
+#elif TLS_DTV_AT_TP
   map->l_tls_firstbyte_offset = GL(dl_tls_static_used);
-  GL(dl_tls_static_used) = used;
+  GL (dl_tls_static_used) = used;
 #else
 # error "Either TLS_TCB_AT_TP or TLS_DTV_AT_TP must be defined"
 #endif
@@ -114,8 +143,17 @@ void
 internal_function __attribute_noinline__
 _dl_allocate_static_tls (struct link_map *map)
 {
-  if (map->l_tls_offset == FORCED_DYNAMIC_TLS_OFFSET
-      || _dl_try_allocate_static_tls (map))
+  /* We wrap this in a signal mask because it has to iterate all threads
+     (including this one) and update this map's TLS entry. A signal handler
+     accessing TLS would try to do the same update and break.  */
+  sigset_t old;
+  _dl_mask_all_signals (&old);
+  int err = -1;
+  if (map->l_tls_offset != FORCED_DYNAMIC_TLS_OFFSET)
+    err = _dl_try_allocate_static_tls (map);
+
+  _dl_unmask_signals (&old);
+  if (err != 0)
     {
       _dl_signal_error (0, map->l_name, NULL, N_("\
 cannot allocate memory in static TLS block"));
diff --git a/elf/dl-tls.c b/elf/dl-tls.c
index c1802e7..c8aca84 100644
--- a/elf/dl-tls.c
+++ b/elf/dl-tls.c
@@ -17,6 +17,7 @@
    <http://www.gnu.org/licenses/>.  */
 
 #include <assert.h>
+#include <atomic.h>
 #include <errno.h>
 #include <libintl.h>
 #include <signal.h>
@@ -533,19 +534,21 @@ rtld_hidden_def (_dl_deallocate_tls)
 # endif
 
 
-static void *
-allocate_and_init (struct link_map *map)
+static void
+allocate_and_init (dtv_t *dtv, struct link_map *map)
 {
   void *newp;
   newp = __signal_safe_memalign (map->l_tls_align, map->l_tls_blocksize);
   if (newp == NULL)
     oom ();
 
-  /* Initialize the memory.  */
+  /* Initialize the memory. Since this is our thread's space, we are
+     under a signal mask, and no one has touched this section before,
+     we can safely just overwrite whatever's there.  */
   memset (__mempcpy (newp, map->l_tls_initimage, map->l_tls_initimage_size),
 	  '\0', map->l_tls_blocksize - map->l_tls_initimage_size);
 
-  return newp;
+  dtv->pointer.val = newp;
 }
 
 
@@ -587,7 +590,15 @@ _dl_update_slotinfo (unsigned long int req_modid)
 	 the entry we need.  */
       size_t new_gen = listp->slotinfo[idx].gen;
       size_t total = 0;
-
+      int ret;
+      sigset_t old;
+      _dl_mask_all_signals (&old);
+      /* We use the signal mask as a lock against reentrancy here.
+         Check that a signal taken before the lock didn't already
+         update us.  */
+      dtv = THREAD_DTV ();
+      if (dtv[0].counter >= listp->slotinfo[idx].gen)
+        goto out;
       /* We have to look through the entire dtv slotinfo list.  */
       listp =  GL(dl_tls_dtv_slotinfo_list);
       do
@@ -699,6 +710,8 @@ _dl_update_slotinfo (unsigned long int req_modid)
 
       /* This will be the new maximum generation counter.  */
       dtv[0].counter = new_gen;
+   out:
+      _dl_unmask_signals (&old);
     }
 
   return the_map;
@@ -724,39 +737,60 @@ tls_get_addr_tail (GET_ADDR_ARGS, dtv_t *dtv, struct link_map *the_map)
 
       the_map = listp->slotinfo[idx].map;
     }
-
- again:
-  /* Make sure that, if a dlopen running in parallel forces the
-     variable into static storage, we'll wait until the address in the
-     static TLS block is set up, and use that.  If we're undecided
-     yet, make sure we make the decision holding the lock as well.  */
-  if (__builtin_expect (the_map->l_tls_offset
-			!= FORCED_DYNAMIC_TLS_OFFSET, 0))
+  sigset_t old;
+  _dl_mask_all_signals (&old);
+
+  /* As with update_slotinfo, we use the sigmask as a check against
+     reentrancy.  */
+  if (dtv[GET_ADDR_MODULE].pointer.val != TLS_DTV_UNALLOCATED)
+    goto out;
+
+  /* Synchronize against a parallel dlopen() forcing this variable
+     into static storage.  If that happens, we have to be more careful
+     about initializing the area, as that dlopen() will be iterating
+     the threads to do so itself.  */
+  ptrdiff_t offset;
+  if ((offset = the_map->l_tls_offset) == NO_TLS_OFFSET)
     {
-      __rtld_lock_lock_recursive (GL(dl_load_lock));
-      if (__builtin_expect (the_map->l_tls_offset == NO_TLS_OFFSET, 1))
-	{
-	  the_map->l_tls_offset = FORCED_DYNAMIC_TLS_OFFSET;
-	  __rtld_lock_unlock_recursive (GL(dl_load_lock));
-	}
-      else
+      /* l_tls_offset starts out at NO_TLS_OFFSET, and all attempts to
+	 change it go from NO_TLS_OFFSET to some other value.  We use
+	 compare_and_exchange to ensure only one attempt succeeds.  We
+	 don't actually need any memory ordering here, but _acq is the
+	 weakest available.  */
+      atomic_compare_and_exchange_bool_acq (&the_map->l_tls_offset,
+					    FORCED_DYNAMIC_TLS_OFFSET,
+					    NO_TLS_OFFSET);
+      offset = the_map->l_tls_offset;
+      assert (offset != NO_TLS_OFFSET);
+    }
+  if (offset == FORCED_DYNAMIC_TLS_OFFSET)
+    {
+      allocate_and_init (&dtv[GET_ADDR_MODULE], the_map);
+    }
+  else
+    {
+      void **pp = &dtv[GET_ADDR_MODULE].pointer.val;
+      while (atomic_forced_read (*pp) == TLS_DTV_UNALLOCATED)
 	{
-	  __rtld_lock_unlock_recursive (GL(dl_load_lock));
-	  if (__builtin_expect (the_map->l_tls_offset
-				!= FORCED_DYNAMIC_TLS_OFFSET, 1))
-	    {
-	      void *p = dtv[GET_ADDR_MODULE].pointer.val;
-	      if (__builtin_expect (p == TLS_DTV_UNALLOCATED, 0))
-		goto again;
-
-	      return (char *) p + GET_ADDR_OFFSET;
-	    }
+	  /* for lack of a better (safe) thing to do, just spin.
+	    Someone else (not us; it's done under a signal mask) set
+	    this map to a static TLS offset, and they'll iterate all
+	    threads to initialize it.  They'll eventually write
+	    to pointer.val, at which point we know they've fully
+	    completed initialization.  */
+	  atomic_delay ();
 	}
+      /* Make sure we've picked up their initialization of the actual
+	 block; this pairs against the write barrier in
+	 init_one_static_tls, guaranteeing that we see their write of
+	 the tls_initimage into the static region.  */
+      atomic_read_barrier ();
     }
-  void *p = dtv[GET_ADDR_MODULE].pointer.val = allocate_and_init (the_map);
-  dtv[GET_ADDR_MODULE].pointer.is_static = false;
+out:
+  assert (dtv[GET_ADDR_MODULE].pointer.val != TLS_DTV_UNALLOCATED);
+  _dl_unmask_signals (&old);
 
-  return (char *) p + GET_ADDR_OFFSET;
+  return (char *) dtv[GET_ADDR_MODULE].pointer.val + GET_ADDR_OFFSET;
 }
 
 
diff --git a/nptl/Makefile b/nptl/Makefile
index 57cc8c6..a43e740 100644
--- a/nptl/Makefile
+++ b/nptl/Makefile
@@ -293,7 +293,7 @@ tests += tst-cancelx2 tst-cancelx3 tst-cancelx4 tst-cancelx5 \
 	 tst-oncex3 tst-oncex4
 endif
 ifeq ($(build-shared),yes)
-tests += tst-atfork2 tst-tls3 tst-tls4 tst-tls5 tst-_res1 tst-fini1 \
+tests += tst-atfork2 tst-tls3 tst-tls4 tst-tls5 tst-tls7 tst-_res1 tst-fini1 \
 	 tst-stackguard1
 tests-nolibpthread += tst-fini1
 ifeq ($(have-z-execstack),yes)
@@ -304,7 +304,8 @@ endif
 modules-names = tst-atfork2mod tst-tls3mod tst-tls4moda tst-tls4modb \
 		tst-tls5mod tst-tls5moda tst-tls5modb tst-tls5modc \
 		tst-tls5modd tst-tls5mode tst-tls5modf \
-		tst-_res1mod1 tst-_res1mod2 tst-execstack-mod tst-fini1mod
+		tst-_res1mod1 tst-_res1mod2 tst-execstack-mod tst-fini1mod \
+		tst-tls7mod
 extra-test-objs += $(addsuffix .os,$(strip $(modules-names))) tst-cleanup4aux.o
 test-extras += $(modules-names) tst-cleanup4aux
 test-modules = $(addprefix $(objpfx),$(addsuffix .so,$(modules-names)))
@@ -318,6 +319,7 @@ tst-tls5modc.so-no-z-defs = yes
 tst-tls5modd.so-no-z-defs = yes
 tst-tls5mode.so-no-z-defs = yes
 tst-tls5modf.so-no-z-defs = yes
+tst-tls7mod.so-no-z-defs = yes
 
 ifeq ($(build-shared),yes)
 # Build all the modules even when not actually running test programs.
@@ -480,6 +482,12 @@ $(objpfx)tst-tls5: $(objpfx)tst-tls5mod.so $(shared-thread-library)
 LDFLAGS-tst-tls5 = $(no-as-needed)
 LDFLAGS-tst-tls5mod.so = -Wl,-soname,tst-tls5mod.so
 
+# ensure free(malloc()) isn't optimized out
+CFLAGS-tst-tls7.c = -fno-builtin-malloc -fno-builtin-free
+$(objpfx)tst-tls7: $(libdl) $(shared-thread-library)
+$(objpfx)tst-tls7.out: $(objpfx)tst-tls7mod.so
+$(objpfx)tst-tls7mod.so: $(shared-thread-library)
+
 ifeq ($(build-shared),yes)
 ifeq ($(run-built-tests),yes)
 tests: $(objpfx)tst-tls6.out
diff --git a/nptl/allocatestack.c b/nptl/allocatestack.c
index 7f6094e..2a5ac22 100644
--- a/nptl/allocatestack.c
+++ b/nptl/allocatestack.c
@@ -1173,13 +1173,18 @@ init_one_static_tls (struct pthread *curp, struct link_map *map)
 #  error "Either TLS_TCB_AT_TP or TLS_DTV_AT_TP must be defined"
 # endif
 
-  /* Fill in the DTV slot so that a later LD/GD access will find it.  */
-  dtv[map->l_tls_modid].pointer.val = dest;
-  dtv[map->l_tls_modid].pointer.is_static = true;
-
   /* Initialize the memory.  */
   memset (__mempcpy (dest, map->l_tls_initimage, map->l_tls_initimage_size),
 	  '\0', map->l_tls_blocksize - map->l_tls_initimage_size);
+
+  /* Fill in the DTV slot so that a later LD/GD access will find it.  */
+  dtv[map->l_tls_modid].pointer.is_static = true;
+  /* Pairs against the read barrier in tls_get_attr_tail, guaranteeing
+     any thread waiting for an update to pointer.val sees the
+     initimage write.  */
+  atomic_write_barrier ();
+  dtv[map->l_tls_modid].pointer.val = dest;
+
 }
 
 void
diff --git a/nptl/tst-tls7.c b/nptl/tst-tls7.c
new file mode 100644
index 0000000..3bb3f7d
--- /dev/null
+++ b/nptl/tst-tls7.c
@@ -0,0 +1,120 @@
+/* Copyright (C) 2013 Free Software Foundation, Inc.
+   This file is part of the GNU C Library.
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library; if not, see
+   <http://www.gnu.org/licenses/>.  */
+
+
+/* This test checks that TLS in a dlopened object works when first accessed
+   from a signal handler.  */
+
+#include <dlfcn.h>
+#include <pthread.h>
+#include <semaphore.h>
+#include <signal.h>
+#include <stdio.h>
+#include <stdlib.h>
+
+void *
+spin (void *ignored)
+{
+  while (1)
+    {
+      /* busywork */
+      free (malloc (128));
+    }
+
+  /* never reached */
+  return NULL;
+}
+
+int
+do_test (void)
+{
+  pthread_t th[10];
+
+  for (int i = 0; i < 10; ++i)
+    {
+      if (pthread_create (&th[i], NULL, spin, NULL))
+        {
+          puts ("pthread_create failed");
+          exit (1);
+        }
+    }
+#define NITERS 75
+
+  for (int i = 0; i < NITERS; ++i)
+    {
+      void *h = dlopen ("tst-tls7mod.so", RTLD_LAZY);
+      if (h == NULL)
+        {
+          puts ("dlopen failed");
+          exit (1);
+        }
+
+      void (*action) (int, siginfo_t *, void *) = dlsym (h, "action");
+      if (action == NULL)
+        {
+          puts ("dlsym for action failed");
+          exit (1);
+        }
+
+      struct sigaction sa;
+      sa.sa_sigaction = action;
+      sigemptyset (&sa.sa_mask);
+      sa.sa_flags = SA_SIGINFO;
+      if (sigaction (SIGUSR1, &sa, NULL))
+        {
+          puts ("sigaction failed");
+          exit (1);
+        }
+
+      sem_t sem;
+      if (sem_init (&sem, 0, 0))
+        {
+          puts ("sem_init failed");
+        }
+
+      sigval_t val;
+      val.sival_ptr = &sem;
+      for (int i = 0; i < 10; ++i)
+        {
+          if (pthread_sigqueue (th[i], SIGUSR1, val))
+            {
+              puts ("pthread_sigqueue failed");
+            }
+        }
+
+
+      for (int i = 0; i < 10; ++i)
+        {
+          if (sem_wait (&sem))
+          {
+            puts ("sem_wait failed");
+          }
+        }
+
+      if (dlclose (h))
+        {
+          puts ("dlclose failed");
+          exit (1);
+        }
+    }
+  return 0;
+}
+
+#define TIMEOUT 4
+
+#define TEST_FUNCTION do_test ()
+#include "../test-skeleton.c"
diff --git a/nptl/tst-tls7mod.c b/nptl/tst-tls7mod.c
new file mode 100644
index 0000000..aff29b9
--- /dev/null
+++ b/nptl/tst-tls7mod.c
@@ -0,0 +1,43 @@
+/* Copyright (C) 2013 Free Software Foundation, Inc.
+   This file is part of the GNU C Library.
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library; if not, see
+   <http://www.gnu.org/licenses/>.  */
+
+/* Dynamic module with TLS to be accessed by a signal handler to check safety
+   of that mode. */
+
+#include <semaphore.h>
+#include <signal.h>
+#include <unistd.h>
+
+/* This is an unlikely value to see in incorrectly initialized TLS
+   block -- make sure we're initialized properly. */
+static __thread intptr_t tls_data = 0xdeadbeef;
+
+void
+action (int signo, siginfo_t *info, void *ignored)
+{
+  sem_t *sem = info->si_value.sival_ptr;
+  if (tls_data != 0xdeadbeef)
+    {
+      write (STDOUT_FILENO, "wrong TLS value\n", 17);
+      _exit (1);
+    }
+
+  /* arbitrary choice, just write something unique-ish. */
+  tls_data = (intptr_t) info;
+
+  sem_post (sem);
+}
-- 
1.8.5.1


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]