This is the mail archive of the libc-alpha@sourceware.org mailing list for the glibc project.


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]
Other format: [Raw text]

Re: [PATCH v4] Also use l_tls_dtor_count to decide on object unload (BZ #18657)


On 07/15/2015 07:10 AM, Siddhesh Poyarekar wrote:
> v4: Take the big lock for a longer duration during destructor
>     registration.
> 
> When an TLS destructor is registered, we set the DF_1_NODELETE flag to
> signal that the object should not be destroyed.  We then clear the
> DF_1_NODELETE flag when all destructors are called, which is wrong -
> the flag could have been set by other means too.

Agreed.

Great work using l_tls_dtor_count to do reference counting atomically!

This looks like a permanent and long lasting solution to the problem.

It's exactly the kind of thing I like to see being used in the loader...

> This patch replaces this use of the flag by using l_tls_dtor_count
> directly to determine whether it is safe to unload the object.  This
> change has the added advantage of eliminating the lock taking when
> calling the destructors, which could result in a deadlock.  The patch
> also fixes the test case tst-tls-atexit - it was making an invalid
> dlclose call, which would just return an error silently.
> 
> Change verified on x86_64; the test suite does not show any
> regressions due to the patch.

... However, you need more P&C comments. So a v5 will be required.

To start with:

diff --git a/include/link.h b/include/link.h
index 7ccd9c3..b39fa73 100644
--- a/include/link.h
+++ b/include/link.h
@@ -303,6 +303,7 @@ struct link_map
     size_t l_tls_modid;
 
     /* Number of thread_local objects constructed by this DSO.  */
+    /* Concurrency: Protected by rtld_load_lock.  */
     size_t l_tls_dtor_count;
 
     /* Information used to change permission after the relocations are
---

Is what I have on my P&C docs local branch. Please add a comment to
include/link.h to describe there at definition of struct link_map how
l_tls_dtor_count may be accessed. The above comment was something I was
going to push, but I'll have to update it to say "Accessed atomically."
now since the rtld_load_lock does not protect that member any longer.

> ChangeLog:
> 
> 	[BZ #18657]
> 	* elf/dl-close.c (_dl_close_worker): Don't unload DSO if there
> 	are pending TLS destructor calls.
> 	* stdlib/cxa_thread_atexit_impl.c (__cxa_thread_atexit_impl):
> 	Don't touch the link map flag.  Atomically increment
> 	l_tls_dtor_count.
> 	(__call_tls_dtors): Atomically decrement l_tls_dtor_count.
> 	Avoid taking the load lock and don't touch the link map flag.
> 	* stdlib/tst-tls-atexit.c (do_test): dlopen
> 	tst-tls-atexit-lib.so again before dlclose.
> 	* stdlib/tst-tls-atexit-nodelete.c: New test case.
> 	* stdlib/Makefile (tests): Use it.
> ---
>  elf/dl-close.c                   |   7 ++-
>  stdlib/Makefile                  |   5 +-
>  stdlib/cxa_thread_atexit_impl.c  |  27 +++-------
>  stdlib/tst-tls-atexit-nodelete.c | 104 +++++++++++++++++++++++++++++++++++++++
>  stdlib/tst-tls-atexit.c          |  14 ++++--
>  5 files changed, 133 insertions(+), 24 deletions(-)
>  create mode 100644 stdlib/tst-tls-atexit-nodelete.c
> 
> diff --git a/elf/dl-close.c b/elf/dl-close.c
> index 2104674..def726f 100644
> --- a/elf/dl-close.c
> +++ b/elf/dl-close.c
> @@ -153,7 +153,11 @@ _dl_close_worker (struct link_map *map, bool force)
>        maps[idx] = l;
>        ++idx;
>  
> -      /* Clear DF_1_NODELETE to force object deletion.  */
> +      /* Clear DF_1_NODELETE to force object deletion.  We don't need to touch
> +	 l_tls_dtor_count because forced object deletion only happens when an
> +	 error occurs during object load.  Destructor registration for TLS
> +	 non-POD objects should not have happened till then for this
> +	 object.  */
>        if (force)
>  	l->l_flags_1 &= ~DF_1_NODELETE;
>      }
> @@ -177,6 +181,7 @@ _dl_close_worker (struct link_map *map, bool force)
>        if (l->l_type == lt_loaded
>  	  && l->l_direct_opencount == 0
>  	  && (l->l_flags_1 & DF_1_NODELETE) == 0
> +	  && atomic_load_relaxed (&l->l_tls_dtor_count) == 0

Is _relaxed valid here?

Needs a comment specifying why _relaxed is valid.

>  	  && !used[done_index])
>  	continue;
>  
> diff --git a/stdlib/Makefile b/stdlib/Makefile
> index 7fc5a80..402466a 100644
> --- a/stdlib/Makefile
> +++ b/stdlib/Makefile
> @@ -74,7 +74,7 @@ tests		:= tst-strtol tst-strtod testmb testrand testsort testdiv   \
>  		   tst-makecontext3 bug-getcontext bug-fmtmsg1		    \
>  		   tst-secure-getenv tst-strtod-overflow tst-strtod-round   \
>  		   tst-tininess tst-strtod-underflow tst-tls-atexit	    \
> -		   tst-setcontext3
> +		   tst-setcontext3 tst-tls-atexit-nodelete
>  tests-static	:= tst-secure-getenv
>  
>  modules-names	= tst-tls-atexit-lib
> @@ -159,6 +159,9 @@ tst-tls-atexit-lib.so-no-z-defs = yes
>  $(objpfx)tst-tls-atexit: $(shared-thread-library) $(libdl)
>  $(objpfx)tst-tls-atexit.out: $(objpfx)tst-tls-atexit-lib.so
>  
> +$(objpfx)tst-tls-atexit-nodelete: $(shared-thread-library) $(libdl)
> +$(objpfx)tst-tls-atexit-nodelete.out: $(objpfx)tst-tls-atexit-lib.so
> +
>  $(objpfx)tst-setcontext3.out: tst-setcontext3.sh $(objpfx)tst-setcontext3
>  	$(SHELL) $< $(common-objpfx) '$(test-program-prefix-before-env)' \
>  		 '$(run-program-env)' '$(test-program-prefix-after-env)' \
> diff --git a/stdlib/cxa_thread_atexit_impl.c b/stdlib/cxa_thread_atexit_impl.c
> index 9120162..c44f9d0 100644
> --- a/stdlib/cxa_thread_atexit_impl.c
> +++ b/stdlib/cxa_thread_atexit_impl.c
> @@ -49,9 +49,11 @@ __cxa_thread_atexit_impl (dtor_func func, void *obj, void *dso_symbol)
>    new->next = tls_dtor_list;
>    tls_dtor_list = new;
>  
> -  /* See if we already encountered the DSO.  */
> +  /* We have to take the big lock to prevent a racing dlclose from pulling our
> +     DSO from underneath us while we're setting up our destructor.  */
>    __rtld_lock_lock_recursive (GL(dl_load_lock));
>  
> +  /* See if we already encountered the DSO.  */
>    if (__glibc_unlikely (dso_symbol_cache != dso_symbol))
>      {
>        ElfW(Addr) caller = (ElfW(Addr)) dso_symbol;
> @@ -62,16 +64,12 @@ __cxa_thread_atexit_impl (dtor_func func, void *obj, void *dso_symbol)
>           program (we hope).  */
>        lm_cache = l ? l : GL(dl_ns)[LM_ID_BASE]._ns_loaded;
>      }
> -  /* A destructor could result in a thread_local construction and the former
> -     could have cleared the flag.  */
> -  if (lm_cache->l_type == lt_loaded && lm_cache->l_tls_dtor_count == 0)
> -    lm_cache->l_flags_1 |= DF_1_NODELETE;
> -
> -  new->map = lm_cache;
> -  new->map->l_tls_dtor_count++;
>  
> +  atomic_increment (&lm_cache->l_tls_dtor_count);

Needs a comment specifying what it synchronizes with.

Should be a C11 atomic.

Should or could be _relase, _acquire, or _relaxed.

>    __rtld_lock_unlock_recursive (GL(dl_load_lock));
>  
> +  new->map = lm_cache;
> +
>    return 0;
>  }
>  
> @@ -83,19 +81,10 @@ __call_tls_dtors (void)
>    while (tls_dtor_list)
>      {
>        struct dtor_list *cur = tls_dtor_list;
> -      tls_dtor_list = tls_dtor_list->next;
>  
> +      tls_dtor_list = tls_dtor_list->next;
>        cur->func (cur->obj);
> -
> -      __rtld_lock_lock_recursive (GL(dl_load_lock));
> -
> -      /* Allow DSO unload if count drops to zero.  */
> -      cur->map->l_tls_dtor_count--;
> -      if (cur->map->l_tls_dtor_count == 0 && cur->map->l_type == lt_loaded)
> -        cur->map->l_flags_1 &= ~DF_1_NODELETE;
> -
> -      __rtld_lock_unlock_recursive (GL(dl_load_lock));
> -
> +      atomic_decrement (&cur->map->l_tls_dtor_count);

Needs a comment specifying what this synchronizes with.

We should be trying hard to use the ISO C11 atomics.

This could be atomic_fetch_add_* ?

>        free (cur);
>      }
>  }
> diff --git a/stdlib/tst-tls-atexit-nodelete.c b/stdlib/tst-tls-atexit-nodelete.c
> new file mode 100644
> index 0000000..cb2b39d
> --- /dev/null
> +++ b/stdlib/tst-tls-atexit-nodelete.c
> @@ -0,0 +1,104 @@
> +/* Verify that a RTLD_NODELETE DSO is not unloaded even if its TLS objects are
> +   destroyed.
> +
> +   Copyright (C) 2015 Free Software Foundation, Inc.
> +   This file is part of the GNU C Library.
> +
> +   The GNU C Library is free software; you can redistribute it and/or
> +   modify it under the terms of the GNU Lesser General Public
> +   License as published by the Free Software Foundation; either
> +   version 2.1 of the License, or (at your option) any later version.
> +
> +   The GNU C Library is distributed in the hope that it will be useful,
> +   but WITHOUT ANY WARRANTY; without even the implied warranty of
> +   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
> +   Lesser General Public License for more details.
> +
> +   You should have received a copy of the GNU Lesser General Public
> +   License along with the GNU C Library; if not, see
> +   <http://www.gnu.org/licenses/>.  */
> +
> +#include <dlfcn.h>
> +#include <pthread.h>
> +#include <stdio.h>
> +#include <unistd.h>
> +#include <string.h>
> +#include <errno.h>
> +
> +static void *
> +use_handle (void *h)
> +{
> +  void (*reg_dtor) (void) = (void (*) (void)) dlsym (h, "reg_dtor");
> +
> +  if (reg_dtor == NULL)
> +    {
> +      printf ("Unable to find symbol reg_dtor: %s\n", dlerror ());
> +      return (void *) (uintptr_t) 1;
> +    }
> +
> +  reg_dtor ();
> +
> +  return NULL;
> +}
> +
> +static int
> +do_test (void)
> +{
> +  pthread_t t;
> +  int ret;
> +  void *thr_ret;
> +
> +  void *h1 = dlopen ("$ORIGIN/tst-tls-atexit-lib.so", RTLD_LAZY);
> +  if (h1 == NULL)
> +    {
> +      printf ("h1: Unable to load DSO: %s\n", dlerror ());
> +      return 1;
> +    }
> +
> +  void *h2 = dlopen ("$ORIGIN/tst-tls-atexit-lib.so",
> +		     RTLD_LAZY | RTLD_NODELETE);
> +  if (h2 == NULL)
> +    {
> +      printf ("h2: Unable to load DSO: %s\n", dlerror ());
> +      return 1;
> +    }
> +
> +  /* FOO is our variable to test if the DSO is unloaded.  */
> +  int *foo = dlsym (h2, "foo_var");
> +
> +  if (foo == NULL)
> +    {
> +      printf ("Unable to find symbol do_foo: %s\n", dlerror ());
> +      return 1;
> +    }
> +
> +  /* Print FOO to be sure that it is working OK.  */
> +  printf ("%d\n", *foo);
> +
> +  if ((ret = pthread_create (&t, NULL, use_handle, h1)) != 0)
> +    {
> +      printf ("pthread_create failed: %s\n", strerror (ret));
> +      return 1;
> +    }
> +
> +  if ((ret = pthread_join (t, &thr_ret)) != 0)
> +    {
> +      printf ("pthread_join failed: %s\n", strerror (ret));
> +      return 1;
> +    }
> +
> +  if (thr_ret != NULL)
> +    return 1;
> +
> +  /* Closing the handles should not unload the DSO.  */
> +  dlclose (h1);
> +  dlclose (h2);
> +
> +  /* This should not crash.  */
> +  printf ("%d\n", *foo);
> +
> +  return 0;
> +}
> +
> +#define TEST_FUNCTION do_test ()
> +#include "../test-skeleton.c"
> diff --git a/stdlib/tst-tls-atexit.c b/stdlib/tst-tls-atexit.c
> index 0a9e920..ae4c328 100644
> --- a/stdlib/tst-tls-atexit.c
> +++ b/stdlib/tst-tls-atexit.c
> @@ -99,9 +99,17 @@ do_test (void)
>    if (thr_ret != NULL)
>      return 1;
>  
> -  /* Now this should unload the DSO.  FIXME: This is a bug, calling dlclose
> -     like this is actually wrong, but it works because cxa_thread_atexit_impl
> -     has a bug which results in dlclose allowing this to work.  */
> +  /* Now this sequence should unload the DSO.  Note that we don't call do_foo,
> +     because we don't want to register the thread destructor.  We just want to
> +     make sure that in the absence of pending destructors, the library is
> +     unloaded correctly.  */
> +  handle = dlopen ("$ORIGIN/tst-tls-atexit-lib.so", RTLD_LAZY);
> +  if (handle == NULL)
> +    {
> +      printf ("main thread: Unable to load DSO: %s\n", dlerror ());
> +      return 1;
> +    }
> +
>    dlclose (handle);
>  
>    /* Handle SIGSEGV.  We don't use the EXPECTED_SIGNAL macro because we want to
> 

Cheers,
Carlos.


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]