This is the mail archive of the libc-alpha@sourceware.org mailing list for the glibc project.


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]
Other format: [Raw text]

Re: [PATCH] elf: Do not use memalign for TCB/TLS blocks allocation [BZ #17730]


On 06/21/2016 07:38 AM, Florian Weimer wrote:
> Instead, call malloc and explicitly align the pointer.
> 
> There is no external location to store the original (unaligned)
> pointer, and this commit increases the allocation size to store
> the pointer at a fixed location relative to the TCB pointer.
> 
> The manual alignment means that some space goes unused which
> was previously made available for subsequent allocations.
> However, in the TLS_DTV_AT_TP case, the manual alignment code
> avoids aligning the pre-TCB to the TLS block alignment.  (Even
> while using memalign, the allocation had some unused padding
> in front.)
> 
> This concludes the removal of memalign calls from the TLS code,
> and the new tst-tls3-malloc test verifies that only core malloc
> routines are used.

This looks good to me.

Is the the test case 100% reliable? It seems to me that it should be,
but I haven't scanned all of the paths through the TLS startup.
 
> 2016-06-21  Florian Weimer  <fweimer@redhat.com>
> 
> 	[BZ #17730]
> 	Avoid using memalign for TCB allocations.
> 	* elf/dl-tls.c (tcb_to_pointer_to_free_location): New.
> 	(_dl_allocate_tls_storage): Use malloc and manual alignment.
> 	Avoid alignment gap in the TLS_DTV_AT_TP case.
> 	(_dl_deallocate_tls): Use tcb_to_pointer_to_free_location to
> 	determine the pointer to free.
> 	* nptl/tst-tls3-malloc.c: New test.
> 	* nptl/Makefile (tests): Add it.
> 	(tst-tls3-malloc): Link with libdl, libpthread.
> 	(LDFLAGS-tst-tls3-malloc): Set.
> 	(tst-tls3-malloc.out): Depend on DSO used in test.
> 
> diff --git a/elf/dl-tls.c b/elf/dl-tls.c
> index be6a3c7..17567ad 100644
> --- a/elf/dl-tls.c
> +++ b/elf/dl-tls.c
> @@ -347,6 +347,22 @@ _dl_get_tls_static_info (size_t *sizep, size_t *alignp)
>    *alignp = GL(dl_tls_static_align);
>  }
>  
> +/* Derive the location of the pointer to the start of the original
> +   allocation (before alignment) from the pointer to the TCB.  */
> +static inline void **
> +tcb_to_pointer_to_free_location (void *tcb)
> +{
> +#if TLS_TCB_AT_TP
> +  /* The TCB follows the TLS blocks, and the pointer to the front
> +     follows the TCB.  */
> +  void **original_pointer_location = tcb + TLS_TCB_SIZE;
> +#elif TLS_DTV_AT_TP
> +  /* The TCB comes first, preceded by the pre-TCB, and the pointer is
> +     before that.  */
> +  void **original_pointer_location = tcb - TLS_PRE_TCB_SIZE - sizeof (void *);
> +#endif
> +  return original_pointer_location;
> +}

OK.

>  
>  void *
>  internal_function
> @@ -359,39 +375,50 @@ _dl_allocate_tls_storage (void)
>    /* Memory layout is:
>       [ TLS_PRE_TCB_SIZE ] [ TLS_TCB_SIZE ] [ TLS blocks ]
>  			  ^ This should be returned.  */
> -  size += (TLS_PRE_TCB_SIZE + GL(dl_tls_static_align) - 1)
> -	  & ~(GL(dl_tls_static_align) - 1);
> +  size += TLS_PRE_TCB_SIZE;

OK.

>  #endif
>  
> -  /* Allocate a correctly aligned chunk of memory.  */
> -  result = __libc_memalign (GL(dl_tls_static_align), size);
> -  if (__builtin_expect (result != NULL, 1))
> -    {
> -      /* Allocate the DTV.  */
> -      void *allocated = result;
> +  /* Perform the allocation.  Reserve space for the required alignment
> +     and the pointer to the original allocation.  */
> +  size_t alignment = GL(dl_tls_static_align);
> +  void *allocated = malloc (size + alignment + sizeof (void *));
> +  if (__glibc_unlikely (allocated == NULL))
> +    return NULL;

OK.

>  
> +  /* Perform alignment and allocate the DTV.  */
>  #if TLS_TCB_AT_TP
> -      /* The TCB follows the TLS blocks.  */
> -      result = (char *) result + size - TLS_TCB_SIZE;
> +  /* The TCB follows the TLS blocks, which determine the alignment.
> +     (TCB alignment requirements have been taken into account when
> +     calculating GL(dl_tls_static_align).)  */
> +  void *aligned = (void *) roundup ((uintptr_t) allocated, alignment);
> +  result = aligned + size - TLS_TCB_SIZE;
>  
> -      /* Clear the TCB data structure.  We can't ask the caller (i.e.
> -	 libpthread) to do it, because we will initialize the DTV et al.  */
> -      memset (result, '\0', TLS_TCB_SIZE);
> +  /* Clear the TCB data structure.  We can't ask the caller (i.e.
> +     libpthread) to do it, because we will initialize the DTV et al.  */
> +  memset (result, '\0', TLS_TCB_SIZE);
>  #elif TLS_DTV_AT_TP
> -      result = (char *) result + size - GL(dl_tls_static_size);
> +  /* Pre-TCB and TCB come before the TLS blocks.  The layout computed
> +     in _dl_determine_tlsoffset assumes that the TCB is aligned to the
> +     TLS block alignment, and not just the TLS blocks after it.  This
> +     can leave an unused alignment gap between the TCB and the TLS
> +     blocks.  */
> +  result = (void *) roundup
> +    (sizeof (void *) + TLS_PRE_TCB_SIZE + (uintptr_t) allocated,
> +     alignment);

OK.

>  
> -      /* Clear the TCB data structure and TLS_PRE_TCB_SIZE bytes before it.
> -	 We can't ask the caller (i.e. libpthread) to do it, because we will
> -	 initialize the DTV et al.  */
> -      memset ((char *) result - TLS_PRE_TCB_SIZE, '\0',
> -	      TLS_PRE_TCB_SIZE + TLS_TCB_SIZE);
> +  /* Clear the TCB data structure and TLS_PRE_TCB_SIZE bytes before
> +     it.  We can't ask the caller (i.e. libpthread) to do it, because
> +     we will initialize the DTV et al.  */
> +  memset (result - TLS_PRE_TCB_SIZE, '\0', TLS_PRE_TCB_SIZE + TLS_TCB_SIZE);

OK.

>  #endif
>  
> -      result = allocate_dtv (result);
> -      if (result == NULL)
> -	free (allocated);
> -    }
> +  /* Record the value of the original pointer for later
> +     deallocation.  */
> +  *tcb_to_pointer_to_free_location (result) = allocated;

OK.

>  
> +  result = allocate_dtv (result);
> +  if (result == NULL)
> +    free (allocated);
>    return result;
>  }
>  
> @@ -558,17 +585,7 @@ _dl_deallocate_tls (void *tcb, bool dealloc_tcb)
>      free (dtv - 1);
>  
>    if (dealloc_tcb)
> -    {
> -#if TLS_TCB_AT_TP
> -      /* The TCB follows the TLS blocks.  Back up to free the whole block.  */
> -      tcb -= GL(dl_tls_static_size) - TLS_TCB_SIZE;
> -#elif TLS_DTV_AT_TP
> -      /* Back up the TLS_PRE_TCB_SIZE bytes.  */
> -      tcb -= (TLS_PRE_TCB_SIZE + GL(dl_tls_static_align) - 1)
> -	     & ~(GL(dl_tls_static_align) - 1);
> -#endif
> -      free (tcb);
> -    }
> +    free (*tcb_to_pointer_to_free_location (tcb));

OK.

>  }
>  rtld_hidden_def (_dl_deallocate_tls)
>  
> diff --git a/nptl/Makefile b/nptl/Makefile
> index e0bc1b7..5bbe8cf 100644
> --- a/nptl/Makefile
> +++ b/nptl/Makefile
> @@ -321,8 +321,8 @@ tests += tst-cancelx2 tst-cancelx3 tst-cancelx4 tst-cancelx5 \
>  	 tst-cleanupx0 tst-cleanupx1 tst-cleanupx2 tst-cleanupx3 tst-cleanupx4 \
>  	 tst-oncex3 tst-oncex4
>  ifeq ($(build-shared),yes)
> -tests += tst-atfork2 tst-tls3 tst-tls4 tst-tls5 tst-_res1 tst-fini1 \
> -	 tst-stackguard1
> +tests += tst-atfork2 tst-tls3 tst-tls3-malloc tst-tls4 tst-tls5 tst-_res1 \

OK.

> +	 tst-fini1 tst-stackguard1
>  tests-nolibpthread += tst-fini1
>  ifeq ($(have-z-execstack),yes)
>  tests += tst-execstack
> @@ -527,6 +527,10 @@ LDFLAGS-tst-tls3 = -rdynamic
>  $(objpfx)tst-tls3.out: $(objpfx)tst-tls3mod.so
>  $(objpfx)tst-tls3mod.so: $(shared-thread-library)
>  
> +$(objpfx)tst-tls3-malloc: $(libdl) $(shared-thread-library)
> +LDFLAGS-tst-tls3-malloc = -rdynamic
> +$(objpfx)tst-tls3-malloc.out: $(objpfx)tst-tls3mod.so
> +
>  $(objpfx)tst-tls4: $(libdl) $(shared-thread-library)
>  $(objpfx)tst-tls4.out: $(objpfx)tst-tls4moda.so $(objpfx)tst-tls4modb.so
>  
> diff --git a/nptl/tst-tls3-malloc.c b/nptl/tst-tls3-malloc.c
> new file mode 100644
> index 0000000..5eab3cd
> --- /dev/null
> +++ b/nptl/tst-tls3-malloc.c
> @@ -0,0 +1,176 @@
> +/* Test TLS allocation with an interposed malloc.
> +   Copyright (C) 2016 Free Software Foundation, Inc.
> +   This file is part of the GNU C Library.
> +
> +   The GNU C Library is free software; you can redistribute it and/or
> +   modify it under the terms of the GNU Lesser General Public
> +   License as published by the Free Software Foundation; either
> +   version 2.1 of the License, or (at your option) any later version.
> +
> +   The GNU C Library is distributed in the hope that it will be useful,
> +   but WITHOUT ANY WARRANTY; without even the implied warranty of
> +   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
> +   Lesser General Public License for more details.
> +
> +   You should have received a copy of the GNU Lesser General Public
> +   License along with the GNU C Library; if not, see
> +   <http://www.gnu.org/licenses/>.  */
> +
> +/* Reuse the test.  */
> +#include "tst-tls3.c"
> +
> +#include <sys/mman.h>
> +
> +/* Interpose a minimal malloc implementation.  This implementation
> +   deliberately interposes just a restricted set of symbols, to detect
> +   if the TLS code bypasses the interposed malloc.  */
> +
> +/* Lock to guard malloc internals.  */
> +static pthread_mutex_t malloc_lock = PTHREAD_MUTEX_INITIALIZER;
> +
> +/* Information about an allocation chunk.  */
> +struct malloc_chunk
> +{
> +  /* Start of the allocation.  */
> +  void *start;
> +  /* Size of the allocation.  */
> +  size_t size;
> +};
> +
> +enum { malloc_chunk_count = 1000 };
> +static struct malloc_chunk chunks[malloc_chunk_count];
> +
> +/* Lock the malloc lock.  */
> +static void
> +xlock (void)
> +{
> +  int ret = pthread_mutex_lock (&malloc_lock);
> +  if (ret != 0)
> +    {
> +      errno = ret;
> +      printf ("error: pthread_mutex_lock: %m\n");
> +      _exit (1);
> +    }
> +}
> +
> +/* Unlock the malloc lock.  */
> +static void
> +xunlock (void)
> +{
> +  int ret = pthread_mutex_unlock (&malloc_lock);
> +  if (ret != 0)
> +    {
> +      errno = ret;
> +      printf ("error: pthread_mutex_unlock: %m\n");
> +      _exit (1);
> +    }
> +}
> +
> +/* Internal malloc without locking and registration.  */
> +static void *
> +malloc_internal (size_t size)
> +{
> +  void *result = mmap (NULL, size, PROT_READ | PROT_WRITE,
> +                       MAP_ANONYMOUS | MAP_PRIVATE, -1, 0);
> +  if (result == MAP_FAILED)
> +    {
> +      printf ("error: mmap: %m\n");
> +      _exit (1);
> +    }
> +  return result;
> +}
> +
> +void *
> +malloc (size_t size)
> +{
> +  if (size == 0)
> +    size = 1;
> +  xlock ();
> +  void *result = malloc_internal (size);
> +  for (int i = 0; i < malloc_chunk_count; ++i)
> +    if (chunks[i].start == NULL)
> +      {
> +        chunks[i].start = result;
> +        chunks[i].size = size;
> +        xunlock ();
> +        return result;
> +      }
> +  xunlock ();
> +  printf ("error: no place to store chunk pointer\n");
> +  _exit (1);
> +}
> +
> +void *
> +calloc (size_t a, size_t b)
> +{
> +  if (b != 0 && a > SIZE_MAX / b)
> +    return NULL;
> +  /* malloc uses mmap, which provides zeroed memory.  */
> +  return malloc (a * b);
> +}
> +
> +static void
> +xunmap (void *ptr, size_t size)
> +{
> +  int ret = munmap (ptr, size);
> +  if (ret < 0)
> +    {
> +      printf ("error: munmap (%p, %zu) failed: %m\n", ptr, size);
> +      _exit (1);
> +    }
> +}
> +
> +void
> +free (void *ptr)
> +{
> +  if (ptr == NULL)
> +    return;
> +
> +  xlock ();
> +  for (int i = 0; i < malloc_chunk_count; ++i)
> +    if (chunks[i].start == ptr)
> +      {
> +        xunmap (ptr, chunks[i].size);
> +        chunks[i] = (struct malloc_chunk) {};
> +        xunlock ();
> +        return;
> +      }
> +  xunlock ();
> +  printf ("error: tried to free non-allocated pointer %p\n", ptr);
> +  _exit (1);
> +}
> +
> +void *
> +realloc (void *old, size_t size)
> +{
> +  if (old != NULL)
> +    {
> +      xlock ();
> +      for (int i = 0; i < malloc_chunk_count; ++i)
> +        if (chunks[i].start == old)
> +          {
> +            size_t old_size = chunks[i].size;
> +            void *result;
> +            if (old_size < size)
> +              {
> +                result = malloc_internal (size);
> +                /* Reuse the slot for the new allocation.  */
> +                memcpy (result, old, old_size);
> +                xunmap (old, old_size);
> +                chunks[i].start = result;
> +                chunks[i].size = size;
> +              }
> +            else
> +              /* Old size is not smaller, so reuse the old
> +                 allocation.  */
> +              result = old;
> +            xunlock ();
> +            return result;
> +          }
> +      xunlock ();
> +      printf ("error: tried to realloc non-allocated pointer %p\n", old);
> +      _exit (1);
> +    }
> +  else
> +    return malloc (size);
> +}
> 

OK.

Cheers,
Carlos.


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]