[PATCH v2 3/6] malloc: Basic support for memory tagging in the malloc() family

DJ Delorie dj@redhat.com
Wed Jun 24 21:05:44 GMT 2020


Richard Earnshaw <rearnsha@arm.com> writes:
> All the hooks use function pointers to allow this to work without
> needing ifuncs.

> -#ifdef SHARED
> +#if defined(SHARED) || defined(_LIBC_MTAG)
>  static void *
>  __failing_morecore (ptrdiff_t d)
>  {
>    return (void *) MORECORE_FAILURE;
>  }
> +#endif
>  
> +#ifdef SHARED
>  extern struct dl_open_hook *_dl_open_hook;
>  libc_hidden_proto (_dl_open_hook);
>  #endif

Makes __failing_morecore available in a static link, ok.

> +#ifdef _LIBC_MTAG
> +static void *
> +__mtag_tag_new_usable (void *ptr)
> +{
> +  if (ptr)
> +    ptr = __libc_mtag_tag_region (__libc_mtag_new_tag (ptr),
> +				  __malloc_usable_size (ptr));
> +  return ptr;
> +}

Comments, please.  Is this adding a tag to a pointer, or removing a tag
from a tagged pointer?  I'm guessing adding a tag, but I don't see
*what* tag is being added, or if it's chosen by the kernel.

I think it's going to be important to be clear about which pointers are
tagged and which are untagged, throughout.

(after completing the review, I see comments in the .h file at the end,
but still...)

> +static void *
> +__mtag_tag_new_memset (void *ptr, int val, size_t size)
> +{
> +  return __libc_mtag_memset_with_tag (__libc_mtag_new_tag (ptr), val, size);
> +}
> +#endif

Likewise.

> @@ -293,6 +312,24 @@ ptmalloc_init (void)
>  
>    __malloc_initialized = 0;
>  
> +#ifdef _LIBC_MTAG
> +  if ((TUNABLE_GET_FULL (glibc, memtag, enable, int32_t, NULL) & 1) != 0)
> +    {
> +      /* If the environment says that we should be using tagged memory
> +	 and that morecore does not support tagged regions, then
> +	 disable it.  */
> +      if (__MTAG_SBRK_UNTAGGED)
> +	__morecore = __failing_morecore;
> +
> +      __mtag_mmap_flags = __MTAG_MMAP_FLAGS;
> +      __tag_new_memset = __mtag_tag_new_memset;
> +      __tag_region = __libc_mtag_tag_region;
> +      __tag_new_usable = __mtag_tag_new_usable;
> +      __tag_at = __libc_mtag_address_get_tag;
> +      __mtag_granule_mask = ~(size_t)(__MTAG_GRANULE_SIZE - 1);
> +    }
> +#endif

Do we need to mangle/demangle these pointers?  I don't see them as
anything but internally stored pointers, so maybe no, but if those
static pointers could be corrupted, yes?

> -  if (__mprotect (p2, size, PROT_READ | PROT_WRITE) != 0)
> +  if (__mprotect (p2, size, MTAG_MMAP_FLAGS | PROT_READ | PROT_WRITE) != 0)

Ok.

> -                      PROT_READ | PROT_WRITE) != 0)
> +                      MTAG_MMAP_FLAGS | PROT_READ | PROT_WRITE) != 0)

Ok.

> diff --git a/malloc/hooks.c b/malloc/hooks.c

Note that there's another patch set by Carlos and I to move all the
hook-related stuff out of libc.so and into libmtrace.so.

>  void
>  __malloc_check_init (void)
>  {
> +#if HAVE_TUNABLES && defined (_LIBC_MTAG)
> +  /* If memory tagging is enabled, there is no need for the boundary
> +     checking cookie as well.  */
> +  if ((TUNABLE_GET_FULL (glibc, memtag, enable, int32_t, NULL) & 1) != 0)
> +    return;
> +#endif

Malloc checking does more than boundary checks, so I would not disable
these hooks just because tagging is enabled.  More checks better than
fewer checks :-)

> +/* For memory tagging.  */
> +#include <libc-mtag.h>

Ok.

> +#ifdef _LIBC_MTAG
> +static void *
> +__default_tag_region (void *ptr, size_t size)
> +{
> +  return ptr;
> +}
> +
> +static void *
> +__default_tag_nop (void *ptr)
> +{
> +  return ptr;
> +}

Needed for mtag present but disabled, ok.

> +static int __mtag_mmap_flags = 0;
> +static size_t __mtag_granule_mask = ~(size_t)0;
> +
> +static void *(*__tag_new_memset)(void *, int, size_t) = memset;
> +static void *(*__tag_region)(void *, size_t) = __default_tag_region;
> +static void *(*__tag_new_usable)(void *) = __default_tag_nop;
> +static void *(*__tag_at)(void *) = __default_tag_nop;
> +
> +# define TAG_NEW_MEMSET(ptr, val, size) __tag_new_memset (ptr, val, size)
> +# define TAG_REGION(ptr, size) __tag_region (ptr, size)
> +# define TAG_NEW_USABLE(ptr) __tag_new_usable (ptr)
> +# define TAG_AT(ptr) __tag_at (ptr)

Cases for mtag-supporting arches, ok.

> +#else
> +# define TAG_NEW_MEMSET(ptr, val, size) memset (ptr, val, size)
> +# define TAG_REGION(ptr, size) (ptr)
> +# define TAG_NEW_USABLE(ptr) (ptr)
> +# define TAG_AT(ptr) (ptr)
> +#endif

All other arches, no-ops, ok.

> +/* When using tagged memory, we cannot share the end of the user block
> +   with the header for the next chunk, so ensure that we allocate
> +   blocks that are rounded up to the granule size.  Take care not to
> +   overflow from close to MAX_SIZE_T to a small number.  */
> +static inline size_t
> +ROUND_UP_ALLOCATION_SIZE(size_t bytes)
> +{
> +#ifdef _LIBC_MTAG
> +  return (bytes + ~__mtag_granule_mask) & __mtag_granule_mask;
> +#else
> +  return bytes;
> +#endif
> +}
> +

This is duplicating the request2size() macro's purpose, and is only used
when request2size is used anyway, so this logic should be within
request2size.  request2size() is used in more places than
checked_request2size.

> -#define chunk2mem(p)   ((void*)((char*)(p) + 2*SIZE_SZ))
> -#define mem2chunk(mem) ((mchunkptr)((char*)(mem) - 2*SIZE_SZ))
> +#define chunk2mem(p) ((void*)TAG_AT (((char*)(p) + 2*SIZE_SZ)))
> +#define chunk2rawmem(p) ((void*)((char*)(p) + 2*SIZE_SZ))
> +#define mem2chunk(mem) ((mchunkptr)TAG_AT (((char*)(mem) - 2*SIZE_SZ)))

I think this warrants a comment about when to use chunk2mem and when to
use chunk2rawmem.

> -      assert (aligned_OK (chunk2mem (p)));
> +      assert (aligned_OK (chunk2rawmem (p)));

Should be ok; tagging shouldn't change alignment.

> -      assert (aligned_OK (chunk2mem (p)));
> +      assert (aligned_OK (chunk2rawmem (p)));

Likewise.

> -  assert (aligned_OK (chunk2mem (p)));
> +  assert (aligned_OK (chunk2rawmem (p)));

Likewise.

> -          mm = (char *) (MMAP (0, size, PROT_READ | PROT_WRITE, 0));
> +          mm = (char *) (MMAP (0, size,
> +			       MTAG_MMAP_FLAGS | PROT_READ | PROT_WRITE, 0));

Ok.

> -                  /* For glibc, chunk2mem increases the address by 2*SIZE_SZ and
> +                  /* For glibc, chunk2rawmem increases the address by 2*SIZE_SZ and
>                       MALLOC_ALIGN_MASK is 2*SIZE_SZ-1.  Each mmap'ed area is page
>                       aligned and therefore definitely MALLOC_ALIGN_MASK-aligned.  */
> -                  assert (((INTERNAL_SIZE_T) chunk2mem (mm) & MALLOC_ALIGN_MASK) == 0);
> +                  assert (((INTERNAL_SIZE_T) chunk2rawmem (mm) & MALLOC_ALIGN_MASK) == 0);

Ok.

> -                front_misalign = (INTERNAL_SIZE_T) chunk2mem (mm) & MALLOC_ALIGN_MASK;
> +                front_misalign = (INTERNAL_SIZE_T) chunk2rawmem (mm) & MALLOC_ALIGN_MASK;

Ok.

> -              char *mbrk = (char *) (MMAP (0, size, PROT_READ | PROT_WRITE, 0));
> +              char *mbrk = (char *) (MMAP (0, size,
> +					   MTAG_MMAP_FLAGS | PROT_READ | PROT_WRITE,
> +					   0));

Ok.

> -                  front_misalign = (INTERNAL_SIZE_T) chunk2mem (brk) & MALLOC_ALIGN_MASK;
> +                  front_misalign = (INTERNAL_SIZE_T) chunk2rawmem (brk) & MALLOC_ALIGN_MASK;

Ok.

> -                    assert (((unsigned long) chunk2mem (brk) & MALLOC_ALIGN_MASK) == 0);
> +                    assert (((unsigned long) chunk2rawmem (brk) & MALLOC_ALIGN_MASK) == 0);

Ok.

> -                      front_misalign = (INTERNAL_SIZE_T) chunk2mem (brk) & MALLOC_ALIGN_MASK;
> +                      front_misalign = (INTERNAL_SIZE_T) chunk2rawmem (brk) & MALLOC_ALIGN_MASK;

Ok.

> -  uintptr_t mem = (uintptr_t) chunk2mem (p);
> +  uintptr_t mem = (uintptr_t) chunk2rawmem (p);

Ok.

> -  assert (aligned_OK (chunk2mem (p)));
> +  assert (aligned_OK (chunk2rawmem (p)));

Ok.

> -      return tcache_get (tc_idx);
> +      victim = tcache_get (tc_idx);
> +      return TAG_NEW_USABLE (victim);

Ok.

> -      victim = _int_malloc (&main_arena, bytes);
> +      victim = TAG_NEW_USABLE (_int_malloc (&main_arena, bytes));

Ok.

> +  victim = TAG_NEW_USABLE (victim);
> +

Ok.

> +#ifdef _LIBC_MTAG
> +  /* Quickly check that the freed pointer matches the tag for the memory.
> +     This gives a useful double-free detection.  */
> +  *(volatile char *)mem;
> +#endif

Ok.

> +  /* Mark the chunk as belonging to the library again.  */
> +  (void)TAG_REGION (chunk2rawmem (p), __malloc_usable_size (mem));
> +

Does this untag or retag?  Is there a "malloc tag" or does the OS just
randomly supply a new tag?

> +#ifdef _LIBC_MTAG
> +  /* Perform a quick check to ensure that the pointer's tag matches the
> +     memory's tag.  */
> +  *(volatile char*) oldmem;
> +#endif

Ok.

>  #if HAVE_MREMAP
>        newp = mremap_chunk (oldp, nb);
>        if (newp)
> -        return chunk2mem (newp);
> +	{
> +	  void *newmem = chunk2rawmem (newp);
> +	  /* Give the new block a different tag.  This helps to ensure
> +	     that stale handles to the previous mapping are not
> +	     reused.  There's a performance hit for both us and the
> +	     caller for doing this, so we might want to
> +	     reconsider.  */
> +	  return TAG_NEW_USABLE (newmem);
> +	}
>  #endif

Ok.

> @@ -3308,7 +3388,6 @@ _mid_memalign (size_t alignment, size_t bytes, void *address)
>        return 0;
>      }
>  
> -
>    /* Make sure alignment is power of 2.  */
>    if (!powerof2 (alignment))
>      {

Superfluous ;-)

> -
> -      return p;
> +      return TAG_NEW_USABLE (p);
>      }

Ok.

>    arena_get (ar_ptr, bytes + alignment + MINSIZE);
> -  return p;
> +  return TAG_NEW_USABLE (p);

Ok.

> +  void *p;
> +

Ok.


> -  return _mid_memalign (pagesize, bytes, address);
> +  p = _mid_memalign (pagesize, bytes, address);
> +  return TAG_NEW_USABLE (p);

Ok.

> +  void *p;
> +
>    if (__malloc_initialized < 0)
>      ptmalloc_init ();
>  
> @@ -3378,19 +3461,22 @@ __libc_pvalloc (size_t bytes)
>      }
>    rounded_bytes = rounded_bytes & -(pagesize - 1);
>  
> -  return _mid_memalign (pagesize, rounded_bytes, address);
> +  p = _mid_memalign (pagesize, rounded_bytes, address);
> +  return TAG_NEW_USABLE (p);

Ok.

>  }
>  
>  void *
>  __libc_calloc (size_t n, size_t elem_size)
>  {
>    mstate av;
> -  mchunkptr oldtop, p;
> -  INTERNAL_SIZE_T sz, csz, oldtopsize;
> +  mchunkptr oldtop;
> +  INTERNAL_SIZE_T sz, oldtopsize;
>    void *mem;
> +#ifndef _LIBC_MTAG
>    unsigned long clearsize;
>    unsigned long nclears;
>    INTERNAL_SIZE_T *d;
> +#endif
>    ptrdiff_t bytes;
>  
>    if (__glibc_unlikely (__builtin_mul_overflow (n, elem_size, &bytes)))
> @@ -3398,6 +3484,7 @@ __libc_calloc (size_t n, size_t elem_size)
>         __set_errno (ENOMEM);
>         return NULL;
>      }
> +
>    sz = bytes;
>  
>    void *(*hook) (size_t, const void *) =
> @@ -3467,7 +3554,14 @@ __libc_calloc (size_t n, size_t elem_size)
>    if (mem == 0)
>      return 0;
>  
> -  p = mem2chunk (mem);
> +  /* If we are using memory tagging, then we need to set the tags
> +     regardless of MORECORE_CLEARS, so we zero the whole block while
> +     doing so.  */
> +#ifdef _LIBC_MTAG
> +  return TAG_NEW_MEMSET (mem, 0, __malloc_usable_size (mem));
> +#else
> +  mchunkptr p = mem2chunk (mem);
> +  INTERNAL_SIZE_T csz = chunksize (p);
>  
>    /* Two optional cases in which clearing not necessary */
>    if (chunk_is_mmapped (p))
> @@ -3478,8 +3572,6 @@ __libc_calloc (size_t n, size_t elem_size)
>        return mem;
>      }
>  
> -  csz = chunksize (p);
> -
>  #if MORECORE_CLEARS
>    if (perturb_byte == 0 && (p == oldtop && csz > oldtopsize))
>      {
> @@ -3522,6 +3614,7 @@ __libc_calloc (size_t n, size_t elem_size)
>      }
>  
>    return mem;
> +#endif
>  }

Ok.  There should be a section in the manual (manual/*) about tagging,
with a list of performance (speed/size) impacts such as these.

> -          return chunk2mem (oldp);
> +          return TAG_NEW_USABLE (chunk2rawmem (oldp));

Ok.

> -	      memcpy (newmem, chunk2mem (oldp), oldsize - SIZE_SZ);
> +	      void *oldmem = chunk2mem (oldp);
> +	      newmem = TAG_NEW_USABLE (newmem);
> +	      memcpy (newmem, oldmem, __malloc_usable_size (oldmem));
> +	      (void) TAG_REGION (chunk2rawmem (oldp), oldsize);

Ok.

> +      /* Clear any user-space tags before writing the header.  */
> +      remainder = TAG_REGION (remainder, remainder_size);

Ok.

> -  return chunk2mem (newp);
> -}
> +  return TAG_NEW_USABLE (chunk2rawmem (newp));
> +    }

Ok, but spaces before the brace?

>        assert (newsize >= nb &&
> -              (((unsigned long) (chunk2mem (p))) % alignment) == 0);
> +              (((unsigned long) (chunk2rawmem (p))) % alignment) == 0);

Ok.

> -                assert ((char *) chunk2mem (p) + 4 * SIZE_SZ <= paligned_mem);
> +                assert ((char *) chunk2rawmem (p) + 4 * SIZE_SZ <= paligned_mem);

Ok.

> @@ -4877,20 +4975,30 @@ musable (void *mem)
>    mchunkptr p;
>    if (mem != 0)
>      {
> +      size_t result = 0;
> +
>        p = mem2chunk (mem);
>  
>        if (__builtin_expect (using_malloc_checking == 1, 0))
> -        return malloc_check_get_size (p);
> +	return malloc_check_get_size (p);
>
>        if (chunk_is_mmapped (p))
>  	{
>  	  if (DUMPED_MAIN_ARENA_CHUNK (p))
> -	    return chunksize (p) - SIZE_SZ;
> +	    result = chunksize (p) - SIZE_SZ;
>  	  else
> -	    return chunksize (p) - 2 * SIZE_SZ;
> +	    result = chunksize (p) - 2 * SIZE_SZ;
>  	}
>        else if (inuse (p))
> -        return chunksize (p) - SIZE_SZ;
> +	result = chunksize (p) - SIZE_SZ;
> +
> +#ifdef _LIBC_MTAG
> +      /* The usable space may be reduced if memory tagging is needed,
> +	 since we cannot share the user-space data with malloc's internal
> +	 data structure.  */
> +      result &= __mtag_granule_mask;
> +#endif
> +      return result;
>      }

Ok.

> +#ifdef _LIBC_MTAG
> +extern int __mtag_mmap_flags;
> +#define MTAG_MMAP_FLAGS __mtag_mmap_flags
> +#else
> +#define MTAG_MMAP_FLAGS 0
> +#endif

Ok.

> diff --git a/sysdeps/generic/libc-mtag.h b/sysdeps/generic/libc-mtag.h
> new file mode 100644
> index 0000000000..3e9885451c
> --- /dev/null
> +++ b/sysdeps/generic/libc-mtag.h
> @@ -0,0 +1,52 @@
> +/* libc-internal interface for tagged (colored) memory support.
> +   Copyright (C) 2019 Free Software Foundation, Inc.

It's 2020 now :-)

> +/* Generic bindings for systems that do not support memory tagging.  */
> +
> +/* Used to ensure additional alignment when objects need to have distinct
> +   tags.  */
> +#define __MTAG_GRANULE_SIZE 1
> +
> +/* Non-zero if memory obtained via morecore (sbrk) is not tagged.  */
> +#define __MTAG_SBRK_UNTAGGED 0
> +
> +/* Extra flags to pass to mmap() to request a tagged region of memory.  */
> +#define __MTAG_MMAP_FLAGS 0
> +
> +/* Set the tags for a region of memory, which must have size and alignment
> +   that are multiples of __MTAG_GRANULE_SIZE.  Size cannot be zero.
> +   void *__libc_mtag_tag_region (const void *, size_t)  */
> +#define __libc_mtag_tag_region(p, s) (p)
> +
> +/* Optimized equivalent to __libc_mtag_tag_region followed by memset.  */
> +#define __libc_mtag_memset_with_tag memset
> +
> +/* Convert address P to a pointer that is tagged correctly for that
> +   location.
> +   void *__libc_mtag_address_get_tag (void*)  */
> +#define __libc_mtag_address_get_tag(p) (p)
> +
> +/* Assign a new (random) tag to a pointer P (does not adjust the tag on
> +   the memory addressed).
> +   void *__libc_mtag_new_tag (void*)  */
> +#define __libc_mtag_new_tag(p) (p)
> +
> +#endif /* _GENERIC_LIBC_MTAG_H */

Ok.



More information about the Libc-alpha mailing list