[PATCH v2 3/6] malloc: Basic support for memory tagging in the malloc() family

Richard Earnshaw Richard.Earnshaw@foss.arm.com
Thu Jun 25 15:35:08 GMT 2020


On 24/06/2020 22:05, DJ Delorie via Libc-alpha wrote:
> 
> Richard Earnshaw <rearnsha@arm.com> writes:
>> All the hooks use function pointers to allow this to work without
>> needing ifuncs.
> 
>> -#ifdef SHARED
>> +#if defined(SHARED) || defined(_LIBC_MTAG)
>>  static void *
>>  __failing_morecore (ptrdiff_t d)
>>  {
>>    return (void *) MORECORE_FAILURE;
>>  }
>> +#endif
>>  
>> +#ifdef SHARED
>>  extern struct dl_open_hook *_dl_open_hook;
>>  libc_hidden_proto (_dl_open_hook);
>>  #endif
> 
> Makes __failing_morecore available in a static link, ok.
> 
>> +#ifdef _LIBC_MTAG
>> +static void *
>> +__mtag_tag_new_usable (void *ptr)
>> +{
>> +  if (ptr)
>> +    ptr = __libc_mtag_tag_region (__libc_mtag_new_tag (ptr),
>> +				  __malloc_usable_size (ptr));
>> +  return ptr;
>> +}
> 
> Comments, please.  Is this adding a tag to a pointer, or removing a tag
> from a tagged pointer?  I'm guessing adding a tag, but I don't see
> *what* tag is being added, or if it's chosen by the kernel.
> 
> I think it's going to be important to be clear about which pointers are
> tagged and which are untagged, throughout.
> 
> (after completing the review, I see comments in the .h file at the end,
> but still...)

Will fix.

> 
>> +static void *
>> +__mtag_tag_new_memset (void *ptr, int val, size_t size)
>> +{
>> +  return __libc_mtag_memset_with_tag (__libc_mtag_new_tag (ptr), val, size);
>> +}
>> +#endif
> 
> Likewise.

Will fix.

> 
>> @@ -293,6 +312,24 @@ ptmalloc_init (void)
>>  
>>    __malloc_initialized = 0;
>>  
>> +#ifdef _LIBC_MTAG
>> +  if ((TUNABLE_GET_FULL (glibc, memtag, enable, int32_t, NULL) & 1) != 0)
>> +    {
>> +      /* If the environment says that we should be using tagged memory
>> +	 and that morecore does not support tagged regions, then
>> +	 disable it.  */
>> +      if (__MTAG_SBRK_UNTAGGED)
>> +	__morecore = __failing_morecore;
>> +
>> +      __mtag_mmap_flags = __MTAG_MMAP_FLAGS;
>> +      __tag_new_memset = __mtag_tag_new_memset;
>> +      __tag_region = __libc_mtag_tag_region;
>> +      __tag_new_usable = __mtag_tag_new_usable;
>> +      __tag_at = __libc_mtag_address_get_tag;
>> +      __mtag_granule_mask = ~(size_t)(__MTAG_GRANULE_SIZE - 1);
>> +    }
>> +#endif
> 
> Do we need to mangle/demangle these pointers?  I don't see them as
> anything but internally stored pointers, so maybe no, but if those
> static pointers could be corrupted, yes?

I just followed the model used for __morecore.  That wasn't mangled,
AFAICT, so I think these only need mangling if that does.

> 
>> -  if (__mprotect (p2, size, PROT_READ | PROT_WRITE) != 0)
>> +  if (__mprotect (p2, size, MTAG_MMAP_FLAGS | PROT_READ | PROT_WRITE) != 0)
> 
> Ok.
> 
>> -                      PROT_READ | PROT_WRITE) != 0)
>> +                      MTAG_MMAP_FLAGS | PROT_READ | PROT_WRITE) != 0)
> 
> Ok.
> 
>> diff --git a/malloc/hooks.c b/malloc/hooks.c
> 
> Note that there's another patch set by Carlos and I to move all the
> hook-related stuff out of libc.so and into libmtrace.so.

I can't find those patches (at least, searching for libmtrace only shows
your reply on this thread).

> 
>>  void
>>  __malloc_check_init (void)
>>  {
>> +#if HAVE_TUNABLES && defined (_LIBC_MTAG)
>> +  /* If memory tagging is enabled, there is no need for the boundary
>> +     checking cookie as well.  */
>> +  if ((TUNABLE_GET_FULL (glibc, memtag, enable, int32_t, NULL) & 1) != 0)
>> +    return;
>> +#endif
> 
> Malloc checking does more than boundary checks, so I would not disable
> these hooks just because tagging is enabled.  More checks better than
> fewer checks :-)

I'll have another look, but this is all a bit hairy; the code has
builtin assumptions about how to find the end of a block that will need
adjusting.

I'll see if it's possible to just disable the trailing byte marker.


> 
>> +/* For memory tagging.  */
>> +#include <libc-mtag.h>
> 
> Ok.
> 
>> +#ifdef _LIBC_MTAG
>> +static void *
>> +__default_tag_region (void *ptr, size_t size)
>> +{
>> +  return ptr;
>> +}
>> +
>> +static void *
>> +__default_tag_nop (void *ptr)
>> +{
>> +  return ptr;
>> +}
> 
> Needed for mtag present but disabled, ok.
> 
>> +static int __mtag_mmap_flags = 0;
>> +static size_t __mtag_granule_mask = ~(size_t)0;
>> +
>> +static void *(*__tag_new_memset)(void *, int, size_t) = memset;
>> +static void *(*__tag_region)(void *, size_t) = __default_tag_region;
>> +static void *(*__tag_new_usable)(void *) = __default_tag_nop;
>> +static void *(*__tag_at)(void *) = __default_tag_nop;
>> +
>> +# define TAG_NEW_MEMSET(ptr, val, size) __tag_new_memset (ptr, val, size)
>> +# define TAG_REGION(ptr, size) __tag_region (ptr, size)
>> +# define TAG_NEW_USABLE(ptr) __tag_new_usable (ptr)
>> +# define TAG_AT(ptr) __tag_at (ptr)
> 
> Cases for mtag-supporting arches, ok.
> 
>> +#else
>> +# define TAG_NEW_MEMSET(ptr, val, size) memset (ptr, val, size)
>> +# define TAG_REGION(ptr, size) (ptr)
>> +# define TAG_NEW_USABLE(ptr) (ptr)
>> +# define TAG_AT(ptr) (ptr)
>> +#endif
> 
> All other arches, no-ops, ok.
> 
>> +/* When using tagged memory, we cannot share the end of the user block
>> +   with the header for the next chunk, so ensure that we allocate
>> +   blocks that are rounded up to the granule size.  Take care not to
>> +   overflow from close to MAX_SIZE_T to a small number.  */
>> +static inline size_t
>> +ROUND_UP_ALLOCATION_SIZE(size_t bytes)
>> +{
>> +#ifdef _LIBC_MTAG
>> +  return (bytes + ~__mtag_granule_mask) & __mtag_granule_mask;
>> +#else
>> +  return bytes;
>> +#endif
>> +}
>> +
> 
> This is duplicating the request2size() macro's purpose, and is only used
> when request2size is used anyway, so this logic should be within
> request2size.  request2size() is used in more places than
> checked_request2size.
> 

There's a problem with that.  request2size() has to be evaluated
statically at compile time, as it's used to size some arrays; so it
can't access __mtag_granule_mask as that's not statically constant.

>> -#define chunk2mem(p)   ((void*)((char*)(p) + 2*SIZE_SZ))
>> -#define mem2chunk(mem) ((mchunkptr)((char*)(mem) - 2*SIZE_SZ))
>> +#define chunk2mem(p) ((void*)TAG_AT (((char*)(p) + 2*SIZE_SZ)))
>> +#define chunk2rawmem(p) ((void*)((char*)(p) + 2*SIZE_SZ))
>> +#define mem2chunk(mem) ((mchunkptr)TAG_AT (((char*)(mem) - 2*SIZE_SZ)))
> 
> I think this warrants a comment about when to use chunk2mem and when to
> use chunk2rawmem.
> 

I'll write a major comment that will cover all this and the new internal
APIs I've introduced.

>> -      assert (aligned_OK (chunk2mem (p)));
>> +      assert (aligned_OK (chunk2rawmem (p)));
> 
> Should be ok; tagging shouldn't change alignment.
> 
>> -      assert (aligned_OK (chunk2mem (p)));
>> +      assert (aligned_OK (chunk2rawmem (p)));
> 
> Likewise.
> 
>> -  assert (aligned_OK (chunk2mem (p)));
>> +  assert (aligned_OK (chunk2rawmem (p)));
> 
> Likewise.
> 
>> -          mm = (char *) (MMAP (0, size, PROT_READ | PROT_WRITE, 0));
>> +          mm = (char *) (MMAP (0, size,
>> +			       MTAG_MMAP_FLAGS | PROT_READ | PROT_WRITE, 0));
> 
> Ok.
> 
>> -                  /* For glibc, chunk2mem increases the address by 2*SIZE_SZ and
>> +                  /* For glibc, chunk2rawmem increases the address by 2*SIZE_SZ and
>>                       MALLOC_ALIGN_MASK is 2*SIZE_SZ-1.  Each mmap'ed area is page
>>                       aligned and therefore definitely MALLOC_ALIGN_MASK-aligned.  */
>> -                  assert (((INTERNAL_SIZE_T) chunk2mem (mm) & MALLOC_ALIGN_MASK) == 0);
>> +                  assert (((INTERNAL_SIZE_T) chunk2rawmem (mm) & MALLOC_ALIGN_MASK) == 0);
> 
> Ok.
> 
>> -                front_misalign = (INTERNAL_SIZE_T) chunk2mem (mm) & MALLOC_ALIGN_MASK;
>> +                front_misalign = (INTERNAL_SIZE_T) chunk2rawmem (mm) & MALLOC_ALIGN_MASK;
> 
> Ok.
> 
>> -              char *mbrk = (char *) (MMAP (0, size, PROT_READ | PROT_WRITE, 0));
>> +              char *mbrk = (char *) (MMAP (0, size,
>> +					   MTAG_MMAP_FLAGS | PROT_READ | PROT_WRITE,
>> +					   0));
> 
> Ok.
> 
>> -                  front_misalign = (INTERNAL_SIZE_T) chunk2mem (brk) & MALLOC_ALIGN_MASK;
>> +                  front_misalign = (INTERNAL_SIZE_T) chunk2rawmem (brk) & MALLOC_ALIGN_MASK;
> 
> Ok.
> 
>> -                    assert (((unsigned long) chunk2mem (brk) & MALLOC_ALIGN_MASK) == 0);
>> +                    assert (((unsigned long) chunk2rawmem (brk) & MALLOC_ALIGN_MASK) == 0);
> 
> Ok.
> 
>> -                      front_misalign = (INTERNAL_SIZE_T) chunk2mem (brk) & MALLOC_ALIGN_MASK;
>> +                      front_misalign = (INTERNAL_SIZE_T) chunk2rawmem (brk) & MALLOC_ALIGN_MASK;
> 
> Ok.
> 
>> -  uintptr_t mem = (uintptr_t) chunk2mem (p);
>> +  uintptr_t mem = (uintptr_t) chunk2rawmem (p);
> 
> Ok.
> 
>> -  assert (aligned_OK (chunk2mem (p)));
>> +  assert (aligned_OK (chunk2rawmem (p)));
> 
> Ok.
> 
>> -      return tcache_get (tc_idx);
>> +      victim = tcache_get (tc_idx);
>> +      return TAG_NEW_USABLE (victim);
> 
> Ok.
> 
>> -      victim = _int_malloc (&main_arena, bytes);
>> +      victim = TAG_NEW_USABLE (_int_malloc (&main_arena, bytes));
> 
> Ok.
> 
>> +  victim = TAG_NEW_USABLE (victim);
>> +
> 
> Ok.
> 
>> +#ifdef _LIBC_MTAG
>> +  /* Quickly check that the freed pointer matches the tag for the memory.
>> +     This gives a useful double-free detection.  */
>> +  *(volatile char *)mem;
>> +#endif
> 
> Ok.
> 
>> +  /* Mark the chunk as belonging to the library again.  */
>> +  (void)TAG_REGION (chunk2rawmem (p), __malloc_usable_size (mem));
>> +
> 
> Does this untag or retag?  Is there a "malloc tag" or does the OS just
> randomly supply a new tag?
> 
>> +#ifdef _LIBC_MTAG
>> +  /* Perform a quick check to ensure that the pointer's tag matches the
>> +     memory's tag.  */
>> +  *(volatile char*) oldmem;
>> +#endif
> 
> Ok.
> 
>>  #if HAVE_MREMAP
>>        newp = mremap_chunk (oldp, nb);
>>        if (newp)
>> -        return chunk2mem (newp);
>> +	{
>> +	  void *newmem = chunk2rawmem (newp);
>> +	  /* Give the new block a different tag.  This helps to ensure
>> +	     that stale handles to the previous mapping are not
>> +	     reused.  There's a performance hit for both us and the
>> +	     caller for doing this, so we might want to
>> +	     reconsider.  */
>> +	  return TAG_NEW_USABLE (newmem);
>> +	}
>>  #endif
> 
> Ok.
> 
>> @@ -3308,7 +3388,6 @@ _mid_memalign (size_t alignment, size_t bytes, void *address)
>>        return 0;
>>      }
>>  
>> -
>>    /* Make sure alignment is power of 2.  */
>>    if (!powerof2 (alignment))
>>      {
> 
> Superfluous ;-)
> 
>> -
>> -      return p;
>> +      return TAG_NEW_USABLE (p);
>>      }
> 
> Ok.
> 
>>    arena_get (ar_ptr, bytes + alignment + MINSIZE);
>> -  return p;
>> +  return TAG_NEW_USABLE (p);
> 
> Ok.
> 
>> +  void *p;
>> +
> 
> Ok.
> 
> 
>> -  return _mid_memalign (pagesize, bytes, address);
>> +  p = _mid_memalign (pagesize, bytes, address);
>> +  return TAG_NEW_USABLE (p);
> 
> Ok.
> 
>> +  void *p;
>> +
>>    if (__malloc_initialized < 0)
>>      ptmalloc_init ();
>>  
>> @@ -3378,19 +3461,22 @@ __libc_pvalloc (size_t bytes)
>>      }
>>    rounded_bytes = rounded_bytes & -(pagesize - 1);
>>  
>> -  return _mid_memalign (pagesize, rounded_bytes, address);
>> +  p = _mid_memalign (pagesize, rounded_bytes, address);
>> +  return TAG_NEW_USABLE (p);
> 
> Ok.
> 
>>  }
>>  
>>  void *
>>  __libc_calloc (size_t n, size_t elem_size)
>>  {
>>    mstate av;
>> -  mchunkptr oldtop, p;
>> -  INTERNAL_SIZE_T sz, csz, oldtopsize;
>> +  mchunkptr oldtop;
>> +  INTERNAL_SIZE_T sz, oldtopsize;
>>    void *mem;
>> +#ifndef _LIBC_MTAG
>>    unsigned long clearsize;
>>    unsigned long nclears;
>>    INTERNAL_SIZE_T *d;
>> +#endif
>>    ptrdiff_t bytes;
>>  
>>    if (__glibc_unlikely (__builtin_mul_overflow (n, elem_size, &bytes)))
>> @@ -3398,6 +3484,7 @@ __libc_calloc (size_t n, size_t elem_size)
>>         __set_errno (ENOMEM);
>>         return NULL;
>>      }
>> +
>>    sz = bytes;
>>  
>>    void *(*hook) (size_t, const void *) =
>> @@ -3467,7 +3554,14 @@ __libc_calloc (size_t n, size_t elem_size)
>>    if (mem == 0)
>>      return 0;
>>  
>> -  p = mem2chunk (mem);
>> +  /* If we are using memory tagging, then we need to set the tags
>> +     regardless of MORECORE_CLEARS, so we zero the whole block while
>> +     doing so.  */
>> +#ifdef _LIBC_MTAG
>> +  return TAG_NEW_MEMSET (mem, 0, __malloc_usable_size (mem));
>> +#else
>> +  mchunkptr p = mem2chunk (mem);
>> +  INTERNAL_SIZE_T csz = chunksize (p);
>>  
>>    /* Two optional cases in which clearing not necessary */
>>    if (chunk_is_mmapped (p))
>> @@ -3478,8 +3572,6 @@ __libc_calloc (size_t n, size_t elem_size)
>>        return mem;
>>      }
>>  
>> -  csz = chunksize (p);
>> -
>>  #if MORECORE_CLEARS
>>    if (perturb_byte == 0 && (p == oldtop && csz > oldtopsize))
>>      {
>> @@ -3522,6 +3614,7 @@ __libc_calloc (size_t n, size_t elem_size)
>>      }
>>  
>>    return mem;
>> +#endif
>>  }
> 
> Ok.  There should be a section in the manual (manual/*) about tagging,
> with a list of performance (speed/size) impacts such as these.

Until I have real hardware, and have fully optimized the back-end
operations, I'm not sure what the overall impact is going to be.
Clearly it won't be zero, as coloring blocks of memory on allocation or
free'ing is proportional to the size of the object.

> 
>> -          return chunk2mem (oldp);
>> +          return TAG_NEW_USABLE (chunk2rawmem (oldp));
> 
> Ok.
> 
>> -	      memcpy (newmem, chunk2mem (oldp), oldsize - SIZE_SZ);
>> +	      void *oldmem = chunk2mem (oldp);
>> +	      newmem = TAG_NEW_USABLE (newmem);
>> +	      memcpy (newmem, oldmem, __malloc_usable_size (oldmem));
>> +	      (void) TAG_REGION (chunk2rawmem (oldp), oldsize);
> 
> Ok.
> 
>> +      /* Clear any user-space tags before writing the header.  */
>> +      remainder = TAG_REGION (remainder, remainder_size);
> 
> Ok.
> 
>> -  return chunk2mem (newp);
>> -}
>> +  return TAG_NEW_USABLE (chunk2rawmem (newp));
>> +    }
> 
> Ok, but spaces before the brace?
> 

Opps!

>>        assert (newsize >= nb &&
>> -              (((unsigned long) (chunk2mem (p))) % alignment) == 0);
>> +              (((unsigned long) (chunk2rawmem (p))) % alignment) == 0);
> 
> Ok.
> 
>> -                assert ((char *) chunk2mem (p) + 4 * SIZE_SZ <= paligned_mem);
>> +                assert ((char *) chunk2rawmem (p) + 4 * SIZE_SZ <= paligned_mem);
> 
> Ok.
> 
>> @@ -4877,20 +4975,30 @@ musable (void *mem)
>>    mchunkptr p;
>>    if (mem != 0)
>>      {
>> +      size_t result = 0;
>> +
>>        p = mem2chunk (mem);
>>  
>>        if (__builtin_expect (using_malloc_checking == 1, 0))
>> -        return malloc_check_get_size (p);
>> +	return malloc_check_get_size (p);
>>
>>        if (chunk_is_mmapped (p))
>>  	{
>>  	  if (DUMPED_MAIN_ARENA_CHUNK (p))
>> -	    return chunksize (p) - SIZE_SZ;
>> +	    result = chunksize (p) - SIZE_SZ;
>>  	  else
>> -	    return chunksize (p) - 2 * SIZE_SZ;
>> +	    result = chunksize (p) - 2 * SIZE_SZ;
>>  	}
>>        else if (inuse (p))
>> -        return chunksize (p) - SIZE_SZ;
>> +	result = chunksize (p) - SIZE_SZ;
>> +
>> +#ifdef _LIBC_MTAG
>> +      /* The usable space may be reduced if memory tagging is needed,
>> +	 since we cannot share the user-space data with malloc's internal
>> +	 data structure.  */
>> +      result &= __mtag_granule_mask;
>> +#endif
>> +      return result;
>>      }
> 
> Ok.
> 
>> +#ifdef _LIBC_MTAG
>> +extern int __mtag_mmap_flags;
>> +#define MTAG_MMAP_FLAGS __mtag_mmap_flags
>> +#else
>> +#define MTAG_MMAP_FLAGS 0
>> +#endif
> 
> Ok.
> 
>> diff --git a/sysdeps/generic/libc-mtag.h b/sysdeps/generic/libc-mtag.h
>> new file mode 100644
>> index 0000000000..3e9885451c
>> --- /dev/null
>> +++ b/sysdeps/generic/libc-mtag.h
>> @@ -0,0 +1,52 @@
>> +/* libc-internal interface for tagged (colored) memory support.
>> +   Copyright (C) 2019 Free Software Foundation, Inc.
> 
> It's 2020 now :-)


Really?  With all this lockdown I think I'd rather go back to 2019 :)

> 
>> +/* Generic bindings for systems that do not support memory tagging.  */
>> +
>> +/* Used to ensure additional alignment when objects need to have distinct
>> +   tags.  */
>> +#define __MTAG_GRANULE_SIZE 1
>> +
>> +/* Non-zero if memory obtained via morecore (sbrk) is not tagged.  */
>> +#define __MTAG_SBRK_UNTAGGED 0
>> +
>> +/* Extra flags to pass to mmap() to request a tagged region of memory.  */
>> +#define __MTAG_MMAP_FLAGS 0
>> +
>> +/* Set the tags for a region of memory, which must have size and alignment
>> +   that are multiples of __MTAG_GRANULE_SIZE.  Size cannot be zero.
>> +   void *__libc_mtag_tag_region (const void *, size_t)  */
>> +#define __libc_mtag_tag_region(p, s) (p)
>> +
>> +/* Optimized equivalent to __libc_mtag_tag_region followed by memset.  */
>> +#define __libc_mtag_memset_with_tag memset
>> +
>> +/* Convert address P to a pointer that is tagged correctly for that
>> +   location.
>> +   void *__libc_mtag_address_get_tag (void*)  */
>> +#define __libc_mtag_address_get_tag(p) (p)
>> +
>> +/* Assign a new (random) tag to a pointer P (does not adjust the tag on
>> +   the memory addressed).
>> +   void *__libc_mtag_new_tag (void*)  */
>> +#define __libc_mtag_new_tag(p) (p)
>> +
>> +#endif /* _GENERIC_LIBC_MTAG_H */
> 
> Ok.
> 



More information about the Libc-alpha mailing list