[PATCH] malloc: Enable merging of remainders in memalign (bug 30723)

DJ Delorie dj@redhat.com
Wed Aug 9 21:58:57 GMT 2023


Florian Weimer <fweimer@redhat.com> writes:
> Previously, calling _int_free from _int_memalign could put remainders
> into the tcache or into fastbins, where they are invisible to the
> low-level allocator.  This results in missed merge opportunities
> because once these freed chunks become available to the low-level
> allocator, further memalign allocations (even of the same size are)
> likely obstructing merges.

I would note that everything else malloc does has this problem, too.
Without limits or checks on fastbin size, anything that puts a chunk on
the fastbin could "permanently" block consolidation somewhere.  This is
why tcache has a fixed upper limit.  If we had a more robust malloc
benchmark suite, I'd suggest re-visiting (a potentially bigger) tcache
vs fastbins to try to reduce fragmentation, as memory size seems to be
becoming more important wrt memory speed these days.

LGTM.
Reviewed-by: DJ Delorie <dj@redhat.com>

> +static void _int_free_merge_chunk (mstate, mchunkptr, INTERNAL_SIZE_T);
> +static INTERNAL_SIZE_T _int_free_create_chunk (mstate,
> +					       mchunkptr, INTERNAL_SIZE_T,
> +					       mchunkptr, INTERNAL_SIZE_T);
> +static void _int_free_maybe_consolidate (mstate, INTERNAL_SIZE_T);

So what we're doing is breaking out some of the functionality of
_int_free() into separately-callable functions, and these are the
prototypes for those new functions.  Ok.

I'll review the "diff -bw" for the remainder, to simplify it...

 
-    nextchunk = chunk_at_offset(p, size);
+    _int_free_merge_chunk (av, p, size);
+
+    if (!have_lock)
+      __libc_lock_unlock (av->mutex);
+  }
+  /*
+    If the chunk was allocated via mmap, release via munmap().
+  */
+
+  else {
+    munmap_chunk (p);
+  }
+}
+
+/* Try to merge chunk P of SIZE bytes with its neighbors.  Put the
+   resulting chunk on the appropriate bin list.  P must not be on a
+   bin list yet, and it can be in use.  */
+static void
+_int_free_merge_chunk (mstate av, mchunkptr p, INTERNAL_SIZE_T size)
+{
+  mchunkptr nextchunk = chunk_at_offset(p, size);

Split off the end of _int_free into separate functions, and call them.  Ok.

 
@@ -4652,16 +4677,17 @@ _int_free (mstate av, mchunkptr p, int h
     if (__glibc_unlikely (!prev_inuse(nextchunk)))
       malloc_printerr ("double free or corruption (!prev)");
 
-    nextsize = chunksize(nextchunk);
+  INTERNAL_SIZE_T nextsize = chunksize(nextchunk);

Because it's first time in this new function, OK.

-    /* consolidate backward */
-    if (!prev_inuse(p)) {
-      prevsize = prev_size (p);
+  /* Consolidate backward.  */
+  if (!prev_inuse(p))
+    {
+      INTERNAL_SIZE_T prevsize = prev_size (p);

Ok.

-    if (nextchunk != av->top) {
+  /* Write the chunk header, maybe after merging with the following chunk.  */
+  size = _int_free_create_chunk (av, p, size, nextchunk, nextsize);
+  _int_free_maybe_consolidate (av, size);
+}
+
+/* Create a chunk at P of SIZE bytes, with SIZE potentially increased
+   to cover the immediately following chunk NEXTCHUNK of NEXTSIZE
+   bytes (if NEXTCHUNK is unused).  The chunk at P is not actually
+   read and does not have to be initialized.  After creation, it is
+   placed on the appropriate bin list.  The function returns the size
+   of the new chunk.  */
+static INTERNAL_SIZE_T
+_int_free_create_chunk (mstate av, mchunkptr p, INTERNAL_SIZE_T size,
+			mchunkptr nextchunk, INTERNAL_SIZE_T nextsize)
+{
+  if (nextchunk != av->top)
+    {

Again, breaking out this block of code into a separate function, and
calling it.  Ok.

       /* get and clear inuse bit */
-      nextinuse = inuse_bit_at_offset(nextchunk, nextsize);
+      bool nextinuse = inuse_bit_at_offset (nextchunk, nextsize);

Ok.
 
-      bck = unsorted_chunks(av);
-      fwd = bck->fd;
+      mchunkptr bck = unsorted_chunks (av);
+      mchunkptr fwd = bck->fd;

Ok.

-    /*
-      If the chunk borders the current high end of memory,
-      consolidate into top
-    */
-
-    else {
+  else
+    {
+      /* If the chunk borders the current high end of memory,
+	 consolidate into top.  */

ok.

-    /*
-      If freeing a large space, consolidate possibly-surrounding
-      chunks. Then, if the total unused topmost memory exceeds trim
-      threshold, ask malloc_trim to reduce top.
-
-      Unless max_fast is 0, we don't know if there are fastbins
-      bordering top, so we cannot tell for sure whether threshold
-      has been reached unless fastbins are consolidated.  But we
-      don't want to consolidate on each free.  As a compromise,
-      consolidation is performed if FASTBIN_CONSOLIDATION_THRESHOLD
-      is reached.
-    */
+  return size;
+}

Ok.
 
-    if ((unsigned long)(size) >= FASTBIN_CONSOLIDATION_THRESHOLD) {
+/* If freeing a large space, consolidate possibly-surrounding
+   chunks.  Then, if the total unused topmost memory exceeds trim
+   threshold, ask malloc_trim to reduce top.  */
+static void
+_int_free_maybe_consolidate (mstate av, INTERNAL_SIZE_T size)
+{
+  /* Unless max_fast is 0, we don't know if there are fastbins
+     bordering top, so we cannot tell for sure whether threshold has
+     been reached unless fastbins are consolidated.  But we don't want
+     to consolidate on each free.  As a compromise, consolidation is
+     performed if FASTBIN_CONSOLIDATION_THRESHOLD is reached.  */
+  if (size >= FASTBIN_CONSOLIDATION_THRESHOLD)
+    {

Ok.  More breakout.

-      if (av == &main_arena) {
+      if (av == &main_arena)
+	{

Ok.

 #ifndef MORECORE_CANNOT_TRIM
-	if ((unsigned long)(chunksize(av->top)) >=
-	    (unsigned long)(mp_.trim_threshold))
+	  if (chunksize (av->top) >= mp_.trim_threshold)

Ok.

-      } else {
-	/* Always try heap_trim(), even if the top chunk is not
-	   large, because the corresponding heap might go away.  */
+	}
+      else
+	{
+	  /* Always try heap_trim, even if the top chunk is not large,
+	     because the corresponding heap might go away.  */

Ok.

-
-    if (!have_lock)
-      __libc_lock_unlock (av->mutex);
-  }
-  /*
-    If the chunk was allocated via mmap, release via munmap().
-  */
-
-  else {
-    munmap_chunk (p);
-  }

This was moved to above.  Ok.

 
@@ -5221,7 +5254,7 @@ _int_memalign (mstate av, size_t alignme
                 (av != &main_arena ? NON_MAIN_ARENA : 0));
       set_inuse_bit_at_offset (newp, newsize);
       set_head_size (p, leadsize | (av != &main_arena ? NON_MAIN_ARENA : 0));
-      _int_free (av, p, 1);
+      _int_free_merge_chunk (av, p, leadsize);

Ok.

@@ -5232,14 +5265,26 @@ _int_memalign (mstate av, size_t alignme
   if (!chunk_is_mmapped (p))
     {
       size = chunksize (p);
-      if ((unsigned long) (size) > (unsigned long) (nb + MINSIZE))
+      mchunkptr nextchunk = chunk_at_offset(p, size);
+      INTERNAL_SIZE_T nextsize = chunksize(nextchunk);
+      if (size > nb)

Ok.  The minsize check is moved below.

           remainder_size = size - nb;
+	  if (remainder_size >= MINSIZE
+	      || nextchunk == av->top
+	      || !inuse_bit_at_offset (nextchunk, nextsize))
+	    {
+	      /* We can only give back the tail if it is larger than
+		 MINSIZE, or if the following chunk is unused (top
+		 chunk or unused in-heap chunk).  Otherwise we would
+		 create a chunk that is smaller than MINSIZE.  */

Ok.

           remainder = chunk_at_offset (p, nb);
-          set_head (remainder, remainder_size | PREV_INUSE |
-                    (av != &main_arena ? NON_MAIN_ARENA : 0));
           set_head_size (p, nb);
-          _int_free (av, remainder, 1);
+	      remainder_size = _int_free_create_chunk (av, remainder,
+						       remainder_size,
+						       nextchunk, nextsize);
+	      _int_free_maybe_consolidate (av, remainder_size);
+	    }

The conditional changes from "big enough" to "big enough, or mergeable"
so OK.



More information about the Libc-alpha mailing list