[PATCH] malloc: Enable merging of remainders in memalign (bug 30723)
DJ Delorie
dj@redhat.com
Wed Aug 9 21:58:57 GMT 2023
Florian Weimer <fweimer@redhat.com> writes:
> Previously, calling _int_free from _int_memalign could put remainders
> into the tcache or into fastbins, where they are invisible to the
> low-level allocator. This results in missed merge opportunities
> because once these freed chunks become available to the low-level
> allocator, further memalign allocations (even of the same size are)
> likely obstructing merges.
I would note that everything else malloc does has this problem, too.
Without limits or checks on fastbin size, anything that puts a chunk on
the fastbin could "permanently" block consolidation somewhere. This is
why tcache has a fixed upper limit. If we had a more robust malloc
benchmark suite, I'd suggest re-visiting (a potentially bigger) tcache
vs fastbins to try to reduce fragmentation, as memory size seems to be
becoming more important wrt memory speed these days.
LGTM.
Reviewed-by: DJ Delorie <dj@redhat.com>
> +static void _int_free_merge_chunk (mstate, mchunkptr, INTERNAL_SIZE_T);
> +static INTERNAL_SIZE_T _int_free_create_chunk (mstate,
> + mchunkptr, INTERNAL_SIZE_T,
> + mchunkptr, INTERNAL_SIZE_T);
> +static void _int_free_maybe_consolidate (mstate, INTERNAL_SIZE_T);
So what we're doing is breaking out some of the functionality of
_int_free() into separately-callable functions, and these are the
prototypes for those new functions. Ok.
I'll review the "diff -bw" for the remainder, to simplify it...
- nextchunk = chunk_at_offset(p, size);
+ _int_free_merge_chunk (av, p, size);
+
+ if (!have_lock)
+ __libc_lock_unlock (av->mutex);
+ }
+ /*
+ If the chunk was allocated via mmap, release via munmap().
+ */
+
+ else {
+ munmap_chunk (p);
+ }
+}
+
+/* Try to merge chunk P of SIZE bytes with its neighbors. Put the
+ resulting chunk on the appropriate bin list. P must not be on a
+ bin list yet, and it can be in use. */
+static void
+_int_free_merge_chunk (mstate av, mchunkptr p, INTERNAL_SIZE_T size)
+{
+ mchunkptr nextchunk = chunk_at_offset(p, size);
Split off the end of _int_free into separate functions, and call them. Ok.
@@ -4652,16 +4677,17 @@ _int_free (mstate av, mchunkptr p, int h
if (__glibc_unlikely (!prev_inuse(nextchunk)))
malloc_printerr ("double free or corruption (!prev)");
- nextsize = chunksize(nextchunk);
+ INTERNAL_SIZE_T nextsize = chunksize(nextchunk);
Because it's first time in this new function, OK.
- /* consolidate backward */
- if (!prev_inuse(p)) {
- prevsize = prev_size (p);
+ /* Consolidate backward. */
+ if (!prev_inuse(p))
+ {
+ INTERNAL_SIZE_T prevsize = prev_size (p);
Ok.
- if (nextchunk != av->top) {
+ /* Write the chunk header, maybe after merging with the following chunk. */
+ size = _int_free_create_chunk (av, p, size, nextchunk, nextsize);
+ _int_free_maybe_consolidate (av, size);
+}
+
+/* Create a chunk at P of SIZE bytes, with SIZE potentially increased
+ to cover the immediately following chunk NEXTCHUNK of NEXTSIZE
+ bytes (if NEXTCHUNK is unused). The chunk at P is not actually
+ read and does not have to be initialized. After creation, it is
+ placed on the appropriate bin list. The function returns the size
+ of the new chunk. */
+static INTERNAL_SIZE_T
+_int_free_create_chunk (mstate av, mchunkptr p, INTERNAL_SIZE_T size,
+ mchunkptr nextchunk, INTERNAL_SIZE_T nextsize)
+{
+ if (nextchunk != av->top)
+ {
Again, breaking out this block of code into a separate function, and
calling it. Ok.
/* get and clear inuse bit */
- nextinuse = inuse_bit_at_offset(nextchunk, nextsize);
+ bool nextinuse = inuse_bit_at_offset (nextchunk, nextsize);
Ok.
- bck = unsorted_chunks(av);
- fwd = bck->fd;
+ mchunkptr bck = unsorted_chunks (av);
+ mchunkptr fwd = bck->fd;
Ok.
- /*
- If the chunk borders the current high end of memory,
- consolidate into top
- */
-
- else {
+ else
+ {
+ /* If the chunk borders the current high end of memory,
+ consolidate into top. */
ok.
- /*
- If freeing a large space, consolidate possibly-surrounding
- chunks. Then, if the total unused topmost memory exceeds trim
- threshold, ask malloc_trim to reduce top.
-
- Unless max_fast is 0, we don't know if there are fastbins
- bordering top, so we cannot tell for sure whether threshold
- has been reached unless fastbins are consolidated. But we
- don't want to consolidate on each free. As a compromise,
- consolidation is performed if FASTBIN_CONSOLIDATION_THRESHOLD
- is reached.
- */
+ return size;
+}
Ok.
- if ((unsigned long)(size) >= FASTBIN_CONSOLIDATION_THRESHOLD) {
+/* If freeing a large space, consolidate possibly-surrounding
+ chunks. Then, if the total unused topmost memory exceeds trim
+ threshold, ask malloc_trim to reduce top. */
+static void
+_int_free_maybe_consolidate (mstate av, INTERNAL_SIZE_T size)
+{
+ /* Unless max_fast is 0, we don't know if there are fastbins
+ bordering top, so we cannot tell for sure whether threshold has
+ been reached unless fastbins are consolidated. But we don't want
+ to consolidate on each free. As a compromise, consolidation is
+ performed if FASTBIN_CONSOLIDATION_THRESHOLD is reached. */
+ if (size >= FASTBIN_CONSOLIDATION_THRESHOLD)
+ {
Ok. More breakout.
- if (av == &main_arena) {
+ if (av == &main_arena)
+ {
Ok.
#ifndef MORECORE_CANNOT_TRIM
- if ((unsigned long)(chunksize(av->top)) >=
- (unsigned long)(mp_.trim_threshold))
+ if (chunksize (av->top) >= mp_.trim_threshold)
Ok.
- } else {
- /* Always try heap_trim(), even if the top chunk is not
- large, because the corresponding heap might go away. */
+ }
+ else
+ {
+ /* Always try heap_trim, even if the top chunk is not large,
+ because the corresponding heap might go away. */
Ok.
-
- if (!have_lock)
- __libc_lock_unlock (av->mutex);
- }
- /*
- If the chunk was allocated via mmap, release via munmap().
- */
-
- else {
- munmap_chunk (p);
- }
This was moved to above. Ok.
@@ -5221,7 +5254,7 @@ _int_memalign (mstate av, size_t alignme
(av != &main_arena ? NON_MAIN_ARENA : 0));
set_inuse_bit_at_offset (newp, newsize);
set_head_size (p, leadsize | (av != &main_arena ? NON_MAIN_ARENA : 0));
- _int_free (av, p, 1);
+ _int_free_merge_chunk (av, p, leadsize);
Ok.
@@ -5232,14 +5265,26 @@ _int_memalign (mstate av, size_t alignme
if (!chunk_is_mmapped (p))
{
size = chunksize (p);
- if ((unsigned long) (size) > (unsigned long) (nb + MINSIZE))
+ mchunkptr nextchunk = chunk_at_offset(p, size);
+ INTERNAL_SIZE_T nextsize = chunksize(nextchunk);
+ if (size > nb)
Ok. The minsize check is moved below.
remainder_size = size - nb;
+ if (remainder_size >= MINSIZE
+ || nextchunk == av->top
+ || !inuse_bit_at_offset (nextchunk, nextsize))
+ {
+ /* We can only give back the tail if it is larger than
+ MINSIZE, or if the following chunk is unused (top
+ chunk or unused in-heap chunk). Otherwise we would
+ create a chunk that is smaller than MINSIZE. */
Ok.
remainder = chunk_at_offset (p, nb);
- set_head (remainder, remainder_size | PREV_INUSE |
- (av != &main_arena ? NON_MAIN_ARENA : 0));
set_head_size (p, nb);
- _int_free (av, remainder, 1);
+ remainder_size = _int_free_create_chunk (av, remainder,
+ remainder_size,
+ nextchunk, nextsize);
+ _int_free_maybe_consolidate (av, remainder_size);
+ }
The conditional changes from "big enough" to "big enough, or mergeable"
so OK.
More information about the Libc-alpha
mailing list