[PATCH v1 1/1] memalign: Support scanning for aligned chunks.
Carlos O'Donell
carlos@redhat.com
Tue Jul 19 02:54:41 GMT 2022
On 7/13/22 23:58, DJ Delorie via Libc-alpha wrote:
> This patch adds a chunk scanning algorithm to the _int_memalign code
> path that reduces heap fragmentation by reusing already aligned chunks
> instead of always looking for chunks of larger sizes and splitting
> them.
>
> The goal is to fix the pathological use cases where heaps grow
> continuously in workloads that are heavy users of memalign.
Fails pre-commit CI. Please review :-)
https://www.delorie.com/trybots/32bit/10936/
> diff --git a/malloc/Makefile b/malloc/Makefile
> index 4e32de2a0b..7a25f2d781 100644
> --- a/malloc/Makefile
> +++ b/malloc/Makefile
> @@ -43,6 +43,7 @@ tests := mallocbug tst-malloc tst-valloc tst-calloc tst-obstack \
> tst-tcfree1 tst-tcfree2 tst-tcfree3 \
> tst-safe-linking \
> tst-mallocalign1 \
> + tst-memalign-2
>
> tests-static := \
> tst-interpose-static-nothread \
> diff --git a/malloc/malloc.c b/malloc/malloc.c
> index 12908b8f97..219707ebec 100644
> --- a/malloc/malloc.c
> +++ b/malloc/malloc.c
> @@ -3557,6 +3557,32 @@ _mid_memalign (size_t alignment, size_t bytes, void *address)
> alignment = a;
> }
>
> +#if USE_TCACHE
> + {
> + size_t tbytes;
> + tbytes = checked_request2size (bytes);
> + if (tbytes == 0)
> + {
> + __set_errno (ENOMEM);
> + return NULL;
> + }
> + size_t tc_idx = csize2tidx (tbytes);
> +
> + MAYBE_INIT_TCACHE ();
> +
> + if (tc_idx < mp_.tcache_bins
> + && tcache
> + && tcache->counts[tc_idx] > 0
> + && ((intptr_t)tcache->entries[tc_idx] & (alignment - 1)) == 0)
> + {
> + void *victim = tcache_get (tc_idx);
> + if (__glibc_unlikely (misaligned_chunk (victim)))
> + malloc_printerr ("_mid_memalign(): unaligned tcache chunk detected");
> + return tag_new_usable (victim);
> + }
> + }
> +#endif
> +
> if (SINGLE_THREAD_P)
> {
> p = _int_memalign (&main_arena, alignment, bytes);
> @@ -4937,6 +4963,43 @@ _int_realloc (mstate av, mchunkptr oldp, INTERNAL_SIZE_T oldsize,
> ------------------------------ memalign ------------------------------
> */
>
> +/* Returns 0 if the chunk is not and does not contain the requested
> + aligned sub-chunk, else returns the amount of "waste" from
> + trimming. BYTES is the *user* byte size, not the chunk byte
> + size. */
> +static int
> +chunk_ok_for_memalign (mchunkptr p, size_t alignment, size_t bytes)
> +{
> + void *m = chunk2mem (p);
> + INTERNAL_SIZE_T size = memsize (p);
> + void *aligned_m = m;
> +
> + if (__glibc_unlikely (misaligned_chunk (p)))
> + malloc_printerr ("_int_memalign(): unaligned chunk detected");
> +
> + aligned_m = PTR_ALIGN_UP (m, alignment);
> +
> + INTERNAL_SIZE_T front_extra = (intptr_t) aligned_m - (intptr_t) m;
> +
> + /* We can't trim off the front as it's too small. */
> + if (front_extra > 0 && front_extra < MINSIZE)
> + return 0;
> +
> + /* If it's a perfect fit, it's an exception to the return value rule
> + (we would return zero waste, which looks like "not usable"), so
> + handle it here by returning a small non-zero value instead. */
> + if (size == bytes && front_extra == 0)
> + return 1;
> +
> + /* If the block we need fits in the chunk, calculate total waste. */
> + if (size > bytes + front_extra)
> + return size - bytes;
> +
> + /* Can't use this chunk. */
> + return 0;
> +}
> +
> +/* BYTES is user requested bytes, not requested chunksize bytes. */
> static void *
> _int_memalign (mstate av, size_t alignment, size_t bytes)
> {
> @@ -4950,8 +5013,7 @@ _int_memalign (mstate av, size_t alignment, size_t bytes)
> mchunkptr remainder; /* spare room at end to split off */
> unsigned long remainder_size; /* its size */
> INTERNAL_SIZE_T size;
> -
> -
> + mchunkptr victim;
>
> nb = checked_request2size (bytes);
> if (nb == 0)
> @@ -4960,29 +5022,142 @@ _int_memalign (mstate av, size_t alignment, size_t bytes)
> return NULL;
> }
>
> - /*
> - Strategy: find a spot within that chunk that meets the alignment
> + /* We can't check tcache here because we hold the arena lock, which
> + tcache doesn't expect. We expect it has been checked
> + earlier. */
> +
> + /* Strategy: search the bins looking for an existing block that
> + meets our needs. We scan a range of bins from "exact size" to
> + "just under 2x", spanning the small/large barrier if needed. If
> + we don't find anything in those bins, the common malloc code will
> + scan starting at 2x. */
> +
> + /* This will be set if we found a candidate chunk. */
> + victim = NULL;
> +
> + /* Fast bins are singly-linked, hard to remove a chunk from the middle
> + and unlikely to meet our alignment requirements. We have not done
> + any experimentation with searching for aligned fastbins. */
> +
> + int first_bin_index;
> + int first_largebin_index;
> + int last_bin_index;
> +
> + if (in_smallbin_range (nb))
> + first_bin_index = smallbin_index (nb);
> + else
> + first_bin_index = largebin_index (nb);
> +
> + if (in_smallbin_range (nb * 2))
> + last_bin_index = smallbin_index (nb * 2);
> + else
> + last_bin_index = largebin_index (nb * 2);
> +
> + first_largebin_index = largebin_index (MIN_LARGE_SIZE);
> +
> + int victim_index; /* its bin index */
> +
> + for (victim_index = first_bin_index;
> + victim_index < last_bin_index;
> + victim_index ++)
> + {
> + victim = NULL;
> +
> + if (victim_index < first_largebin_index)
> + {
> + /* Check small bins. Small bin chunks are doubly-linked despite
> + being the same size. */
> +
> + mchunkptr fwd; /* misc temp for linking */
> + mchunkptr bck; /* misc temp for linking */
> +
> + bck = bin_at (av, victim_index);
> + fwd = bck->fd;
> + while (fwd != bck)
> + {
> + if (chunk_ok_for_memalign (fwd, alignment, bytes) > 0)
> + {
> + victim = fwd;
> +
> + /* Unlink it */
> + victim->fd->bk = victim->bk;
> + victim->bk->fd = victim->fd;
> + break;
> + }
> +
> + fwd = fwd->fd;
> + }
> + }
> + else
> + {
> + /* Check large bins. */
> + mchunkptr fwd; /* misc temp for linking */
> + mchunkptr bck; /* misc temp for linking */
> + mchunkptr best = NULL;
> + size_t best_size = 0;
> +
> + bck = bin_at (av, victim_index);
> + fwd = bck->fd;
> +
> + while (fwd != bck)
> + {
> + int extra;
> +
> + if (chunksize (fwd) < nb)
> + break;
> + extra = chunk_ok_for_memalign (fwd, alignment, bytes);
> + if (extra > 0
> + && (extra <= best_size || best == NULL))
> + {
> + best = fwd;
> + best_size = extra;
> + }
> +
> + fwd = fwd->fd;
> + }
> + victim = best;
> +
> + if (victim != NULL)
> + {
> + unlink_chunk (av, victim);
> + break;
> + }
> + }
> +
> + if (victim != NULL)
> + break;
> + }
> +
> + /* Strategy: find a spot within that chunk that meets the alignment
> request, and then possibly free the leading and trailing space.
> - */
> + This strategy is incredibly costly and can lead to external
> + fragmentation if header and footer chunks are unused. */
>
> - /* Call malloc with worst case padding to hit alignment. */
> + if (victim != NULL)
> + {
> + p = victim;
> + m = chunk2mem (p);
> + set_inuse (p);
> + }
> + else
> + {
> + /* Call malloc with worst case padding to hit alignment. */
>
> - m = (char *) (_int_malloc (av, nb + alignment + MINSIZE));
> + m = (char *) (_int_malloc (av, nb + alignment + MINSIZE));
>
> - if (m == 0)
> - return 0; /* propagate failure */
> + if (m == 0)
> + return 0; /* propagate failure */
>
> - p = mem2chunk (m);
> + p = mem2chunk (m);
> + }
>
> if ((((unsigned long) (m)) % alignment) != 0) /* misaligned */
> -
> - { /*
> - Find an aligned spot inside chunk. Since we need to give back
> - leading space in a chunk of at least MINSIZE, if the first
> - calculation places us at a spot with less than MINSIZE leader,
> - we can move to the next aligned spot -- we've allocated enough
> - total room so that this is always possible.
> - */
> + {
> + /* Find an aligned spot inside chunk. Since we need to give back
> + leading space in a chunk of at least MINSIZE, if the first
> + calculation places us at a spot with less than MINSIZE leader,
> + we can move to the next aligned spot -- we've allocated enough
> + total room so that this is always possible. */
> brk = (char *) mem2chunk (((unsigned long) (m + alignment - 1)) &
> - ((signed long) alignment));
> if ((unsigned long) (brk - (char *) (p)) < MINSIZE)
> diff --git a/malloc/tst-memalign-2.c b/malloc/tst-memalign-2.c
> new file mode 100644
> index 0000000000..04d42a2da2
> --- /dev/null
> +++ b/malloc/tst-memalign-2.c
> @@ -0,0 +1,115 @@
> +/* Test for memalign chunk reuse
> + Copyright (C) 2022 Free Software Foundation, Inc.
> + This file is part of the GNU C Library.
> +
> + The GNU C Library is free software; you can redistribute it and/or
> + modify it under the terms of the GNU Lesser General Public
> + License as published by the Free Software Foundation; either
> + version 2.1 of the License, or (at your option) any later version.
> +
> + The GNU C Library is distributed in the hope that it will be useful,
> + but WITHOUT ANY WARRANTY; without even the implied warranty of
> + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
> + Lesser General Public License for more details.
> +
> + You should have received a copy of the GNU Lesser General Public
> + License along with the GNU C Library; if not, see
> + <https://www.gnu.org/licenses/>. */
> +
> +#include <errno.h>
> +#include <malloc.h>
> +#include <stdio.h>
> +#include <string.h>
> +#include <unistd.h>
> +#include <array_length.h>
> +
> +#include <support/check.h>
> +
> +typedef struct TestCase {
> + size_t size;
> + size_t alignment;
> + void *ptr1;
> + void *ptr2;
> +} TestCase;
> +
> +static TestCase tcache_allocs[] = {
> + { 24, 8, NULL, NULL },
> + { 24, 16, NULL, NULL },
> + { 128, 32, NULL, NULL }
> +};
> +#define TN array_length (tcache_allocs)
> +
> +static TestCase large_allocs[] = {
> + { 23450, 64, NULL, NULL },
> + { 23450, 64, NULL, NULL },
> + { 23550, 64, NULL, NULL },
> + { 23550, 64, NULL, NULL },
> + { 23650, 64, NULL, NULL },
> + { 23650, 64, NULL, NULL },
> + { 33650, 64, NULL, NULL },
> + { 33650, 64, NULL, NULL }
> +};
> +#define LN array_length (large_allocs)
> +
> +void *p;
> +
> +static int
> +do_test (void)
> +{
> + int i, j;
> + int count;
> +
> + /* TCache test. */
> +
> + for (i = 0; i < TN; ++ i)
> + {
> + tcache_allocs[i].ptr1 = memalign (tcache_allocs[i].alignment, tcache_allocs[i].size);
> + free (tcache_allocs[i].ptr1);
> + /* This should return the same chunk as was just free'd. */
> + tcache_allocs[i].ptr2 = memalign (tcache_allocs[i].alignment, tcache_allocs[i].size);
> + free (tcache_allocs[i].ptr2);
> +
> + TEST_VERIFY (tcache_allocs[i].ptr1 == tcache_allocs[i].ptr2);
> + }
> +
> + /* Large bins test. */
> +
> + for (i = 0; i < LN; ++ i)
> + {
> + large_allocs[i].ptr1 = memalign (large_allocs[i].alignment, large_allocs[i].size);
> + /* Keep chunks from combining by fragmenting the heap. */
> + p = malloc (512);
> + }
> +
> + for (i = 0; i < LN; ++ i)
> + free (large_allocs[i].ptr1);
> +
> + /* Force the unsorted bins to be scanned and moved to small/large
> + bins. */
> + p = malloc (60000);
> +
> + for (i = 0; i < LN; ++ i)
> + large_allocs[i].ptr2 = memalign (large_allocs[i].alignment, large_allocs[i].size);
> +
> + count = 0;
> + for (i = 0; i < LN; ++ i)
> + {
> + int ok = 0;
> + for (j = 0; j < LN; ++ j)
> + if (large_allocs[i].ptr1 == large_allocs[j].ptr2)
> + ok = 1;
> + if (ok == 1)
> + count ++;
> + }
> +
> + /* The allocation algorithm is complicated outside of the memalign
> + logic, so just make sure it's working for most of the
> + allocations. This avoids possible boundary conditions with
> + empty/full heaps. */
> + TEST_VERIFY (count > LN / 2);
> +
> + return 0;
> +}
> +
> +#define TEST_FUNCTION do_test ()
> +#include "../test-skeleton.c"
>
--
Cheers,
Carlos.
More information about the Libc-alpha
mailing list