This is the mail archive of the
libc-alpha@sourceware.org
mailing list for the glibc project.
Re: [PATCH][malloc] Improve malloc initialization sequence
- From: Wilco Dijkstra <Wilco dot Dijkstra at arm dot com>
- To: DJ Delorie <dj at redhat dot com>
- Cc: "fweimer at redhat dot com" <fweimer at redhat dot com>, "libc-alpha at sourceware dot org" <libc-alpha at sourceware dot org>, nd <nd at arm dot com>
- Date: Tue, 3 Oct 2017 17:18:51 +0000
- Subject: Re: [PATCH][malloc] Improve malloc initialization sequence
- Authentication-results: sourceware.org; auth=none
- Authentication-results: spf=none (sender IP is ) smtp.mailfrom=Wilco dot Dijkstra at arm dot com;
- Nodisclaimer: True
- References: <HE1PR0801MB2058129B80D52F8CCF0F5370837D0@HE1PR0801MB2058.eurprd08.prod.outlook.com> (message from Wilco Dijkstra on Mon, 2 Oct 2017 14:24:49 +0000),<xny3otp5uk.fsf@greed.delorie.com>
- Spamdiagnosticmetadata: NSPM
- Spamdiagnosticoutput: 1:99
DJ Delorie wrote:
> __libc_mallopt() calls malloc_consolidate, with a comment that says
> "Ensure initialization/consolidation" - do we need to consolidate here,
> or is it another remnant? Either way, at least the comment is wrong
> now.
>
> The comment for malloc_init_state() is also wrong now.
>
> _int_malloc calls malloc_consolidate to initialize arenas, although it
> also needs the consolidation feature.
>
> comment in mtrim() is wrong now, or it needs to call init? (ok, pretty
> much every call to malloc_consolidate needs to be checked to see if they
> really are initializing the arenas; there are a lot of public APIs that
> might be called before malloc/realloc/calloc, which is when
> ptmalloc_init gets called)
>
> I wonder if an assert in malloc_consolidate would be prudent...
I've gone over all the calls to consolidate and removed all redundant
intitialization. mtrim, mallopt all call consolidate directly after calling
ptmalloc_init so these are not needed for initialization. There was also
a redundant one in _int_malloc, and an incorrect check for arena->top != 0
in do_check_malloc_state (malloc debugging now builds and works).
I've updated comments and reindented where needed.
Updated patch:
The current malloc initialization is quite convoluted. Instead of
sometimes calling malloc_consolidate from ptmalloc_init, call
malloc_init_state early so that the main_arena is always initialized.
The special initialization can now be removed from malloc_consolidate.
This also fixes BZ #22159.
Check all calls to malloc_consolidate and remove calls that are
redundant initialization after ptmalloc_init, like in int_mallinfo
and __libc_mallopt (but keep the latter as consolidation is required for
set_max_fast). Update comments to improve clarity.
Remove impossible initialization check from _int_malloc, fix assert
in do_check_malloc_state to ensure arena->top != 0. Fix the obvious bugs
in do_check_free_chunk and do_check_remalloced_chunk to enable single
threaded malloc debugging (do_check_malloc_state is not thread safe!).
GLIBC builds and tests pass, OK for commit?
ChangeLog:
2017-10-03 Wilco Dijkstra <wdijkstr@arm.com>
[BZ #22159]
* malloc/arena.c (ptmalloc_init): Call malloc_init_state.
* malloc/malloc.c (do_check_free_chunk): Fix build bug.
(do_check_remalloced_chunk): Fix build bug.
(do_check_malloc_state): Add assert that checks arena->top.
(malloc_consolidate): Remove initialization.
(int_mallinfo): Remove call to malloc_consolidate.
(__libc_mallopt): Clarify why malloc_consolidate is needed.
--
diff --git a/malloc/arena.c b/malloc/arena.c
index 9e5a62d260bf2f5e6d76da4ccaf7b7dcb388c296..85b985e193d513b633bd148b275515a29a710584 100644
--- a/malloc/arena.c
+++ b/malloc/arena.c
@@ -307,13 +307,9 @@ ptmalloc_init (void)
thread_arena = &main_arena;
-#if HAVE_TUNABLES
- /* Ensure initialization/consolidation and do it under a lock so that a
- thread attempting to use the arena in parallel waits on us till we
- finish. */
- __libc_lock_lock (main_arena.mutex);
- malloc_consolidate (&main_arena);
+ malloc_init_state (&main_arena);
+#if HAVE_TUNABLES
TUNABLE_GET (check, int32_t, TUNABLE_CALLBACK (set_mallopt_check));
TUNABLE_GET (top_pad, size_t, TUNABLE_CALLBACK (set_top_pad));
TUNABLE_GET (perturb, int32_t, TUNABLE_CALLBACK (set_perturb_byte));
@@ -322,13 +318,12 @@ ptmalloc_init (void)
TUNABLE_GET (mmap_max, int32_t, TUNABLE_CALLBACK (set_mmaps_max));
TUNABLE_GET (arena_max, size_t, TUNABLE_CALLBACK (set_arena_max));
TUNABLE_GET (arena_test, size_t, TUNABLE_CALLBACK (set_arena_test));
-#if USE_TCACHE
+# if USE_TCACHE
TUNABLE_GET (tcache_max, size_t, TUNABLE_CALLBACK (set_tcache_max));
TUNABLE_GET (tcache_count, size_t, TUNABLE_CALLBACK (set_tcache_count));
TUNABLE_GET (tcache_unsorted_limit, size_t,
TUNABLE_CALLBACK (set_tcache_unsorted_limit));
-#endif
- __libc_lock_unlock (main_arena.mutex);
+# endif
#else
const char *s = NULL;
if (__glibc_likely (_environ != NULL))
diff --git a/malloc/malloc.c b/malloc/malloc.c
index 88cfd25766eba6787faeb7195d95b73d7a4637ab..9e8cef1a9de220d94dd1edbd7cda98bacacd4270 100644
--- a/malloc/malloc.c
+++ b/malloc/malloc.c
@@ -1629,8 +1629,10 @@ static INTERNAL_SIZE_T global_max_fast;
/*
Set value of max_fast.
Use impossibly small value if 0.
- Precondition: there are no existing fastbin chunks.
- Setting the value clears fastchunk bit but preserves noncontiguous bit.
+ Precondition: there are no existing fastbin chunks in the main arena.
+ Since do_check_malloc_state () checks this, we call malloc_consolidate ()
+ before changing max_fast. Note other arenas will leak their fast bin
+ entries if max_fast is reduced.
*/
#define set_max_fast(s) \
@@ -1794,11 +1796,8 @@ static struct malloc_par mp_ =
/*
Initialize a malloc_state struct.
- This is called only from within malloc_consolidate, which needs
- be called in the same contexts anyway. It is never called directly
- outside of malloc_consolidate because some optimizing compilers try
- to inline it at all call points, which turns out not to be an
- optimization at all. (Inlining it in malloc_consolidate is fine though.)
+ This is called from ptmalloc_init () or from _int_new_arena ()
+ when creating a new arena.
*/
static void
@@ -1976,7 +1975,7 @@ do_check_chunk (mstate av, mchunkptr p)
static void
do_check_free_chunk (mstate av, mchunkptr p)
{
- INTERNAL_SIZE_T sz = p->size & ~(PREV_INUSE | NON_MAIN_ARENA);
+ INTERNAL_SIZE_T sz = chunksize_nomask (p) & ~(PREV_INUSE | NON_MAIN_ARENA);
mchunkptr next = chunk_at_offset (p, sz);
do_check_chunk (av, p);
@@ -1991,7 +1990,7 @@ do_check_free_chunk (mstate av, mchunkptr p)
assert ((sz & MALLOC_ALIGN_MASK) == 0);
assert (aligned_OK (chunk2mem (p)));
/* ... matching footer field */
- assert (prev_size (p) == sz);
+ assert (prev_size (next_chunk (p)) == sz);
/* ... and is fully consolidated */
assert (prev_inuse (p));
assert (next == av->top || inuse (next));
@@ -2051,7 +2050,7 @@ do_check_inuse_chunk (mstate av, mchunkptr p)
static void
do_check_remalloced_chunk (mstate av, mchunkptr p, INTERNAL_SIZE_T s)
{
- INTERNAL_SIZE_T sz = p->size & ~(PREV_INUSE | NON_MAIN_ARENA);
+ INTERNAL_SIZE_T sz = chunksize_nomask (p) & ~(PREV_INUSE | NON_MAIN_ARENA);
if (!chunk_is_mmapped (p))
{
@@ -2127,8 +2126,11 @@ do_check_malloc_state (mstate av)
/* alignment is a power of 2 */
assert ((MALLOC_ALIGNMENT & (MALLOC_ALIGNMENT - 1)) == 0);
- /* cannot run remaining checks until fully initialized */
- if (av->top == 0 || av->top == initial_top (av))
+ /* Check the arena is initialized. */
+ assert (av->top != 0);
+
+ /* No memory has been allocated yet, so doing more tests is not possible. */
+ if (av->top == initial_top (av))
return;
/* pagesize is a power of 2 */
@@ -3632,21 +3634,16 @@ _int_malloc (mstate av, size_t bytes)
if ((victim = last (bin)) != bin)
{
- if (victim == 0) /* initialization check */
- malloc_consolidate (av);
- else
- {
- bck = victim->bk;
- if (__glibc_unlikely (bck->fd != victim))
- malloc_printerr
- ("malloc(): smallbin double linked list corrupted");
- set_inuse_bit_at_offset (victim, nb);
- bin->bk = bck;
- bck->fd = bin;
-
- if (av != &main_arena)
- set_non_main_arena (victim);
- check_malloced_chunk (av, victim, nb);
+ bck = victim->bk;
+ if (__glibc_unlikely (bck->fd != victim))
+ malloc_printerr ("malloc(): smallbin double linked list corrupted");
+ set_inuse_bit_at_offset (victim, nb);
+ bin->bk = bck;
+ bck->fd = bin;
+
+ if (av != &main_arena)
+ set_non_main_arena (victim);
+ check_malloced_chunk (av, victim, nb);
#if USE_TCACHE
/* While we're here, if we see other chunks of the same size,
stash them in the tcache. */
@@ -3673,10 +3670,9 @@ _int_malloc (mstate av, size_t bytes)
}
}
#endif
- void *p = chunk2mem (victim);
- alloc_perturb (p, bytes);
- return p;
- }
+ void *p = chunk2mem (victim);
+ alloc_perturb (p, bytes);
+ return p;
}
}
@@ -4386,10 +4382,6 @@ _int_free (mstate av, mchunkptr p, int have_lock)
purpose since, among other things, it might place chunks back onto
fastbins. So, instead, we need to use a minor variant of the same
code.
-
- Also, because this routine needs to be called the first time through
- malloc anyway, it turns out to be the perfect place to trigger
- initialization code.
*/
static void malloc_consolidate(mstate av)
@@ -4410,84 +4402,73 @@ static void malloc_consolidate(mstate av)
mchunkptr bck;
mchunkptr fwd;
- /*
- If max_fast is 0, we know that av hasn't
- yet been initialized, in which case do so below
- */
-
- if (get_max_fast () != 0) {
- atomic_store_relaxed (&av->have_fastchunks, false);
-
- unsorted_bin = unsorted_chunks(av);
+ atomic_store_relaxed (&av->have_fastchunks, false);
- /*
- Remove each chunk from fast bin and consolidate it, placing it
- then in unsorted bin. Among other reasons for doing this,
- placing in unsorted bin avoids needing to calculate actual bins
- until malloc is sure that chunks aren't immediately going to be
- reused anyway.
- */
+ unsorted_bin = unsorted_chunks(av);
- maxfb = &fastbin (av, NFASTBINS - 1);
- fb = &fastbin (av, 0);
- do {
- p = atomic_exchange_acq (fb, NULL);
- if (p != 0) {
- do {
- check_inuse_chunk(av, p);
- nextp = p->fd;
-
- /* Slightly streamlined version of consolidation code in free() */
- size = chunksize (p);
- nextchunk = chunk_at_offset(p, size);
- nextsize = chunksize(nextchunk);
-
- if (!prev_inuse(p)) {
- prevsize = prev_size (p);
- size += prevsize;
- p = chunk_at_offset(p, -((long) prevsize));
- unlink(av, p, bck, fwd);
- }
+ /*
+ Remove each chunk from fast bin and consolidate it, placing it
+ then in unsorted bin. Among other reasons for doing this,
+ placing in unsorted bin avoids needing to calculate actual bins
+ until malloc is sure that chunks aren't immediately going to be
+ reused anyway.
+ */
- if (nextchunk != av->top) {
- nextinuse = inuse_bit_at_offset(nextchunk, nextsize);
+ maxfb = &fastbin (av, NFASTBINS - 1);
+ fb = &fastbin (av, 0);
+ do {
+ p = atomic_exchange_acq (fb, NULL);
+ if (p != 0) {
+ do {
+ check_inuse_chunk(av, p);
+ nextp = p->fd;
+
+ /* Slightly streamlined version of consolidation code in free() */
+ size = chunksize (p);
+ nextchunk = chunk_at_offset(p, size);
+ nextsize = chunksize(nextchunk);
+
+ if (!prev_inuse(p)) {
+ prevsize = prev_size (p);
+ size += prevsize;
+ p = chunk_at_offset(p, -((long) prevsize));
+ unlink(av, p, bck, fwd);
+ }
- if (!nextinuse) {
- size += nextsize;
- unlink(av, nextchunk, bck, fwd);
- } else
- clear_inuse_bit_at_offset(nextchunk, 0);
+ if (nextchunk != av->top) {
+ nextinuse = inuse_bit_at_offset(nextchunk, nextsize);
- first_unsorted = unsorted_bin->fd;
- unsorted_bin->fd = p;
- first_unsorted->bk = p;
+ if (!nextinuse) {
+ size += nextsize;
+ unlink(av, nextchunk, bck, fwd);
+ } else
+ clear_inuse_bit_at_offset(nextchunk, 0);
- if (!in_smallbin_range (size)) {
- p->fd_nextsize = NULL;
- p->bk_nextsize = NULL;
- }
+ first_unsorted = unsorted_bin->fd;
+ unsorted_bin->fd = p;
+ first_unsorted->bk = p;
- set_head(p, size | PREV_INUSE);
- p->bk = unsorted_bin;
- p->fd = first_unsorted;
- set_foot(p, size);
+ if (!in_smallbin_range (size)) {
+ p->fd_nextsize = NULL;
+ p->bk_nextsize = NULL;
}
- else {
- size += nextsize;
- set_head(p, size | PREV_INUSE);
- av->top = p;
- }
+ set_head(p, size | PREV_INUSE);
+ p->bk = unsorted_bin;
+ p->fd = first_unsorted;
+ set_foot(p, size);
+ }
- } while ( (p = nextp) != 0);
+ else {
+ size += nextsize;
+ set_head(p, size | PREV_INUSE);
+ av->top = p;
+ }
- }
- } while (fb++ != maxfb);
- }
- else {
- malloc_init_state(av);
- check_malloc_state(av);
- }
+ } while ( (p = nextp) != 0);
+
+ }
+ } while (fb++ != maxfb);
}
/*
@@ -4754,7 +4735,7 @@ _int_memalign (mstate av, size_t alignment, size_t bytes)
static int
mtrim (mstate av, size_t pad)
{
- /* Ensure initialization/consolidation */
+ /* Ensure all blocks are consolidated. */
malloc_consolidate (av);
const size_t ps = GLRO (dl_pagesize);
@@ -4885,10 +4866,6 @@ int_mallinfo (mstate av, struct mallinfo *m)
int nblocks;
int nfastblocks;
- /* Ensure initialization */
- if (av->top == 0)
- malloc_consolidate (av);
-
check_malloc_state (av);
/* Account for top */
@@ -5137,11 +5114,13 @@ __libc_mallopt (int param_number, int value)
if (__malloc_initialized < 0)
ptmalloc_init ();
__libc_lock_lock (av->mutex);
- /* Ensure initialization/consolidation */
- malloc_consolidate (av);
LIBC_PROBE (memory_mallopt, 2, param_number, value);
+ /* We must consolidate main arena before changing max_fast
+ (see definition of set_max_fast). */
+ malloc_consolidate (av);
+
switch (param_number)
{
case M_MXFAST: