This is the mail archive of the
libc-alpha@sourceware.org
mailing list for the glibc project.
[PATCH] malloc: Prevent arena free_list from turning cyclic [BZ #19048]
- From: Florian Weimer <fweimer at redhat dot com>
- To: GNU C Library <libc-alpha at sourceware dot org>
- Date: Fri, 9 Oct 2015 18:16:03 +0200
- Subject: [PATCH] malloc: Prevent arena free_list from turning cyclic [BZ #19048]
- Authentication-results: sourceware.org; auth=none
This is a preview of a fix for bug 19048. As explained in the
description of that bug, the problem is that a certain sequence of
events causes free_list to turn cyclic. If this happens, the
reused_arena fallback code is never used because get_free_list will
never return NULL again. If the cycle in free_list is short, this can
lead to significant contention, and we will stop using arenas not on the
free list or already attached to a thread.
This patch introduces a counter to keep track to how many threads are
attached to an arena, and put it on the free list on thread exit if and
only if the exiting thread was the last one to use the arena.
There is a ??? marker on deattach_arena. I did not want to change the
arena management algorithm in a fundamental way, but the reference count
can reach zero here, and we could put the arena on the free_list. This
can happen if the other thread that caused contention for the current
thread (so that we switched arenas) exited. This may not even be that
unlikely if a thread frees a lot of memory before exit.
Regarding algorithm changes: If applications create and destroy threads
continuously, it seems likely they will hit this bug. After this fix
(with or with the deattach_arena enhancement mentioned above), the arena
selection behavior will be *very* different. This is unavoidable, but
it is unclear if it will result in a performance gain across the board.
Regarding synchronization, I tried to make sure that all updates to the
attached_threads member happen under list_lock. (There are some
existing data races, but that's a different issue.) Only in
reused_arena, I had to take list_lock briefly to maintain the reference
count, but that's clearly not on the fast path. In the other places, I
could reuse the existing list_lock synchronization.
Florian
2015-10-09 Florian Weimer <fweimer@redhat.com>
[BZ# 19048]
* malloc/malloc.c (struct malloc_state): Update comment. Add
attached_threads member.
(main_arena): Initialize attached_threads.
* malloc/arena.c (list_lock): Update comment.
(ptmalloc_lock_all, ptmalloc_unlock_all): Likewise.
(ptmalloc_unlock_all2): Reinitialize arena reference counts.
(deattach_arena): New function.
(_int_new_arena): Initialize arena reference count and deattach
replaced arena.
(get_free_list, reused_arena): Update reference count and deattach
replaced arena.
(arena_thread_freeres): Update arena reference count and only put
unreferenced arenas on the free list.
diff --git a/malloc/arena.c b/malloc/arena.c
index cb45a04..9266d96 100644
--- a/malloc/arena.c
+++ b/malloc/arena.c
@@ -64,7 +64,10 @@ extern int sanity_check_heap_info_alignment[(sizeof (heap_info)
+ 2 * SIZE_SZ) % MALLOC_ALIGNMENT
? -1 : 1];
-/* Thread specific data */
+/* Thread specific data. list_lock protects the free_list variable
+ below, and the next_free and attached_threads members of the mstate
+ objects. No other (malloc) locks must be taken while list_lock is
+ active, otherwise deadlocks may occur. */
static tsd_key_t arena_key;
static mutex_t list_lock = MUTEX_INITIALIZER;
@@ -233,7 +236,10 @@ ptmalloc_lock_all (void)
save_free_hook = __free_hook;
__malloc_hook = malloc_atfork;
__free_hook = free_atfork;
- /* Only the current thread may perform malloc/free calls now. */
+ /* Only the current thread may perform malloc/free calls now.
+ save_arena will be reattached to the current thread, in
+ ptmalloc_lock_all, so save_arena->attached_threads is not
+ updated. */
tsd_getspecific (arena_key, save_arena);
tsd_setspecific (arena_key, ATFORK_ARENA_PTR);
out:
@@ -251,6 +257,9 @@ ptmalloc_unlock_all (void)
if (--atfork_recursive_cntr != 0)
return;
+ /* Replace ATFORK_ARENA_PTR with save_arena.
+ save_arena->attached_threads was not changed in ptmalloc_lock_all
+ and is still correct. */
tsd_setspecific (arena_key, save_arena);
__malloc_hook = save_malloc_hook;
__free_hook = save_free_hook;
@@ -282,12 +291,19 @@ ptmalloc_unlock_all2 (void)
tsd_setspecific (arena_key, save_arena);
__malloc_hook = save_malloc_hook;
__free_hook = save_free_hook;
+
+ /* Push all arenas to the free list, except save_arena, which is
+ attached to the current thread. */
+ if (save_arena != NULL)
+ ((mstate) save_arena)->attached_threads = 1;
free_list = NULL;
for (ar_ptr = &main_arena;; )
{
mutex_init (&ar_ptr->mutex);
if (ar_ptr != save_arena)
{
+ /* This arena is no longer attached to any thread. */
+ ar_ptr->attached_threads = 0;
ar_ptr->next_free = free_list;
free_list = ar_ptr;
}
@@ -727,6 +743,21 @@ heap_trim (heap_info *heap, size_t pad)
/* Create a new arena with initial size "size". */
+/* If REPLACED_ARENA is not NULL, deattach it from this thread.
+ Must be called while list_lock is held.
+
+ ??? Why do we fail to put the arena on the free list if the
+ reference count reaches 0? */
+static void
+deattach_arena (mstate replaced_arena)
+{
+ if (replaced_arena != NULL)
+ {
+ assert (replaced_arena->attached_threads > 0);
+ --replaced_arena->attached_threads;
+ }
+}
+
static mstate
_int_new_arena (size_t size)
{
@@ -748,6 +779,7 @@ _int_new_arena (size_t size)
}
a = h->ar_ptr = (mstate) (h + 1);
malloc_init_state (a);
+ a->attached_threads = 1;
/*a->next = NULL;*/
a->system_mem = a->max_system_mem = h->size;
arena_mem += h->size;
@@ -761,12 +793,16 @@ _int_new_arena (size_t size)
set_head (top (a), (((char *) h + h->size) - ptr) | PREV_INUSE);
LIBC_PROBE (memory_arena_new, 2, a, size);
+ void *vptr;
+ mstate replaced_arena = tsd_getspecific (arena_key, vptr);
tsd_setspecific (arena_key, (void *) a);
mutex_init (&a->mutex);
(void) mutex_lock (&a->mutex);
(void) mutex_lock (&list_lock);
+ deattach_arena (replaced_arena);
+
/* Add the new arena to the global list. */
a->next = main_arena.next;
atomic_write_barrier ();
@@ -781,13 +817,25 @@ _int_new_arena (size_t size)
static mstate
get_free_list (void)
{
+ void *vptr;
+ mstate replaced_arena = tsd_getspecific (arena_key, vptr);
mstate result = free_list;
if (result != NULL)
{
(void) mutex_lock (&list_lock);
result = free_list;
if (result != NULL)
- free_list = result->next_free;
+ {
+ free_list = result->next_free;
+
+ /* Arenas on the free list are not attached to any thread. */
+ assert (result->attached_threads == 0);
+ /* But the arena will now be attached to this thread. */
+ result->attached_threads = 1;
+
+ if (result != NULL)
+ deattach_arena (replaced_arena);
+ }
(void) mutex_unlock (&list_lock);
if (result != NULL)
@@ -846,6 +894,15 @@ reused_arena (mstate avoid_arena)
(void) mutex_lock (&result->mutex);
out:
+ {
+ void *vptr;
+ mstate replaced_arena = tsd_getspecific (arena_key, vptr);
+ (void) mutex_lock (&list_lock);
+ deattach_arena (replaced_arena);
+ ++result->attached_threads;
+ (void) mutex_unlock (&list_lock);
+ }
+
LIBC_PROBE (memory_arena_reuse, 2, result, avoid_arena);
tsd_setspecific (arena_key, (void *) result);
next_to_use = result->next;
@@ -941,8 +998,14 @@ arena_thread_freeres (void)
if (a != NULL)
{
(void) mutex_lock (&list_lock);
- a->next_free = free_list;
- free_list = a;
+ /* If this was the last attached thread for this arena, put the
+ arena on the free list. */
+ assert (a->attached_threads > 0);
+ if (--a->attached_threads == 0)
+ {
+ a->next_free = free_list;
+ free_list = a;
+ }
(void) mutex_unlock (&list_lock);
}
}
diff --git a/malloc/malloc.c b/malloc/malloc.c
index 0eca9ce..051c463 100644
--- a/malloc/malloc.c
+++ b/malloc/malloc.c
@@ -1709,9 +1709,15 @@ struct malloc_state
/* Linked list */
struct malloc_state *next;
- /* Linked list for free arenas. */
+ /* Linked list for free arenas. Access to this field is serialized
+ by list_lock in arena.c. */
struct malloc_state *next_free;
+ /* Number of threads attached to this arena. 0 if the arena is on
+ the free list. Access to this field is serialized by list_lock
+ in arena.c. */
+ INTERNAL_SIZE_T attached_threads;
+
/* Memory allocated from the system in this arena. */
INTERNAL_SIZE_T system_mem;
INTERNAL_SIZE_T max_system_mem;
@@ -1755,7 +1761,8 @@ struct malloc_par
static struct malloc_state main_arena =
{
.mutex = MUTEX_INITIALIZER,
- .next = &main_arena
+ .next = &main_arena,
+ .attached_threads = 1
};
/* There is only one instance of the malloc parameters. */
--
2.4.3