This is the mail archive of the
libc-alpha@sourceware.org
mailing list for the glibc project.
[PATCH] Remove atomic operations from malloc.c
- From: Leonhard Holz <leonhard dot holz at web dot de>
- To: libc-alpha at sourceware dot org
- Date: Wed, 11 Feb 2015 09:30:07 +0100
- Subject: [PATCH] Remove atomic operations from malloc.c
- Authentication-results: sourceware.org; auth=none
The fastbin path of malloc/free features atomic operations, which were probably an
attempt for lock-free code. But there is an ABA problem in malloc's
while ((pp = catomic_compare_and_exchange_val_acq (fb, victim->fd, victim))
!= victim);
because even if *fd == victim, victim->fd might have changed in the meantime.
There is no easy way to fix this, an old comment mentions the need for a CAS2
operation with tagged pointers. Consequently malloc is right now wrapped by a lock
to avoid problems.
Anyhow, while the atomic ops avoid a lock in free they are costly too. Especially
at high contention the compare_and_exchange may fail multiple times (see
http://blog.memsql.com/common-pitfalls-in-writing-lock-free-algorithms/ for a
discussion). So I measured how removing atomics and installing a lock around free
affects performance and it turns out (at least in my case with 2 cores), that it
has no effect:
Current implementation
threads time per iteration
1 116.709
8 705.080
16 1394.74
32 2923.03
Without atomics
threads time per iteration
1 112.541
8 715.897
16 1403.67
32 2881.30
So I propose to remove the atomic ops because it simplifies the code and
reactivates a security check (size of top fastbin chunk in free).
Remove atomic operations from mallocs fastbin path.
* malloc/malloc.c (_int_malloc): Remove compare_and_exchange.
(_int_free): Remove compare_and_exchange and lock arena if needed.
(malloc_consolidate): Remove atomic_exchange.
diff --git a/malloc/malloc.c b/malloc/malloc.c
index aa7edbf..f43c14c 100644
--- a/malloc/malloc.c
+++ b/malloc/malloc.c
@@ -219,7 +219,6 @@
#include <malloc-machine.h>
#include <malloc-sysdep.h>
-#include <atomic.h>
#include <_itoa.h>
#include <bits/wordsize.h>
#include <sys/sysinfo.h>
@@ -3332,15 +3331,7 @@ _int_malloc (mstate av, size_t bytes)
{
idx = fastbin_index (nb);
mfastbinptr *fb = &fastbin (av, idx);
- mchunkptr pp = *fb;
- do
- {
- victim = pp;
- if (victim == NULL)
- break;
- }
- while ((pp = catomic_compare_and_exchange_val_acq (fb, victim->fd, victim))
- != victim);
+ victim = *fb;
if (victim != 0)
{
if (__builtin_expect (fastbin_index (chunksize (victim)) != idx, 0))
@@ -3350,6 +3341,7 @@ _int_malloc (mstate av, size_t bytes)
malloc_printerr (check_action, errstr, chunk2mem (victim));
return NULL;
}
+ *fb = victim->fd;
check_remalloced_chunk (av, victim, nb);
void *p = chunk2mem (victim);
alloc_perturb (p, bytes);
@@ -3857,29 +3849,18 @@ _int_free (mstate av, mchunkptr p, int have_lock)
#endif
) {
+ if (!have_lock)
+ {
+ (void) mutex_lock (&av->mutex);
+ locked = 1;
+ }
+
if (__builtin_expect (chunk_at_offset (p, size)->size <= 2 * SIZE_SZ, 0)
|| __builtin_expect (chunksize (chunk_at_offset (p, size))
>= av->system_mem, 0))
{
- /* We might not have a lock at this point and concurrent modifications
- of system_mem might have let to a false positive. Redo the test
- after getting the lock. */
- if (have_lock
- || ({ assert (locked == 0);
- mutex_lock(&av->mutex);
- locked = 1;
- chunk_at_offset (p, size)->size <= 2 * SIZE_SZ
- || chunksize (chunk_at_offset (p, size)) >= av->system_mem;
- }))
- {
- errstr = "free(): invalid next size (fast)";
- goto errout;
- }
- if (! have_lock)
- {
- (void)mutex_unlock(&av->mutex);
- locked = 0;
- }
+ errstr = "free(): invalid next size (fast)";
+ goto errout;
}
free_perturb (chunk2mem(p), size - 2 * SIZE_SZ);
@@ -3887,35 +3868,34 @@ _int_free (mstate av, mchunkptr p, int have_lock)
set_fastchunks(av);
unsigned int idx = fastbin_index(size);
fb = &fastbin (av, idx);
+ mchunkptr old = *fb;
- /* Atomically link P to its fastbin: P->FD = *FB; *FB = P; */
- mchunkptr old = *fb, old2;
- unsigned int old_idx = ~0u;
- do
+ /* Check that the top of the bin is not the record we are going to add
+ (i.e., double free). */
+ if (__builtin_expect (old == p, 0))
{
- /* Check that the top of the bin is not the record we are going to add
- (i.e., double free). */
- if (__builtin_expect (old == p, 0))
- {
- errstr = "double free or corruption (fasttop)";
- goto errout;
- }
- /* Check that size of fastbin chunk at the top is the same as
- size of the chunk that we are adding. We can dereference OLD
- only if we have the lock, otherwise it might have already been
- deallocated. See use of OLD_IDX below for the actual check. */
- if (have_lock && old != NULL)
- old_idx = fastbin_index(chunksize(old));
- p->fd = old2 = old;
+ errstr = "double free or corruption (fasttop)";
+ goto errout;
}
- while ((old = catomic_compare_and_exchange_val_rel (fb, p, old2)) != old2);
- if (have_lock && old != NULL && __builtin_expect (old_idx != idx, 0))
+ /* Check that size of fastbin chunk at the top is the same as
+ size of the chunk that we are adding. */
+ if (old != NULL && __builtin_expect (fastbin_index(chunksize(old)) != idx, 0))
{
errstr = "invalid fastbin entry (free)";
goto errout;
}
- }
+
+ /* Link P to its fastbin: P->FD = *FB; *FB = P; */
+ p->fd = old;
+ *fb = p;
+
+ if (!have_lock)
+ {
+ (void) mutex_unlock(&av->mutex);
+ locked = 0;
+ }
+ }
/*
Consolidate other non-mmapped chunks as they arrive.
@@ -4121,7 +4101,8 @@ static void malloc_consolidate(mstate av)
maxfb = &fastbin (av, NFASTBINS - 1);
fb = &fastbin (av, 0);
do {
- p = atomic_exchange_acq (fb, 0);
+ p = *fb;
+ *fb = NULL;
if (p != 0) {
do {
check_inuse_chunk(av, p);