This is the mail archive of the libc-alpha@sourceware.org mailing list for the glibc project.


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]
Other format: [Raw text]

Re: [PATCH] Remove atomic operations from malloc.c


On 11 February 2015 at 08:30, Leonhard Holz <leonhard.holz@web.de> wrote:
> The fastbin path of malloc/free features atomic operations, which were
> probably an attempt for lock-free code. But there is an ABA problem in
> malloc's
>
> while ((pp = catomic_compare_and_exchange_val_acq (fb, victim->fd, victim))
>         != victim);
>
> because even if *fd == victim, victim->fd might have changed in the
> meantime. There is no easy way to fix this, an old comment mentions the need
> for a CAS2 operation with tagged pointers. Consequently malloc is right now
> wrapped by a lock to avoid problems.
>
> Anyhow, while the atomic ops avoid a lock in free they are costly too.
> Especially at high contention the compare_and_exchange may fail multiple
> times (see
> http://blog.memsql.com/common-pitfalls-in-writing-lock-free-algorithms/ for
> a discussion). So I measured how removing atomics and installing a lock
> around free affects performance and it turns out (at least in my case with 2
> cores), that it has no effect:
>
> Current implementation
> threads time per iteration
> 1       116.709
> 8       705.080
> 16      1394.74
> 32      2923.03
>
> Without atomics
> threads time per iteration
> 1       112.541
> 8       715.897
> 16      1403.67
> 32      2881.30

Did you try running the malloc benchtest?

> So I propose to remove the atomic ops because it simplifies the code and
> reactivates a security check (size of top fastbin chunk in free).
>
>
>         Remove atomic operations from mallocs fastbin path.
>         * malloc/malloc.c (_int_malloc): Remove compare_and_exchange.
>         (_int_free): Remove compare_and_exchange and lock arena if needed.
>         (malloc_consolidate): Remove atomic_exchange.
>
>
> diff --git a/malloc/malloc.c b/malloc/malloc.c
> index aa7edbf..f43c14c 100644
> --- a/malloc/malloc.c
> +++ b/malloc/malloc.c
> @@ -219,7 +219,6 @@
>  #include <malloc-machine.h>
>  #include <malloc-sysdep.h>
>
> -#include <atomic.h>
>  #include <_itoa.h>
>  #include <bits/wordsize.h>
>  #include <sys/sysinfo.h>
> @@ -3332,15 +3331,7 @@ _int_malloc (mstate av, size_t bytes)
>      {
>        idx = fastbin_index (nb);
>        mfastbinptr *fb = &fastbin (av, idx);
> -      mchunkptr pp = *fb;
> -      do
> -        {
> -          victim = pp;
> -          if (victim == NULL)
> -            break;
> -        }
> -      while ((pp = catomic_compare_and_exchange_val_acq (fb, victim->fd,
> victim))
> -             != victim);
> +      victim = *fb;
>        if (victim != 0)
>          {
>            if (__builtin_expect (fastbin_index (chunksize (victim)) != idx,
> 0))
> @@ -3350,6 +3341,7 @@ _int_malloc (mstate av, size_t bytes)
>                malloc_printerr (check_action, errstr, chunk2mem (victim));
>                return NULL;
>              }
> +         *fb = victim->fd;
>            check_remalloced_chunk (av, victim, nb);
>            void *p = chunk2mem (victim);
>            alloc_perturb (p, bytes);
> @@ -3857,29 +3849,18 @@ _int_free (mstate av, mchunkptr p, int have_lock)
>  #endif
>        ) {
>
> +    if (!have_lock)
> +      {
> +       (void) mutex_lock (&av->mutex);
> +       locked = 1;
> +      }
> +
>      if (__builtin_expect (chunk_at_offset (p, size)->size <= 2 * SIZE_SZ,
> 0)
>         || __builtin_expect (chunksize (chunk_at_offset (p, size))
>                              >= av->system_mem, 0))
>        {
> -       /* We might not have a lock at this point and concurrent
> modifications
> -          of system_mem might have let to a false positive.  Redo the test
> -          after getting the lock.  */
> -       if (have_lock
> -           || ({ assert (locked == 0);
> -                 mutex_lock(&av->mutex);
> -                 locked = 1;
> -                 chunk_at_offset (p, size)->size <= 2 * SIZE_SZ
> -                   || chunksize (chunk_at_offset (p, size)) >=
> av->system_mem;
> -             }))
> -         {
> -           errstr = "free(): invalid next size (fast)";
> -           goto errout;
> -         }
> -       if (! have_lock)
> -         {
> -           (void)mutex_unlock(&av->mutex);
> -           locked = 0;
> -         }
> +       errstr = "free(): invalid next size (fast)";
> +       goto errout;
>        }
>
>      free_perturb (chunk2mem(p), size - 2 * SIZE_SZ);
> @@ -3887,35 +3868,34 @@ _int_free (mstate av, mchunkptr p, int have_lock)
>      set_fastchunks(av);
>      unsigned int idx = fastbin_index(size);
>      fb = &fastbin (av, idx);
> +    mchunkptr old = *fb;
>
> -    /* Atomically link P to its fastbin: P->FD = *FB; *FB = P;  */
> -    mchunkptr old = *fb, old2;
> -    unsigned int old_idx = ~0u;
> -    do
> +    /* Check that the top of the bin is not the record we are going to add
> +       (i.e., double free).  */
> +    if (__builtin_expect (old == p, 0))
>        {
> -       /* Check that the top of the bin is not the record we are going to
> add
> -          (i.e., double free).  */
> -       if (__builtin_expect (old == p, 0))
> -         {
> -           errstr = "double free or corruption (fasttop)";
> -           goto errout;
> -         }
> -       /* Check that size of fastbin chunk at the top is the same as
> -          size of the chunk that we are adding.  We can dereference OLD
> -          only if we have the lock, otherwise it might have already been
> -          deallocated.  See use of OLD_IDX below for the actual check.  */
> -       if (have_lock && old != NULL)
> -         old_idx = fastbin_index(chunksize(old));
> -       p->fd = old2 = old;
> +       errstr = "double free or corruption (fasttop)";
> +       goto errout;
>        }
> -    while ((old = catomic_compare_and_exchange_val_rel (fb, p, old2)) !=
> old2);
>
> -    if (have_lock && old != NULL && __builtin_expect (old_idx != idx, 0))
> +    /* Check that size of fastbin chunk at the top is the same as
> +       size of the chunk that we are adding.  */
> +    if (old != NULL && __builtin_expect (fastbin_index(chunksize(old)) !=
> idx, 0))
>        {
>         errstr = "invalid fastbin entry (free)";
>         goto errout;
>        }
> -  }
> +
> +    /* Link P to its fastbin: P->FD = *FB; *FB = P;  */
> +    p->fd = old;
> +    *fb = p;
> +
> +    if (!have_lock)
> +      {
> +       (void) mutex_unlock(&av->mutex);
> +       locked = 0;
> +      }
> +   }
>
>    /*
>      Consolidate other non-mmapped chunks as they arrive.
> @@ -4121,7 +4101,8 @@ static void malloc_consolidate(mstate av)
>      maxfb = &fastbin (av, NFASTBINS - 1);
>      fb = &fastbin (av, 0);
>      do {
> -      p = atomic_exchange_acq (fb, 0);
> +      p = *fb;
> +      *fb = NULL;
>        if (p != 0) {
>         do {
>           check_inuse_chunk(av, p);



-- 
Will Newton
Toolchain Working Group, Linaro


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]