This is the mail archive of the libc-alpha@sourceware.org mailing list for the glibc project.


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]
Other format: [Raw text]

[PATCH] Remove atomic operations from malloc.c


The fastbin path of malloc/free features atomic operations, which were probably an attempt for lock-free code. But there is an ABA problem in malloc's

while ((pp = catomic_compare_and_exchange_val_acq (fb, victim->fd, victim))
        != victim);

because even if *fd == victim, victim->fd might have changed in the meantime. There is no easy way to fix this, an old comment mentions the need for a CAS2 operation with tagged pointers. Consequently malloc is right now wrapped by a lock to avoid problems.

Anyhow, while the atomic ops avoid a lock in free they are costly too. Especially at high contention the compare_and_exchange may fail multiple times (see http://blog.memsql.com/common-pitfalls-in-writing-lock-free-algorithms/ for a discussion). So I measured how removing atomics and installing a lock around free affects performance and it turns out (at least in my case with 2 cores), that it has no effect:

Current implementation
threads time per iteration
1       116.709
8       705.080
16      1394.74
32      2923.03

Without atomics
threads time per iteration
1       112.541
8       715.897
16      1403.67
32      2881.30

So I propose to remove the atomic ops because it simplifies the code and reactivates a security check (size of top fastbin chunk in free).


	Remove atomic operations from mallocs fastbin path.
	* malloc/malloc.c (_int_malloc): Remove compare_and_exchange.
	(_int_free): Remove compare_and_exchange and lock arena if needed.
	(malloc_consolidate): Remove atomic_exchange.


diff --git a/malloc/malloc.c b/malloc/malloc.c
index aa7edbf..f43c14c 100644
--- a/malloc/malloc.c
+++ b/malloc/malloc.c
@@ -219,7 +219,6 @@
 #include <malloc-machine.h>
 #include <malloc-sysdep.h>

-#include <atomic.h>
 #include <_itoa.h>
 #include <bits/wordsize.h>
 #include <sys/sysinfo.h>
@@ -3332,15 +3331,7 @@ _int_malloc (mstate av, size_t bytes)
     {
       idx = fastbin_index (nb);
       mfastbinptr *fb = &fastbin (av, idx);
-      mchunkptr pp = *fb;
-      do
-        {
-          victim = pp;
-          if (victim == NULL)
-            break;
-        }
-      while ((pp = catomic_compare_and_exchange_val_acq (fb, victim->fd, victim))
-             != victim);
+      victim = *fb;
       if (victim != 0)
         {
           if (__builtin_expect (fastbin_index (chunksize (victim)) != idx, 0))
@@ -3350,6 +3341,7 @@ _int_malloc (mstate av, size_t bytes)
               malloc_printerr (check_action, errstr, chunk2mem (victim));
               return NULL;
             }
+	  *fb = victim->fd;
           check_remalloced_chunk (av, victim, nb);
           void *p = chunk2mem (victim);
           alloc_perturb (p, bytes);
@@ -3857,29 +3849,18 @@ _int_free (mstate av, mchunkptr p, int have_lock)
 #endif
       ) {

+    if (!have_lock)
+      {
+	(void) mutex_lock (&av->mutex);
+	locked = 1;
+      }
+
     if (__builtin_expect (chunk_at_offset (p, size)->size <= 2 * SIZE_SZ, 0)
 	|| __builtin_expect (chunksize (chunk_at_offset (p, size))
 			     >= av->system_mem, 0))
       {
-	/* We might not have a lock at this point and concurrent modifications
-	   of system_mem might have let to a false positive.  Redo the test
-	   after getting the lock.  */
-	if (have_lock
-	    || ({ assert (locked == 0);
-		  mutex_lock(&av->mutex);
-		  locked = 1;
-		  chunk_at_offset (p, size)->size <= 2 * SIZE_SZ
-		    || chunksize (chunk_at_offset (p, size)) >= av->system_mem;
-	      }))
-	  {
-	    errstr = "free(): invalid next size (fast)";
-	    goto errout;
-	  }
-	if (! have_lock)
-	  {
-	    (void)mutex_unlock(&av->mutex);
-	    locked = 0;
-	  }
+	errstr = "free(): invalid next size (fast)";
+	goto errout;
       }

     free_perturb (chunk2mem(p), size - 2 * SIZE_SZ);
@@ -3887,35 +3868,34 @@ _int_free (mstate av, mchunkptr p, int have_lock)
     set_fastchunks(av);
     unsigned int idx = fastbin_index(size);
     fb = &fastbin (av, idx);
+    mchunkptr old = *fb;

-    /* Atomically link P to its fastbin: P->FD = *FB; *FB = P;  */
-    mchunkptr old = *fb, old2;
-    unsigned int old_idx = ~0u;
-    do
+    /* Check that the top of the bin is not the record we are going to add
+       (i.e., double free).  */
+    if (__builtin_expect (old == p, 0))
       {
-	/* Check that the top of the bin is not the record we are going to add
-	   (i.e., double free).  */
-	if (__builtin_expect (old == p, 0))
-	  {
-	    errstr = "double free or corruption (fasttop)";
-	    goto errout;
-	  }
-	/* Check that size of fastbin chunk at the top is the same as
-	   size of the chunk that we are adding.  We can dereference OLD
-	   only if we have the lock, otherwise it might have already been
-	   deallocated.  See use of OLD_IDX below for the actual check.  */
-	if (have_lock && old != NULL)
-	  old_idx = fastbin_index(chunksize(old));
-	p->fd = old2 = old;
+	errstr = "double free or corruption (fasttop)";
+	goto errout;
       }
-    while ((old = catomic_compare_and_exchange_val_rel (fb, p, old2)) != old2);

-    if (have_lock && old != NULL && __builtin_expect (old_idx != idx, 0))
+    /* Check that size of fastbin chunk at the top is the same as
+       size of the chunk that we are adding.  */
+    if (old != NULL && __builtin_expect (fastbin_index(chunksize(old)) != idx, 0))
       {
 	errstr = "invalid fastbin entry (free)";
 	goto errout;
       }
-  }
+
+    /* Link P to its fastbin: P->FD = *FB; *FB = P;  */
+    p->fd = old;
+    *fb = p;
+
+    if (!have_lock)
+      {
+	(void) mutex_unlock(&av->mutex);
+	locked = 0;
+      }
+   }

   /*
     Consolidate other non-mmapped chunks as they arrive.
@@ -4121,7 +4101,8 @@ static void malloc_consolidate(mstate av)
     maxfb = &fastbin (av, NFASTBINS - 1);
     fb = &fastbin (av, 0);
     do {
-      p = atomic_exchange_acq (fb, 0);
+      p = *fb;
+      *fb = NULL;
       if (p != 0) {
 	do {
 	  check_inuse_chunk(av, p);


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]