This is the mail archive of the glibc-cvs@sourceware.org mailing list for the glibc project.


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]
Other format: [Raw text]

GNU C Library master sources branch dj/malloc updated. glibc-2.23-112-g46b4437


This is an automated email from the git hooks/post-receive script. It was
generated because a ref change was pushed to the repository containing
the project "GNU C Library master sources".

The branch, dj/malloc has been updated
       via  46b44370f430c1de1da835903b96c77a55ff1d63 (commit)
      from  5bd0885c9ddf14ddb959742fcb0d4b2368d7b44f (commit)

Those revisions listed above that are new to this repository have
not appeared on any other notification email; so we list those
revisions in full, below.

- Log -----------------------------------------------------------------
http://sourceware.org/git/gitweb.cgi?p=glibc.git;a=commitdiff;h=46b44370f430c1de1da835903b96c77a55ff1d63

commit 46b44370f430c1de1da835903b96c77a55ff1d63
Author: DJ Delorie <dj@delorie.com>
Date:   Fri Apr 29 14:58:34 2016 -0400

    changes to per-thread cache algorithms
    
    Core algorithm changes:
    
    * Per-thread cache is refilled from existing fastbins and smallbins
      instead of always needing a bigger chunk.
    
    * Caches are linked, and cache is cleaned up when the thread exits
      (incomplete for now, needed framework for chunk scanner).
    
    * Fixes to mutex placement - needed to sync chunk headers across
      threads.
    
    Enabling the per-thread cache (tcache) gives about a 20-30% speedup at
    a 20-30% memory cost (due to fragmentation).  Still working on that :-)
    
    Debugging helpers (temporary):
    
    * __malloc_scan_chunks() calls back to the app for each chunk in each
      heap.
    
    * _m_printf() helper for "safe" printing within malloc
    
    * Lots of calls to the above, commented out, in case you need them.
    
    * trace_run scans leftover chunks too.

diff --git a/malloc/Versions b/malloc/Versions
index 43cb909..6f03a46 100644
--- a/malloc/Versions
+++ b/malloc/Versions
@@ -76,5 +76,7 @@ libc {
     # malloc trace hooks for mtrace-ctl
     __malloc_set_trace_buffer;
     __malloc_get_trace_buffer;
+
+    __malloc_scan_chunks;
   }
 }
diff --git a/malloc/malloc.c b/malloc/malloc.c
index 453311d..acd7170 100644
--- a/malloc/malloc.c
+++ b/malloc/malloc.c
@@ -301,6 +301,22 @@ __malloc_assert (const char *assertion, const char *file, unsigned int line,
 }
 #endif
 
+static void
+_m_printf(const char *fmt, ...)
+{
+  char buf[1024];
+  va_list argp;
+  int tid = (unsigned int)syscall(__NR_gettid);
+
+  snprintf(buf, sizeof(buf)-1, "\033[%dm%5x: ", (tid % 6) + 31, tid & 0xfffff);
+
+  va_start (argp, fmt);
+  vsnprintf(buf+strlen(buf), sizeof(buf)-strlen(buf), fmt, argp);
+  va_end (argp);
+
+  strcat(buf+strlen(buf), "\033[0m");
+  write(2, buf, strlen(buf));
+}
 
 /*
   INTERNAL_SIZE_T is the word-size used for internal bookkeeping
@@ -1487,7 +1503,10 @@ typedef struct malloc_chunk *mbinptr;
     FD = P->fd;								      \
     BK = P->bk;								      \
     if (__builtin_expect (FD->bk != P || BK->fd != P, 0))		      \
+      {\
+	_m_printf("%p->%p %p %p->%p\n", FD, FD->bk, P, BK, BK->fd); \
       malloc_printerr (check_action, "corrupted double-linked list", P, AV);  \
+      }									\
     else {								      \
         FD->bk = BK;							      \
         BK->fd = FD;							      \
@@ -1999,7 +2018,9 @@ free_perturb (char *p, size_t n)
 static void
 do_check_chunk (mstate av, mchunkptr p)
 {
-  unsigned long sz = chunksize (p);
+  unsigned long sz  __attribute__((unused)) = chunksize (p);
+  if (!av)
+    return;
   /* min and max possible addresses assuming contiguous allocation */
   char *max_address = (char *) (av->top) + chunksize (av->top);
   char *min_address = max_address - av->system_mem;
@@ -2119,7 +2140,7 @@ do_check_inuse_chunk (mstate av, mchunkptr p)
 static void
 do_check_remalloced_chunk (mstate av, mchunkptr p, INTERNAL_SIZE_T s)
 {
-  INTERNAL_SIZE_T sz = p->size & ~(PREV_INUSE | NON_MAIN_ARENA);
+  INTERNAL_SIZE_T sz  __attribute__((unused)) = p->size & ~(PREV_INUSE | NON_MAIN_ARENA);
 
   if (!chunk_is_mmapped (p))
     {
@@ -2366,6 +2387,9 @@ sysmalloc (INTERNAL_SIZE_T nb, mstate av)
      rather than expanding top.
    */
 
+#if 0
+  _m_printf("\033[35mdj: av %p size %lx, top %p %d\033[0m\n", av, nb, av->top, chunksize(av->top));
+#endif
   if (av == NULL
       || ((unsigned long) (nb) >= (unsigned long) (mp_.mmap_threshold)
 	  && (mp_.n_mmaps < mp_.n_mmaps_max)))
@@ -2458,6 +2482,12 @@ sysmalloc (INTERNAL_SIZE_T nb, mstate av)
      at least MINSIZE and to have prev_inuse set.
    */
 
+#if 0
+  _m_printf("%p %p %ld, %p %p\n",
+	    old_top, initial_top(av), old_size,
+	    prev_inuse(old_top), old_end);
+#endif
+
   assert ((old_top == initial_top (av) && old_size == 0) ||
           ((unsigned long) (old_size) >= MINSIZE &&
            prev_inuse (old_top) &&
@@ -2900,6 +2930,9 @@ munmap_chunk (mchunkptr p)
 
   uintptr_t block = (uintptr_t) p - p->prev_size;
   size_t total_size = p->prev_size + size;
+#if 0
+  fprintf(stderr, "DJ: p  %p sz %5lx ps %lx s %lx\n", p, (int64_t)p->size, (int64_t)p->prev_size, (int64_t)size);
+#endif
   /* Unfortunately we have to do the compilers job by hand here.  Normally
      we would test BLOCK and TOTAL-SIZE separately for compliance with the
      page size.  But gcc does not recognize the optimization possibility
@@ -2987,12 +3020,35 @@ typedef struct TCacheEntry {
   struct TCacheEntry *next;
 } TCacheEntry;
 
-typedef struct {
+typedef struct TCache {
+  struct TCache *prev, *next;
+  char initted; /* 0 = uninitted, 1 = normal, anything else = shutting down */
   char counts[TCACHE_IDX];
   TCacheEntry *entries[TCACHE_IDX];
 } TCache;
 
-static __thread TCache tcache = {{0},{0}};
+static TCache *tcache_list = NULL;
+static mutex_t tcache_mutex = _LIBC_LOCK_INITIALIZER;
+
+static __thread TCache tcache = {0,0,0,{0},{0}};
+
+static void __attribute__ ((section ("__libc_thread_freeres_fn")))
+tcache_thread_freeres (void)
+{
+  if (tcache.initted == 1)
+    {
+      (void) mutex_lock (&tcache_mutex);
+      tcache.initted = 2;
+      if (tcache.next)
+	tcache.next->prev = tcache.prev;
+      if (tcache.prev)
+	tcache.prev->next = tcache.next;
+      else
+	tcache_list = tcache.next;
+      (void) mutex_unlock (&tcache_mutex);
+    }
+}
+text_set_element (__libc_thread_subfreeres, tcache_thread_freeres);
 
 #endif
 
@@ -3001,15 +3057,29 @@ __libc_malloc (size_t bytes)
 {
   mstate ar_ptr;
   void *victim;
+
 #if USE_TCACHE
+  bytes = request2size(bytes);
   int tc_idx = size2tidx (bytes);
+
+  if (tcache.initted == 0)
+    {
+      tcache.initted = 1;
+      (void) mutex_lock (&tcache_mutex);
+      tcache.next = tcache_list;
+      if (tcache.next)
+	tcache.next->prev = &tcache;
+      tcache_list = &tcache;
+      (void) mutex_unlock (&tcache_mutex);
+    }
 #endif
 
   __MTB_TRACE_ENTRY (MALLOC,bytes,NULL);
 
 #if USE_TCACHE
   if (bytes < MAX_TCACHE_SIZE
-      && tcache.entries[tc_idx] != NULL)
+      && tcache.entries[tc_idx] != NULL
+      && tcache.initted == 1)
     {
       TCacheEntry *e = tcache.entries[tc_idx];
       tcache.entries[tc_idx] = e->next;
@@ -3028,22 +3098,34 @@ __libc_malloc (size_t bytes)
       return (*hook)(bytes, RETURN_ADDRESS (0));
     }
 
-#if USE_TCACHE
-  if (bytes < MAX_TCACHE_SIZE)
+#if 0 && USE_TCACHE
+  /* This is fast but causes internal fragmentation, as it always
+     pulls large chunks but puts small chunks, leading to a large
+     backlog of small chunks.  */
+  if (bytes < MAX_TCACHE_SIZE
+      && tcache.initted == 1)
     {
       void *ent;
-      int tc_bytes = tc_idx << TCACHE_SHIFT;
-      int tc_ibytes = tc_bytes + 2*SIZE_SZ;
-      int total_bytes;
+      size_t tc_bytes = tc_idx << TCACHE_SHIFT;
+      size_t tc_ibytes;
+      size_t total_bytes;
       int i;
 
-      total_bytes = tc_bytes + tc_ibytes * TCACHE_FILL_COUNT;
+      assert (tc_bytes >= bytes);
+
+      if (tc_bytes < 2 * SIZE_SZ)
+	tc_bytes = 2 * SIZE_SZ;
+      tc_ibytes = tc_bytes + 2*SIZE_SZ;
+
+      total_bytes = tc_bytes + tc_ibytes * TCACHE_FILL_COUNT
 
       __MTB_TRACE_PATH (thread_cache);
       __MTB_TRACE_PATH (cpu_cache);
 
       arena_get (ar_ptr, total_bytes);
 
+      if (ar_ptr)
+	{
       ent = _int_malloc (ar_ptr, total_bytes);
       /* Retry with another arena only if we were able to find a usable arena
 	 before.  */
@@ -3053,17 +3135,26 @@ __libc_malloc (size_t bytes)
 	  LIBC_PROBE (memory_malloc_retry, 1, total_bytes);
 	  ar_ptr = arena_get_retry (ar_ptr, total_bytes);
 	  ent = _int_malloc (ar_ptr, total_bytes);
+	  //_m_printf("tc2: av %p sz %lx rv %p\n", ar_ptr, total_bytes, ent);
 	}
-      if (ar_ptr != NULL)
-	(void) mutex_unlock (&ar_ptr->mutex);
 
       if (ent)
 	{
 	  mchunkptr m = mem2chunk (ent);
 	  TCacheEntry *e;
 	  int flags = m->size & SIZE_BITS;
-	  int old_size = m->size & ~SIZE_BITS;
-	  int extra = old_size - total_bytes - 2*SIZE_SZ;
+	  size_t old_size = m->size & ~SIZE_BITS;
+	  size_t extra = old_size - total_bytes - 2*SIZE_SZ;
+
+#if 0
+	  tid = syscall(__NR_gettid);
+	  _m_printf("%04x tc: av %p sz %5lx.%5lx.%2d rv %p %16lx %16lx %d\n",
+	    tid, ar_ptr, m->size, total_bytes, (int)extra, ent, (int64_t)m->prev_size, (int64_t)m->size, bytes);
+#endif
+	  if (flags & IS_MMAPPED)
+	    {
+	      write (2, "\033[31mMMAPPED CACHE BLOCK\033[0m\n", 29);
+	    }
 
 	  m->size = tc_ibytes | flags;
 	  flags |= PREV_INUSE;
@@ -3073,8 +3164,8 @@ __libc_malloc (size_t bytes)
 	      m =     (mchunkptr) (ent + i * tc_ibytes + tc_bytes);
 	      e = (TCacheEntry *) (ent + i * tc_ibytes + tc_ibytes);
 
-	      /* Not needed because the previious chunk is "in use".  */
-	      /*m->prev_size = tc_ibytes;*/
+	      //	      _m_printf("%04x \t%p %d\n", tid, m, tc_ibytes);
+	      /* Not needed because the previous chunk is "in use".  */
 	      m->size = tc_ibytes | flags;
 	      e->next = tcache.entries[tc_idx];
 	      tcache.entries[tc_idx] = e;
@@ -3085,8 +3176,16 @@ __libc_malloc (size_t bytes)
 	  /*m = (mchunkptr) (ent + total_bytes);
 	    m->prev_size = tc_ibytes + extra;*/
 	}
+
+      /* This must go after the above code to ensure that other
+	 threads see our changes, even though we're sending this chunk
+	 up to the app.  */
+      if (ar_ptr != NULL)
+	(void) mutex_unlock (&ar_ptr->mutex);
+
       __MTB_TRACE_SET(ptr2, ent);
       return ent;
+	}
     }
 #endif
 
@@ -3452,6 +3551,7 @@ __libc_calloc (size_t n, size_t elem_size)
   mem = _int_malloc (av, sz);
 
 
+  //_m_printf("mem = %p av %p afc %p\n", mem, av, arena_for_chunk (mem2chunk (mem)));
   assert (!mem || chunk_is_mmapped (mem2chunk (mem)) ||
           av == arena_for_chunk (mem2chunk (mem)));
 
@@ -3532,6 +3632,65 @@ __libc_calloc (size_t n, size_t elem_size)
    ------------------------------ malloc ------------------------------
  */
 
+#if 0 && USE_TCACHE
+/* This will be re-used later when we re-add a chunk splitting algorithm.  */
+/* If a chunk of some multiple of the desired size it is found, use
+   this routine to split it up, fill the cache, and return it.  */
+static mchunkptr
+_tcache_fill (mstate av, size_t original_nb, mchunkptr chunk)
+{
+  int i;
+  int n = chunksize(chunk) / original_nb;
+  mchunkptr m;
+  TCacheEntry *e;
+  int tc_idx = size2tidx (original_nb - SIZE_SZ);
+  int bits = chunk->size & SIZE_BITS;
+
+  if (original_nb-SIZE_SZ > MAX_TCACHE_SIZE)
+    return chunk;
+
+  //_m_printf("_tcache_fill %p %x %d %d %p\n", chunk, (unsigned int)original_nb, n, MALLOC_ALIGNMENT,
+  //	    arena_for_chunk(chunk));
+
+  if (n < original_nb*2)
+    return chunk;
+
+  for (i = 1; i < n; i++)
+    {
+      m =     (mchunkptr) ((char *)chunk + i * original_nb);
+      e = (TCacheEntry *) chunk2mem (m);
+
+      set_head (m, original_nb | PREV_INUSE | bits);
+      //_m_printf("  cache[%d] %p %p\n", i, m, arena_for_chunk(m));
+      e->next = tcache.entries[tc_idx];
+      tcache.entries[tc_idx] = e;
+      tcache.counts[tc_idx] ++;
+    }
+
+  assert ((chunksize(chunk) % original_nb) == 0);
+  //_m_printf(" returns %p %x %x $p\n", chunk, chunksize(chunk), original_nb, arena_for_chunk (chunk));
+  set_head_size (chunk, original_nb);
+
+  return chunk;
+}
+
+#if 0
+/* Given a chunk of size ACTUAL_SIZE and a user request of size
+   DESIRED_SIZE, compute the largest ACTUAL_SIZE that would fill the
+   tcache.  */
+static int
+_tcache_maxsize (INTERNAL_SIZE_T desired_size, INTERNAL_SIZE_T actual_size)
+{
+  if (desired_size-SIZE_SZ > MAX_TCACHE_SIZE)
+    return desired_size;
+  actual_size -= actual_size % desired_size;
+  if (actual_size > desired_size * TCACHE_FILL_COUNT)
+    actual_size = desired_size * TCACHE_FILL_COUNT;
+  return actual_size;
+}
+#endif
+#endif
+
 static void *
 _int_malloc (mstate av, size_t bytes)
 {
@@ -3576,6 +3735,13 @@ _int_malloc (mstate av, size_t bytes)
       return p;
     }
 
+#if 0
+  if (av && av->top)
+    _m_printf("_int_malloc (%ld), %p %d\n", nb, av->top, chunksize(av->top));
+  else
+    _m_printf("_int_malloc (%ld)\n", nb);
+#endif
+
   /*
      If the size qualifies as a fastbin, first check corresponding bin.
      This code is safe to execute even if av is not yet initialized, so we
@@ -3584,6 +3750,7 @@ _int_malloc (mstate av, size_t bytes)
 
   if ((unsigned long) (nb) <= (unsigned long) (get_max_fast ()))
     {
+
       idx = fastbin_index (nb);
       mfastbinptr *fb = &fastbin (av, idx);
       mchunkptr pp = *fb;
@@ -3605,6 +3772,38 @@ _int_malloc (mstate av, size_t bytes)
               return NULL;
             }
           check_remalloced_chunk (av, victim, nb);
+#if USE_TCACHE
+	  /* While we're here, if we see other chunk of the same size,
+	     stash them in the tcache.  */
+	  if (nb-SIZE_SZ < MAX_TCACHE_SIZE)
+	    {
+	      int tc_idx = size2tidx (bytes);
+	      mchunkptr tc_victim;
+	      int found = 0;
+
+	      /* While bin not empty and tcache not full, copy chunks over.  */
+	      while (tcache.counts[tc_idx] < TCACHE_FILL_COUNT
+		     && (pp = *fb) != NULL)
+		{
+		  do
+		    {
+	              tc_victim = pp;
+	              if (tc_victim == NULL)
+	                break;
+	            }
+	          while ((pp = catomic_compare_and_exchange_val_acq (fb, tc_victim->fd, tc_victim))
+	                 != tc_victim);
+		  if (tc_victim != 0)
+		    {
+		      TCacheEntry *e = (TCacheEntry *) chunk2mem(tc_victim);
+		      e->next = tcache.entries[tc_idx];
+		      tcache.entries[tc_idx] = e;
+		      tcache.counts[tc_idx] ++;
+		      found ++;
+	            }
+		}
+	    }
+#endif
           void *p = chunk2mem (victim);
           alloc_perturb (p, bytes);
           return p;
@@ -3631,7 +3830,7 @@ _int_malloc (mstate av, size_t bytes)
           else
             {
               bck = victim->bk;
-	if (__glibc_unlikely (bck->fd != victim))
+	      if (__glibc_unlikely (bck->fd != victim))
                 {
                   errstr = "malloc(): smallbin double linked list corrupted";
                   goto errout;
@@ -3643,6 +3842,41 @@ _int_malloc (mstate av, size_t bytes)
               if (av != &main_arena)
                 victim->size |= NON_MAIN_ARENA;
               check_malloced_chunk (av, victim, nb);
+#if USE_TCACHE
+	  /* While we're here, if we see other chunk of the same size,
+	     stash them in the tcache.  */
+	  if (nb-SIZE_SZ < MAX_TCACHE_SIZE)
+	    {
+	      int tc_idx = size2tidx (nb-SIZE_SZ);
+	      mchunkptr tc_victim;
+	      int found = 0;
+
+	      /* While bin not empty and tcache not full, copy chunks over.  */
+	      while (tcache.counts[tc_idx] < TCACHE_FILL_COUNT
+		     && (tc_victim = last(bin)) != bin)
+		{
+		  if (tc_victim != 0)
+		    {
+		      bck = tc_victim->bk;
+		      set_inuse_bit_at_offset (tc_victim, nb);
+		      if (av != &main_arena)
+			tc_victim->size |= NON_MAIN_ARENA;
+		      bin->bk = bck;
+		      bck->fd = bin;
+
+		      TCacheEntry *e = (TCacheEntry *) chunk2mem(tc_victim);
+		      e->next = tcache.entries[tc_idx];
+		      tcache.entries[tc_idx] = e;
+		      tcache.counts[tc_idx] ++;
+		      found ++;
+		      //_m_printf("snarf chunk %p %lx %p %lx\n", tc_victim, nb,
+		      //	chunk_at_offset(tc_victim, nb), chunk_at_offset(tc_victim, nb)->size);
+	            }
+		}
+	      //_m_printf("%d chunks found in smallbin\n", found);
+	    }
+#endif
+	  //_m_printf("%d: return %p\n", __LINE__, victim);
               void *p = chunk2mem (victim);
               alloc_perturb (p, bytes);
               return p;
@@ -3665,7 +3899,7 @@ _int_malloc (mstate av, size_t bytes)
     {
       idx = largebin_index (nb);
       if (have_fastchunks (av))
-        malloc_consolidate (av);
+	malloc_consolidate (av);
     }
 
   /*
@@ -3681,8 +3915,23 @@ _int_malloc (mstate av, size_t bytes)
      otherwise need to expand memory to service a "small" request.
    */
 
+#if USE_TCACHE
+  INTERNAL_SIZE_T tcache_nb = 0;
+  //INTERNAL_SIZE_T tcache_max = 0;
+  if (nb-SIZE_SZ <= MAX_TCACHE_SIZE)
+    {
+      //int tc_idx = size2tidx (bytes);
+      tcache_nb = nb;
+      //tcache_max = nb * (TCACHE_FILL_COUNT - tcache.counts[tc_idx]);
+    }
+  int tc_idx = size2tidx (nb-SIZE_SZ);
+  int return_cached = 0;
+#endif
+
   for (;; )
     {
+      //_m_printf ("top of loop: %x, %p %ld %p\n", nb, av->top, chunksize(av->top), av);
+
       int iters = 0;
       while ((victim = unsorted_chunks (av)->bk) != unsorted_chunks (av))
         {
@@ -3724,6 +3973,11 @@ _int_malloc (mstate av, size_t bytes)
               set_foot (remainder, remainder_size);
 
               check_malloced_chunk (av, victim, nb);
+#if 0&&USE_TCACHE
+	      // chunk splitting - disabled for the moment
+	      victim = _tcache_fill (av, original_nb, victim);
+#endif
+	      //_m_printf("%d: return %p\n", __LINE__, victim);
               void *p = chunk2mem (victim);
               alloc_perturb (p, bytes);
               return p;
@@ -3735,15 +3989,43 @@ _int_malloc (mstate av, size_t bytes)
 
           /* Take now instead of binning if exact fit */
 
-          if (size == nb)
+          if (size == nb
+#if 0 && USE_TCACHE
+	      /* This forces us to split up bigger chunks later */
+	      && tcache_nb
+		  && tcache.counts[tc_idx] < TCACHE_FILL_COUNT
+#endif
+	      )
             {
               set_inuse_bit_at_offset (victim, size);
               if (av != &main_arena)
                 victim->size |= NON_MAIN_ARENA;
+
+#if USE_TCACHE
+	      /* Fill cache first, return to user only if cache fills.
+		 We may return one of these chunks later.  */
+	      if (tcache_nb
+		  && tcache.counts[tc_idx] < TCACHE_FILL_COUNT)
+		{
+		  TCacheEntry *e = (TCacheEntry *) chunk2mem(victim);
+		  e->next = tcache.entries[tc_idx];
+		  tcache.entries[tc_idx] = e;
+		  tcache.counts[tc_idx] ++;
+		  return_cached = 1;
+		  continue;
+		}
+	      else
+		{
+#endif
+
               check_malloced_chunk (av, victim, nb);
+	      //_m_printf("%d: return %p\n", __LINE__, victim);
               void *p = chunk2mem (victim);
               alloc_perturb (p, bytes);
               return p;
+#if USE_TCACHE
+		}
+#endif
             }
 
           /* place chunk in bin */
@@ -3813,6 +4095,17 @@ _int_malloc (mstate av, size_t bytes)
             break;
         }
 
+#if USE_TCACHE
+      /* If all the small chunks we found ended up cached, return one now.  */
+      if (return_cached)
+	{
+	  TCacheEntry *e = tcache.entries[tc_idx];
+	  tcache.entries[tc_idx] = e->next;
+	  tcache.counts[tc_idx] --;
+	  return (void *) e;
+	}
+#endif
+
       /*
          If a large request, scan through the chunks of current bin in
          sorted order to find smallest that fits.  Use the skip list for this.
@@ -3874,6 +4167,11 @@ _int_malloc (mstate av, size_t bytes)
                   set_foot (remainder, remainder_size);
                 }
               check_malloced_chunk (av, victim, nb);
+#if 0 && USE_TCACHE
+              /* chunk splitting, disabled for now.  */
+	      victim = _tcache_fill (av, original_nb, victim);
+#endif
+	      //_m_printf("%d: return %p\n", __LINE__, victim);
               void *p = chunk2mem (victim);
               alloc_perturb (p, bytes);
               return p;
@@ -3985,6 +4283,10 @@ _int_malloc (mstate av, size_t bytes)
                   set_foot (remainder, remainder_size);
                 }
               check_malloced_chunk (av, victim, nb);
+#if 0&&USE_TCACHE
+	      victim = _tcache_fill (av, original_nb, victim);
+#endif
+	      //_m_printf("%d: return %p %d, top %p %d\n", __LINE__, victim, chunksize(victim), av->top, chunksize(av->top));
               void *p = chunk2mem (victim);
               alloc_perturb (p, bytes);
               return p;
@@ -4020,6 +4322,10 @@ _int_malloc (mstate av, size_t bytes)
           set_head (remainder, remainder_size | PREV_INUSE);
 
           check_malloced_chunk (av, victim, nb);
+#if 0&&USE_TCACHE
+	  victim = _tcache_fill (av, original_nb, victim);
+#endif
+	  //_m_printf("%d: return %p %d, top %p %d\n", __LINE__, victim, chunksize(victim), av->top, chunksize(av->top));
           void *p = chunk2mem (victim);
           alloc_perturb (p, bytes);
           return p;
@@ -4042,11 +4348,23 @@ _int_malloc (mstate av, size_t bytes)
        */
       else
         {
+#if 0&&USE_TCACHE
+	  if (nb == original_nb)
+	    {
+#endif
           void *p = sysmalloc (nb, av);
           if (p != NULL)
             alloc_perturb (p, bytes);
+	  //_m_printf("%d: return %p (sysmalloc)\n", __LINE__, victim);
           return p;
+#if 0&&USE_TCACHE
+	    }
+#endif
         }
+
+#if 0&&USE_TCACHE
+      nb = original_nb;
+#endif
     }
 }
 
@@ -4100,7 +4418,8 @@ _int_free (mstate av, mchunkptr p, int have_lock)
     int tc_idx = size2tidx (size - SIZE_SZ*2);
 
     if (size < MAX_TCACHE_SIZE
-	&& tcache.counts[tc_idx] < TCACHE_FILL_COUNT)
+	&& tcache.counts[tc_idx] < TCACHE_FILL_COUNT
+	&& tcache.initted == 1)
       {
 	TCacheEntry *e = (TCacheEntry *) chunk2mem (p);
 	e->next = tcache.entries[tc_idx];
@@ -4141,6 +4460,8 @@ _int_free (mstate av, mchunkptr p, int have_lock)
 		    || chunksize (chunk_at_offset (p, size)) >= av->system_mem;
 	      }))
 	  {
+	    _m_printf("%p %p %lx vs %lx %lx\n", p, chunk_at_offset (p, size), chunk_at_offset (p, size)->size,
+		      2 * SIZE_SZ, av->system_mem);
 	    errstr = "free(): invalid next size (fast)";
 	    goto errout;
 	  }
@@ -5238,6 +5559,7 @@ malloc_printerr (int action, const char *str, void *ptr, mstate ar_ptr)
 
       __libc_message (action & 2, "*** Error in `%s': %s: 0x%s ***\n",
                       __libc_argv[0] ? : "<unknown>", str, cp);
+      abort();
     }
   else if (action & 2)
     abort ();
@@ -5299,7 +5621,7 @@ __malloc_info (int options, FILE *fp)
   mstate ar_ptr = &main_arena;
   do
     {
-      fprintf (fp, "<heap nr=\"%d\">\n<sizes>\n", n++);
+      fprintf (fp, "<heap nr=\"%d\" addr=\"%p\">\n<sizes>\n", n++, ar_ptr);
 
       size_t nblocks = 0;
       size_t nfastblocks = 0;
@@ -5447,6 +5769,74 @@ __malloc_info (int options, FILE *fp)
 }
 weak_alias (__malloc_info, malloc_info)
 
+void
+__malloc_scan_chunks (void (*cb)(void *,size_t,int))
+{
+#if USE_TCACHE
+  TCache *tc = tcache_list;
+  while (tc)
+    {
+      cb(tc, 0, MSCAN_TCACHE);
+      for (size_t i = 0; i < TCACHE_IDX; ++i)
+	{
+	  TCacheEntry *te = tc->entries[i];
+	  for (int j = 0; j < tc->counts[i]; j++)
+	    {
+	      cb(mem2chunk(te), chunksize(mem2chunk(te)), MSCAN_TCACHE);
+	      te = te->next;
+	    }
+	}
+      tc = tc->next;
+    }
+#endif
+
+  mstate ar_ptr = &main_arena;
+  do
+    {
+      cb(ar_ptr, 0, MSCAN_ARENA);
+
+      if (ar_ptr != &main_arena)
+	{
+	  heap_info *heap = heap_for_ptr (top (ar_ptr));
+	  while (heap)
+	    {
+	      cb(heap, heap->size, MSCAN_HEAP);
+
+	      heap = heap->prev;
+	    }
+	};
+
+      for (size_t i = 0; i < NFASTBINS; ++i)
+	{
+	  mchunkptr p = fastbin (ar_ptr, i);
+	  while (p != NULL)
+	    {
+	      cb(p, chunksize(p), MSCAN_FASTBIN_FREE);
+	      p = p->fd;
+	    }
+	}
+
+      mbinptr bin;
+      struct malloc_chunk *r;
+      for (size_t i = 1; i < NBINS; ++i)
+	{
+	  bin = bin_at (ar_ptr, i);
+	  r = bin->fd;
+	  if (r != NULL)
+	    while (r != bin)
+	      {
+		cb(r, chunksize(r), (i == 1) ? MSCAN_UNSORTED : MSCAN_CHUNK_FREE);
+		r = r->fd;
+	      }
+	}
+
+      cb(ar_ptr->top, chunksize(ar_ptr->top), MSCAN_TOP);
+
+      ar_ptr = ar_ptr->next;
+    }
+  while (ar_ptr != &main_arena);
+}
+
 
 strong_alias (__libc_calloc, __calloc) weak_alias (__libc_calloc, calloc)
 strong_alias (__libc_free, __cfree) weak_alias (__libc_free, cfree)
diff --git a/malloc/mtrace.h b/malloc/mtrace.h
index d6ca8c1..8dea72c 100644
--- a/malloc/mtrace.h
+++ b/malloc/mtrace.h
@@ -89,3 +89,18 @@ void * __malloc_get_trace_buffer (size_t *bufcount, size_t *bufhead);
 
 /* ptr2 = pvalloc (size) */
 #define __MTB_TYPE_PVALLOC	7
+
+
+typedef enum {
+  MSCAN_UNUSED,
+  MSCAN_ARENA,
+  MSCAN_HEAP,
+  MSCAN_CHUNK_USED,
+  MSCAN_CHUNK_FREE,
+  MSCAN_FASTBIN_FREE,
+  MSCAN_UNSORTED,
+  MSCAN_TOP,
+  MSCAN_TCACHE
+} MSCAN_Types;
+
+void __malloc_scan_chunks (void (*callback)(void * /*ptr*/, size_t /*length*/, int /*type*/));
diff --git a/malloc/trace2dat b/malloc/trace2dat
index 1d8029e..d09ca6c 100755
--- a/malloc/trace2dat
+++ b/malloc/trace2dat
@@ -5,6 +5,8 @@ $outfile = shift @ARGV;
 
 $outfile = "trace2c.dat" unless $outfile;
 
+die("$outfile already exists") if -f $outfile;
+
 # Arrays starting with c_ are data code to be emitted later
 
 
diff --git a/malloc/trace_run.c b/malloc/trace_run.c
index 986c3dd..52d5487 100644
--- a/malloc/trace_run.c
+++ b/malloc/trace_run.c
@@ -9,6 +9,8 @@
 #include <sys/resource.h>
 #include <fcntl.h>
 
+#include "malloc.h"
+
 /* These must stay in sync with trace2dat */
 #define C_NOP 0
 #define C_DONE 1
@@ -49,6 +51,15 @@ static int64_t diff_timeval (struct timeval e, struct timeval s)
   return usec;
 }
 
+#if 1
+#define Q1
+#define Q2
+#else
+pthread_mutex_t genmutex = PTHREAD_MUTEX_INITIALIZER;
+#define Q1   pthread_mutex_lock(&genmutex)
+#define Q2   pthread_mutex_unlock(&genmutex)
+#endif
+
 pthread_mutex_t cmutex = PTHREAD_MUTEX_INITIALIZER;
 #define NCBUF 10
 static char cbuf[NCBUF][30];
@@ -95,9 +106,15 @@ int64_t calloc_time = 0, calloc_count = 0;
 int64_t realloc_time = 0, realloc_count = 0;
 int64_t free_time = 0, free_count = 0;
 
+pthread_mutex_t stop_mutex = PTHREAD_MUTEX_INITIALIZER;
+int threads_done = 0;
+
 //#define dprintf printf
 #define dprintf(...) 1
 
+//#define mprintf printf
+#define mprintf(...) 1
+
 #define myabort() my_abort_2(me, __LINE__)
 my_abort_2 (pthread_t me, int line)
 {
@@ -110,10 +127,20 @@ wmem (volatile void *ptr, int count)
 {
   char *p = (char *)ptr;
   int i;
+  size_t sz;
+
+  if (!p)
+    return;
+
+  //  sz = *((size_t *)ptr-1) & ~7;
+  //  fprintf(stderr, "wmem: %p size %x csize %x\n", ptr,
+  //	  count, sz);
+  //  if (sz < 4*sizeof(size_t))
+  //    abort();
   for (i=0; i<count; i+=8)
-    p[i] = 0;
+    p[i] = 0x11;
 }
-#define wmem(a,b)
+#define xwmem(a,b)
 
 static size_t get_int (unsigned char **ptr)
 {
@@ -140,6 +167,7 @@ thread_common (void *my_data_v)
   int64_t my_realloc_time = 0, my_realloc_count = 0;
   int64_t my_free_time = 0, my_free_count = 0;
   int64_t stime;
+  volatile void *tmp;
 
   while (1)
     {
@@ -162,7 +190,10 @@ thread_common (void *my_data_v)
 	  calloc_count += my_calloc_count;
 	  realloc_count += my_realloc_count;
 	  free_count += my_free_count;
+	  threads_done ++;
 	  pthread_mutex_unlock (&stat_mutex);
+	  pthread_mutex_lock(&stop_mutex);
+	  pthread_mutex_unlock(&stop_mutex);
 	  return NULL;
 
 	case C_MALLOC:
@@ -172,7 +203,12 @@ thread_common (void *my_data_v)
 	  if (p2 > n_ptrs)
 	    myabort();
 	  stime = rdtsc_s();
+	  Q1;
+	  if (ptrs[p2])
+	    free ((void *)ptrs[p2]);
 	  ptrs[p2] = malloc (sz);
+	  mprintf("%p = malloc(%lx)\n", ptrs[p2], sz);
+	  Q2;
 	  my_malloc_time += rdtsc_e() - stime;
 	  my_malloc_count ++;
 	  wmem(ptrs[p2], sz);
@@ -185,7 +221,12 @@ thread_common (void *my_data_v)
 	  if (p2 > n_ptrs)
 	    myabort();
 	  stime = rdtsc_s();
+	  Q1;
+	  if (ptrs[p2])
+	    free ((void *)ptrs[p2]);
 	  ptrs[p2] = calloc (sz, 1);
+	  mprintf("%p = calloc(%lx)\n", ptrs[p2], sz);
+	  Q2;
 	  my_calloc_time += rdtsc_e() - stime;
 	  my_calloc_count ++;
 	  wmem(ptrs[p2], sz);
@@ -196,22 +237,33 @@ thread_common (void *my_data_v)
 	  p1 = get_int (&cp);
 	  sz = get_int (&cp);
 	  dprintf("op %d:%d %d = REALLOC %d %d\n", (int)me, cp-data, p2, p1, sz);
+	  if (p1 > n_ptrs)
+	    myabort();
 	  if (p2 > n_ptrs)
 	    myabort();
 	  stime = rdtsc_s();
+	  Q1;
+	  tmp = ptrs[p1];
 	  ptrs[p2] = realloc ((void *)ptrs[p1], sz);
+	  mprintf("%p = relloc(%p,%lx)\n", ptrs[p2], tmp,sz);
+	  Q2;
 	  my_realloc_time += rdtsc_e() - stime;
 	  my_realloc_count ++;
 	  wmem(ptrs[p2], sz);
+	  if (p1 != p2)
+	    ptrs[p1] = 0;
 	  break;
 
 	case C_FREE:
 	  p1 = get_int (&cp);
-	  if (p2 > n_ptrs)
+	  if (p1 > n_ptrs)
 	    myabort();
 	  dprintf("op %d:%d FREE %d\n", (int)me, cp-data, p1);
 	  stime = rdtsc_s();
+	  Q1;
+	  mprintf("free(%p)\n", ptrs[p1]);
 	  free ((void *)ptrs[p1]);
+	  Q2;
 	  my_free_time += rdtsc_e() - stime;
 	  my_free_count ++;
 	  ptrs[p1] = 0;
@@ -276,6 +328,25 @@ my_malloc (char *msg, int size, unsigned char **cp, size_t *psz, size_t count)
   return rv;
 }
 
+static const char * const scan_names[] = {
+  "UNUSED",
+  "ARENA",
+  "HEAP",
+  "CHUNK_USED",
+  "CHUNK_FREE",
+  "FASTBIN_FREE",
+  "UNSORTED",
+  "TOP",
+  "TCACHE",
+  "USED"
+};
+
+void
+malloc_scan_callback (void *ptr, size_t length, int type)
+{
+  printf("%s: ptr %p length %llx\n", scan_names[type], ptr, length);
+}
+
 #define MY_ALLOC(T, psz)				\
   (typeof (T)) my_malloc (#T, sizeof(*T), &cp, psz, 0)
 #define MY_ALLOCN(T, count)				\
@@ -317,6 +388,8 @@ main(int argc, char **argv)
   for (i=0; i<n_data; i+=512)
     asm volatile ("# forced read %0" :: "r" (data[i]));
 
+  pthread_mutex_lock(&stop_mutex);
+
   cp = data;
   while (cp)
     {
@@ -353,11 +426,12 @@ main(int argc, char **argv)
 	  thread_idx ++;
 	  break;
 	case C_DONE:
-	  for (i=0; i<thread_idx; i++)
+	  do
 	    {
-	      dprintf("Joining thread %lld\n", (long)thread_ids[i]);
-	      pthread_join (thread_ids[i], NULL);
-	    }
+	      pthread_mutex_lock (&stat_mutex);
+	      i = threads_done;
+	      pthread_mutex_unlock (&stat_mutex);
+	    } while (i < thread_idx);
 	  cp = NULL;
 	  break;
 	}
@@ -388,5 +462,40 @@ main(int argc, char **argv)
   printf("Avg free time: %8s in %10s calls\n", comma(free_time/free_count), comma(free_count));
   printf("Total call time: %s cycles\n", comma(malloc_time+calloc_time+realloc_time+free_time));
   printf("\n");
+
+#if 0
+  /* Free any still-held chunks of memory.  */
+  for (idx=0; idx<n_ptrs; idx++)
+    if (ptrs[idx])
+      {
+	free((void *)ptrs[idx]);
+	ptrs[idx] = 0;
+      }
+#endif
+
+  /* This will fail (crash) for system glibc but that's OK.  */
+  __malloc_scan_chunks(malloc_scan_callback);
+
+  malloc_info (0, stdout);
+
+#if 1
+  /* ...or report them as used.  */
+  for (idx=0; idx<n_ptrs; idx++)
+    if (ptrs[idx])
+      {
+	char *p = (char *)ptrs[idx] - 2*sizeof(size_t);
+	size_t *sp = (size_t *)p;
+	size_t size = sp[1] & ~7;
+	malloc_scan_callback (sp, size, 9);
+      }
+#endif
+
+  /* Now that we've scanned all the per-thread caches, it's safe to
+     let them exit and clean up.  */
+  pthread_mutex_unlock(&stop_mutex);
+
+  for (i=0; i<thread_idx; i++)
+    pthread_join (thread_ids[i], NULL);
+
   return 0;
 }

-----------------------------------------------------------------------

Summary of changes:
 malloc/Versions    |    2 +
 malloc/malloc.c    |  434 +++++++++++++++++++++++++++++++++++++++++++++++++---
 malloc/mtrace.h    |   15 ++
 malloc/trace2dat   |    2 +
 malloc/trace_run.c |  123 ++++++++++++++-
 5 files changed, 547 insertions(+), 29 deletions(-)


hooks/post-receive
-- 
GNU C Library master sources


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]