Current implementation of free() returns the system memory to the kernel only if the end of the heap is freed. No matter how many times free() is called, unnecessary system memory is kept in the heap if the last chunk of the heap is not freed yet. Please check attached two programs, malloc-test-a and malloc-test-b. They are almost the same. They call malloc()/free() for same amount of memory, same times. The only difference is which chunk of memory they leave, only a 120kB memory. However, only malloc-test-b keeps over 1GB system memory. $ ps -eo "comm pid vsz rss" | grep malloc malloc-test-a 18741 4084 708 $ ps -eo "comm pid vsz rss" | grep malloc malloc-test-b 18673 1204240 1200736 I think it is a problem of big difference of system memory between an program needs and actually allocated. So I made a solution, an optional functionality of returning the memory to the kernel, which runs on calling free() if it is called for more than certain size of memory. This is disabled by default, because it might affect the performance. A new environment MALLOC_FREQUENTLY_TRIM is the switch of enabling it. It's for systems and/or applications which want to manage the system memory usage strictly. I'd like hear your opinion about this problem and this patch. diff -Nur a/malloc/arena.c b/malloc/arena.c --- a/malloc/arena.c 2011-06-30 18:22:36.000000000 +0900 +++ b/malloc/arena.c 2011-07-11 17:23:41.000000000 +0900 @@ -596,6 +596,8 @@ mALLOPt(M_TRIM_THRESHOLD, atoi(&envline[16])); else if (memcmp (envline, "MMAP_THRESHOLD_", 15) == 0) mALLOPt(M_MMAP_THRESHOLD, atoi(&envline[16])); + else if (memcmp (envline, "FREQUENTLY_TRIM", 15) == 0) + mALLOPt(M_FREQUENTLY_TRIM, atoi(&envline[16])); } break; default: diff -Nur a/malloc/malloc.c b/malloc/malloc.c --- a/malloc/malloc.c 2011-06-30 18:22:36.000000000 +0900 +++ b/malloc/malloc.c 2011-07-13 09:47:19.000000000 +0900 @@ -2451,6 +2451,16 @@ #define M_ARENA_MAX -8 #endif +/* + M_FREQUENTLY_TRIM is a function to return the system the system + memory when the system memory which temporarily became unnecessary + exceeds the value of environment MALLOC_FREQUENTLY_TRIM in free(). + The function operates only when MALLOC_FREQUENTLY_TRIM is set. +*/ + +#define M_FREQUENTLY_TRIM -9 +#define FREQUENTLY_TRIM_THRESHOLD_MIN FASTBIN_CONSOLIDATION_THRESHOLD + /* Maximum size of memory handled in fastbins. */ static INTERNAL_SIZE_T global_max_fast; @@ -2562,6 +2572,7 @@ #define alloc_perturb(p, n) memset (p, (perturb_byte ^ 0xff) & 0xff, n) #define free_perturb(p, n) memset (p, perturb_byte & 0xff, n) +static unsigned long frequently_trim; /* ------------------- Support for multiple arenas -------------------- */ #include "arena.c" @@ -4962,7 +4973,8 @@ size += prevsize; p = chunk_at_offset(p, -((long) prevsize)); unlink(p, bck, fwd); - } + } else + prevsize = 0; if (nextchunk != av->top) { /* get and clear inuse bit */ @@ -4972,9 +4984,10 @@ if (!nextinuse) { unlink(nextchunk, bck, fwd); size += nextsize; - } else + } else { clear_inuse_bit_at_offset(nextchunk, 0); - + nextsize = 0; + } /* Place the chunk in unsorted chunk list. Chunks are not placed into regular bins until after they have @@ -5033,6 +5046,29 @@ if (have_fastchunks(av)) malloc_consolidate(av); + if (__builtin_expect (frequently_trim, 0) + && (unsigned long)(size) >= frequently_trim + && av->top != p) { + const size_t pagemask = mp_.pagesize - 1; + char * dontneed_mem; + + if (nextsize >= frequently_trim) + size -= (nextsize - sizeof (struct malloc_chunk)); + + if (prevsize >= frequently_trim) + dontneed_mem = (char *)(((uintptr_t)p + + prevsize) & ~pagemask); + else + dontneed_mem = (char *)(((uintptr_t)p + + sizeof (struct malloc_chunk) + + pagemask) & ~pagemask); + + size -= dontneed_mem - (char *)p; + + if ((long)(size - pagemask) > 0) + madvise (dontneed_mem, size & ~pagemask, MADV_DONTNEED); + } + if (av == &main_arena) { #ifndef MORECORE_CANNOT_TRIM if ((unsigned long)(chunksize(av->top)) >= @@ -6117,6 +6153,13 @@ mp_.arena_max = value; break; #endif + + case M_FREQUENTLY_TRIM: + if (value > FREQUENTLY_TRIM_THRESHOLD_MIN) + frequently_trim = value; + else + frequently_trim = FREQUENTLY_TRIM_THRESHOLD_MIN; + break; } (void)mutex_unlock(&av->mutex); return res; diff -Nur a/malloc/malloc.h b/malloc/malloc.h --- a/malloc/malloc.h 2011-06-30 18:22:36.000000000 +0900 +++ b/malloc/malloc.h 2011-07-11 17:23:41.000000000 +0900 @@ -139,6 +139,7 @@ #define M_PERTURB -6 #define M_ARENA_TEST -7 #define M_ARENA_MAX -8 +#define M_FREQUENTLY_TRIM -9 /* General SVID/XPG interface to tunable parameters. */ extern int mallopt (int __param, int __val) __THROW; Regards, Naoki Yanagimoto
Created attachment 5845 [details] malloc test program A (malloc-test-a.c)
Created attachment 5846 [details] malloc test program B (malloc-test-b.c)
Created attachment 5847 [details] malloc_frequently_trim.patch
I don't like this at all. There is not one behavior everyone likes and as soon as one workaround other people will complain. Just wrap your free() calls and force compression this way. free() is deliberately written to allow interception.