Bug 12993 - Big difference of system memory between an executable program needs and actually allocated.
Summary: Big difference of system memory between an executable program needs and actua...
Status: RESOLVED WONTFIX
Alias: None
Product: glibc
Classification: Unclassified
Component: libc (show other bugs)
Version: unspecified
: P2 enhancement
Target Milestone: ---
Assignee: Ulrich Drepper
URL:
Keywords:
Depends on:
Blocks:
 
Reported: 2011-07-13 01:55 UTC by Naoki Yanagimoto
Modified: 2014-06-27 12:55 UTC (History)
1 user (show)

See Also:
Host:
Target:
Build:
Last reconfirmed:
fweimer: security-


Attachments
malloc test program A (malloc-test-a.c) (250 bytes, application/octet-stream)
2011-07-13 01:57 UTC, Naoki Yanagimoto
Details
malloc test program B (malloc-test-b.c) (252 bytes, application/octet-stream)
2011-07-13 01:58 UTC, Naoki Yanagimoto
Details
malloc_frequently_trim.patch (1.47 KB, patch)
2011-07-13 02:00 UTC, Naoki Yanagimoto
Details | Diff

Note You need to log in before you can comment on or make changes to this bug.
Description Naoki Yanagimoto 2011-07-13 01:55:39 UTC
Current implementation of free() returns the system memory to the kernel
only if the end of the heap is freed. No matter how many times free() is
called, unnecessary system memory is kept in the heap if the last chunk
of the heap is not freed yet.

Please check attached two programs, malloc-test-a and malloc-test-b.
They are almost the same. They call malloc()/free() for same amount of
memory, same times. The only difference is which chunk of memory they
leave, only a 120kB memory. However, only malloc-test-b keeps over 1GB
system memory.


   $ ps -eo "comm pid vsz rss" | grep malloc
   malloc-test-a   18741   4084   708

   $ ps -eo "comm pid vsz rss" | grep malloc
   malloc-test-b   18673 1204240 1200736


I think it is a problem of big difference of system memory between an
program needs and actually allocated.

So I made a solution, an optional functionality of returning the
memory to the kernel, which runs on calling free() if it is called
for more than certain size of memory. This is disabled by default,
because it might affect the performance. A new environment
MALLOC_FREQUENTLY_TRIM is the switch of enabling it.
It's for systems and/or applications which want to manage the system
memory usage strictly.

I'd like hear your opinion about this problem and this patch.


diff -Nur a/malloc/arena.c b/malloc/arena.c
--- a/malloc/arena.c	2011-06-30 18:22:36.000000000 +0900
+++ b/malloc/arena.c	2011-07-11 17:23:41.000000000 +0900
@@ -596,6 +596,8 @@
 		    mALLOPt(M_TRIM_THRESHOLD, atoi(&envline[16]));
 		  else if (memcmp (envline, "MMAP_THRESHOLD_", 15) == 0)
 		    mALLOPt(M_MMAP_THRESHOLD, atoi(&envline[16]));
+		  else if (memcmp (envline, "FREQUENTLY_TRIM", 15) == 0)
+		    mALLOPt(M_FREQUENTLY_TRIM, atoi(&envline[16]));
 		}
 	      break;
 	    default:
diff -Nur a/malloc/malloc.c b/malloc/malloc.c
--- a/malloc/malloc.c	2011-06-30 18:22:36.000000000 +0900
+++ b/malloc/malloc.c	2011-07-13 09:47:19.000000000 +0900
@@ -2451,6 +2451,16 @@
 #define M_ARENA_MAX  -8
 #endif
 
+/*
+  M_FREQUENTLY_TRIM is a function to return the system the system 
+  memory when the system memory which temporarily became unnecessary 
+  exceeds the value of environment MALLOC_FREQUENTLY_TRIM in free(). 
+  The function operates only when MALLOC_FREQUENTLY_TRIM is set.
+*/
+
+#define M_FREQUENTLY_TRIM -9
+#define FREQUENTLY_TRIM_THRESHOLD_MIN FASTBIN_CONSOLIDATION_THRESHOLD
+
 
 /* Maximum size of memory handled in fastbins.  */
 static INTERNAL_SIZE_T global_max_fast;
@@ -2562,6 +2572,7 @@
 #define alloc_perturb(p, n) memset (p, (perturb_byte ^ 0xff) & 0xff, n)
 #define free_perturb(p, n) memset (p, perturb_byte & 0xff, n)
 
+static unsigned long frequently_trim;
 
 /* ------------------- Support for multiple arenas -------------------- */
 #include "arena.c"
@@ -4962,7 +4973,8 @@
       size += prevsize;
       p = chunk_at_offset(p, -((long) prevsize));
       unlink(p, bck, fwd);
-    }
+    } else
+      prevsize = 0;
 
     if (nextchunk != av->top) {
       /* get and clear inuse bit */
@@ -4972,9 +4984,10 @@
       if (!nextinuse) {
 	unlink(nextchunk, bck, fwd);
 	size += nextsize;
-      } else
+      } else {
 	clear_inuse_bit_at_offset(nextchunk, 0);
-
+	nextsize = 0;
+      }
       /*
 	Place the chunk in unsorted chunk list. Chunks are
 	not placed into regular bins until after they have
@@ -5033,6 +5046,29 @@
       if (have_fastchunks(av))
 	malloc_consolidate(av);
 
+      if (__builtin_expect (frequently_trim, 0) 
+           && (unsigned long)(size) >= frequently_trim
+           && av->top != p) {
+        const size_t pagemask = mp_.pagesize - 1;
+        char * dontneed_mem;
+
+        if (nextsize >= frequently_trim)
+          size -= (nextsize - sizeof (struct malloc_chunk));
+
+        if (prevsize >= frequently_trim)
+          dontneed_mem = (char *)(((uintptr_t)p
+                                  + prevsize) & ~pagemask);
+        else
+          dontneed_mem = (char *)(((uintptr_t)p
+                                  + sizeof (struct malloc_chunk)
+                                  + pagemask) & ~pagemask);
+
+        size -= dontneed_mem - (char *)p;
+
+        if ((long)(size - pagemask) > 0)
+          madvise (dontneed_mem, size & ~pagemask, MADV_DONTNEED);
+      }
+
       if (av == &main_arena) {
 #ifndef MORECORE_CANNOT_TRIM
 	if ((unsigned long)(chunksize(av->top)) >=
@@ -6117,6 +6153,13 @@
       mp_.arena_max = value;
     break;
 #endif
+
+  case M_FREQUENTLY_TRIM:
+    if (value > FREQUENTLY_TRIM_THRESHOLD_MIN)
+      frequently_trim = value;
+    else
+      frequently_trim = FREQUENTLY_TRIM_THRESHOLD_MIN;
+    break;  
   }
   (void)mutex_unlock(&av->mutex);
   return res;
diff -Nur a/malloc/malloc.h b/malloc/malloc.h
--- a/malloc/malloc.h	2011-06-30 18:22:36.000000000 +0900
+++ b/malloc/malloc.h	2011-07-11 17:23:41.000000000 +0900
@@ -139,6 +139,7 @@
 #define M_PERTURB	    -6
 #define M_ARENA_TEST	    -7
 #define M_ARENA_MAX	    -8
+#define M_FREQUENTLY_TRIM   -9
 
 /* General SVID/XPG interface to tunable parameters. */
 extern int mallopt (int __param, int __val) __THROW;


Regards,
Naoki Yanagimoto
Comment 1 Naoki Yanagimoto 2011-07-13 01:57:23 UTC
Created attachment 5845 [details]
malloc test program A (malloc-test-a.c)
Comment 2 Naoki Yanagimoto 2011-07-13 01:58:19 UTC
Created attachment 5846 [details]
malloc test program B (malloc-test-b.c)
Comment 3 Naoki Yanagimoto 2011-07-13 02:00:17 UTC
Created attachment 5847 [details]
malloc_frequently_trim.patch
Comment 4 Ulrich Drepper 2011-07-20 00:36:48 UTC
I don't like this at all.  There is not one behavior everyone likes and as soon as one workaround other people will complain.

Just wrap your free() calls and force compression this way.  free() is deliberately written to allow interception.