[PATCH v2 4/4] malloc: Add Huge Page support for sysmalloc

Adhemerval Zanella adhemerval.zanella@linaro.org
Wed Aug 18 14:20:00 GMT 2021


A new tunable, 'glibc.malloc.mmap_hugetlb', adds support to use Huge Page
support directly with mmap() calls.  The required supported sizes and
flags for mmap() are provided by an arch-specific internal hook
malloc_hp_config().

Currently it first try mmap() using the huge page size and fallback to
default page size and sbrk() call if kernel returns MMAP_FAILED.

The default malloc_hp_config() implementation does not enable it even
if the tunable is set.

Checked on x86_64-linux-gnu.
---
 NEWS                                       |   4 +
 elf/dl-tunables.list                       |   4 +
 elf/tst-rtld-list-tunables.exp             |   1 +
 malloc/arena.c                             |   2 +
 malloc/malloc.c                            |  35 +++++-
 manual/tunables.texi                       |  14 +++
 sysdeps/generic/malloc-hugepages.c         |   6 +
 sysdeps/generic/malloc-hugepages.h         |  12 ++
 sysdeps/unix/sysv/linux/malloc-hugepages.c | 125 +++++++++++++++++++++
 9 files changed, 200 insertions(+), 3 deletions(-)

diff --git a/NEWS b/NEWS
index 9b2345d08c..412bf3e6f8 100644
--- a/NEWS
+++ b/NEWS
@@ -14,6 +14,10 @@ Major new features:
   It might improve performance with Transparent Huge Pages madvise mode
   depending of the workload.
 
+* On Linux, a new tunable, glibc.malloc.mmap_hugetlb, can be used to
+  instruct malloc to try use Huge Pages when allocate memory with mmap()
+  calls (through the use of MAP_HUGETLB).
+
 Deprecated and removed features, and other changes affecting compatibility:
 
   [Add deprecations, removals and changes affecting compatibility here]
diff --git a/elf/dl-tunables.list b/elf/dl-tunables.list
index 67df6dbc2c..209c2d8592 100644
--- a/elf/dl-tunables.list
+++ b/elf/dl-tunables.list
@@ -97,6 +97,10 @@ glibc {
       minval: 0
       maxval: 1
     }
+    mmap_hugetlb {
+      type: SIZE_T
+      minval: 0
+    }
   }
   cpu {
     hwcap_mask {
diff --git a/elf/tst-rtld-list-tunables.exp b/elf/tst-rtld-list-tunables.exp
index d8109fa31c..49f033ce91 100644
--- a/elf/tst-rtld-list-tunables.exp
+++ b/elf/tst-rtld-list-tunables.exp
@@ -1,6 +1,7 @@
 glibc.malloc.arena_max: 0x0 (min: 0x1, max: 0x[f]+)
 glibc.malloc.arena_test: 0x0 (min: 0x1, max: 0x[f]+)
 glibc.malloc.check: 0 (min: 0, max: 3)
+glibc.malloc.mmap_hugetlb: 0x0 (min: 0x0, max: 0x[f]+)
 glibc.malloc.mmap_max: 0 (min: 0, max: 2147483647)
 glibc.malloc.mmap_threshold: 0x0 (min: 0x0, max: 0x[f]+)
 glibc.malloc.mxfast: 0x0 (min: 0x0, max: 0x[f]+)
diff --git a/malloc/arena.c b/malloc/arena.c
index 81bff54303..4efb5581c1 100644
--- a/malloc/arena.c
+++ b/malloc/arena.c
@@ -232,6 +232,7 @@ TUNABLE_CALLBACK_FNDECL (set_tcache_unsorted_limit, size_t)
 #endif
 TUNABLE_CALLBACK_FNDECL (set_mxfast, size_t)
 TUNABLE_CALLBACK_FNDECL (set_thp_madvise, int32_t)
+TUNABLE_CALLBACK_FNDECL (set_mmap_hugetlb, size_t)
 #else
 /* Initialization routine. */
 #include <string.h>
@@ -333,6 +334,7 @@ ptmalloc_init (void)
 # endif
   TUNABLE_GET (mxfast, size_t, TUNABLE_CALLBACK (set_mxfast));
   TUNABLE_GET (thp_madvise, int32_t, TUNABLE_CALLBACK (set_thp_madvise));
+  TUNABLE_GET (mmap_hugetlb, size_t, TUNABLE_CALLBACK (set_mmap_hugetlb));
 #else
   if (__glibc_likely (_environ != NULL))
     {
diff --git a/malloc/malloc.c b/malloc/malloc.c
index 4bfcea286f..8cf2d6855e 100644
--- a/malloc/malloc.c
+++ b/malloc/malloc.c
@@ -1884,6 +1884,10 @@ struct malloc_par
 #if HAVE_TUNABLES
   /* Transparent Large Page support.  */
   INTERNAL_SIZE_T thp_pagesize;
+  /* A value different than 0 means to align mmap allocation to hp_pagesize
+     add hp_flags on flags.  */
+  INTERNAL_SIZE_T hp_pagesize;
+  int hp_flags;
 #endif
 
   /* Memory map support */
@@ -2415,7 +2419,8 @@ do_check_malloc_state (mstate av)
  */
 
 static void *
-sysmalloc_mmap (INTERNAL_SIZE_T nb, size_t pagesize, int extra_flags, mstate av)
+sysmalloc_mmap (INTERNAL_SIZE_T nb, size_t pagesize, int extra_flags, mstate av,
+		bool set_thp)
 {
   long int size;
 
@@ -2442,7 +2447,8 @@ sysmalloc_mmap (INTERNAL_SIZE_T nb, size_t pagesize, int extra_flags, mstate av)
   if (mm == MAP_FAILED)
     return mm;
 
-  sysmadvise_thp (mm, size);
+  if (set_thp)
+    sysmadvise_thp (mm, size);
 
   /*
     The offset to the start of the mmapped region is stored in the prev_size
@@ -2531,7 +2537,18 @@ sysmalloc (INTERNAL_SIZE_T nb, mstate av)
 	  && (mp_.n_mmaps < mp_.n_mmaps_max)))
     {
     try_mmap:
-      char *mm = sysmalloc_mmap (nb, pagesize, 0, av);
+      char *mm;
+#if HAVE_TUNABLES
+      if (mp_.hp_pagesize > 0)
+	{
+	  /* There is no need to isse the THP madvise call if Huge Pages are
+	     used directly.  */
+	  mm = sysmalloc_mmap (nb, mp_.hp_pagesize, mp_.hp_flags, av, false);
+	  if (mm != MAP_FAILED)
+	    return mm;
+	}
+#endif
+      mm = sysmalloc_mmap (nb, pagesize, 0, av, true);
       if (mm != MAP_FAILED)
 	return mm;
       tried_mmap = true;
@@ -5405,6 +5422,18 @@ do_set_thp_madvise (int32_t value)
     }
   return 0;
 }
+
+static __always_inline int
+do_set_mmap_hugetlb (size_t value)
+{
+  if (value > 0)
+    {
+      struct malloc_hugepage_config_t cfg = __malloc_hugepage_config (value);
+      mp_.hp_pagesize = cfg.pagesize;
+      mp_.hp_flags = cfg.flags;
+    }
+  return 0;
+}
 #endif
 
 int
diff --git a/manual/tunables.texi b/manual/tunables.texi
index 93c46807f9..4da6a02778 100644
--- a/manual/tunables.texi
+++ b/manual/tunables.texi
@@ -279,6 +279,20 @@ The default value of this tunable is @code{0}, which disable its usage.
 Setting to a positive value enable the @code{madvise} call.
 @end deftp
 
+@deftp Tunable glibc.malloc.mmap_hugetlb
+This tunable enable the use of Huge Pages when the system supports it (currently
+only Linux).  It is done by aligning the memory size and passing the required
+flags (@code{MAP_HUGETLB} on Linux) when issuing the @code{mmap} to allocate
+memory from the system.
+
+The default value of this tunable is @code{0}, which disable its usage.
+The special value @code{1} will try to gather the system default huge page size,
+while a value larger than @code{1} will try to match it with the supported system
+huge page size.  If either no default huge page size could be obtained or if the
+requested size does not match the supported ones, the huge pages supports will be
+disabled.
+@end deftp
+
 @node Dynamic Linking Tunables
 @section Dynamic Linking Tunables
 @cindex dynamic linking tunables
diff --git a/sysdeps/generic/malloc-hugepages.c b/sysdeps/generic/malloc-hugepages.c
index 262bcdbeb8..e5f5c1ec98 100644
--- a/sysdeps/generic/malloc-hugepages.c
+++ b/sysdeps/generic/malloc-hugepages.c
@@ -29,3 +29,9 @@ __malloc_thp_mode (void)
 {
   return malloc_thp_mode_not_supported;
 }
+
+/* Return the default transparent huge page size.  */
+struct malloc_hugepage_config_t __malloc_hugepage_config (size_t requested)
+{
+  return (struct malloc_hugepage_config_t) { 0, 0 };
+}
diff --git a/sysdeps/generic/malloc-hugepages.h b/sysdeps/generic/malloc-hugepages.h
index 664cda9b67..27f7adfea5 100644
--- a/sysdeps/generic/malloc-hugepages.h
+++ b/sysdeps/generic/malloc-hugepages.h
@@ -34,4 +34,16 @@ enum malloc_thp_mode_t
 
 enum malloc_thp_mode_t __malloc_thp_mode (void) attribute_hidden;
 
+struct malloc_hugepage_config_t
+{
+  size_t pagesize;
+  int flags;
+};
+
+/* Returned the support huge page size from the requested PAGESIZE along
+   with the requires extra mmap flags.  Returning a 0 value for pagesize
+   disables its usage.  */
+struct malloc_hugepage_config_t __malloc_hugepage_config (size_t requested)
+     attribute_hidden;
+
 #endif /* _MALLOC_HUGEPAGES_H */
diff --git a/sysdeps/unix/sysv/linux/malloc-hugepages.c b/sysdeps/unix/sysv/linux/malloc-hugepages.c
index 66589127cd..0eb0c764ad 100644
--- a/sysdeps/unix/sysv/linux/malloc-hugepages.c
+++ b/sysdeps/unix/sysv/linux/malloc-hugepages.c
@@ -17,8 +17,10 @@
    not, see <https://www.gnu.org/licenses/>.  */
 
 #include <intprops.h>
+#include <dirent.h>
 #include <malloc-hugepages.h>
 #include <not-cancel.h>
+#include <sys/mman.h>
 
 size_t
 __malloc_default_thp_pagesize (void)
@@ -74,3 +76,126 @@ __malloc_thp_mode (void)
     }
   return malloc_thp_mode_not_supported;
 }
+
+static size_t
+malloc_default_hugepage_size (void)
+{
+  int fd = __open64_nocancel ("/proc/meminfo", O_RDONLY);
+  if (fd == -1)
+    return 0;
+
+  char buf[512];
+  off64_t off = 0;
+  while (1)
+    {
+      ssize_t r = __pread64_nocancel (fd, buf, sizeof (buf) - 1, off);
+      if (r < 0)
+	break;
+      buf[r - 1] = '\0';
+
+      const char *s = strstr (buf, "Hugepagesize:");
+      if (s == NULL)
+	{
+	  char *nl = strrchr (buf, '\n');
+	  if (nl == NULL)
+	    break;
+	  off += (nl + 1) - buf;
+	  continue;
+	}
+
+      /* The default huge page size is in the form:
+	 Hugepagesize:       NUMBER kB  */
+      size_t hpsize = 0;
+      s += sizeof ("Hugepagesize: ") - 1;
+      for (int i = 0; (s[i] >= '0' && s[i] <= '9') || s[i] == ' '; i++)
+	{
+	  if (s[i] == ' ')
+	    continue;
+	  hpsize *= 10;
+	  hpsize += s[i] - '0';
+	}
+      return hpsize * 1024;
+    }
+
+  __close_nocancel (fd);
+
+  return 0;
+}
+
+static inline struct malloc_hugepage_config_t
+make_malloc_hugepage_config (size_t pagesize)
+{
+  int flags = MAP_HUGETLB | (__builtin_ctzll (pagesize) << MAP_HUGE_SHIFT);
+  return (struct malloc_hugepage_config_t) { pagesize, flags };
+}
+
+struct malloc_hugepage_config_t
+__malloc_hugepage_config (size_t requested)
+{
+  if (requested == 1)
+    {
+      size_t pagesize = malloc_default_hugepage_size ();
+      if (pagesize != 0)
+	return make_malloc_hugepage_config (pagesize);
+    }
+
+  int dirfd = __open64_nocancel ("/sys/kernel/mm/hugepages",
+				 O_RDONLY | O_DIRECTORY, 0);
+  if (dirfd == -1)
+    return (struct malloc_hugepage_config_t) { 0, 0 };
+
+  bool found = false;
+
+  char buffer[1024];
+  while (true)
+    {
+#if !IS_IN(libc)
+# define __getdents64 getdents64
+#endif
+      ssize_t ret = __getdents64 (dirfd, buffer, sizeof (buffer));
+      if (ret == -1)
+	break;
+      else if (ret == 0)
+        break;
+
+      char *begin = buffer, *end = buffer + ret;
+      while (begin != end)
+        {
+          unsigned short int d_reclen;
+          memcpy (&d_reclen, begin + offsetof (struct dirent64, d_reclen),
+                  sizeof (d_reclen));
+          const char *dname = begin + offsetof (struct dirent64, d_name);
+          begin += d_reclen;
+
+          if (dname[0] == '.'
+	      || strncmp (dname, "hugepages-", sizeof ("hugepages-") - 1) != 0)
+            continue;
+
+	  /* Each entry represents a supported huge page in the form of:
+	     hugepages-<size>kB.  */
+	  size_t hpsize = 0;
+	  const char *sizestr = dname + sizeof ("hugepages-") - 1;
+	  for (int i = 0; sizestr[i] >= '0' && sizestr[i] <= '9'; i++)
+	    {
+	      hpsize *= 10;
+	      hpsize += sizestr[i] - '0';
+	    }
+	  hpsize *= 1024;
+
+	  if (hpsize == requested)
+	    {
+	      found = true;
+	      break;
+	    }
+        }
+      if (found)
+	break;
+    }
+
+  __close_nocancel (dirfd);
+
+  if (found)
+    return make_malloc_hugepage_config (requested);
+
+  return (struct malloc_hugepage_config_t) { 0, 0 };
+}
-- 
2.30.2



More information about the Libc-alpha mailing list