[PATCH v2 4/4] malloc: Add Huge Page support for sysmalloc
Adhemerval Zanella
adhemerval.zanella@linaro.org
Wed Aug 18 14:20:00 GMT 2021
A new tunable, 'glibc.malloc.mmap_hugetlb', adds support to use Huge Page
support directly with mmap() calls. The required supported sizes and
flags for mmap() are provided by an arch-specific internal hook
malloc_hp_config().
Currently it first try mmap() using the huge page size and fallback to
default page size and sbrk() call if kernel returns MMAP_FAILED.
The default malloc_hp_config() implementation does not enable it even
if the tunable is set.
Checked on x86_64-linux-gnu.
---
NEWS | 4 +
elf/dl-tunables.list | 4 +
elf/tst-rtld-list-tunables.exp | 1 +
malloc/arena.c | 2 +
malloc/malloc.c | 35 +++++-
manual/tunables.texi | 14 +++
sysdeps/generic/malloc-hugepages.c | 6 +
sysdeps/generic/malloc-hugepages.h | 12 ++
sysdeps/unix/sysv/linux/malloc-hugepages.c | 125 +++++++++++++++++++++
9 files changed, 200 insertions(+), 3 deletions(-)
diff --git a/NEWS b/NEWS
index 9b2345d08c..412bf3e6f8 100644
--- a/NEWS
+++ b/NEWS
@@ -14,6 +14,10 @@ Major new features:
It might improve performance with Transparent Huge Pages madvise mode
depending of the workload.
+* On Linux, a new tunable, glibc.malloc.mmap_hugetlb, can be used to
+ instruct malloc to try use Huge Pages when allocate memory with mmap()
+ calls (through the use of MAP_HUGETLB).
+
Deprecated and removed features, and other changes affecting compatibility:
[Add deprecations, removals and changes affecting compatibility here]
diff --git a/elf/dl-tunables.list b/elf/dl-tunables.list
index 67df6dbc2c..209c2d8592 100644
--- a/elf/dl-tunables.list
+++ b/elf/dl-tunables.list
@@ -97,6 +97,10 @@ glibc {
minval: 0
maxval: 1
}
+ mmap_hugetlb {
+ type: SIZE_T
+ minval: 0
+ }
}
cpu {
hwcap_mask {
diff --git a/elf/tst-rtld-list-tunables.exp b/elf/tst-rtld-list-tunables.exp
index d8109fa31c..49f033ce91 100644
--- a/elf/tst-rtld-list-tunables.exp
+++ b/elf/tst-rtld-list-tunables.exp
@@ -1,6 +1,7 @@
glibc.malloc.arena_max: 0x0 (min: 0x1, max: 0x[f]+)
glibc.malloc.arena_test: 0x0 (min: 0x1, max: 0x[f]+)
glibc.malloc.check: 0 (min: 0, max: 3)
+glibc.malloc.mmap_hugetlb: 0x0 (min: 0x0, max: 0x[f]+)
glibc.malloc.mmap_max: 0 (min: 0, max: 2147483647)
glibc.malloc.mmap_threshold: 0x0 (min: 0x0, max: 0x[f]+)
glibc.malloc.mxfast: 0x0 (min: 0x0, max: 0x[f]+)
diff --git a/malloc/arena.c b/malloc/arena.c
index 81bff54303..4efb5581c1 100644
--- a/malloc/arena.c
+++ b/malloc/arena.c
@@ -232,6 +232,7 @@ TUNABLE_CALLBACK_FNDECL (set_tcache_unsorted_limit, size_t)
#endif
TUNABLE_CALLBACK_FNDECL (set_mxfast, size_t)
TUNABLE_CALLBACK_FNDECL (set_thp_madvise, int32_t)
+TUNABLE_CALLBACK_FNDECL (set_mmap_hugetlb, size_t)
#else
/* Initialization routine. */
#include <string.h>
@@ -333,6 +334,7 @@ ptmalloc_init (void)
# endif
TUNABLE_GET (mxfast, size_t, TUNABLE_CALLBACK (set_mxfast));
TUNABLE_GET (thp_madvise, int32_t, TUNABLE_CALLBACK (set_thp_madvise));
+ TUNABLE_GET (mmap_hugetlb, size_t, TUNABLE_CALLBACK (set_mmap_hugetlb));
#else
if (__glibc_likely (_environ != NULL))
{
diff --git a/malloc/malloc.c b/malloc/malloc.c
index 4bfcea286f..8cf2d6855e 100644
--- a/malloc/malloc.c
+++ b/malloc/malloc.c
@@ -1884,6 +1884,10 @@ struct malloc_par
#if HAVE_TUNABLES
/* Transparent Large Page support. */
INTERNAL_SIZE_T thp_pagesize;
+ /* A value different than 0 means to align mmap allocation to hp_pagesize
+ add hp_flags on flags. */
+ INTERNAL_SIZE_T hp_pagesize;
+ int hp_flags;
#endif
/* Memory map support */
@@ -2415,7 +2419,8 @@ do_check_malloc_state (mstate av)
*/
static void *
-sysmalloc_mmap (INTERNAL_SIZE_T nb, size_t pagesize, int extra_flags, mstate av)
+sysmalloc_mmap (INTERNAL_SIZE_T nb, size_t pagesize, int extra_flags, mstate av,
+ bool set_thp)
{
long int size;
@@ -2442,7 +2447,8 @@ sysmalloc_mmap (INTERNAL_SIZE_T nb, size_t pagesize, int extra_flags, mstate av)
if (mm == MAP_FAILED)
return mm;
- sysmadvise_thp (mm, size);
+ if (set_thp)
+ sysmadvise_thp (mm, size);
/*
The offset to the start of the mmapped region is stored in the prev_size
@@ -2531,7 +2537,18 @@ sysmalloc (INTERNAL_SIZE_T nb, mstate av)
&& (mp_.n_mmaps < mp_.n_mmaps_max)))
{
try_mmap:
- char *mm = sysmalloc_mmap (nb, pagesize, 0, av);
+ char *mm;
+#if HAVE_TUNABLES
+ if (mp_.hp_pagesize > 0)
+ {
+ /* There is no need to isse the THP madvise call if Huge Pages are
+ used directly. */
+ mm = sysmalloc_mmap (nb, mp_.hp_pagesize, mp_.hp_flags, av, false);
+ if (mm != MAP_FAILED)
+ return mm;
+ }
+#endif
+ mm = sysmalloc_mmap (nb, pagesize, 0, av, true);
if (mm != MAP_FAILED)
return mm;
tried_mmap = true;
@@ -5405,6 +5422,18 @@ do_set_thp_madvise (int32_t value)
}
return 0;
}
+
+static __always_inline int
+do_set_mmap_hugetlb (size_t value)
+{
+ if (value > 0)
+ {
+ struct malloc_hugepage_config_t cfg = __malloc_hugepage_config (value);
+ mp_.hp_pagesize = cfg.pagesize;
+ mp_.hp_flags = cfg.flags;
+ }
+ return 0;
+}
#endif
int
diff --git a/manual/tunables.texi b/manual/tunables.texi
index 93c46807f9..4da6a02778 100644
--- a/manual/tunables.texi
+++ b/manual/tunables.texi
@@ -279,6 +279,20 @@ The default value of this tunable is @code{0}, which disable its usage.
Setting to a positive value enable the @code{madvise} call.
@end deftp
+@deftp Tunable glibc.malloc.mmap_hugetlb
+This tunable enable the use of Huge Pages when the system supports it (currently
+only Linux). It is done by aligning the memory size and passing the required
+flags (@code{MAP_HUGETLB} on Linux) when issuing the @code{mmap} to allocate
+memory from the system.
+
+The default value of this tunable is @code{0}, which disable its usage.
+The special value @code{1} will try to gather the system default huge page size,
+while a value larger than @code{1} will try to match it with the supported system
+huge page size. If either no default huge page size could be obtained or if the
+requested size does not match the supported ones, the huge pages supports will be
+disabled.
+@end deftp
+
@node Dynamic Linking Tunables
@section Dynamic Linking Tunables
@cindex dynamic linking tunables
diff --git a/sysdeps/generic/malloc-hugepages.c b/sysdeps/generic/malloc-hugepages.c
index 262bcdbeb8..e5f5c1ec98 100644
--- a/sysdeps/generic/malloc-hugepages.c
+++ b/sysdeps/generic/malloc-hugepages.c
@@ -29,3 +29,9 @@ __malloc_thp_mode (void)
{
return malloc_thp_mode_not_supported;
}
+
+/* Return the default transparent huge page size. */
+struct malloc_hugepage_config_t __malloc_hugepage_config (size_t requested)
+{
+ return (struct malloc_hugepage_config_t) { 0, 0 };
+}
diff --git a/sysdeps/generic/malloc-hugepages.h b/sysdeps/generic/malloc-hugepages.h
index 664cda9b67..27f7adfea5 100644
--- a/sysdeps/generic/malloc-hugepages.h
+++ b/sysdeps/generic/malloc-hugepages.h
@@ -34,4 +34,16 @@ enum malloc_thp_mode_t
enum malloc_thp_mode_t __malloc_thp_mode (void) attribute_hidden;
+struct malloc_hugepage_config_t
+{
+ size_t pagesize;
+ int flags;
+};
+
+/* Returned the support huge page size from the requested PAGESIZE along
+ with the requires extra mmap flags. Returning a 0 value for pagesize
+ disables its usage. */
+struct malloc_hugepage_config_t __malloc_hugepage_config (size_t requested)
+ attribute_hidden;
+
#endif /* _MALLOC_HUGEPAGES_H */
diff --git a/sysdeps/unix/sysv/linux/malloc-hugepages.c b/sysdeps/unix/sysv/linux/malloc-hugepages.c
index 66589127cd..0eb0c764ad 100644
--- a/sysdeps/unix/sysv/linux/malloc-hugepages.c
+++ b/sysdeps/unix/sysv/linux/malloc-hugepages.c
@@ -17,8 +17,10 @@
not, see <https://www.gnu.org/licenses/>. */
#include <intprops.h>
+#include <dirent.h>
#include <malloc-hugepages.h>
#include <not-cancel.h>
+#include <sys/mman.h>
size_t
__malloc_default_thp_pagesize (void)
@@ -74,3 +76,126 @@ __malloc_thp_mode (void)
}
return malloc_thp_mode_not_supported;
}
+
+static size_t
+malloc_default_hugepage_size (void)
+{
+ int fd = __open64_nocancel ("/proc/meminfo", O_RDONLY);
+ if (fd == -1)
+ return 0;
+
+ char buf[512];
+ off64_t off = 0;
+ while (1)
+ {
+ ssize_t r = __pread64_nocancel (fd, buf, sizeof (buf) - 1, off);
+ if (r < 0)
+ break;
+ buf[r - 1] = '\0';
+
+ const char *s = strstr (buf, "Hugepagesize:");
+ if (s == NULL)
+ {
+ char *nl = strrchr (buf, '\n');
+ if (nl == NULL)
+ break;
+ off += (nl + 1) - buf;
+ continue;
+ }
+
+ /* The default huge page size is in the form:
+ Hugepagesize: NUMBER kB */
+ size_t hpsize = 0;
+ s += sizeof ("Hugepagesize: ") - 1;
+ for (int i = 0; (s[i] >= '0' && s[i] <= '9') || s[i] == ' '; i++)
+ {
+ if (s[i] == ' ')
+ continue;
+ hpsize *= 10;
+ hpsize += s[i] - '0';
+ }
+ return hpsize * 1024;
+ }
+
+ __close_nocancel (fd);
+
+ return 0;
+}
+
+static inline struct malloc_hugepage_config_t
+make_malloc_hugepage_config (size_t pagesize)
+{
+ int flags = MAP_HUGETLB | (__builtin_ctzll (pagesize) << MAP_HUGE_SHIFT);
+ return (struct malloc_hugepage_config_t) { pagesize, flags };
+}
+
+struct malloc_hugepage_config_t
+__malloc_hugepage_config (size_t requested)
+{
+ if (requested == 1)
+ {
+ size_t pagesize = malloc_default_hugepage_size ();
+ if (pagesize != 0)
+ return make_malloc_hugepage_config (pagesize);
+ }
+
+ int dirfd = __open64_nocancel ("/sys/kernel/mm/hugepages",
+ O_RDONLY | O_DIRECTORY, 0);
+ if (dirfd == -1)
+ return (struct malloc_hugepage_config_t) { 0, 0 };
+
+ bool found = false;
+
+ char buffer[1024];
+ while (true)
+ {
+#if !IS_IN(libc)
+# define __getdents64 getdents64
+#endif
+ ssize_t ret = __getdents64 (dirfd, buffer, sizeof (buffer));
+ if (ret == -1)
+ break;
+ else if (ret == 0)
+ break;
+
+ char *begin = buffer, *end = buffer + ret;
+ while (begin != end)
+ {
+ unsigned short int d_reclen;
+ memcpy (&d_reclen, begin + offsetof (struct dirent64, d_reclen),
+ sizeof (d_reclen));
+ const char *dname = begin + offsetof (struct dirent64, d_name);
+ begin += d_reclen;
+
+ if (dname[0] == '.'
+ || strncmp (dname, "hugepages-", sizeof ("hugepages-") - 1) != 0)
+ continue;
+
+ /* Each entry represents a supported huge page in the form of:
+ hugepages-<size>kB. */
+ size_t hpsize = 0;
+ const char *sizestr = dname + sizeof ("hugepages-") - 1;
+ for (int i = 0; sizestr[i] >= '0' && sizestr[i] <= '9'; i++)
+ {
+ hpsize *= 10;
+ hpsize += sizestr[i] - '0';
+ }
+ hpsize *= 1024;
+
+ if (hpsize == requested)
+ {
+ found = true;
+ break;
+ }
+ }
+ if (found)
+ break;
+ }
+
+ __close_nocancel (dirfd);
+
+ if (found)
+ return make_malloc_hugepage_config (requested);
+
+ return (struct malloc_hugepage_config_t) { 0, 0 };
+}
--
2.30.2
More information about the Libc-alpha
mailing list