This is the mail archive of the libc-alpha@sourceware.org mailing list for the glibc project.
Index Nav: | [Date Index] [Subject Index] [Author Index] [Thread Index] | |
---|---|---|
Message Nav: | [Date Prev] [Date Next] | [Thread Prev] [Thread Next] |
Other format: | [Raw text] |
Problem: exception handling is not scalable. Attached program mt.cc demonstrates this easily. It runs around 1 seconds with 1 thread, but with only 4 threads it takes 4.5 seconds to complete. The reason is locks that are taken on unwind path. There are two locks right now. Fist is in libgcc's _Unwind_Find_registered_FDE and it is eliminated by https://gcc.gnu.org/ml/gcc-patches/2016-07/msg01629.html (for dynamic executables only, but this is common case). Second one is dl_load_write_lock in __dl_iterate_phdr. It serves dual purpose: it stops the list of loaded objects from been modified while iterating over it and it makes sure that more than one callback will not run in parallel. This means that even if we will find a cleaver way to make __dl_iterate_phdr iterate over the objects list locklessly the lock will still have to be taken around the call to callback to preserve second guaranty. This patch here propose to introduce another API: dl_iterate_phdr_parallel which does exactly same thing as dl_iterate_phdr but may run more than one provided callback in parallel. And to make it more scalable it breaks single dl_load_write_lock into arrays of locks. Reader takes only one lock, but writer has to lock all of them to proceed with modifying the list. Attached gcc.patch makes gcc use proposed dl_iterate_phdr_parallel interface and with this modification (and the patch linked above) same mt.cc with 4 threads runs around 1 second. diff --git a/elf/Versions b/elf/Versions index 23deda9..56b7510 100644 --- a/elf/Versions +++ b/elf/Versions @@ -11,6 +11,9 @@ libc { GLIBC_2.2.4 { dl_iterate_phdr; } + GLIBC_2.23 { + dl_iterate_phdr_parallel; + } %ifdef EXPORT_UNWIND_FIND_FDE # Needed for SHLIB_COMPAT calls using this version. GLIBC_2.2.5 { diff --git a/elf/dl-close.c b/elf/dl-close.c index 687d7de..9b8e9ac 100644 --- a/elf/dl-close.c +++ b/elf/dl-close.c @@ -535,7 +535,8 @@ _dl_close_worker (struct link_map *map, bool force) tls_free_start = tls_free_end = NO_TLS_OFFSET; /* We modify the list of loaded objects. */ - __rtld_lock_lock_recursive (GL(dl_load_write_lock)); + for (int i = 0; i < _DL_LOCKS; i++) + __rtld_lock_lock_recursive (GL(dl_load_write_lock)[i]); /* Check each element of the search list to see if all references to it are gone. */ @@ -748,7 +749,8 @@ _dl_close_worker (struct link_map *map, bool force) } } - __rtld_lock_unlock_recursive (GL(dl_load_write_lock)); + for (int i = 0; i < _DL_LOCKS; i++) + __rtld_lock_unlock_recursive (GL(dl_load_write_lock)[i]); /* If we removed any object which uses TLS bump the generation counter. */ if (any_tls) diff --git a/elf/dl-iteratephdr.c b/elf/dl-iteratephdr.c index 4db6938..69e4a79 100644 --- a/elf/dl-iteratephdr.c +++ b/elf/dl-iteratephdr.c @@ -22,24 +22,47 @@ #include <stddef.h> #include <libc-lock.h> +static +uintptr_t crank(uintptr_t word) +{ + word ^= (word << 13); word ^= (word >> 17); word ^= (word << 5); + word = 69069*word + 12345; + return word; +} + +static +uintptr_t hash(uintptr_t val) +{ + char* x = (char*)&val; + uintptr_t r = 0; + r = x[0]; + for (unsigned i = 1; i < sizeof(val); ++i) + { + r = crank(r) + x[i]; + r = crank(r); + } + return r & (_DL_LOCKS - 1); +} + + static void -cancel_handler (void *arg __attribute__((unused))) +cancel_handler (void *arg) { - __rtld_lock_unlock_recursive (GL(dl_load_write_lock)); + __rtld_lock_unlock_recursive (GL(dl_load_write_lock)[(uintptr_t)arg]); } -hidden_proto (__dl_iterate_phdr) +static int -__dl_iterate_phdr (int (*callback) (struct dl_phdr_info *info, - size_t size, void *data), void *data) +__dl_iterate_phdr_internal (int (*callback) (struct dl_phdr_info *info, + size_t size, void *data), void *data, unsigned idx) { struct link_map *l; struct dl_phdr_info info; int ret = 0; /* Make sure nobody modifies the list of loaded objects. */ - __rtld_lock_lock_recursive (GL(dl_load_write_lock)); - __libc_cleanup_push (cancel_handler, NULL); + __rtld_lock_lock_recursive (GL(dl_load_write_lock)[idx]); + __libc_cleanup_push (cancel_handler, (void*)(uintptr_t)idx); /* We have to determine the namespace of the caller since this determines which namespace is reported. */ @@ -80,10 +103,26 @@ __dl_iterate_phdr (int (*callback) (struct dl_phdr_info *info, /* Release the lock. */ __libc_cleanup_pop (0); - __rtld_lock_unlock_recursive (GL(dl_load_write_lock)); + __rtld_lock_unlock_recursive (GL(dl_load_write_lock)[idx]); return ret; } + +hidden_proto (__dl_iterate_phdr) +int +__dl_iterate_phdr (int (*callback) (struct dl_phdr_info *info, + size_t size, void *data), void *data) { + return __dl_iterate_phdr_internal(callback, data, 0); +} hidden_def (__dl_iterate_phdr) +hidden_proto (__dl_iterate_phdr_parallel) +int +__dl_iterate_phdr_parallel (int (*callback) (struct dl_phdr_info *info, + size_t size, void *data), void *data) { + return __dl_iterate_phdr_internal(callback, data, hash((unsigned long)THREAD_SELF)); +} +hidden_def (__dl_iterate_phdr_parallel) + weak_alias (__dl_iterate_phdr, dl_iterate_phdr); +weak_alias (__dl_iterate_phdr_parallel, dl_iterate_phdr_parallel); diff --git a/elf/dl-object.c b/elf/dl-object.c index 362992b..a8df503 100644 --- a/elf/dl-object.c +++ b/elf/dl-object.c @@ -31,7 +31,8 @@ internal_function _dl_add_to_namespace_list (struct link_map *new, Lmid_t nsid) { /* We modify the list of loaded objects. */ - __rtld_lock_lock_recursive (GL(dl_load_write_lock)); + for (int i = 0; i < _DL_LOCKS; i++) + __rtld_lock_lock_recursive (GL(dl_load_write_lock)[i]); if (GL(dl_ns)[nsid]._ns_loaded != NULL) { @@ -48,7 +49,8 @@ _dl_add_to_namespace_list (struct link_map *new, Lmid_t nsid) new->l_serial = GL(dl_load_adds); ++GL(dl_load_adds); - __rtld_lock_unlock_recursive (GL(dl_load_write_lock)); + for (int i = 0; i < _DL_LOCKS; i++) + __rtld_lock_unlock_recursive (GL(dl_load_write_lock)[i]); } diff --git a/elf/dl-support.c b/elf/dl-support.c index c30194c..0fc12e4 100644 --- a/elf/dl-support.c +++ b/elf/dl-support.c @@ -213,7 +213,7 @@ __rtld_lock_define_initialized_recursive (, _dl_load_lock) /* This lock is used to keep __dl_iterate_phdr from inspecting the list of loaded objects while an object is added to or removed from that list. */ -__rtld_lock_define_initialized_recursive (, _dl_load_write_lock) +__rtld_lock_recursive_t _dl_load_write_lock[_DL_LOCKS] = { _RTLD_LOCK_RECURSIVE_INITIALIZER, }; #ifdef HAVE_AUX_VECTOR diff --git a/elf/link.h b/elf/link.h index f448141..56fd4b5 100644 --- a/elf/link.h +++ b/elf/link.h @@ -168,6 +168,9 @@ extern int dl_iterate_phdr (int (*__callback) (struct dl_phdr_info *, size_t, void *), void *__data); +extern int dl_iterate_phdr_parallel (int (*__callback) (struct dl_phdr_info *, + size_t, void *), + void *__data); /* Prototypes for the ld.so auditing interfaces. These are not defined anywhere in ld.so but instead have to be provided by the diff --git a/elf/rtld.c b/elf/rtld.c index 647661c..9c65b03 100644 --- a/elf/rtld.c +++ b/elf/rtld.c @@ -128,7 +128,7 @@ struct rtld_global _rtld_global = ._dl_stack_flags = DEFAULT_STACK_PERMS, #ifdef _LIBC_REENTRANT ._dl_load_lock = _RTLD_LOCK_RECURSIVE_INITIALIZER, - ._dl_load_write_lock = _RTLD_LOCK_RECURSIVE_INITIALIZER, + ._dl_load_write_lock = { _RTLD_LOCK_RECURSIVE_INITIALIZER, }, #endif ._dl_nns = 1, ._dl_ns = diff --git a/include/link.h b/include/link.h index 32a7392..9582ed9 100644 --- a/include/link.h +++ b/include/link.h @@ -336,6 +336,10 @@ extern int __dl_iterate_phdr (int (*callback) (struct dl_phdr_info *info, size_t size, void *data), void *data); +extern int __dl_iterate_phdr_parallel (int (*callback) (struct dl_phdr_info *info, + size_t size, void *data), + void *data); + /* We use this macro to refer to ELF macros independent of the native wordsize. `ELFW(R_TYPE)' is used in place of `ELF32_R_TYPE' or `ELF64_R_TYPE'. */ diff --git a/sysdeps/generic/ldsodefs.h b/sysdeps/generic/ldsodefs.h index f68fdf4..0d2b2a1 100644 --- a/sysdeps/generic/ldsodefs.h +++ b/sysdeps/generic/ldsodefs.h @@ -268,6 +268,8 @@ typedef void (*receiver_fct) (int, const char *, const char *); user interface to run-time dynamic linking. */ +#define _DL_LOCKS 64 + #ifndef SHARED # define EXTERN extern # define GL(name) _##name @@ -334,7 +336,7 @@ struct rtld_global /* This lock is used to keep __dl_iterate_phdr from inspecting the list of loaded objects while an object is added to or removed from that list. */ - __rtld_lock_define_recursive (EXTERN, _dl_load_write_lock) + __rtld_lock_define_recursive (EXTERN, _dl_load_write_lock[_DL_LOCKS]) /* Incremented whenever something may have been added to dl_loaded. */ EXTERN unsigned long long _dl_load_adds; diff --git a/sysdeps/unix/sysv/linux/x86_64/64/libc.abilist b/sysdeps/unix/sysv/linux/x86_64/64/libc.abilist index c1054ce..ea9d2e9 100644 --- a/sysdeps/unix/sysv/linux/x86_64/64/libc.abilist +++ b/sysdeps/unix/sysv/linux/x86_64/64/libc.abilist @@ -82,6 +82,7 @@ GLIBC_2.17 clock_settime F GLIBC_2.17 secure_getenv F GLIBC_2.18 GLIBC_2.18 A GLIBC_2.18 __cxa_thread_atexit_impl F +GLIBC_2.23 dl_iterate_phdr_parallel F GLIBC_2.2.5 GLIBC_2.2.5 A GLIBC_2.2.5 _Exit F GLIBC_2.2.5 _IO_2_1_stderr_ D 0xe0 -- Gleb.
Attachment:
mt.cc
Description: Text document
Attachment:
gcc.patch
Description: Text document
Index Nav: | [Date Index] [Subject Index] [Author Index] [Thread Index] | |
---|---|---|
Message Nav: | [Date Prev] [Date Next] | [Thread Prev] [Thread Next] |