Bug 31405 - Improve dl_iterate_phdr using _dl_find_object
Summary: Improve dl_iterate_phdr using _dl_find_object
Status: RESOLVED FIXED
Alias: None
Product: glibc
Classification: Unclassified
Component: libc (show other bugs)
Version: 2.39
: P2 enhancement
Target Milestone: 2.40
Assignee: Adhemerval Zanella
URL:
Keywords:
Depends on:
Blocks:
 
Reported: 2024-02-21 16:15 UTC by Fred Hsueh
Modified: 2024-02-24 16:15 UTC (History)
5 users (show)

See Also:
Host:
Target:
Build:
Last reconfirmed:
fw: security-


Attachments

Note You need to log in before you can comment on or make changes to this bug.
Description Fred Hsueh 2024-02-21 16:15:08 UTC
A performance bottleneck in the runtime performance of Address Sanitizer (ASAN) is that a call to _dl_iterate_phdr() is made as part of malloc / free for memory tracking. This function acquires the GL(dl_load_write_lock) mutex, effectively serializing and slowing all these operations. As much of the ASAN locking is for read operations, the data protected by this is dominated by read and by few writes. An improvement can be made to change this to a RWLock which would allow concurrent reads.

Example stack from free():
#0  0x6065f4b8 in find_exidx_callback (info=<optimized out>, size=<optimized out>, ptr=0x507a74fc) at ../sysdeps/arm/find_exidx.c:48
#1  0x6065e6a4 in __GI___dl_iterate_phdr (callback=0x6956d5a0, data=0x6065e6a4 <__GI___dl_iterate_phdr+364>, data@entry=0x507a74f4) at dl-iteratephdr.c:76
#2  0x6065f518 in __gnu_Unwind_Find_exidx (pc=pc@entry=1660388795, pcount=0x507a751c, pcount@entry=0x507a7514) at ../sysdeps/arm/find_exidx.c:74
#3  0x606b1fb0 in get_eit_entry (ucbp=ucbp@entry=0x507a7530, return_address=1660388795) at gcc/libgcc/unwind-arm-common.inc:276
#4  0x606b2544 in __gnu_Unwind_Backtrace (trace=0x69046978 <__sanitizer::(anonymous namespace)::Unwind_Trace(_Unwind_Context*, void*)>, trace_argument=0x507a77d8, entry_vrs=<optimized out>) at gcc/libgcc/unwind-arm-common.inc:768
#5  0x606b2ef4 in _Unwind_Backtrace () at gcc/libgcc/config/arm/libunwind.S:360
#6  0x69046b8c in __sanitizer::BufferedStackTrace::UnwindSlow (this=0x507a7858, pc=pc@entry=1761766388, max_depth=max_depth@entry=30) at gcc/libsanitizer/sanitizer_common/sanitizer_unwind_linux_libcdep.cpp:130
#7  0x6903f6e4 in __sanitizer::BufferedStackTrace::Unwind (this=this@entry=0x507a7858, max_depth=30, max_depth@entry=1761766436, pc=pc@entry=1761766388, bp=bp@entry=1350204548, context=context@entry=0x0, stack_top=stack_top@entry=1350214408, stack_bottom=1349169152, request_fast_unwind=request_fast_unwind@entry=false) at gcc/libsanitizer/sanitizer_common/sanitizer_stacktrace_libcdep.cpp:157
#8  0x6902eff4 in __sanitizer::BufferedStackTrace::UnwindImpl (this=0x507a7858, pc=1761766388, bp=1350204548, context=0x0, request_fast=false, max_depth=30) at gcc/libsanitizer/asan/asan_stack.cpp:77
#9  0x68fa6f58 in __sanitizer::BufferedStackTrace::Unwind (this=this@entry=0x507a7858, pc=pc@entry=1761766388, bp=bp@entry=1350204548, context=context@entry=0x0, request_fast=request_fast@entry=false, max_depth=30) at gcc/libsanitizer/sanitizer_common/sanitizer_stacktrace.h:131
#10 0x69026c24 in __interceptor_free (ptr=0x4e124af0) at gcc/libsanitizer/asan/asan_malloc_linux.cpp:52
...
Comment 1 Adhemerval Zanella 2024-02-21 19:40:04 UTC
I think we can use _dl_find_object instead, as used on libbacktrace for other ABIs. The API was designed for exactly this scalability issue and dlfo_eh_frame/dlfo_eh_count maps to PT_ARM_EXIDX vaddr start and length on ARM.


diff --git a/sysdeps/arm/find_exidx.c b/sysdeps/arm/find_exidx.c
index d647865e5a..a7a76b09da 100644
--- a/sysdeps/arm/find_exidx.c
+++ b/sysdeps/arm/find_exidx.c
@@ -16,64 +16,15 @@
    <https://www.gnu.org/licenses/>.  */

 #include <link.h>
-#include <unwind.h>
-
-struct unw_eh_callback_data
-{
-  _Unwind_Ptr pc;
-  _Unwind_Ptr exidx_start;
-  int exidx_len;
-};
-
-
-/* Callback to determines if the PC lies within an object, and remember the
-   location of the exception index table if it does.  */
-
-static int
-find_exidx_callback (struct dl_phdr_info * info, size_t size, void * ptr)
-{
-  struct unw_eh_callback_data * data;
-  const ElfW(Phdr) *phdr;
-  int i;
-  int match;
-  _Unwind_Ptr load_base;
-
-  data = (struct unw_eh_callback_data *) ptr;
-  load_base = info->dlpi_addr;
-  phdr = info->dlpi_phdr;
-
-  match = 0;
-  for (i = info->dlpi_phnum; i > 0; i--, phdr++)
-    {
-      if (phdr->p_type == PT_LOAD)
-        {
-          _Unwind_Ptr vaddr = phdr->p_vaddr + load_base;
-          if (data->pc >= vaddr && data->pc < vaddr + phdr->p_memsz)
-            match = 1;
-        }
-      else if (phdr->p_type == PT_ARM_EXIDX)
-       {
-         data->exidx_start = (_Unwind_Ptr) (phdr->p_vaddr + load_base);
-         data->exidx_len = phdr->p_memsz;
-       }
-    }
-
-  return match;
-}
-

 /* Find the exception index table containing PC.  */

 _Unwind_Ptr
 __gnu_Unwind_Find_exidx (_Unwind_Ptr pc, int * pcount)
 {
-  struct unw_eh_callback_data data;
-
-  data.pc = pc;
-  data.exidx_start = 0;
-  if (__dl_iterate_phdr (find_exidx_callback, &data) <= 0)
+  struct dl_find_object data;
+  if (_dl_find_object ((void *) pc, &data) < 0)
     return 0;
-
-  *pcount = data.exidx_len / 8;
-  return data.exidx_start;
+  *pcount = data.dlfo_eh_count;
+  return (_Unwind_Ptr) data.dlfo_eh_frame;
 }
Comment 2 Adhemerval Zanella 2024-02-23 11:51:30 UTC
Fixed on 2.40.
Comment 3 Florian Weimer 2024-02-24 15:51:49 UTC
The dl_iterate_phdr locking behavior cannot be changed easily (not for the existing symbol) because existing callbacks assume that dl_iterate_phdr uses a mutex for locking.