On Fedora 31, most of shared libraries are CET enabled. But /lib64/libnss_myhostname.so.2 isn't one of them. It depends on: linux-vdso.so.1 (0x00007ffc29b44000) libgcc_s.so.1 => /lib64/libgcc_s.so.1 (0x00007f6859f61000) libpthread.so.0 => /lib64/libpthread.so.0 (0x00007f6859f3f000) libc.so.6 => /lib64/libc.so.6 (0x00007f6859d76000) /lib64/ld-linux-x86-64.so.2 (0x00007f6859fb9000) On CET machine, when nss/test-netdb tries to dlopen /lib64/libnss_myhostname.so.2, it sets GL(dl_initfirst) to libpthread.so.0: Old value = (struct link_map *) 0x0 New value = (struct link_map *) 0x40e8f0 0x00007ffff7fd940a in _dl_map_object_from_fd ( name=name@entry=0x7ffff75d7ad3 "libpthread.so.0", origname=origname@entry=0x0, fd=-1, fbp=fbp@entry=0x7ffeffffbb20, realname=<optimized out>, loader=loader@entry=0x40df00, l_type=<optimized out>, mode=<optimized out>, stack_endp=<optimized out>, nsid=<optimized out>) at dl-load.c:1354 1354 GL(dl_initfirst) = l; (gdb) When for some reason, dl_cet_check failed and called _dl_signal_error, __GI__dl_catch_exception called _dl_close_worker which unloaded libpthread.so.0: _int_free (av=0x7ffff77c8b80 <main_arena>, p=0x40e8e0, have_lock=<optimized out>) at malloc.c:4359 4359 if (!in_smallbin_range(size)) (gdb) bt #0 _int_free (av=0x7ffff77c8b80 <main_arena>, p=0x40e8e0, have_lock=<optimized out>) at malloc.c:4359 #1 0x00007ffff7fe74bb in _dl_close_worker (force=<optimized out>, map=<optimized out>) at dl-close.c:759 #2 _dl_close_worker (map=<optimized out>, force=force@entry=true) at dl-close.c:135 #3 0x00007ffff7fe5b08 in _dl_open (file=<optimized out>, mode=<optimized out>, caller_dlopen=0x7ffff772a4a1 <nss_load_library+241>, nsid=-2, argc=2, argv=<optimized out>, env=0x7ffeffffcde0) at dl-open.c:874 #4 0x00007ffff7740ef1 in do_dlopen (ptr=ptr@entry=0x7ffeffffc970) at dl-libc.c:96 #5 0x00007ffff7741971 in __GI__dl_catch_exception ( exception=exception@entry=0x7ffeffffc8f0, operate=operate@entry=0x7ffff7740eb0 <do_dlopen>, args=args@entry=0x7ffeffffc970) at dl-error-skeleton.c:208 #6 0x00007ffff7741a23 in __GI__dl_catch_error ( objname=objname@entry=0x7ffeffffc950, errstring=errstring@entry=0x7ffeffffc958, mallocedp=mallocedp@entry=0x7ffeffffc94f, operate=operate@entry=0x7ffff7740eb0 <do_dlopen>, args=args@entry=0x7ffeffffc970) at dl-error-skeleton.c:227 #7 0x00007ffff7740fe7 in dlerror_run ( operate=operate@entry=0x7ffff7740eb0 <do_dlopen>, Then I got Program received signal SIGSEGV, Segmentation fault. call_init (l=0x40e8f0, argc=argc@entry=2, argv=argv@entry=0x7ffeffffcdc8, env=env@entry=0x7ffeffffcde0) at dl-init.c:39 39 if (__builtin_expect (l->l_name[0], 'a') == '\0' (gdb) p *l $34 = {l_addr = 7738135660173684588, l_name = 0x302e302e37323100 <error: Cannot access memory at address 0x302e302e37323100>, l_ld = 0xd5d5d5d50000302e, l_next = 0x0, l_prev = 0xd5d5d5d5d5d5d5d5, l_real = 0xd5d5d5d5d5d5d5d5, l_ns = -3038287259199220267, l_libname = 0xd5d5d5d5d5d5d5d5, l_info = { 0xd5d5d5d5d5d5d5d5 <repeats 77 times>}, l_phdr = 0xd5d5d5d5d5d5d5d5, l_entry = 15408456814510331349, l_phnum = 54741, l_ldnum = 54741, l_searchlist = {r_list = 0xd5d5d5d5d5d5d5d5, r_nlist = 3587560917}, l_symbolic_searchlist = {r_list = 0xd5d5d5d5d5d5d5d5, r_nlist = 3587560917}, l_loader = 0xd5d5d5d5d5d5d5d5, l_versions = 0xd5d5d5d5d5d5d5d5, l_nversions = 3587560917, l_nbuckets = 3587560917, l_gnu_bitmask_idxbits = 3587560917, l_gnu_shift = 3587560917, l_gnu_bitmask = 0xd5d5d5d5d5d5d5d5, {l_gnu_buckets = 0xd5d5d5d5d5d5d5d5, l_chain = 0xd5d5d5d5d5d5d5d5}, {l_gnu_chain_zero = 0xd5d5d5d5d5d5d5d5, l_buckets = 0xd5d5d5d5d5d5d5d5}, l_direct_opencount = 3587560917, l_type = lt_library, l_relocated = 1, l_init_called = 1, l_global = 1, l_reserved = 2, l_phdr_allocated = 1, l_soname_added = 1, l_faked = 0, l_need_tls_init = 1, l_auditing = 0, l_audit_any_plt = 1, l_removed = 0, l_contiguous = 1, l_symbolic_in_local_scope = 1, l_free_initfini = 1, l_nodelete_active = 213, l_nodelete_pending = 213, l_cet = 5, l_rpath_dirs = {dirs = 0xd5d5d5d5d5d5d5d5, malloced = -707406379}, l_reloc_result = 0xd5d5d5d5d5d5d5d5, l_versyms = 0xd5d5d5d5d5d5d5d5, l_origin = 0xd5d5d5d5d5d5d5d5 <error: Cannot access memory at address 0xd5d5d5d5d5d5d5d5>, l_map_start = 15408456814510331349, l_map_end = 15408456814510331349, l_text_end = 15408456814510331349, l_scope_mem = {0xd5d5d5d5d5d5d5d5, 0xd5d5d5d5d5d5d5d5, 0xd5d5d5d5d5d5d5d5, 0xd5d5d5d5d5d5d5d5}, l_scope_max = 15408456814510331349, l_scope = 0xd5d5d5d5d5d5d5d5, l_local_scope = {0xd5d5d5d5d5d5d5d5, 0xd5d5d5d5d5d5d5d5}, l_file_id = {dev = 15408456814510331349, ino = 15408456814510331349}, l_runpath_dirs = {dirs = 0xd5d5d5d5d5d5d5d5, malloced = -707406379}, l_initfini = 0xd5d5d5d5d5d5d5d5, l_reldeps = 0xd5d5d5d5d5d5d5d5, l_reldepsmax = 3587560917, l_used = 3587560917, l_feature_1 = 3587560917, l_flags_1 = 3587560917, l_flags = 3587560917, l_idx = -707406379, l_mach = { plt = 15408456814510331349, gotplt = 15408456814510331349, tlsdesc_table = 0x2a2a2a2a2a2a2a2a}, l_lookup_cache = {sym = 0x21, type_class = 4255008, value = 0x40ed40, ret = 0x40ed80}, l_tls_initimage = 0x21, l_tls_initimage_size = 140737345313970,
Program received signal SIGSEGV, Segmentation fault. call_init (l=0x40e8f0, argc=argc@entry=2, argv=argv@entry=0x7ffeffffcdc8, env=env@entry=0x7ffeffffcde0) at dl-init.c:39 39 if (__builtin_expect (l->l_name[0], 'a') == '\0' (gdb) bt #0 call_init (l=0x40e8f0, argc=argc@entry=2, argv=argv@entry=0x7ffeffffcdc8, env=env@entry=0x7ffeffffcde0) at dl-init.c:39 #1 0x00007ffff7fe22c6 in call_init (env=0x7ffeffffcde0, argv=0x7ffeffffcdc8, argc=2, l=<optimized out>) at dl-init.c:86 #2 _dl_init (main_map=0x40fca0, argc=2, argv=0x7ffeffffcdc8, env=0x7ffeffffcde0) at dl-init.c:86 #3 0x00007ffff77419bd in __GI__dl_catch_exception (exception=<optimized out>, operate=<optimized out>, args=<optimized out>) at dl-error-skeleton.c:182 #4 0x00007ffff7fe6198 in dl_open_worker (a=a@entry=0x7ffeffffc7e0) at dl-open.c:758 #5 0x00007ffff7741971 in __GI__dl_catch_exception (exception=<optimized out>, operate=<optimized out>, args=<optimized out>) at dl-error-skeleton.c:208 #6 0x00007ffff7fe5a1e in _dl_open (file=0x7ffeffffca50 "libnss_sss.so.2", mode=-2147483646, caller_dlopen=0x7ffff772a4a1 <nss_load_library+241>, nsid=-2, argc=2, argv=<optimized out>, env=0x7ffeffffcde0) at dl-open.c:837 #7 0x00007ffff7740ef1 in do_dlopen (ptr=ptr@entry=0x7ffeffffca20) at dl-libc.c:96 #8 0x00007ffff7741971 in __GI__dl_catch_exception ( exception=exception@entry=0x7ffeffffc9a0, operate=operate@entry=0x7ffff7740eb0 <do_dlopen>, args=args@entry=0x7ffeffffca20) at dl-error-skeleton.c:208 #9 0x00007ffff7741a23 in __GI__dl_catch_error ( objname=objname@entry=0x7ffeffffca00, errstring=errstring@entry=0x7ffeffffca08, mallocedp=mallocedp@entry=0x7ffeffffc9ff, operate=operate@entry=0x7ffff7740eb0 <do_dlopen>, args=args@entry=0x7ffeffffca20) at dl-error-skeleton.c:227 #10 0x00007ffff7740fe7 in dlerror_run ( operate=operate@entry=0x7ffff7740eb0 <do_dlopen>, args=args@entry=0x7ffeffffca20) at dl-libc.c:46 #11 0x00007ffff774105a in __GI___libc_dlopen_mode ( name=name@entry=0x7ffeffffca50 "libnss_sss.so.2", mode=mode@entry=-2147483646) at dl-libc.c:195 #12 0x00007ffff772a4a1 in nss_load_library (ni=ni@entry=0x409d90) at nsswitch.c:359 #13 0x00007ffff772acd9 in __GI___nss_lookup_function (ni=0x409d90, fct_name=<optimized out>, fct_name@entry=0x7ffff7795d0b "getservbyname_r") at nsswitch.c:456 --Type <RET> for more, q to quit, c to continue without paging--q Quit (gdb) f 1 #1 0x00007ffff7fe22c6 in call_init (env=0x7ffeffffcde0, argv=0x7ffeffffcdc8, argc=2, l=<optimized out>) at dl-init.c:86 86 call_init (GL(dl_initfirst), argc, argv, env); (gdb)
Florian, Do you think this might have to do with the NODELETE reordering?
There is /* Remember whether this object must be initialized first. */ if (l->l_flags_1 & DF_1_INITFIRST) GL(dl_initfirst) = l; libpthread.so.0 has 0x000000006ffffffb (FLAGS_1) Flags: NOW NODELETE INITFIRST _dl_close_worker unloaded libpthread.so.0 which left GL(dl_initfirst) unchanged. This patch: diff --git a/elf/dl-close.c b/elf/dl-close.c index 104c299209..2d426d8737 100644 --- a/elf/dl-close.c +++ b/elf/dl-close.c @@ -756,6 +756,9 @@ _dl_close_worker (struct link_map *map, bool force) if (imap->l_runpath_dirs.dirs != (void *) -1) free (imap->l_runpath_dirs.dirs); + if (imap == GL(dl_initfirst)) + GL(dl_initfirst) = NULL; + free (imap); } } seems to work.
Thanks. Would you please post the patch to libc-alpha? I think it is correct. I will post my non-CET test case for this separately.
(In reply to Florian Weimer from comment #4) > Thanks. Would you please post the patch to libc-alpha? I think it is > correct. > > I will post my non-CET test case for this separately. https://sourceware.org/ml/libc-alpha/2020-01/msg00333.html
Fixed by commit 5177d85b0c050a2333a0c4165c938dd422013d05 Author: H.J. Lu <hjl.tools@gmail.com> Date: Thu Jan 16 06:45:36 2020 -0800 Clear GL(dl_initfirst) when freeing its link_map memory [BZ# 25396] We should clear GL(dl_initfirst) when freeing its link_map memory. Tested on Fedora 31/x86-64 with CET. Reviewed-by: Florian Weimer <fweimer@redhat.com>
The master branch has been updated by Florian Weimer <fw@sourceware.org>: https://sourceware.org/git/gitweb.cgi?p=glibc.git;h=a332bd1518af518c984fad73eba6f46dc5b2b2d4 commit a332bd1518af518c984fad73eba6f46dc5b2b2d4 Author: Florian Weimer <fweimer@redhat.com> Date: Thu Jan 16 16:53:58 2020 +0100 elf: Add elf/tst-dlopenfail-2 [BZ #25396] Without CET, a jump into a newly loaded object through an overwritten link map often does not crash, it just executes some random code. CET detects this in some cases because the function pointer does not point to the start of a function in the replacement shared object, so there is no ENDBR instruction. The new test uses a small shared object and the existing dangling link map to trigger the bug. Reviewed-by: Siddhesh Poyarekar <siddhesh@sourceware.org>