This is the mail archive of the
gdb-patches@sourceware.org
mailing list for the GDB project.
Re: [PATCH v3] Fix failure to detach if threads exit while detaching on linux
- From: Pedro Alves <palves at redhat dot com>
- To: Antoine Tremblay <antoine dot tremblay at ericsson dot com>, gdb-patches at sourceware dot org
- Date: Fri, 3 Jun 2016 20:15:17 +0100
- Subject: Re: [PATCH v3] Fix failure to detach if threads exit while detaching on linux
- Authentication-results: sourceware.org; auth=none
- References: <d6bca62c-e357-b112-a08b-d040e53c78cc at redhat dot com> <1464972191-13912-1-git-send-email-antoine dot tremblay at ericsson dot com>
On 06/03/2016 05:43 PM, Antoine Tremblay wrote:
> Updated patch...
>
> Thanks for looking at this!
The test hangs waiting for gdbserver to exit, because gdbserver
doesn't exit. :-)
So the extra testing actually found a bug. Good, see? :-)
gdbserver is stuck in linux_join:
(gdb) bt
#0 0x00007f31e08af0da in __GI___waitpid (pid=13862, stat_loc=0x7fffcdf15f28, options=0) at ../sysdeps/unix/sysv/linux/waitpid.c:29
#1 0x000000000043bc53 in my_waitpid (pid=13862, status=0x7fffcdf15f28, flags=0) at /home/pedro/gdb/mygit/src/gdb/gdbserver/../nat/linux-waitpid.c:88
#2 0x000000000042e0d0 in linux_join (pid=13862) at /home/pedro/gdb/mygit/src/gdb/gdbserver/linux-low.c:1603
#3 0x0000000000414145 in process_serial_event () at /home/pedro/gdb/mygit/src/gdb/gdbserver/server.c:4000
#4 0x0000000000414fb7 in handle_serial_event (err=0, client_data=0x0) at /home/pedro/gdb/mygit/src/gdb/gdbserver/server.c:4347
#5 0x000000000041bf42 in handle_file_event (event_file_desc=4) at /home/pedro/gdb/mygit/src/gdb/gdbserver/event-loop.c:428
#6 0x000000000041b66e in process_event () at /home/pedro/gdb/mygit/src/gdb/gdbserver/event-loop.c:184
#7 0x000000000041c4b6 in start_event_loop () at /home/pedro/gdb/mygit/src/gdb/gdbserver/event-loop.c:547
#8 0x0000000000413896 in captured_main (argc=4, argv=0x7fffcdf16358) at /home/pedro/gdb/mygit/src/gdb/gdbserver/server.c:3719
#9 0x0000000000413abf in main (argc=4, argv=0x7fffcdf16358) at /home/pedro/gdb/mygit/src/gdb/gdbserver/server.c:3804
(gdb)
Running the test manually, I can reproduce it. In this one run,
gdbserver is waiting for pid 32650 to exit. And that process'es
status is:
$ cat /proc/32650/status
Name: detach-gone-thr
State: Z (zombie)
Tgid: 32650
Ngid: 0
Pid: 32650
PPid: 32642
TracerPid: 0
...
Threads: 256
So why isn't gdbserver's inferior process exiting and reporting
a status to gdbserver, which is the inferior process'es parent?
Notice TracerPid == 0. That means gdbserver successfully detached
from that lwp.
However, seems like gdbserver didn't manage to detach from
any of the other threads:
$ grep -h State /proc/32650/task/*/status | sort | uniq -c
256 State: Z (zombie)
$ grep -h Tracer /proc/32650/task/*/status | sort | uniq -c
1 TracerPid: 0
255 TracerPid: 32642
32642 is gdbserver, which again, is also 32650's parent.
gdbserver is detaching from the leader thread first,
while gdb isn't. So I thought I'd try to make gdbserver
detach from the non-leader lwps first. Doesn't make a
difference.
Under ptrace, the leader thread doesn't report an exit to the
ptracer until all the children are reaped. So I thought I'd try
to make gdbserver collect the exit status of each zombie lwp,
since it's still supposedly attached to them.
And there you go, that works. See hacky patch on top of yours below.
The important bit is this:
- if (!check_ptrace_stopped_lwp_gone (lwp))
- error (_("Can't detach %s: %s"),
- target_pid_to_str (ptid_of (thread)),
- strerror (errno));
+ {
+ int ret, status;
+
+ if (!check_ptrace_stopped_lwp_gone (lwp))
+ error (_("Can't detach %s: %s"),
+ target_pid_to_str (ptid_of (thread)),
+ strerror (errno));
+
+ ret = waitpid (lwpid, &status, __WALL);
+ }
I have to go right now, but I'll clean this up once I have a chance.
>From 2895f87d0f59de37dcbd3f0f3cd281e26303bb7d Mon Sep 17 00:00:00 2001
From: Pedro Alves <palves@redhat.com>
Date: Fri, 3 Jun 2016 20:09:14 +0100
Subject: [PATCH] fix hang
---
gdb/gdbserver/linux-low.c | 52 +++++++++++++++++-------
gdb/testsuite/gdb.threads/detach-gone-thread.exp | 1 +
2 files changed, 39 insertions(+), 14 deletions(-)
diff --git a/gdb/gdbserver/linux-low.c b/gdb/gdbserver/linux-low.c
index 5f02dab..aad38a4 100644
--- a/gdb/gdbserver/linux-low.c
+++ b/gdb/gdbserver/linux-low.c
@@ -1480,16 +1480,12 @@ check_ptrace_stopped_lwp_gone (struct lwp_info *lp)
return 0;
}
-static int
-linux_detach_one_lwp (struct inferior_list_entry *entry, void *args)
+static void
+linux_detach_one_lwp (struct lwp_info *lwp)
{
- struct thread_info *thread = (struct thread_info *) entry;
- struct lwp_info *lwp = get_thread_lwp (thread);
- int pid = * (int *) args;
+ struct thread_info *thread = get_lwp_thread (lwp);
int sig;
-
- if (ptid_get_pid (entry->id) != pid)
- return 0;
+ int lwpid;
/* If there is a pending SIGSTOP, get rid of it. */
if (lwp->stop_expected)
@@ -1511,14 +1507,38 @@ linux_detach_one_lwp (struct inferior_list_entry *entry, void *args)
/* Finally, let it resume. */
if (the_low_target.prepare_to_resume != NULL)
the_low_target.prepare_to_resume (lwp);
- if (ptrace (PTRACE_DETACH, lwpid_of (thread), (PTRACE_TYPE_ARG3) 0,
+ lwpid = lwpid_of (thread);
+ if (ptrace (PTRACE_DETACH, lwpid, (PTRACE_TYPE_ARG3) 0,
(PTRACE_TYPE_ARG4) (long) sig) < 0)
- if (!check_ptrace_stopped_lwp_gone (lwp))
- error (_("Can't detach %s: %s"),
- target_pid_to_str (ptid_of (thread)),
- strerror (errno));
+ {
+ int ret, status;
+
+ if (!check_ptrace_stopped_lwp_gone (lwp))
+ error (_("Can't detach %s: %s"),
+ target_pid_to_str (ptid_of (thread)),
+ strerror (errno));
+
+ ret = waitpid (lwpid, &status, __WALL);
+ }
delete_lwp (lwp);
+}
+
+static int
+linux_detach_lwp_callback (struct inferior_list_entry *entry, void *args)
+{
+ struct thread_info *thread = (struct thread_info *) entry;
+ struct lwp_info *lwp = get_thread_lwp (thread);
+ int pid = * (int *) args;
+ int lwpid = lwpid_of (thread);
+
+ if (ptid_get_pid (entry->id) != pid)
+ return 0;
+
+ // if (ptid_get_pid (entry->id) == lwpid)
+ // return 0;
+
+ linux_detach_one_lwp (lwp);
return 0;
}
@@ -1549,7 +1569,11 @@ linux_detach (int pid)
/* Stabilize threads (move out of jump pads). */
stabilize_threads ();
- find_inferior (&all_threads, linux_detach_one_lwp, &pid);
+ /* Detach from the children first. */
+ find_inferior (&all_threads, linux_detach_lwp_callback, &pid);
+
+ // struct lwp_info *lwp = find_lwp_pid (pid_to_ptid (pid));
+ // linux_detach_one_lwp (lwp);
the_target->mourn (process);
diff --git a/gdb/testsuite/gdb.threads/detach-gone-thread.exp b/gdb/testsuite/gdb.threads/detach-gone-thread.exp
index b8caf18..1780aeb 100644
--- a/gdb/testsuite/gdb.threads/detach-gone-thread.exp
+++ b/gdb/testsuite/gdb.threads/detach-gone-thread.exp
@@ -31,6 +31,7 @@ proc test_server_exit {} {
return
}
+ set test "server exits"
gdb_expect {
-i $server_spawn_id
eof {
--
2.5.5