This is the mail archive of the
gdb-patches@sourceware.org
mailing list for the GDB project.
[PATCH] PR gdb/15713 - errors from i386_linux_resume lead to lock-up
- From: Pedro Alves <palves at redhat dot com>
- To: gdb-patches at sourceware dot org
- Date: Wed, 21 May 2014 13:47:48 +0100
- Subject: [PATCH] PR gdb/15713 - errors from i386_linux_resume lead to lock-up
- Authentication-results: sourceware.org; auth=none
linux_nat_resume is not considering that linux_ops->to_resume may throw:
/* Mark LWP as not stopped to prevent it from being continued by
linux_nat_resume_callback. */
lp->stopped = 0;
if (resume_many)
iterate_over_lwps (ptid, linux_nat_resume_callback, NULL);
If something within linux_nat_resume_callback, GDB leaves the lwp_info
as if the inferior was resumed, while it actually wasn't.
A couple examples, there are possibly others:
- i386_linux_resume calls target_read which calls QUIT.
- if the actual ptrace resumption fails in inf_ptrace_resume,
perror_with_name is called.
If the user tries to kill the inferior at this point (or quit, which
offers to kill), GDB locks up trying to stop the lwp -- if it is
already stopped no new waitpid event gets generated for it.
Fix this by setting the stopped flag earlier, as soon as we collect a
stop event with waitpid, and clearing it always only after resuming
the lwp successfully.
Tested on x86_64 Fedora 20. Confirmed the lock-up disappears using a
local hack that forces an error in inf_ptrace_resume.
Also fixes a little "set debug lin-lwp" annoyance. Currently we always see:
Continuing.
LLR: Preparing to resume process 6802, 0, inferior_ptid Thread 0x7ffff7fc7740 (LWP 6802)
^^^^^^^^
RC: Resuming sibling Thread 0x7ffff77c5700 (LWP 6807), 0, resume
RC: Resuming sibling Thread 0x7ffff7fc6700 (LWP 6806), 0, resume
RC: Not resuming sibling Thread 0x7ffff7fc7740 (LWP 6802) (not stopped)
^^^^^^^^^^^^^^^^^^^^^^^
LLR: PTRACE_CONT process 6802, 0 (resume event thread)
This patch gets rid of the "Not resuming sibling" line.
2014-05-21 Pedro Alves <palves@redhat.com>
PR gdb/15713
* linux-nat.c (linux_nat_resume_callback): Rename the second
parameter to 'except'. Skip LP if it points to EXCEPT.
(linux_nat_resume): Don't mark the event lwp as not stopped
before resuming sibling lwps. Instead ask
linux_nat_resume_callback to skip the event lwp. Mark it as not
stopped after actually resuming it.
(linux_handle_syscall_trap): Mark the lwp as not stopped after
resuming it.
(wait_lwp): Mark the lwp as stopped here.
(stop_wait_callback): Mark the lwp as not stopped right after
resuming it. Don't mark lwps as stopped here.
(linux_nat_filter_event): Mark the lwp as stopped earlier.
(linux_nat_wait_1): Don't mark dead lwps as stopped here.
---
gdb/linux-nat.c | 41 ++++++++++++++++++-----------------------
1 file changed, 18 insertions(+), 23 deletions(-)
diff --git a/gdb/linux-nat.c b/gdb/linux-nat.c
index e84ee95..0d92b25 100644
--- a/gdb/linux-nat.c
+++ b/gdb/linux-nat.c
@@ -1642,13 +1642,17 @@ resume_lwp (struct lwp_info *lp, int step, enum gdb_signal signo)
}
}
-/* Resume LWP, with the last stop signal, if it is in pass state. */
+/* Callback for iterate_over_lwps. If LWP is EXCEPT, do nothing.
+ Resume LWP with the last stop signal, if it is in pass state. */
static int
-linux_nat_resume_callback (struct lwp_info *lp, void *data)
+linux_nat_resume_callback (struct lwp_info *lp, void *except)
{
enum gdb_signal signo = GDB_SIGNAL_0;
+ if (lp == except)
+ return 0;
+
if (lp->stopped)
{
struct thread_info *thread;
@@ -1764,12 +1768,8 @@ linux_nat_resume (struct target_ops *ops,
return;
}
- /* Mark LWP as not stopped to prevent it from being continued by
- linux_nat_resume_callback. */
- lp->stopped = 0;
-
if (resume_many)
- iterate_over_lwps (ptid, linux_nat_resume_callback, NULL);
+ iterate_over_lwps (ptid, linux_nat_resume_callback, lp);
/* Convert to something the lower layer understands. */
ptid = pid_to_ptid (ptid_get_lwp (lp->ptid));
@@ -1778,6 +1778,7 @@ linux_nat_resume (struct target_ops *ops,
linux_nat_prepare_to_resume (lp);
linux_ops->to_resume (linux_ops, ptid, step, signo);
lp->stopped_by_watchpoint = 0;
+ lp->stopped = 0;
if (debug_linux_nat)
fprintf_unfiltered (gdb_stdlog,
@@ -1864,6 +1865,7 @@ linux_handle_syscall_trap (struct lwp_info *lp, int stopping)
lp->syscall_state = TARGET_WAITKIND_IGNORE;
ptrace (PTRACE_CONT, ptid_get_lwp (lp->ptid), 0, 0);
+ lp->stopped = 0;
return 1;
}
@@ -1947,6 +1949,7 @@ linux_handle_syscall_trap (struct lwp_info *lp, int stopping)
linux_nat_prepare_to_resume (lp);
linux_ops->to_resume (linux_ops, pid_to_ptid (ptid_get_lwp (lp->ptid)),
lp->step, GDB_SIGNAL_0);
+ lp->stopped = 0;
return 1;
}
@@ -2156,7 +2159,7 @@ linux_handle_extended_wait (struct lwp_info *lp, int status,
linux_ops->to_resume (linux_ops,
pid_to_ptid (ptid_get_lwp (lp->ptid)),
0, GDB_SIGNAL_0);
-
+ lp->stopped = 0;
return 1;
}
@@ -2311,6 +2314,7 @@ wait_lwp (struct lwp_info *lp)
}
gdb_assert (WIFSTOPPED (status));
+ lp->stopped = 1;
/* Handle GNU/Linux's syscall SIGTRAPs. */
if (WIFSTOPPED (status) && WSTOPSIG (status) == SYSCALL_SIGTRAP)
@@ -2564,6 +2568,7 @@ stop_wait_callback (struct lwp_info *lp, void *data)
errno = 0;
ptrace (PTRACE_CONT, ptid_get_lwp (lp->ptid), 0, 0);
+ lp->stopped = 0;
if (debug_linux_nat)
fprintf_unfiltered (gdb_stdlog,
"PTRACE_CONT %s, 0, 0 (%s) "
@@ -2590,9 +2595,7 @@ stop_wait_callback (struct lwp_info *lp, void *data)
/* Save the sigtrap event. */
lp->status = status;
- gdb_assert (!lp->stopped);
gdb_assert (lp->signalled);
- lp->stopped = 1;
}
else
{
@@ -2604,8 +2607,6 @@ stop_wait_callback (struct lwp_info *lp, void *data)
"SWC: Delayed SIGSTOP caught for %s.\n",
target_pid_to_str (lp->ptid));
- lp->stopped = 1;
-
/* Reset SIGNALLED only after the stop_wait_callback call
above as it does gdb_assert on SIGNALLED. */
lp->signalled = 0;
@@ -2933,6 +2934,10 @@ linux_nat_filter_event (int lwpid, int status, int *new_pending_p)
if (!WIFSTOPPED (status) && !lp)
return NULL;
+ /* This LWP is stopped now. (And if dead, this prevents it from
+ ever being continued.) */
+ lp->stopped = 1;
+
/* Handle GNU/Linux's syscall SIGTRAPs. */
if (WIFSTOPPED (status) && WSTOPSIG (status) == SYSCALL_SIGTRAP)
{
@@ -2975,7 +2980,6 @@ linux_nat_filter_event (int lwpid, int status, int *new_pending_p)
used. */
if (ptid_get_pid (lp->ptid) == ptid_get_lwp (lp->ptid))
{
- lp->stopped = 1;
iterate_over_lwps (pid_to_ptid (ptid_get_pid (lp->ptid)),
stop_and_resume_callback, new_pending_p);
}
@@ -3320,13 +3324,9 @@ retry:
" cancelled it\n",
ptid_get_lwp (lp->ptid));
}
- lp->stopped = 1;
}
else
- {
- lp->stopped = 1;
- lp->signalled = 0;
- }
+ lp->signalled = 0;
}
else if (WIFEXITED (lp->status) || WIFSIGNALED (lp->status))
{
@@ -3343,11 +3343,6 @@ retry:
pending for the next time we're able to report
it. */
- /* Prevent trying to stop this thread again. We'll
- never try to resume it because it has a pending
- status. */
- lp->stopped = 1;
-
/* Dead LWP's aren't expected to reported a pending
sigstop. */
lp->signalled = 0;
--
1.9.0