This is the mail archive of the
gdb-patches@sourceware.org
mailing list for the GDB project.
[RFC] 10/10 non-stop for linux native
- From: Pedro Alves <pedro at codesourcery dot com>
- To: gdb-patches at sourceware dot org
- Date: Tue, 6 May 2008 16:50:10 +0100
- Subject: [RFC] 10/10 non-stop for linux native
This adds non-stop support for linux native.
The changes are:
- ptracing a running thread doesn't work.
This implies that, we must ensure that the proc_services
usage in linux-thread-db.c talks to a pid of a stopped lwp.
Checking if a thread is alive with ptrace doesn't work
for running threads. Worse, ptrace errors out claiming
the thread doesn't exits.
- We must not stop all threads, obviously.
- We must mark threads as running if we're resuming
them behind the core's back.
- Implement target_stop_ptid to interrupt only one thread
--
Pedro Alves
2008-05-06 Pedro Alves <pedro@codesourcery.com>
* linux-fork.c (linux_fork_killall): Use SIGKILL instead of PTRACE_KILL.
* linux-nat.c (sigint_clear_callback): New.
(linux_nat_resume): In non-stop mode, only touch the passed in
ptid. Clear the sigint flag.
(linux_handle_extended_wait): On a clone event, add new lwp to
GDB's thread table, and mark as running, executing and stopped
appropriatelly.
(linux_nat_wait): In non-stop mode, don't stop all lwps, unless
sync_execution.
(kill_callback): If lwp is not stopped, use SIGKILL.
(linux_nat_thread_alive): Use signal 0 to detect if
thread is alive.
(send_sigint_callback): New.
(linux_nat_stop): New.
(linux_nat_stop_ptid): New.
(linux_nat_add_target): Set to_stop and to_stop_ptid.
* linux-nat.h (struct lwp_info): Add sigint field.
* linux-thread-db.c (thread_from_lwp, enable_thread_event)
(check_event): Set proc_handle.pid to the stopped lwp.
(thread_db_find_new_threads): If current inferior is executing,
don't try to read from it.
---
gdb/linux-fork.c | 4 -
gdb/linux-nat.c | 185 ++++++++++++++++++++++++++++++++++++++------------
gdb/linux-nat.h | 4 +
gdb/linux-thread-db.c | 13 +++
4 files changed, 164 insertions(+), 42 deletions(-)
Index: src/gdb/linux-fork.c
===================================================================
--- src.orig/gdb/linux-fork.c 2008-05-06 15:52:28.000000000 +0100
+++ src/gdb/linux-fork.c 2008-05-06 16:24:05.000000000 +0100
@@ -337,7 +337,9 @@ linux_fork_killall (void)
{
pid = PIDGET (fp->ptid);
do {
- ptrace (PT_KILL, pid, 0, 0);
+ /* Use SIGKILL instead of PTRACE_KILL because the former works even
+ if the thread is running, while the later doesn't. */
+ kill (pid, SIGKILL);
ret = waitpid (pid, &status, 0);
/* We might get a SIGCHLD instead of an exit status. This is
aggravated by the first kill above - a child has just
Index: src/gdb/linux-nat.c
===================================================================
--- src.orig/gdb/linux-nat.c 2008-05-06 15:52:28.000000000 +0100
+++ src/gdb/linux-nat.c 2008-05-06 16:43:18.000000000 +0100
@@ -212,6 +212,8 @@ static void linux_nat_async (void (*call
static int linux_nat_async_mask (int mask);
static int kill_lwp (int lwpid, int signo);
+static int send_sigint_callback (struct lwp_info *lp, void *data);
+
/* Captures the result of a successful waitpid call, along with the
options used in that call. */
struct waitpid_result
@@ -1466,6 +1468,13 @@ resume_set_callback (struct lwp_info *lp
return 0;
}
+static int
+sigint_clear_callback (struct lwp_info *lp, void *data)
+{
+ lp->sigint = 0;
+ return 0;
+}
+
static void
linux_nat_resume (ptid_t ptid, int step, enum target_signal signo)
{
@@ -1489,10 +1498,17 @@ linux_nat_resume (ptid_t ptid, int step,
/* A specific PTID means `step only this process id'. */
resume_all = (PIDGET (ptid) == -1);
- if (resume_all)
- iterate_over_lwps (resume_set_callback, NULL);
- else
- iterate_over_lwps (resume_clear_callback, NULL);
+ if (non_stop && resume_all)
+ internal_error (__FILE__, __LINE__,
+ "can't resume all in non-stop mode");
+
+ if (!non_stop)
+ {
+ if (resume_all)
+ iterate_over_lwps (resume_set_callback, NULL);
+ else
+ iterate_over_lwps (resume_clear_callback, NULL);
+ }
/* If PID is -1, it's the current inferior that should be
handled specially. */
@@ -1502,6 +1518,7 @@ linux_nat_resume (ptid_t ptid, int step,
lp = find_lwp_pid (ptid);
gdb_assert (lp != NULL);
+ /* Convert to something the lower layer understands. */
ptid = pid_to_ptid (GET_LWP (lp->ptid));
/* Remember if we're stepping. */
@@ -1515,6 +1532,9 @@ linux_nat_resume (ptid_t ptid, int step,
"LLAL: setting resumed (%d) %s\n",
__LINE__, target_pid_to_str (lp->ptid));
+ /* Remove the SIGINT mark. Used in non-stop mode. */
+ lp->sigint = 0;
+
/* If we have a pending wait status for this thread, there is no
point in resuming the process. But first make sure that
linux_nat_wait won't preemptively handle the event - we
@@ -1657,6 +1677,8 @@ linux_handle_extended_wait (struct lwp_i
ourstatus->kind = TARGET_WAITKIND_VFORKED;
else
{
+ struct cleanup *old_chain;
+
ourstatus->kind = TARGET_WAITKIND_IGNORE;
new_lp = add_lwp (BUILD_LWP (new_pid, GET_PID (inferior_ptid)));
new_lp->cloned = 1;
@@ -1676,10 +1698,29 @@ linux_handle_extended_wait (struct lwp_i
else
status = 0;
+ /* Make thread_db aware of this thread. We do this this
+ early, because we need to mark the new thread as running.
+ thread_db needs a stopped inferior_ptid. We know LP is
+ stopped, so use it this time. */
+ old_chain = save_inferior_ptid ();
+ inferior_ptid = lp->ptid;
+ lp->stopped = 1;
+ target_find_new_threads ();
+ do_cleanups (old_chain);
+ if (!in_thread_list (new_lp->ptid))
+ {
+ /* We're not using thread_db. Attach and add it to
+ GDB's list. */
+ lin_lwp_attach_lwp (new_lp->ptid);
+ target_post_attach (GET_LWP (new_lp->ptid));
+ add_thread (new_lp->ptid);
+ }
+
if (stopping)
new_lp->stopped = 1;
else
{
+ new_lp->stopped = 0;
new_lp->resumed = 1;
if (debug_linux_nat)
fprintf_unfiltered (gdb_stdlog,
@@ -1687,12 +1728,15 @@ linux_handle_extended_wait (struct lwp_i
__LINE__, target_pid_to_str (new_lp->ptid));
ptrace (PTRACE_CONT, lp->waitstatus.value.related_pid, 0,
status ? WSTOPSIG (status) : 0);
+ set_running (new_lp->ptid, 1);
+ set_executing (new_lp->ptid, 1);
}
if (debug_linux_nat)
fprintf_unfiltered (gdb_stdlog,
"LHEW: Got clone event from LWP %ld, resuming\n",
GET_LWP (lp->ptid));
+ lp->stopped = 0;
ptrace (PTRACE_CONT, GET_LWP (lp->ptid), 0, 0);
return 1;
@@ -2412,13 +2456,8 @@ linux_nat_filter_event (int lwpid, int s
not the end of the debugged application and should be
ignored. */
if (num_lwps > 0)
- {
- /* Make sure there is at least one thread running. */
- gdb_assert (iterate_over_lwps (running_callback, NULL));
-
- /* Discard the event. */
- return NULL;
- }
+ /* Discard the event. */
+ return NULL;
}
/* Check if the current LWP has previously exited. In the nptl
@@ -2552,6 +2591,7 @@ linux_nat_wait (ptid_t ptid, struct targ
__LINE__, target_pid_to_str (lp->ptid));
/* Add the main thread to GDB's thread list. */
add_thread_silent (lp->ptid);
+ set_running (lp->ptid, 1);
}
sigemptyset (&flush_mask);
@@ -2798,19 +2838,38 @@ retry:
fprintf_unfiltered (gdb_stdlog, "LLW: Candidate event %s in %s.\n",
status_to_str (status), target_pid_to_str (lp->ptid));
- /* Now stop all other LWP's ... */
- iterate_over_lwps (stop_callback, NULL);
-
- /* ... and wait until all of them have reported back that they're no
- longer running. */
- iterate_over_lwps (stop_wait_callback, &flush_mask);
- iterate_over_lwps (flush_callback, &flush_mask);
-
- /* If we're not waiting for a specific LWP, choose an event LWP from
- among those that have had events. Giving equal priority to all
- LWPs that have had events helps prevent starvation. */
- if (pid == -1)
- select_event_lwp (&lp, &status);
+ /* When threads are created with CLONE_THREAD, SIGINT is only sent
+ to one thread in the thread group. Send the signal to all the
+ other running threads too. An obvious possible enhancement would
+ be to detect clones we're debugging that haven't been started
+ with CLONE_THREAD, and hence will recieve the signal
+ automatically. */
+ if (non_stop
+ && target_can_async_p ()
+ && sync_execution
+ && WIFSTOPPED (status) && WSTOPSIG (status) == SIGINT)
+ {
+ lp->sigint = 1; /* This one has already seen SIGINT. */
+ iterate_over_lwps (send_sigint_callback, NULL);
+ }
+
+ if (!non_stop)
+ {
+ /* Now stop all other LWP's ... */
+ iterate_over_lwps (stop_callback, NULL);
+
+ /* ... and wait until all of them have reported back that
+ they're no longer running. */
+ iterate_over_lwps (stop_wait_callback, &flush_mask);
+ iterate_over_lwps (flush_callback, &flush_mask);
+
+ /* If we're not waiting for a specific LWP, choose an event LWP
+ from among those that have had events. Giving equal priority
+ to all LWPs that have had events helps prevent
+ starvation. */
+ if (pid == -1)
+ select_event_lwp (&lp, &status);
+ }
/* Now that we've selected our final event LWP, cancel any
breakpoints in other LWPs that have hit a GDB breakpoint. See
@@ -2850,13 +2909,25 @@ static int
kill_callback (struct lwp_info *lp, void *data)
{
errno = 0;
- ptrace (PTRACE_KILL, GET_LWP (lp->ptid), 0, 0);
- if (debug_linux_nat)
- fprintf_unfiltered (gdb_stdlog,
- "KC: PTRACE_KILL %s, 0, 0 (%s)\n",
- target_pid_to_str (lp->ptid),
- errno ? safe_strerror (errno) : "OK");
-
+ /* PTRACE_KILL doesn't work when the thread is running. */
+ if (!lp->stopped)
+ {
+ kill_lwp (GET_LWP (lp->ptid), SIGKILL);
+ if (debug_linux_nat)
+ fprintf_unfiltered (gdb_stdlog,
+ "KC: kill_lwp (SIGKILL) %s (%s)\n",
+ target_pid_to_str (lp->ptid),
+ errno ? safe_strerror (errno) : "OK");
+ }
+ else
+ {
+ ptrace (PTRACE_KILL, GET_LWP (lp->ptid), 0, 0);
+ if (debug_linux_nat)
+ fprintf_unfiltered (gdb_stdlog,
+ "KC: PTRACE_KILL %s, 0, 0 (%s)\n",
+ target_pid_to_str (lp->ptid),
+ errno ? safe_strerror (errno) : "OK");
+ }
return 0;
}
@@ -2999,22 +3070,22 @@ linux_nat_xfer_partial (struct target_op
static int
linux_nat_thread_alive (ptid_t ptid)
{
+ int err;
+
gdb_assert (is_lwp (ptid));
- errno = 0;
- ptrace (PTRACE_PEEKUSER, GET_LWP (ptid), 0, 0);
+ /* Send signal 0 instead of anything ptrace, because ptracing a
+ running thread errors out claiming that the thread doesn't
+ exist. */
+ err = kill_lwp (GET_LWP (ptid), 0);
+
if (debug_linux_nat)
fprintf_unfiltered (gdb_stdlog,
- "LLTA: PTRACE_PEEKUSER %s, 0, 0 (%s)\n",
+ "LLTA: KILL(SIG0) %s (%s)\n",
target_pid_to_str (ptid),
- errno ? safe_strerror (errno) : "OK");
+ err ? safe_strerror (err) : "OK");
- /* Not every Linux kernel implements PTRACE_PEEKUSER. But we can
- handle that case gracefully since ptrace will first do a lookup
- for the process based upon the passed-in pid. If that fails we
- will get either -ESRCH or -EPERM, otherwise the child exists and
- is alive. */
- if (errno == ESRCH || errno == EPERM)
+ if (err != 0)
return 0;
return 1;
@@ -4174,6 +4245,35 @@ linux_nat_set_async_mode (int on)
linux_nat_async_enabled = on;
}
+static int
+send_sigint_callback (struct lwp_info *lp, void *data)
+{
+ if (!lp->stopped && !lp->sigint)
+ {
+ kill_lwp (GET_LWP (lp->ptid), SIGINT);
+ lp->sigint = 1;
+ }
+ return 0;
+}
+
+static void
+linux_nat_stop (void)
+{
+ if (non_stop)
+ iterate_over_lwps (send_sigint_callback, NULL);
+ else
+ linux_ops->to_stop ();
+}
+
+static void
+linux_nat_stop_ptid (ptid_t ptid)
+{
+ if (ptid_equal (ptid, minus_one_ptid))
+ iterate_over_lwps (send_sigint_callback, NULL);
+ else
+ kill_lwp (GET_LWP (ptid), SIGINT);
+}
+
void
linux_nat_add_target (struct target_ops *t)
{
@@ -4204,6 +4304,9 @@ linux_nat_add_target (struct target_ops
t->to_terminal_inferior = linux_nat_terminal_inferior;
t->to_terminal_ours = linux_nat_terminal_ours;
+ t->to_stop = linux_nat_stop;
+ t->to_stop_ptid = linux_nat_stop_ptid;
+
/* We don't change the stratum; this target will sit at
process_stratum and thread_db will set at thread_stratum. This
is a little strange, since this is a multi-threaded-capable
Index: src/gdb/linux-nat.h
===================================================================
--- src.orig/gdb/linux-nat.h 2008-05-06 15:52:28.000000000 +0100
+++ src/gdb/linux-nat.h 2008-05-06 16:24:05.000000000 +0100
@@ -37,6 +37,10 @@ struct lwp_info
SIGCHLD. */
int cloned;
+ /* Non-zero if we sent this LWP a SIGINT (but the LWP didn't report
+ it back yet). */
+ int sigint;
+
/* Non-zero if we sent this LWP a SIGSTOP (but the LWP didn't report
it back yet). */
int signalled;
Index: src/gdb/linux-thread-db.c
===================================================================
--- src.orig/gdb/linux-thread-db.c 2008-05-06 15:52:28.000000000 +0100
+++ src/gdb/linux-thread-db.c 2008-05-06 16:24:05.000000000 +0100
@@ -308,6 +308,8 @@ thread_from_lwp (ptid_t ptid)
LWP. */
gdb_assert (GET_LWP (ptid) != 0);
+ /* Access an lwp we know is stopped. */
+ proc_handle.pid = GET_LWP (ptid);
err = td_ta_map_lwp2thr_p (thread_agent, GET_LWP (ptid), &th);
if (err != TD_OK)
error (_("Cannot find user-level thread for LWP %ld: %s"),
@@ -418,6 +420,9 @@ enable_thread_event (td_thragent_t *thre
td_notify_t notify;
td_err_e err;
+ /* Access an lwp we know is stopped. */
+ proc_handle.pid = GET_LWP (inferior_ptid);
+
/* Get the breakpoint address for thread EVENT. */
err = td_ta_event_addr_p (thread_agent, event, ¬ify);
if (err != TD_OK)
@@ -761,6 +766,9 @@ check_event (ptid_t ptid)
if (stop_pc != td_create_bp_addr && stop_pc != td_death_bp_addr)
return;
+ /* Access an lwp we know is stopped. */
+ proc_handle.pid = GET_LWP (ptid);
+
/* If we are at a create breakpoint, we do not know what new lwp
was created and cannot specifically locate the event message for it.
We have to call td_ta_event_getmsg() to get
@@ -961,6 +969,11 @@ thread_db_find_new_threads (void)
{
td_err_e err;
+ if (in_thread_list (inferior_ptid) && is_executing (inferior_ptid))
+ return;
+
+ /* Access an lwp we know is stopped. */
+ proc_handle.pid = GET_LWP (inferior_ptid);
/* Iterate over all user-space threads to discover new threads. */
err = td_ta_thr_iter_p (thread_agent, find_new_threads_callback, NULL,
TD_THR_ANY_STATE, TD_THR_LOWEST_PRIORITY,