[PATCH] Fix PR 12702 - gdb can hang waiting for thread group leader (gdbserver)
Hui Zhu
teawater@gmail.com
Tue Nov 19 06:19:00 GMT 2013
On Thu, Jul 25, 2013 at 2:25 AM, Pedro Alves <palves@redhat.com> wrote:
> On 07/20/2013 10:59 AM, Hui Zhu wrote:
>> Hi,
>>
>> I got a issue with leader-exit.exp will hang with target_board=native-gdbserver.
>> I found the reason is gdbserver still have this issue. So I post a
>> patch for gdbserver.
>
> leader-exit.exp is part of a series of tests for a set of related
> problems. See:
>
> http://www.sourceware.org/ml/gdb-patches/2011-10/msg00704.html
>
> Several are currently masked because gdbserver doesn't support
> fork/exec yet.
>
> We should fix this by implementing TARGET_WAITKIND_NO_RESUMED
> on gdbserver, like the patch above did for native.
>
> --
> Pedro Alves
>
Hi Pedro,
According to your comments, I make a new patch to add
TARGET_WAITKIND_NO_RESUMED to gdbserver.
It can pass the leader-exit.exp and pass the regression test.
Please help me review it.
Thanks,
Hui
2013-11-19 Hui Zhu <hui@codesourcery.com>
* common/ptid.c (ptid_match): New.
* common/ptid.h (ptid_match): New extern.
* inferior.h (ptid_match): Removed.
* infrun.c (ptid_match): Removed.
2013-11-19 Hui Zhu <hui@codesourcery.com>
* linux-low.c (sigchld_mask): New.
(handle_extended_wait): Add debug output.
(num_lwps): New.
(check_zombie_leaders_callback): New.
(check_zombie_leaders): New.
(not_suspended_callback): New.
(check_pending_stop): New.
(linux_wait_for_lwp): Call my_waitpid with WNOHANG.
(linux_wait_for_event): Change the return check for
linux_wait_for_lwp.
(wait_lwp): New.
(wait_for_sigstop): Call wait_lwp.
* linux-low.h (lwp_info): Add pending_stop.
* server.c (handle_target_event): Add check for
TARGET_WAITKIND_NO_RESUMED.
-------------- next part --------------
--- a/gdb/common/ptid.c
+++ b/gdb/common/ptid.c
@@ -114,3 +114,17 @@ ptid_tid_p (ptid_t ptid)
return (ptid_get_tid (ptid) != 0);
}
+
+int
+ptid_match (ptid_t ptid, ptid_t filter)
+{
+ if (ptid_equal (filter, minus_one_ptid))
+ return 1;
+ if (ptid_is_pid (filter)
+ && ptid_get_pid (ptid) == ptid_get_pid (filter))
+ return 1;
+ else if (ptid_equal (ptid, filter))
+ return 1;
+
+ return 0;
+}
--- a/gdb/common/ptid.h
+++ b/gdb/common/ptid.h
@@ -78,4 +78,14 @@ int ptid_lwp_p (ptid_t ptid);
/* Return true if PTID's tid member is non-zero. */
int ptid_tid_p (ptid_t ptid);
+/* Returns true if PTID matches filter FILTER. FILTER can be the wild
+ card MINUS_ONE_PTID (all ptid match it); can be a ptid representing
+ a process (ptid_is_pid returns true), in which case, all lwps and
+ threads of that given process match, lwps and threads of other
+ processes do not; or, it can represent a specific thread, in which
+ case, only that thread will match true. PTID must represent a
+ specific LWP or THREAD, it can never be a wild card. */
+
+extern int ptid_match (ptid_t ptid, ptid_t filter);
+
#endif
--- a/gdb/gdbserver/linux-low.c
+++ b/gdb/gdbserver/linux-low.c
@@ -220,6 +220,9 @@ int using_threads = 1;
jump pads). */
static int stabilizing_threads;
+/* Signals to block SIGCHLD. */
+static sigset_t sigchld_mask;
+
static void linux_resume_one_lwp (struct lwp_info *lwp,
int step, int signal, siginfo_t *info);
static void linux_resume (struct thread_resume *resume_info, size_t n);
@@ -407,6 +410,11 @@ handle_extended_wait (struct lwp_info *e
warning ("wait returned unexpected status 0x%x", status);
}
+ if (debug_threads)
+ fprintf (stderr, "HEW: Got clone event "
+ "from LWP %ld, new child is LWP %ld\n",
+ lwpid_of (event_child), new_pid);
+
ptid = ptid_build (pid_of (event_child), new_pid, 0);
new_lwp = (struct lwp_info *) add_lwp (ptid);
add_thread (ptid, new_lwp);
@@ -1268,30 +1276,148 @@ find_lwp_pid (ptid_t ptid)
return (struct lwp_info*) find_inferior (&all_lwps, same_lwp, &ptid);
}
+/* Return the number of known LWPs in the tgid given by PID. */
+
+static int
+num_lwps (int pid)
+{
+ int count = 0;
+ struct inferior_list_entry *inf = all_lwps.head;
+
+ while (inf != NULL)
+ {
+ struct inferior_list_entry *next;
+
+ next = inf->next;
+ if (ptid_get_pid(inf->id) == pid)
+ count++;
+ inf = next;
+ }
+
+ return count;
+}
+
+static void
+check_zombie_leaders_callback (struct inferior_list_entry *inf)
+{
+ pid_t leader_pid = ptid_get_pid (inf->id);
+ struct lwp_info *leader_lp;
+
+ leader_lp = find_lwp_pid (inf->id);
+ if (leader_lp != NULL
+ /* Check if there are other threads in the group, as we may
+ have raced with the inferior simply exiting. */
+ && num_lwps (leader_pid) > 1
+ && linux_proc_pid_is_zombie (leader_pid))
+ delete_lwp (leader_lp);
+}
+
+/* Detect zombie thread group leaders, and "exit" them. We can't reap
+ their exits until all other threads in the group have exited. */
+
+static void
+check_zombie_leaders (void)
+{
+ for_each_inferior (&all_processes, check_zombie_leaders_callback);
+}
+
+static int
+not_suspended_callback (struct inferior_list_entry *inf, void *arg)
+{
+ ptid_t filter = *(ptid_t *)arg;
+ struct lwp_info *lwp;
+
+ if (!ptid_match (inf->id, filter))
+ return 0;
+
+ lwp = find_lwp_pid (inf->id);
+ if (lwp == NULL)
+ return 0;
+
+ if (lwp->suspended)
+ return 0;
+
+ return 1;
+}
+
+static int
+check_pending_stop (struct inferior_list_entry *inf, void *data)
+{
+ ptid_t ptid = *(ptid_t *) data;
+ struct lwp_info *lwp;
+
+ if (ptid_equal (ptid, minus_one_ptid)
+ || ptid_get_pid(inf->id) == ptid_get_pid(ptid))
+ {
+ lwp = find_lwp_pid (inf->id);
+ if (lwp->pending_stop)
+ return 1;
+ }
+
+ return 0;
+}
+
static struct lwp_info *
linux_wait_for_lwp (ptid_t ptid, int *wstatp, int options)
{
- int ret;
- int to_wait_for = -1;
+ int ret = 0;
struct lwp_info *child = NULL;
+ sigset_t prev_mask;
if (debug_threads)
fprintf (stderr, "linux_wait_for_lwp: %s\n", target_pid_to_str (ptid));
- if (ptid_equal (ptid, minus_one_ptid))
- to_wait_for = -1; /* any child */
- else
- to_wait_for = ptid_get_lwp (ptid); /* this lwp only */
+ /* Make sure SIGCHLD is blocked. */
+ sigprocmask (SIG_BLOCK, &sigchld_mask, &prev_mask);
- options |= __WALL;
+ /* First check if there is a LWP with a wait status pending. */
+ if (ptid_equal (ptid, minus_one_ptid) || ptid_get_lwp (ptid) == 0)
+ child = (struct lwp_info*) find_inferior (&all_lwps, check_pending_stop,
+ &ptid);
+ else
+ child = find_lwp_pid (ptid);
+ if (child && child->pending_stop)
+ {
+ if (debug_threads)
+ fprintf (stderr,
+ "linux_wait_for_lwp: %s has a wait status pending.\n",
+ target_pid_to_str (ptid_of (child)));
+ *wstatp = child->last_status;
+ child->pending_stop = 0;
+ child->stopped = 1;
+ goto got_child;
+ }
retry:
+ ret = my_waitpid (-1, wstatp, __WCLONE | WNOHANG);
+ if (ret == 0 || (ret == -1 && errno == ECHILD))
+ ret = my_waitpid (-1, wstatp, WNOHANG);
- ret = my_waitpid (to_wait_for, wstatp, options);
- if (ret == 0 || (ret == -1 && errno == ECHILD && (options & WNOHANG)))
- return NULL;
- else if (ret == -1)
- perror_with_name ("waitpid");
+ if (ret <= 0)
+ {
+ sigset_t suspend_mask;
+
+ /* Check for zombie thread group leaders. Those can't be reaped
+ until all other threads in the thread group are. */
+ check_zombie_leaders ();
+
+ /* If there are no resumed children left, bail. We'd be stuck
+ forever in the sigsuspend call below otherwise.
+ OR no interesting event to report to the core.*/
+ if ((find_inferior (&all_lwps, not_suspended_callback, &ptid) == NULL)
+ || (options & WNOHANG))
+ {
+ sigprocmask (SIG_SETMASK, &prev_mask, NULL);
+ return NULL;
+ }
+
+ /* Block until we get an event reported with SIGCHLD. */
+ sigprocmask (SIG_SETMASK, NULL, &suspend_mask);
+ sigdelset (&suspend_mask, SIGCHLD);
+ sigsuspend (&suspend_mask);
+
+ goto retry;
+ }
if (debug_threads
&& (!WIFSTOPPED (*wstatp)
@@ -1408,6 +1534,24 @@ retry:
current_inferior = saved_inferior;
}
+ if (!ptid_match (ptid_of (child), ptid))
+ {
+ if (debug_threads)
+ {
+ char child_ptid_buf[80];
+
+ strcpy (child_ptid_buf, target_pid_to_str (ptid_of (child)));
+ fprintf (stderr, "linux_wait_for_lwp: %s is not same with %s.\n",
+ child_ptid_buf, target_pid_to_str (ptid));
+ }
+ child->pending_stop = 1;
+ child->stopped = 0;
+ goto retry;
+ }
+
+got_child:
+ sigprocmask (SIG_SETMASK, &prev_mask, NULL);
+
return child;
}
@@ -1844,16 +1988,13 @@ linux_wait_for_event (ptid_t ptid, int *
{
event_child = linux_wait_for_lwp (wait_ptid, wstat, options);
- if ((options & WNOHANG) && event_child == NULL)
+ if (event_child == NULL)
{
if (debug_threads)
- fprintf (stderr, "WNOHANG set, no event found\n");
+ fprintf (stderr, "No event found\n");
return 0;
}
- if (event_child == NULL)
- error ("event from unknown child");
-
if (ptid_is_pid (ptid)
&& ptid_get_pid (ptid) != ptid_get_pid (ptid_of (event_child)))
{
@@ -2314,8 +2455,25 @@ retry:
pid = linux_wait_for_event (step_over_bkpt, &w, options & ~WNOHANG);
}
- if (pid == 0) /* only if TARGET_WNOHANG */
- return null_ptid;
+ if (pid == 0)
+ {
+ /* If there are no resumed children left, bail. We'd be stuck
+ forever in the sigsuspend call below otherwise. */
+ if (find_inferior (&all_lwps, not_suspended_callback, &ptid) == NULL)
+ {
+ ourstatus->kind = TARGET_WAITKIND_NO_RESUMED;
+ return minus_one_ptid;
+ }
+
+ /* No interesting event to report to the core. */
+ if (target_options & TARGET_WNOHANG)
+ {
+ ourstatus->kind = TARGET_WAITKIND_IGNORE;
+ return minus_one_ptid;
+ }
+
+ return null_ptid;
+ }
event_child = get_thread_lwp (current_inferior);
@@ -2899,6 +3057,175 @@ mark_lwp_dead (struct lwp_info *lwp, int
lwp->stop_expected = 0;
}
+/* Wait for LP to stop. Returns the wait status, or 0 if the LWP has
+ exited. */
+
+static int
+wait_lwp (struct lwp_info *lwp)
+{
+ pid_t pid;
+ int status = 0;
+ int thread_dead = 0;
+ sigset_t prev_mask;
+
+ /* Make sure SIGCHLD is blocked for sigsuspend avoiding a race below. */
+ sigprocmask (SIG_BLOCK, &sigchld_mask, &prev_mask);
+
+ for (;;)
+ {
+ sigset_t suspend_mask;
+
+ /* If my_waitpid returns 0 it means the __WCLONE vs. non-__WCLONE kind
+ was right and we should just call sigsuspend. */
+
+ pid = my_waitpid (ptid_get_lwp (lwp->head.id), &status, WNOHANG);
+ if (pid == -1 && errno == ECHILD)
+ pid = my_waitpid (ptid_get_lwp (lwp->head.id), &status, __WCLONE | WNOHANG);
+ if (pid == -1 && errno == ECHILD)
+ {
+ /* The thread has previously exited. We need to delete it
+ now because, for some vendor 2.4 kernels with NPTL
+ support backported, there won't be an exit event unless
+ it is the main thread. 2.6 kernels will report an exit
+ event for each thread that exits, as expected. */
+ thread_dead = 1;
+ if (debug_threads)
+ fprintf (stderr, "WL: %s vanished.\n",
+ target_pid_to_str (lwp->head.id));
+ }
+ if (pid != 0)
+ break;
+
+ /* Bugs 10970, 12702.
+ Thread group leader may have exited in which case we'll lock up in
+ waitpid if there are other threads, even if they are all zombies too.
+ Basically, we're not supposed to use waitpid this way.
+ __WCLONE is not applicable for the leader so we can't use that.
+ LINUX_NAT_THREAD_ALIVE cannot be used here as it requires a STOPPED
+ process; it gets ESRCH both for the zombie and for running processes.
+
+ As a workaround, check if we're waiting for the thread group leader and
+ if it's a zombie, and avoid calling waitpid if it is.
+
+ This is racy, what if the tgl becomes a zombie right after we check?
+ Therefore always use WNOHANG with sigsuspend - it is equivalent to
+ waiting waitpid but linux_proc_pid_is_zombie is safe this way. */
+
+ if (ptid_get_pid (lwp->head.id) == ptid_get_lwp (lwp->head.id)
+ && linux_proc_pid_is_zombie (ptid_get_lwp (lwp->head.id)))
+ {
+ thread_dead = 1;
+ if (debug_threads)
+ fprintf (stderr, "WL: Thread group leader %s vanished.\n",
+ target_pid_to_str (lwp->head.id));
+ break;
+ }
+
+ /* Wait for next SIGCHLD and try again. This may let SIGCHLD handlers
+ get invoked despite our caller had them intentionally blocked by
+ block_child_signals. This is sensitive only to the loop of
+ linux_nat_wait_1 and there if we get called my_waitpid gets called
+ again before it gets to sigsuspend so we can safely let the handlers
+ get executed here. */
+
+ /* Block until we get an event reported with SIGCHLD. */
+ sigprocmask (SIG_SETMASK, NULL, &suspend_mask);
+ sigdelset (&suspend_mask, SIGCHLD);
+ sigsuspend (&suspend_mask);
+ }
+
+ sigprocmask (SIG_SETMASK, &prev_mask, NULL);
+
+ if (!thread_dead)
+ {
+ gdb_assert (pid == ptid_get_lwp (lwp->head.id));
+
+ if (debug_threads)
+ fprintf (stderr, "WL: waitpid %s received %x\n",
+ target_pid_to_str (lwp->head.id), status);
+
+ /* Check if the thread has exited. */
+ if (WIFEXITED (status) || WIFSIGNALED (status))
+ {
+ thread_dead = 1;
+ if (debug_threads)
+ fprintf (stderr, "WL: %s exited.\n",
+ target_pid_to_str (lwp->head.id));
+ }
+ }
+
+ if (thread_dead)
+ {
+ delete_lwp (lwp);
+ return 0;
+ }
+
+ gdb_assert (WIFSTOPPED (status));
+
+ lwp->stopped = 1;
+ lwp->last_status = status;
+ if (WIFSTOPPED (status))
+ {
+ lwp->stop_pc = get_stop_pc (lwp);
+ if (WSTOPSIG (status) == SIGSTOP && lwp->stop_expected)
+ lwp->stop_expected = 0;
+ }
+
+ if (WIFSTOPPED (status) && WSTOPSIG (status) == SIGTRAP)
+ {
+ if (the_low_target.stopped_by_watchpoint == NULL)
+ {
+ lwp->stopped_by_watchpoint = 0;
+ }
+ else
+ {
+ struct thread_info *saved_inferior;
+
+ saved_inferior = current_inferior;
+ current_inferior = get_lwp_thread (lwp);
+
+ lwp->stopped_by_watchpoint
+ = the_low_target.stopped_by_watchpoint ();
+
+ if (lwp->stopped_by_watchpoint)
+ {
+ if (the_low_target.stopped_data_address != NULL)
+ lwp->stopped_data_address
+ = the_low_target.stopped_data_address ();
+ else
+ lwp->stopped_data_address = 0;
+ }
+
+ current_inferior = saved_inferior;
+ }
+ }
+
+ /* Handle GNU/Linux's syscall SIGTRAPs. */
+ if (WIFSTOPPED (status) && WSTOPSIG (status) == SYSCALL_SIGTRAP)
+ {
+ if (debug_threads)
+ fprintf (stderr, "WL: Handling syscall SIGTRAPs\n");
+ /* No longer need the sysgood bit. The ptrace event ends up
+ recorded in lp->waitstatus if we care for it. We can carry
+ on handling the event like a regular SIGTRAP from here
+ on. */
+ status = W_STOPCODE (SIGTRAP);
+ ptrace (PTRACE_CONT, ptid_get_lwp (lwp->head.id), 0, 0);
+ return wait_lwp (lwp);
+ }
+
+ /* Handle GNU/Linux's extended waitstatus for trace events. */
+ if (WIFSTOPPED (status) && WSTOPSIG (status) == SIGTRAP && status >> 16 != 0)
+ {
+ if (debug_threads)
+ fprintf (stderr, "WL: Handling extended status 0x%06x\n", status);
+ handle_extended_wait (lwp, status);
+ return wait_lwp (lwp);
+ }
+
+ return status;
+}
+
static void
wait_for_sigstop (struct inferior_list_entry *entry)
{
@@ -2906,8 +3233,6 @@ wait_for_sigstop (struct inferior_list_e
struct thread_info *saved_inferior;
int wstat;
ptid_t saved_tid;
- ptid_t ptid;
- int pid;
if (lwp->stopped)
{
@@ -2923,12 +3248,12 @@ wait_for_sigstop (struct inferior_list_e
else
saved_tid = null_ptid; /* avoid bogus unused warning */
- ptid = lwp->head.id;
-
if (debug_threads)
fprintf (stderr, "wait_for_sigstop: pulling one event\n");
- pid = linux_wait_for_event (ptid, &wstat, __WALL);
+ wstat = wait_lwp (lwp);
+ if (wstat == 0)
+ return;
/* If we stopped with a non-SIGSTOP signal, save it for later
and record the pending SIGSTOP. If the process exited, just
@@ -2952,9 +3277,10 @@ wait_for_sigstop (struct inferior_list_e
else
{
if (debug_threads)
- fprintf (stderr, "Process %d exited while stopping LWPs\n", pid);
+ fprintf (stderr, "Process %ld exited while stopping LWPs\n",
+ ptid_get_lwp (lwp->head.id));
- lwp = find_lwp_pid (pid_to_ptid (pid));
+ lwp = find_lwp_pid (pid_to_ptid (ptid_get_lwp (lwp->head.id)));
if (lwp)
{
/* Leave this status pending for the next time we're able to
@@ -5864,5 +6190,8 @@ initialize_low (void)
sigchld_action.sa_flags = SA_RESTART;
sigaction (SIGCHLD, &sigchld_action, NULL);
+ sigemptyset (&sigchld_mask);
+ sigaddset(&sigchld_mask, SIGCHLD);
+
initialize_low_arch ();
}
--- a/gdb/gdbserver/linux-low.h
+++ b/gdb/gdbserver/linux-low.h
@@ -253,6 +253,9 @@ struct lwp_info
event already received in a wait()). */
int stopped;
+ /* If this flag is set, the lwp has a wait status pending. */
+ int pending_stop;
+
/* If this flag is set, the lwp is known to be dead already (exit
event already received in a wait(), and is cached in
status_pending). */
--- a/gdb/gdbserver/server.c
+++ b/gdb/gdbserver/server.c
@@ -3595,7 +3595,8 @@ handle_target_event (int err, gdb_client
last_ptid = mywait (minus_one_ptid, &last_status,
TARGET_WNOHANG, 1);
- if (last_status.kind != TARGET_WAITKIND_IGNORE)
+ if (last_status.kind != TARGET_WAITKIND_IGNORE
+ && last_status.kind != TARGET_WAITKIND_NO_RESUMED)
{
int pid = ptid_get_pid (last_ptid);
struct process_info *process = find_process_pid (pid);
--- a/gdb/inferior.h
+++ b/gdb/inferior.h
@@ -65,16 +65,6 @@ extern void discard_infcall_control_stat
extern struct regcache *
get_infcall_suspend_state_regcache (struct infcall_suspend_state *);
-/* Returns true if PTID matches filter FILTER. FILTER can be the wild
- card MINUS_ONE_PTID (all ptid match it); can be a ptid representing
- a process (ptid_is_pid returns true), in which case, all lwps and
- threads of that given process match, lwps and threads of other
- processes do not; or, it can represent a specific thread, in which
- case, only that thread will match true. PTID must represent a
- specific LWP or THREAD, it can never be a wild card. */
-
-extern int ptid_match (ptid_t ptid, ptid_t filter);
-
/* Save value of inferior_ptid so that it may be restored by
a later call to do_cleanups(). Returns the struct cleanup
pointer needed for later doing the cleanup. */
--- a/gdb/infrun.c
+++ b/gdb/infrun.c
@@ -7059,20 +7059,6 @@ discard_infcall_control_state (struct in
xfree (inf_status);
}
-int
-ptid_match (ptid_t ptid, ptid_t filter)
-{
- if (ptid_equal (filter, minus_one_ptid))
- return 1;
- if (ptid_is_pid (filter)
- && ptid_get_pid (ptid) == ptid_get_pid (filter))
- return 1;
- else if (ptid_equal (ptid, filter))
- return 1;
-
- return 0;
-}
-
/* restore_inferior_ptid() will be used by the cleanup machinery
to restore the inferior_ptid value saved in a call to
save_inferior_ptid(). */
More information about the Gdb-patches
mailing list