[rfa/linux] Make SIGINT handling more robust

Daniel Jacobowitz drow@false.org
Mon Apr 4 04:15:00 GMT 2005


This patch, which I've had kicking around for a year or so, makes SIGINT
handling much more reliable.

The problem being solved by this code (both the current version, and the
replacement below) is this: in a LinuxThreads program, every thread is a
process.  When a signal is sent to the process group, every thread will
receive it.  If the user hits C-c at the console, the signal will get sent
to every thread; the user will be presented with an indeterminate number of
SIGINT reports instead of one.  This could apply to other signals, too, but
SIGINT was the only one handled by the old code.

Previously, we had flush_callback.  This function resumed the inferior when
it saw a pending SIGINT, in order to force the SIGINT to be delivered.  This
method is unreliable; it had the potential to lose other signals, and often
generated a failed assertion for "lp->status == 0".

This patch relies completely on recording the pending signal, and discarding
it once it is delivered.  An unexpected SIGINT will cause a flag to be set
on every thread.  Then, as long as /proc indicates that the signal is
pending, we will leave the flag set.  If a SIGINT is received while the flag
is set, it is discarded.  At various points we check to see if the signal
is still pending; if it isn't, then we assume it was delivered to some other
thread (the POSIX and NPTL semantics) and clear the flag.

Tested on i686-pc-linux-gnu, with both LinuxThreads and NPTL.  With
LinuxThreads, this removes an intermittent failure in watchthreads.exp
(improved testcase coming soon).  Also tested with my previous patch on
mips-unknown-linux-gnu, where results for schedlock.exp go from abyssmal
to fairly good.

OK?

-- 
Daniel Jacobowitz
CodeSourcery, LLC

2005-04-04  Daniel Jacobowitz  <dan@codesourcery.com>

	* linux-nat.c (resume_callback): Add more debugging output.
	(linux_nat_has_pending_sigint): New function, based on
	linux_nat_has_pending.
	(set_ignore_sigint, maybe_clear_ignore_sigint): New functions.
	(stop_wait_callback): Remove flush_mask handling.  Honor
	ignore_sigint.  Call maybe_clear_ignore_sigint.  Pass NULL
	to recursive calls.
	(linux_nat_has_pending, flush_callback): Remove.
	(linux_nat_wait): Add more debugging output.  Remove flush_mask
	support and call to flush_callback.  Honor ignore_sigint.  Use
	set_ignore_sigint and maybe_clear_ignore_sigint.
	* linux-nat.h (struct lwp_info): Add ignore_sigint field.

Index: linux-nat.c
===================================================================
RCS file: /big/fsf/rsync/src-cvs/src/gdb/linux-nat.c,v
retrieving revision 1.27
diff -u -p -r1.27 linux-nat.c
--- linux-nat.c	6 Mar 2005 16:42:20 -0000	1.27
+++ linux-nat.c	4 Apr 2005 03:10:23 -0000
@@ -1042,6 +1042,12 @@ resume_callback (struct lwp_info *lp, vo
       lp->stopped = 0;
       lp->step = 0;
     }
+  else if (lp->stopped && debug_linux_nat)
+    fprintf_unfiltered (gdb_stdlog, "RC: Not resuming sibling %s (has pending)\n",
+			target_pid_to_str (lp->ptid));
+  else if (debug_linux_nat)
+    fprintf_unfiltered (gdb_stdlog, "RC: Not resuming sibling %s (not stopped)\n",
+			target_pid_to_str (lp->ptid));
 
   return 0;
 }
@@ -1294,14 +1300,67 @@ stop_callback (struct lwp_info *lp, void
   return 0;
 }
 
-/* Wait until LP is stopped.  If DATA is non-null it is interpreted as
-   a pointer to a set of signals to be flushed immediately.  */
+/* Return non-zero if LWP PID has a pending SIGINT.  */
 
 static int
-stop_wait_callback (struct lwp_info *lp, void *data)
+linux_nat_has_pending_sigint (int pid)
+{
+  sigset_t pending, blocked, ignored;
+  int i;
+
+  linux_proc_pending_signals (pid, &pending, &blocked, &ignored);
+
+  /* && !sigismember (&blocked, SIGINT) */
+  if (sigismember (&pending, SIGINT)
+      && !sigismember (&ignored, SIGINT))
+    return 1;
+
+  return 0;
+}
+
+/* Set a flag in LP indicating that we should ignore its next SIGINT.  */
+
+static int
+set_ignore_sigint (struct lwp_info *lp, void *data)
 {
-  sigset_t *flush_mask = data;
+  /* If a thread has a pending SIGINT, consume it; otherwise, set a
+     flag to consume the next one.  */
+  if (lp->stopped && lp->status != 0 && WIFSTOPPED (lp->status)
+      && WSTOPSIG (lp->status) == SIGINT)
+    lp->status = 0;
+  else
+    lp->ignore_sigint = 1;
+
+  return 0;
+}
+
+/* If LP does not have a SIGINT pending, then clear the ignore_sigint flag.
+   This function is called after we know the LWP has stopped; if the LWP
+   stopped before the expected SIGINT was delivered, then it will never have
+   arrived.  Also, if the signal was delivered to a shared queue and consumed
+   by a different thread, it will never be delivered to this LWP.  */
+   
+static void
+maybe_clear_ignore_sigint (struct lwp_info *lp)
+{
+  if (!lp->ignore_sigint)
+    return;
+
+  if (!linux_nat_has_pending_sigint (GET_LWP (lp->ptid)))
+    {
+      if (debug_linux_nat)
+	fprintf_unfiltered (gdb_stdlog,
+			    "MCIS: Clearing bogus flag for %s\n",
+			    target_pid_to_str (lp->ptid));
+      lp->ignore_sigint = 0;
+    }
+}
 
+/* Wait until LP is stopped.  */
+
+static int
+stop_wait_callback (struct lwp_info *lp, void *data)
+{
   if (!lp->stopped)
     {
       int status;
@@ -1310,26 +1369,24 @@ stop_wait_callback (struct lwp_info *lp,
       if (status == 0)
 	return 0;
 
-      /* Ignore any signals in FLUSH_MASK.  */
-      if (flush_mask && sigismember (flush_mask, WSTOPSIG (status)))
+      if (lp->ignore_sigint && WIFSTOPPED (status)
+	  && WSTOPSIG (status) == SIGINT)
 	{
-	  if (!lp->signalled)
-	    {
-	      lp->stopped = 1;
-	      return 0;
-	    }
+	  lp->ignore_sigint = 0;
 
 	  errno = 0;
 	  ptrace (PTRACE_CONT, GET_LWP (lp->ptid), 0, 0);
 	  if (debug_linux_nat)
 	    fprintf_unfiltered (gdb_stdlog,
-				"PTRACE_CONT %s, 0, 0 (%s)\n",
+				"PTRACE_CONT %s, 0, 0 (%s) (discarding SIGINT)\n",
 				target_pid_to_str (lp->ptid),
 				errno ? safe_strerror (errno) : "OK");
 
-	  return stop_wait_callback (lp, flush_mask);
+	  return stop_wait_callback (lp, NULL);
 	}
 
+      maybe_clear_ignore_sigint (lp);
+
       if (WSTOPSIG (status) != SIGSTOP)
 	{
 	  if (WSTOPSIG (status) == SIGTRAP)
@@ -1362,7 +1419,7 @@ stop_wait_callback (struct lwp_info *lp,
 				      target_pid_to_str (lp->ptid));
 		}
 	      /* Hold the SIGTRAP for handling by linux_nat_wait. */
-	      stop_wait_callback (lp, data);
+	      stop_wait_callback (lp, NULL);
 	      /* If there's another event, throw it back into the queue. */
 	      if (lp->status)
 		{
@@ -1402,7 +1459,7 @@ stop_wait_callback (struct lwp_info *lp,
 
 	      /* Hold this event/waitstatus while we check to see if
 	         there are any more (we still want to get that SIGSTOP). */
-	      stop_wait_callback (lp, data);
+	      stop_wait_callback (lp, NULL);
 	      /* If the lp->status field is still empty, use it to hold
 	         this event.  If not, then this event must be returned
 	         to the event queue of the LWP.  */
@@ -1434,88 +1491,6 @@ stop_wait_callback (struct lwp_info *lp,
   return 0;
 }
 
-/* Check whether PID has any pending signals in FLUSH_MASK.  If so set
-   the appropriate bits in PENDING, and return 1 - otherwise return 0.  */
-
-static int
-linux_nat_has_pending (int pid, sigset_t *pending, sigset_t *flush_mask)
-{
-  sigset_t blocked, ignored;
-  int i;
-
-  linux_proc_pending_signals (pid, pending, &blocked, &ignored);
-
-  if (!flush_mask)
-    return 0;
-
-  for (i = 1; i < NSIG; i++)
-    if (sigismember (pending, i))
-      if (!sigismember (flush_mask, i)
-	  || sigismember (&blocked, i)
-	  || sigismember (&ignored, i))
-	sigdelset (pending, i);
-
-  if (sigisemptyset (pending))
-    return 0;
-
-  return 1;
-}
-
-/* DATA is interpreted as a mask of signals to flush.  If LP has
-   signals pending, and they are all in the flush mask, then arrange
-   to flush them.  LP should be stopped, as should all other threads
-   it might share a signal queue with.  */
-
-static int
-flush_callback (struct lwp_info *lp, void *data)
-{
-  sigset_t *flush_mask = data;
-  sigset_t pending, intersection, blocked, ignored;
-  int pid, status;
-
-  /* Normally, when an LWP exits, it is removed from the LWP list.  The
-     last LWP isn't removed till later, however.  So if there is only
-     one LWP on the list, make sure it's alive.  */
-  if (lwp_list == lp && lp->next == NULL)
-    if (!linux_nat_thread_alive (lp->ptid))
-      return 0;
-
-  /* Just because the LWP is stopped doesn't mean that new signals
-     can't arrive from outside, so this function must be careful of
-     race conditions.  However, because all threads are stopped, we
-     can assume that the pending mask will not shrink unless we resume
-     the LWP, and that it will then get another signal.  We can't
-     control which one, however.  */
-
-  if (lp->status)
-    {
-      if (debug_linux_nat)
-	printf_unfiltered (_("FC: LP has pending status %06x\n"), lp->status);
-      if (WIFSTOPPED (lp->status) && sigismember (flush_mask, WSTOPSIG (lp->status)))
-	lp->status = 0;
-    }
-
-  while (linux_nat_has_pending (GET_LWP (lp->ptid), &pending, flush_mask))
-    {
-      int ret;
-      
-      errno = 0;
-      ret = ptrace (PTRACE_CONT, GET_LWP (lp->ptid), 0, 0);
-      if (debug_linux_nat)
-	fprintf_unfiltered (gdb_stderr,
-			    "FC: Sent PTRACE_CONT, ret %d %d\n", ret, errno);
-
-      lp->stopped = 0;
-      stop_wait_callback (lp, flush_mask);
-      if (debug_linux_nat)
-	fprintf_unfiltered (gdb_stderr,
-			    "FC: Wait finished; saved status is %d\n",
-			    lp->status);
-    }
-
-  return 0;
-}
-
 /* Return non-zero if LP has a wait status pending.  */
 
 static int
@@ -1819,9 +1794,10 @@ linux_nat_wait (ptid_t ptid, struct targ
   int options = 0;
   int status = 0;
   pid_t pid = PIDGET (ptid);
-  sigset_t flush_mask;
 
-  sigemptyset (&flush_mask);
+  if (debug_linux_nat)
+    fprintf_unfiltered (gdb_stderr, "LLW: Waiting for %s\n",
+			target_pid_to_str (ptid));
 
   /* Make sure SIGCHLD is blocked.  */
   if (!sigismember (&blocked_mask, SIGCHLD))
@@ -2117,6 +2093,37 @@ retry:
 	      continue;
 	    }
 
+	  /* Make sure we don't report a SIGINT that we have already
+	     displayed for another thread.  */
+	  if (lp->ignore_sigint
+	      && WIFSTOPPED (status) && WSTOPSIG (status) == SIGINT)
+	    {
+	      if (debug_linux_nat)
+		fprintf_unfiltered (gdb_stdlog,
+				    "LLW: Delayed SIGINT caught for %s.\n",
+				    target_pid_to_str (lp->ptid));
+
+	      /* This is a delayed SIGINT.  */
+	      lp->ignore_sigint = 0;
+
+	      registers_changed ();
+	      child_resume (pid_to_ptid (GET_LWP (lp->ptid)), lp->step,
+			    TARGET_SIGNAL_0);
+	      if (debug_linux_nat)
+		fprintf_unfiltered (gdb_stdlog,
+				    "LLW: %s %s, 0, 0 (discard SIGINT)\n",
+				    lp->step ?
+				    "PTRACE_SINGLESTEP" : "PTRACE_CONT",
+				    target_pid_to_str (lp->ptid));
+
+	      lp->stopped = 0;
+	      gdb_assert (lp->resumed);
+
+	      /* Discard the event.  */
+	      status = 0;
+	      continue;
+	    }
+
 	  break;
 	}
 
@@ -2176,11 +2183,15 @@ retry:
       if (signo == TARGET_SIGNAL_INT && signal_pass_state (signo) == 0)
 	{
 	  /* If ^C/BREAK is typed at the tty/console, SIGINT gets
-	     forwarded to the entire process group, that is, all LWP's
-	     will receive it.  Since we only want to report it once,
-	     we try to flush it from all LWPs except this one.  */
-	  sigaddset (&flush_mask, SIGINT);
+	     forwarded to the entire process group, that is, all LWPs
+	     will receive it - unless they're using CLONE_THREAD to
+	     share signals.  Since we only want to report it once, we
+	     mark it as ignored for all LWPs except this one.  */
+	  iterate_over_lwps (set_ignore_sigint, NULL);
+	  lp->ignore_sigint = 0;
 	}
+      else
+	maybe_clear_ignore_sigint (lp);
     }
 
   /* This LWP is stopped now.  */
@@ -2195,8 +2206,7 @@ retry:
 
   /* ... and wait until all of them have reported back that they're no
      longer running.  */
-  iterate_over_lwps (stop_wait_callback, &flush_mask);
-  iterate_over_lwps (flush_callback, &flush_mask);
+  iterate_over_lwps (stop_wait_callback, NULL);
 
   /* If we're not waiting for a specific LWP, choose an event LWP from
      among those that have had events.  Giving equal priority to all
Index: linux-nat.h
===================================================================
RCS file: /big/fsf/rsync/src-cvs/src/gdb/linux-nat.h,v
retrieving revision 1.6
diff -u -p -r1.6 linux-nat.h
--- linux-nat.h	29 Mar 2004 18:07:14 -0000	1.6
+++ linux-nat.h	3 Apr 2005 22:02:25 -0000
@@ -1,5 +1,5 @@
 /* Native debugging support for GNU/Linux (LWP layer).
-   Copyright 2000, 2001, 2002, 2003, 2004 Free Software Foundation, Inc.
+   Copyright 2000, 2001, 2002, 2003, 2004, 2005 Free Software Foundation, Inc.
 
    This file is part of GDB.
 
@@ -54,6 +54,9 @@ struct lwp_info
   /* Non-zero if we were stepping this LWP.  */
   int step;
 
+  /* Non-zero if we expect a duplicated SIGINT.  */
+  int ignore_sigint;
+
   /* If WAITSTATUS->KIND != TARGET_WAITKIND_SPURIOUS, the waitstatus
      for this LWP's last event.  This may correspond to STATUS above,
      or to a local variable in lin_lwp_wait.  */



More information about the Gdb-patches mailing list