This is the mail archive of the gdb-patches@sourceware.org mailing list for the GDB project.


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]
Other format: [Raw text]

[commit] GNU/Linux threading fix for high frequency signals


This patch fixes, and adds a testcase for, a bug involving LinuxThreads
and SIGINT.  The original problem was that we could send SIGSTOP to a
thread, wait for it to stop, and see that it had a pending SIGINT.
We'd try to resume it to get rid of the SIGINT.  But if we get a signal
other than the SIGINT, we have to stop!  Otherwise, we might lose an
in-flight signal.  And in fact there's an assert in place that prevents
us from resuming when we might lose a signal.  If you run the test case
in a loop on a system using LinuxThreads you'll eventually hit that
assertion.

The rest of the patch is for a bug I discovered while trying to write
a testcase :-(  If we're waiting for a thread to get its first
SIGSTOP, a quirk of the implementation means it might get some
other signal first.  If so, we have to be sure not to lose that
signal.

Tested on x86_64-pc-linux-gnu (with NPTL) and manually with
LinuxThreads, and checked in.

-- 
Daniel Jacobowitz
CodeSourcery

2007-01-08  Daniel Jacobowitz  <dan@codesourcery.com>

	* linux-nat.c (struct simple_pid_list): Add status.
	(add_to_pid_list): Record the PID's status.
	(linux_record_stopped_pid): Likewise.  Make static.
	(pull_pid_from_list): Return the saved status.
	(linux_nat_handle_extended): Deleted.
	(linux_handle_extended_wait): Combine with linux_nat_handle_extended.
	Make static.  Handle non-SIGSTOP for a new thread's first signal.
	(flush_callback): Handle unexpected pending signals.
	(linux_nat_wait): Update calls to changed functions.
	* linux-nat.h (linux_record_stopped_pid, linux_handle_extended_wait):
	Remove prototypes for newly static functions.

2007-01-08  Daniel Jacobowitz  <dan@codesourcery.com>

	* gdb.threads/sigthread.c, gdb.threads/sigthread.exp: New.

Index: linux-nat.c
===================================================================
RCS file: /cvs/src/src/gdb/linux-nat.c,v
retrieving revision 1.53
diff -u -p -r1.53 linux-nat.c
--- linux-nat.c	3 Jan 2007 19:01:25 -0000	1.53
+++ linux-nat.c	8 Jan 2007 20:45:37 -0000
@@ -113,6 +113,7 @@ static int linux_parent_pid;
 struct simple_pid_list
 {
   int pid;
+  int status;
   struct simple_pid_list *next;
 };
 struct simple_pid_list *stopped_pids;
@@ -131,16 +132,17 @@ static int linux_supports_tracevforkdone
 /* Trivial list manipulation functions to keep track of a list of
    new stopped processes.  */
 static void
-add_to_pid_list (struct simple_pid_list **listp, int pid)
+add_to_pid_list (struct simple_pid_list **listp, int pid, int status)
 {
   struct simple_pid_list *new_pid = xmalloc (sizeof (struct simple_pid_list));
   new_pid->pid = pid;
+  new_pid->status = status;
   new_pid->next = *listp;
   *listp = new_pid;
 }
 
 static int
-pull_pid_from_list (struct simple_pid_list **listp, int pid)
+pull_pid_from_list (struct simple_pid_list **listp, int pid, int *status)
 {
   struct simple_pid_list **p;
 
@@ -148,6 +150,7 @@ pull_pid_from_list (struct simple_pid_li
     if ((*p)->pid == pid)
       {
 	struct simple_pid_list *next = (*p)->next;
+	*status = (*p)->status;
 	xfree (*p);
 	*p = next;
 	return 1;
@@ -155,10 +158,10 @@ pull_pid_from_list (struct simple_pid_li
   return 0;
 }
 
-void
-linux_record_stopped_pid (int pid)
+static void
+linux_record_stopped_pid (int pid, int status)
 {
-  add_to_pid_list (&stopped_pids, pid);
+  add_to_pid_list (&stopped_pids, pid, status);
 }
 
 
@@ -516,69 +519,6 @@ child_follow_fork (struct target_ops *op
   return 0;
 }
 
-ptid_t
-linux_handle_extended_wait (int pid, int status,
-			    struct target_waitstatus *ourstatus)
-{
-  int event = status >> 16;
-
-  if (event == PTRACE_EVENT_FORK || event == PTRACE_EVENT_VFORK
-      || event == PTRACE_EVENT_CLONE)
-    {
-      unsigned long new_pid;
-      int ret;
-
-      ptrace (PTRACE_GETEVENTMSG, pid, 0, &new_pid);
-
-      /* If we haven't already seen the new PID stop, wait for it now.  */
-      if (! pull_pid_from_list (&stopped_pids, new_pid))
-	{
-	  /* The new child has a pending SIGSTOP.  We can't affect it until it
-	     hits the SIGSTOP, but we're already attached.  */
-	  ret = my_waitpid (new_pid, &status,
-			    (event == PTRACE_EVENT_CLONE) ? __WCLONE : 0);
-	  if (ret == -1)
-	    perror_with_name (_("waiting for new child"));
-	  else if (ret != new_pid)
-	    internal_error (__FILE__, __LINE__,
-			    _("wait returned unexpected PID %d"), ret);
-	  else if (!WIFSTOPPED (status) || WSTOPSIG (status) != SIGSTOP)
-	    internal_error (__FILE__, __LINE__,
-			    _("wait returned unexpected status 0x%x"), status);
-	}
-
-      if (event == PTRACE_EVENT_FORK)
-	ourstatus->kind = TARGET_WAITKIND_FORKED;
-      else if (event == PTRACE_EVENT_VFORK)
-	ourstatus->kind = TARGET_WAITKIND_VFORKED;
-      else
-	ourstatus->kind = TARGET_WAITKIND_SPURIOUS;
-
-      ourstatus->value.related_pid = new_pid;
-      return inferior_ptid;
-    }
-
-  if (event == PTRACE_EVENT_EXEC)
-    {
-      ourstatus->kind = TARGET_WAITKIND_EXECD;
-      ourstatus->value.execd_pathname
-	= xstrdup (child_pid_to_exec_file (pid));
-
-      if (linux_parent_pid)
-	{
-	  detach_breakpoints (linux_parent_pid);
-	  ptrace (PTRACE_DETACH, linux_parent_pid, 0, 0);
-
-	  linux_parent_pid = 0;
-	}
-
-      return inferior_ptid;
-    }
-
-  internal_error (__FILE__, __LINE__,
-		  _("unknown ptrace event %d"), event);
-}
-
 
 void
 child_insert_fork_catchpoint (int pid)
@@ -1293,44 +1233,113 @@ kill_lwp (int lwpid, int signo)
   return kill (lwpid, signo);
 }
 
-/* Handle a GNU/Linux extended wait response.  Most of the work we
-   just pass off to linux_handle_extended_wait, but if it reports a
-   clone event we need to add the new LWP to our list (and not report
-   the trap to higher layers).  This function returns non-zero if
-   the event should be ignored and we should wait again.  If STOPPING
-   is true, the new LWP remains stopped, otherwise it is continued.  */
+/* Handle a GNU/Linux extended wait response.  If we see a clone
+   event, we need to add the new LWP to our list (and not report the
+   trap to higher layers).  This function returns non-zero if the
+   event should be ignored and we should wait again.  If STOPPING is
+   true, the new LWP remains stopped, otherwise it is continued.  */
 
 static int
-linux_nat_handle_extended (struct lwp_info *lp, int status, int stopping)
-{
-  linux_handle_extended_wait (GET_LWP (lp->ptid), status,
-			      &lp->waitstatus);
+linux_handle_extended_wait (struct lwp_info *lp, int status,
+			    int stopping)
+{
+  int pid = GET_LWP (lp->ptid);
+  struct target_waitstatus *ourstatus = &lp->waitstatus;
+  struct lwp_info *new_lp = NULL;
+  int event = status >> 16;
 
-  /* TARGET_WAITKIND_SPURIOUS is used to indicate clone events.  */
-  if (lp->waitstatus.kind == TARGET_WAITKIND_SPURIOUS)
+  if (event == PTRACE_EVENT_FORK || event == PTRACE_EVENT_VFORK
+      || event == PTRACE_EVENT_CLONE)
     {
-      struct lwp_info *new_lp;
-      new_lp = add_lwp (BUILD_LWP (lp->waitstatus.value.related_pid,
-				   GET_PID (inferior_ptid)));
-      new_lp->cloned = 1;
+      unsigned long new_pid;
+      int ret;
+
+      ptrace (PTRACE_GETEVENTMSG, pid, 0, &new_pid);
+
+      /* If we haven't already seen the new PID stop, wait for it now.  */
+      if (! pull_pid_from_list (&stopped_pids, new_pid, &status))
+	{
+	  /* The new child has a pending SIGSTOP.  We can't affect it until it
+	     hits the SIGSTOP, but we're already attached.  */
+	  ret = my_waitpid (new_pid, &status,
+			    (event == PTRACE_EVENT_CLONE) ? __WCLONE : 0);
+	  if (ret == -1)
+	    perror_with_name (_("waiting for new child"));
+	  else if (ret != new_pid)
+	    internal_error (__FILE__, __LINE__,
+			    _("wait returned unexpected PID %d"), ret);
+	  else if (!WIFSTOPPED (status))
+	    internal_error (__FILE__, __LINE__,
+			    _("wait returned unexpected status 0x%x"), status);
+	}
+
+      ourstatus->value.related_pid = new_pid;
 
-      if (stopping)
-	new_lp->stopped = 1;
+      if (event == PTRACE_EVENT_FORK)
+	ourstatus->kind = TARGET_WAITKIND_FORKED;
+      else if (event == PTRACE_EVENT_VFORK)
+	ourstatus->kind = TARGET_WAITKIND_VFORKED;
       else
-	ptrace (PTRACE_CONT, lp->waitstatus.value.related_pid, 0, 0);
+	{
+	  ourstatus->kind = TARGET_WAITKIND_IGNORE;
+	  new_lp = add_lwp (BUILD_LWP (new_pid, GET_PID (inferior_ptid)));
+	  new_lp->cloned = 1;
 
-      lp->waitstatus.kind = TARGET_WAITKIND_IGNORE;
+	  if (WSTOPSIG (status) != SIGSTOP)
+	    {
+	      /* This can happen if someone starts sending signals to
+		 the new thread before it gets a chance to run, which
+		 have a lower number than SIGSTOP (e.g. SIGUSR1).
+		 This is an unlikely case, and harder to handle for
+		 fork / vfork than for clone, so we do not try - but
+		 we handle it for clone events here.  We'll send
+		 the other signal on to the thread below.  */
 
-      if (debug_linux_nat)
-	fprintf_unfiltered (gdb_stdlog,
-			    "LLHE: Got clone event from LWP %ld, resuming\n",
-			    GET_LWP (lp->ptid));
-      ptrace (PTRACE_CONT, GET_LWP (lp->ptid), 0, 0);
+	      new_lp->signalled = 1;
+	    }
+	  else
+	    status = 0;
 
-      return 1;
+	  if (stopping)
+	    new_lp->stopped = 1;
+	  else
+	    {
+	      new_lp->resumed = 1;
+	      ptrace (PTRACE_CONT, lp->waitstatus.value.related_pid, 0,
+		      status ? WSTOPSIG (status) : 0);
+	    }
+
+	  if (debug_linux_nat)
+	    fprintf_unfiltered (gdb_stdlog,
+				"LHEW: Got clone event from LWP %ld, resuming\n",
+				GET_LWP (lp->ptid));
+	  ptrace (PTRACE_CONT, GET_LWP (lp->ptid), 0, 0);
+
+	  return 1;
+	}
+
+      return 0;
     }
 
-  return 0;
+  if (event == PTRACE_EVENT_EXEC)
+    {
+      ourstatus->kind = TARGET_WAITKIND_EXECD;
+      ourstatus->value.execd_pathname
+	= xstrdup (child_pid_to_exec_file (pid));
+
+      if (linux_parent_pid)
+	{
+	  detach_breakpoints (linux_parent_pid);
+	  ptrace (PTRACE_DETACH, linux_parent_pid, 0, 0);
+
+	  linux_parent_pid = 0;
+	}
+
+      return 0;
+    }
+
+  internal_error (__FILE__, __LINE__,
+		  _("unknown ptrace event %d"), event);
 }
 
 /* Wait for LP to stop.  Returns the wait status, or 0 if the LWP has
@@ -1401,7 +1410,7 @@ wait_lwp (struct lwp_info *lp)
 	fprintf_unfiltered (gdb_stdlog,
 			    "WL: Handling extended status 0x%06x\n",
 			    status);
-      if (linux_nat_handle_extended (lp, status, 1))
+      if (linux_handle_extended_wait (lp, status, 1))
 	return wait_lwp (lp);
     }
 
@@ -1641,7 +1650,15 @@ flush_callback (struct lwp_info *lp, voi
 	lp->status = 0;
     }
 
-  while (linux_nat_has_pending (GET_LWP (lp->ptid), &pending, flush_mask))
+  /* While there is a pending signal we would like to flush, continue
+     the inferior and collect another signal.  But if there's already
+     a saved status that we don't want to flush, we can't resume the
+     inferior - if it stopped for some other reason we wouldn't have
+     anywhere to save the new status.  In that case, we must leave the
+     signal unflushed (and possibly generate an extra SIGINT stop).
+     That's much less bad than losing a signal.  */
+  while (lp->status == 0
+	 && linux_nat_has_pending (GET_LWP (lp->ptid), &pending, flush_mask))
     {
       int ret;
       
@@ -1995,7 +2012,7 @@ retry:
 	     from waitpid before or after the event is.  */
 	  if (WIFSTOPPED (status) && !lp)
 	    {
-	      linux_record_stopped_pid (lwpid);
+	      linux_record_stopped_pid (lwpid, status);
 	      status = 0;
 	      continue;
 	    }
@@ -2046,7 +2063,7 @@ retry:
 		fprintf_unfiltered (gdb_stdlog,
 				    "LLW: Handling extended status 0x%06x\n",
 				    status);
-	      if (linux_nat_handle_extended (lp, status, 0))
+	      if (linux_handle_extended_wait (lp, status, 0))
 		{
 		  status = 0;
 		  continue;
Index: linux-nat.h
===================================================================
RCS file: /cvs/src/src/gdb/linux-nat.h,v
retrieving revision 1.14
diff -u -p -r1.14 linux-nat.h
--- linux-nat.h	31 Dec 2006 21:04:51 -0000	1.14
+++ linux-nat.h	8 Jan 2007 20:45:37 -0000
@@ -75,10 +75,7 @@ void thread_db_init (struct target_ops *
 void linux_proc_pending_signals (int pid, sigset_t *pending, sigset_t *blocked, sigset_t *ignored);
 
 /* linux-nat functions for handling fork events.  */
-extern void linux_record_stopped_pid (int pid);
 extern void linux_enable_event_reporting (ptid_t ptid);
-extern ptid_t linux_handle_extended_wait (int pid, int status,
-					  struct target_waitstatus *ourstatus);
 
 extern int lin_lwp_attach_lwp (ptid_t ptid, int verbose);
 
Index: testsuite/gdb.threads/sigthread.c
===================================================================
RCS file: testsuite/gdb.threads/sigthread.c
diff -N testsuite/gdb.threads/sigthread.c
--- /dev/null	1 Jan 1970 00:00:00 -0000
+++ testsuite/gdb.threads/sigthread.c	8 Jan 2007 20:45:37 -0000
@@ -0,0 +1,67 @@
+/* Test case for C-c sent to threads with pending signals.  Before I
+   even get there, creating a thread and sending it a signal before it
+   has a chance to run leads to an internal error in GDB.  We need to
+   record that there's a pending SIGSTOP, so that we'll ignore it
+   later, and pass the current signal back to the thread.
+
+   The fork/vfork case has similar trouble but that's even harder
+   to get around.  We may need to send a SIGCONT to cancel out the
+   SIGSTOP.  Different kernels may do different things if the thread
+   is stopped by ptrace and sent a SIGSTOP.  */
+
+#include <stdio.h>
+#include <unistd.h>
+#include <stdlib.h>
+#include <pthread.h>
+#include <signal.h>
+
+/* Loop long enough for GDB to send a few signals of its own, but
+   don't hang around eating CPU forever if something goes wrong during
+   testing.  */
+#define NSIGS 1000000
+
+void
+handler (int sig)
+{
+  ;
+}
+
+pthread_t main_thread;
+pthread_t child_thread, child_thread_two;
+
+void *
+child_two (void *arg)
+{
+  int i;
+
+  for (i = 0; i < NSIGS; i++)
+    pthread_kill (child_thread, SIGUSR1);
+}
+
+void *
+thread_function (void *arg)
+{
+  int i;
+
+  for (i = 0; i < NSIGS; i++)
+    pthread_kill (child_thread_two, SIGUSR2);
+}
+
+int main()
+{
+  int i;
+
+  signal (SIGUSR1, handler);
+  signal (SIGUSR2, handler);
+
+  main_thread = pthread_self ();
+  pthread_create (&child_thread, NULL, thread_function, NULL);
+  pthread_create (&child_thread_two, NULL, child_two, NULL);
+
+  for (i = 0; i < NSIGS; i++)
+    pthread_kill (child_thread_two, SIGUSR1);
+
+  pthread_join (child_thread, NULL);
+
+  exit (0);
+}
Index: testsuite/gdb.threads/sigthread.exp
===================================================================
RCS file: testsuite/gdb.threads/sigthread.exp
diff -N testsuite/gdb.threads/sigthread.exp
--- /dev/null	1 Jan 1970 00:00:00 -0000
+++ testsuite/gdb.threads/sigthread.exp	8 Jan 2007 20:45:37 -0000
@@ -0,0 +1,56 @@
+# sigthread.exp -- Expect script to test thread and signal interaction
+# Copyright (C) 2007 Free Software Foundation, Inc.
+
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; either version 2 of the License, or
+# (at your option) any later version.
+# 
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU General Public License for more details.
+# 
+# You should have received a copy of the GNU General Public License
+# along with this program; if not, write to the Free Software
+# Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.  */
+
+set testfile sigthread
+set srcfile ${testfile}.c
+set binfile ${objdir}/${subdir}/${testfile}
+
+if {[gdb_compile_pthreads "${srcdir}/${subdir}/${srcfile}" "${binfile}" \
+	 executable { debug }] != "" } {
+    return -1
+}
+
+gdb_exit
+gdb_start
+gdb_reinitialize_dir $srcdir/$subdir
+gdb_load ${binfile}
+
+if ![runto_main] then {
+   fail "Can't run to main"
+   return 0
+}
+
+gdb_test "handle SIGUSR1 nostop noprint pass"
+gdb_test "handle SIGUSR2 nostop noprint pass"
+
+send_gdb "continue\n"
+gdb_expect {
+    -re "Continuing" {
+	pass "continue"
+    }
+    timeout {
+	fail "continue (timeout)"
+    }
+}
+
+# For this to work we must be sure to consume the "Continuing."
+# message first, or GDB's signal handler may not be in place.
+after 500 {send_gdb "\003"}
+
+# Make sure we do not get an internal error from hitting Control-C
+# while many signals are flying back and forth.
+gdb_test "" "Program received signal SIGINT.*" "stop with control-c"


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]