This is the mail archive of the gdb-cvs@sourceware.org mailing list for the GDB project.


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]
Other format: [Raw text]

[binutils-gdb] PR threads/18600: Threads left stopped after fork+thread spawn


https://sourceware.org/git/gitweb.cgi?p=binutils-gdb.git;h=4dd63d488a76482543517c4c4cde699ee6fa33ef

commit 4dd63d488a76482543517c4c4cde699ee6fa33ef
Author: Pedro Alves <palves@redhat.com>
Date:   Thu Jul 30 18:50:29 2015 +0100

    PR threads/18600: Threads left stopped after fork+thread spawn
    
    When a program forks and another process start threads while gdb is
    handling the fork event, newly created threads are left stuck stopped
    by gdb, even though gdb presents them as "running", to the user.
    
    This can be seen with the test added by this patch.  The test has the
    inferior fork a certain number of times and waits for all children to
    exit.  Each fork child spawns a number of threads that do nothing and
    joins them immediately.  Normally, the program should run unimpeded
    (from the point of view of the user) and exit very quickly.  Without
    this fix, it doesn't because of some threads left stopped by gdb, so
    inferior 1 never exits.
    
    The program triggers when a new clone thread is found while inside the
    linux_stop_and_wait_all_lwps call in linux-thread-db.c:
    
          linux_stop_and_wait_all_lwps ();
    
          ALL_LWPS (lp)
    	if (ptid_get_pid (lp->ptid) == pid)
    	  thread_from_lwp (lp->ptid);
    
          linux_unstop_all_lwps ();
    
    Within linux_stop_and_wait_all_lwps, we reach
    linux_handle_extended_wait with the "stopping" parameter set to 1, and
    because of that we don't mark the new lwp as resumed.  As consequence,
    the subsequent resume_stopped_resumed_lwps, called from
    linux_unstop_all_lwps, never resumes the new LWP.
    
    There's lots of cruft in linux_handle_extended_wait that no longer
    makes sense.  On systems with CLONE events support, we don't rely on
    libthread_db for thread listing anymore, so the code that preserves
    stop_requested and the handling of last_resume_kind is all dead.
    
    So the fix is to remove all that, and simply always mark the new LWP
    as resumed, so that resume_stopped_resumed_lwps re-resumes it.
    
    gdb/ChangeLog:
    2015-07-30  Pedro Alves  <palves@redhat.com>
    	    Simon Marchi  <simon.marchi@ericsson.com>
    
    	PR threads/18600
    	* linux-nat.c (linux_handle_extended_wait): On CLONE event, always
    	mark the new thread as resumed.  Remove STOPPING parameter.
    	(wait_lwp): Adjust call to linux_handle_extended_wait.
    	(linux_nat_filter_event): Adjust call to
    	linux_handle_extended_wait.
    	(resume_stopped_resumed_lwps): Add debug output.
    
    gdb/testsuite/ChangeLog:
    2015-07-30  Simon Marchi  <simon.marchi@ericsson.com>
    	    Pedro Alves  <palves@redhat.com>
    
    	PR threads/18600
    	* gdb.threads/fork-plus-threads.c: New file.
    	* gdb.threads/fork-plus-threads.exp: New file.

Diff:
---
 gdb/ChangeLog                                   |  11 +++
 gdb/linux-nat.c                                 |  97 +++++++++-----------
 gdb/testsuite/ChangeLog                         |   7 ++
 gdb/testsuite/gdb.threads/fork-plus-threads.c   | 115 ++++++++++++++++++++++++
 gdb/testsuite/gdb.threads/fork-plus-threads.exp |  69 ++++++++++++++
 5 files changed, 243 insertions(+), 56 deletions(-)

diff --git a/gdb/ChangeLog b/gdb/ChangeLog
index 4d604de..40403f9 100644
--- a/gdb/ChangeLog
+++ b/gdb/ChangeLog
@@ -1,3 +1,14 @@
+2015-07-30  Pedro Alves  <palves@redhat.com>
+	    Simon Marchi  <simon.marchi@ericsson.com>
+
+	PR threads/18600
+	* linux-nat.c (linux_handle_extended_wait): On CLONE event, always
+	mark the new thread as resumed.  Remove STOPPING parameter.
+	(wait_lwp): Adjust call to linux_handle_extended_wait.
+	(linux_nat_filter_event): Adjust call to
+	linux_handle_extended_wait.
+	(resume_stopped_resumed_lwps): Add debug output.
+
 2015-07-30  Pierre Langlois  <pierre.langlois@arm.com>
 
 	* arch-utils.c (default_fast_tracepoint_valid_at): Remove unused
diff --git a/gdb/linux-nat.c b/gdb/linux-nat.c
index b33abb0..966c6a8 100644
--- a/gdb/linux-nat.c
+++ b/gdb/linux-nat.c
@@ -2000,8 +2000,7 @@ linux_handle_syscall_trap (struct lwp_info *lp, int stopping)
    true, the new LWP remains stopped, otherwise it is continued.  */
 
 static int
-linux_handle_extended_wait (struct lwp_info *lp, int status,
-			    int stopping)
+linux_handle_extended_wait (struct lwp_info *lp, int status)
 {
   int pid = ptid_get_lwp (lp->ptid);
   struct target_waitstatus *ourstatus = &lp->waitstatus;
@@ -2071,7 +2070,7 @@ linux_handle_extended_wait (struct lwp_info *lp, int status,
 	ourstatus->kind = TARGET_WAITKIND_FORKED;
       else if (event == PTRACE_EVENT_VFORK)
 	ourstatus->kind = TARGET_WAITKIND_VFORKED;
-      else
+      else if (event == PTRACE_EVENT_CLONE)
 	{
 	  struct lwp_info *new_lp;
 
@@ -2086,43 +2085,7 @@ linux_handle_extended_wait (struct lwp_info *lp, int status,
 	  new_lp = add_lwp (ptid_build (ptid_get_pid (lp->ptid), new_pid, 0));
 	  new_lp->cloned = 1;
 	  new_lp->stopped = 1;
-
-	  if (WSTOPSIG (status) != SIGSTOP)
-	    {
-	      /* This can happen if someone starts sending signals to
-		 the new thread before it gets a chance to run, which
-		 have a lower number than SIGSTOP (e.g. SIGUSR1).
-		 This is an unlikely case, and harder to handle for
-		 fork / vfork than for clone, so we do not try - but
-		 we handle it for clone events here.  We'll send
-		 the other signal on to the thread below.  */
-
-	      new_lp->signalled = 1;
-	    }
-	  else
-	    {
-	      struct thread_info *tp;
-
-	      /* When we stop for an event in some other thread, and
-		 pull the thread list just as this thread has cloned,
-		 we'll have seen the new thread in the thread_db list
-		 before handling the CLONE event (glibc's
-		 pthread_create adds the new thread to the thread list
-		 before clone'ing, and has the kernel fill in the
-		 thread's tid on the clone call with
-		 CLONE_PARENT_SETTID).  If that happened, and the core
-		 had requested the new thread to stop, we'll have
-		 killed it with SIGSTOP.  But since SIGSTOP is not an
-		 RT signal, it can only be queued once.  We need to be
-		 careful to not resume the LWP if we wanted it to
-		 stop.  In that case, we'll leave the SIGSTOP pending.
-		 It will later be reported as GDB_SIGNAL_0.  */
-	      tp = find_thread_ptid (new_lp->ptid);
-	      if (tp != NULL && tp->stop_requested)
-		new_lp->last_resume_kind = resume_stop;
-	      else
-		status = 0;
-	    }
+	  new_lp->resumed = 1;
 
 	  /* If the thread_db layer is active, let it record the user
 	     level thread id and status, and add the thread to GDB's
@@ -2136,19 +2099,23 @@ linux_handle_extended_wait (struct lwp_info *lp, int status,
 	    }
 
 	  /* Even if we're stopping the thread for some reason
-	     internal to this module, from the user/frontend's
-	     perspective, this new thread is running.  */
+	     internal to this module, from the perspective of infrun
+	     and the user/frontend, this new thread is running until
+	     it next reports a stop.  */
 	  set_running (new_lp->ptid, 1);
-	  if (!stopping)
-	    {
-	      set_executing (new_lp->ptid, 1);
-	      /* thread_db_attach_lwp -> lin_lwp_attach_lwp forced
-		 resume_stop.  */
-	      new_lp->last_resume_kind = resume_continue;
-	    }
+	  set_executing (new_lp->ptid, 1);
 
-	  if (status != 0)
+	  if (WSTOPSIG (status) != SIGSTOP)
 	    {
+	      /* This can happen if someone starts sending signals to
+		 the new thread before it gets a chance to run, which
+		 have a lower number than SIGSTOP (e.g. SIGUSR1).
+		 This is an unlikely case, and harder to handle for
+		 fork / vfork than for clone, so we do not try - but
+		 we handle it for clone events here.  */
+
+	      new_lp->signalled = 1;
+
 	      /* We created NEW_LP so it cannot yet contain STATUS.  */
 	      gdb_assert (new_lp->status == 0);
 
@@ -2162,7 +2129,6 @@ linux_handle_extended_wait (struct lwp_info *lp, int status,
 	      new_lp->status = status;
 	    }
 
-	  new_lp->resumed = !stopping;
 	  return 1;
 	}
 
@@ -2353,7 +2319,7 @@ wait_lwp (struct lwp_info *lp)
 	fprintf_unfiltered (gdb_stdlog,
 			    "WL: Handling extended status 0x%06x\n",
 			    status);
-      linux_handle_extended_wait (lp, status, 1);
+      linux_handle_extended_wait (lp, status);
       return 0;
     }
 
@@ -3155,7 +3121,7 @@ linux_nat_filter_event (int lwpid, int status)
 	fprintf_unfiltered (gdb_stdlog,
 			    "LLW: Handling extended status 0x%06x\n",
 			    status);
-      if (linux_handle_extended_wait (lp, status, 0))
+      if (linux_handle_extended_wait (lp, status))
 	return NULL;
     }
 
@@ -3675,9 +3641,28 @@ resume_stopped_resumed_lwps (struct lwp_info *lp, void *data)
 {
   ptid_t *wait_ptid_p = data;
 
-  if (lp->stopped
-      && lp->resumed
-      && !lwp_status_pending_p (lp))
+  if (!lp->stopped)
+    {
+      if (debug_linux_nat)
+	fprintf_unfiltered (gdb_stdlog,
+			    "RSRL: NOT resuming LWP %s, not stopped\n",
+			    target_pid_to_str (lp->ptid));
+    }
+  else if (!lp->resumed)
+    {
+      if (debug_linux_nat)
+	fprintf_unfiltered (gdb_stdlog,
+			    "RSRL: NOT resuming LWP %s, not resumed\n",
+			    target_pid_to_str (lp->ptid));
+    }
+  else if (lwp_status_pending_p (lp))
+    {
+      if (debug_linux_nat)
+	fprintf_unfiltered (gdb_stdlog,
+			    "RSRL: NOT resuming LWP %s, has pending status\n",
+			    target_pid_to_str (lp->ptid));
+    }
+  else
     {
       struct regcache *regcache = get_thread_regcache (lp->ptid);
       struct gdbarch *gdbarch = get_regcache_arch (regcache);
diff --git a/gdb/testsuite/ChangeLog b/gdb/testsuite/ChangeLog
index 171784e..06ca987 100644
--- a/gdb/testsuite/ChangeLog
+++ b/gdb/testsuite/ChangeLog
@@ -1,3 +1,10 @@
+2015-07-30  Simon Marchi  <simon.marchi@ericsson.com>
+	    Pedro Alves  <palves@redhat.com>
+
+	PR threads/18600
+	* gdb.threads/fork-plus-threads.c: New file.
+	* gdb.threads/fork-plus-threads.exp: New file.
+
 2015-07-29  Patrick Palka  <patrick@parcs.ath.cx>
 
 	* gdb.base/batch-preserve-term-settings.exp
diff --git a/gdb/testsuite/gdb.threads/fork-plus-threads.c b/gdb/testsuite/gdb.threads/fork-plus-threads.c
new file mode 100644
index 0000000..780a4b8
--- /dev/null
+++ b/gdb/testsuite/gdb.threads/fork-plus-threads.c
@@ -0,0 +1,115 @@
+/* This testcase is part of GDB, the GNU debugger.
+
+   Copyright 2015 Free Software Foundation, Inc.
+
+   This program is free software; you can redistribute it and/or modify
+   it under the terms of the GNU General Public License as published by
+   the Free Software Foundation; either version 3 of the License, or
+   (at your option) any later version.
+
+   This program is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+   GNU General Public License for more details.
+
+   You should have received a copy of the GNU General Public License
+   along with this program.  If not, see <http://www.gnu.org/licenses/>.  */
+
+#include <assert.h>
+#include <pthread.h>
+#include <unistd.h>
+#include <stdio.h>
+#include <sys/types.h>
+#include <sys/wait.h>
+
+
+/* Number of times the main process forks.  */
+#define NFORKS 10
+
+/* Number of threads by each fork child.  */
+#define NTHREADS 10
+
+static void *
+thread_func (void *arg)
+{
+  /* Empty.  */
+}
+
+static void
+fork_child (void)
+{
+  pthread_t threads[NTHREADS];
+  int i;
+  int ret;
+
+  for (i = 0; i < NTHREADS; i++)
+    {
+      ret = pthread_create (&threads[i], NULL, thread_func, NULL);
+      assert (ret == 0);
+    }
+
+  for (i = 0; i < NTHREADS; i++)
+    {
+      ret = pthread_join (threads[i], NULL);
+      assert (ret == 0);
+    }
+}
+
+int
+main (void)
+{
+  pid_t childs[NFORKS];
+  int i;
+  int status;
+  int num_exited = 0;
+
+  /* Don't run forever if the wait loop below gets stuck.  */
+  alarm (180);
+
+  for (i = 0; i < NFORKS; i++)
+    {
+      pid_t pid;
+
+      pid = fork ();
+
+      if (pid > 0)
+	{
+	  /* Parent.  */
+	  childs[i] = pid;
+	}
+      else if (pid == 0)
+	{
+	  /* Child.  */
+	  fork_child ();
+	  return 0;
+	}
+      else
+	{
+	  perror ("fork");
+	  return 1;
+	}
+    }
+
+  while (num_exited != NFORKS)
+    {
+      pid_t pid = wait (&status);
+
+      if (pid == -1)
+	{
+	  perror ("wait");
+	  return 1;
+	}
+
+      if (WIFEXITED (status))
+	{
+	  num_exited++;
+	}
+      else
+	{
+	  printf ("Hmm, unexpected wait status 0x%x from child %d\n", status,
+		  pid);
+	}
+    }
+
+  return 0;
+}
diff --git a/gdb/testsuite/gdb.threads/fork-plus-threads.exp b/gdb/testsuite/gdb.threads/fork-plus-threads.exp
new file mode 100644
index 0000000..53d1102
--- /dev/null
+++ b/gdb/testsuite/gdb.threads/fork-plus-threads.exp
@@ -0,0 +1,69 @@
+# Copyright (C) 2015 Free Software Foundation, Inc.
+
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; either version 3 of the License, or
+# (at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program.  If not, see <http://www.gnu.org/licenses/>.
+
+# This test verifies that threads created by the child fork are
+# properly handled.  Specifically, GDB used to have a bug where it
+# would leave child fork threads stuck stopped, even though "info
+# threads" would show them running.
+#
+# See https://sourceware.org/bugzilla/show_bug.cgi?id=18600
+
+standard_testfile
+
+proc do_test { detach_on_fork } {
+    global GDBFLAGS
+    global srcfile testfile
+    global gdb_prompt
+
+    set saved_gdbflags $GDBFLAGS
+    set GDBFLAGS [concat $GDBFLAGS " -ex \"set non-stop on\""]
+
+    if {[prepare_for_testing "failed to prepare" \
+	     $testfile $srcfile {debug pthreads}] == -1} {
+	set GDBFLAGS $saved_gdbflags
+	return -1
+    }
+
+    set GDBFLAGS $saved_gdbflags
+
+    if ![runto_main] then {
+	fail "Can't run to main"
+	return 0
+    }
+
+    gdb_test_no_output "set detach-on-fork $detach_on_fork"
+    set test "continue &"
+    gdb_test_multiple $test $test {
+	-re "$gdb_prompt " {
+	    pass $test
+	}
+    }
+
+    set test "inferior 1 exited"
+    gdb_test_multiple "" $test {
+	-re "Inferior 1 \(\[^\r\n\]+\) exited normally" {
+	    pass $test
+	}
+    }
+
+    gdb_test "info threads" "No threads\." \
+	"no threads left"
+}
+
+foreach detach_on_fork {"on" "off"} {
+    with_test_prefix "detach-on-fork=$detach_on_fork" {
+	do_test $detach_on_fork
+    }
+}


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]