[patch] Fix hang on the thread leader exit

Jan Kratochvil jan.kratochvil@redhat.com
Sun Jul 8 13:39:00 GMT 2007


Hi,

if the thread group leader exits there are two problems:

* We would lock up by WAITPID as it waits on all it the threads of the whole
  process while WAIT_LWP intends to wait only on the specific task.

* nptl_db expects being able to transfer memory just by specifying PID.
  After the thread group leader exists the Linux kernel turns the task
  into zombie no longer permitting accesses to its memory neither
  by `/proc/PID/mem' not PTRACE_PEEK*.
  Transfer the memory from an arbitrary LWP_LIST entry in such case.
  I do not like this solution but it looks correct from the kernel side.

Testcase also provided.


Regards,
Jan
-------------- next part --------------
2007-07-08  Jan Kratochvil  <jan.kratochvil@redhat.com>

	* linux-nat.c (linux_lwp_is_zombie): New function.
	(wait_lwp): Fix lockup on exit of the thread group leader.
	(linux_xfer_partial): Renamed to ...
	(linux_xfer_partial_lwp): ... here.
	(linux_xfer_partial): New function wrapping LINUX_XFER_PARTIAL_LWP.

2007-07-08  Jan Kratochvil  <jan.kratochvil@redhat.com>

	* gdb.threads/leader-exit.c, gdb.threads/leader-exit.exp: New files.

--- ./gdb/linux-nat.c	3 Jul 2007 17:01:55 -0000	1.65
+++ ./gdb/linux-nat.c	7 Jul 2007 15:21:57 -0000
@@ -1343,6 +1343,31 @@ linux_handle_extended_wait (struct lwp_i
 		  _("unknown ptrace event %d"), event);
 }
 
+static int
+linux_lwp_is_zombie (long lwp)
+{
+  char buffer[MAXPATHLEN];
+  FILE *procfile;
+  int retval = 0;
+
+  sprintf (buffer, "/proc/%ld/status", lwp);
+  procfile = fopen (buffer, "r");
+  if (procfile == NULL)
+    {
+      warning (_("unable to open /proc file '%s'"), buffer);
+      return 0;
+    }
+  while (fgets (buffer, sizeof (buffer), procfile) != NULL)
+    if (strcmp (buffer, "State:\tZ (zombie)\n") == 0)
+      {
+	retval = 1;
+	break;
+      }
+  fclose (procfile);
+
+  return retval;
+}
+
 /* Wait for LP to stop.  Returns the wait status, or 0 if the LWP has
    exited.  */
 
@@ -1350,16 +1375,31 @@ static int
 wait_lwp (struct lwp_info *lp)
 {
   pid_t pid;
-  int status;
+  int status = 0;
   int thread_dead = 0;
 
   gdb_assert (!lp->stopped);
   gdb_assert (lp->status == 0);
 
-  pid = my_waitpid (GET_LWP (lp->ptid), &status, 0);
-  if (pid == -1 && errno == ECHILD)
+  /* Thread group leader may have exited but we would lock up by WAITPID as it
+     waits on all its threads; __WCLONE is not applicable for the leader.
+     The thread leader restrictions is only a performance optimization here.
+     LINUX_NAT_THREAD_ALIVE cannot be used here as it requires a STOPPED
+     process; it gets ESRCH both for the zombie and for running processes.  */
+  if (is_lwp (lp->ptid) && GET_PID (lp->ptid) == GET_LWP (lp->ptid)
+      && linux_lwp_is_zombie (GET_LWP (lp->ptid)))
+    {
+      thread_dead = 1;
+      if (debug_linux_nat)
+	fprintf_unfiltered (gdb_stdlog, "WL: Threads leader %s vanished.\n",
+			    target_pid_to_str (lp->ptid));
+    }
+
+  if (!thread_dead)
     {
-      pid = my_waitpid (GET_LWP (lp->ptid), &status, __WCLONE);
+      pid = my_waitpid (GET_LWP (lp->ptid), &status, 0);
+      if (pid == -1 && errno == ECHILD)
+	pid = my_waitpid (GET_LWP (lp->ptid), &status, __WCLONE);
       if (pid == -1 && errno == ECHILD)
 	{
 	  /* The thread has previously exited.  We need to delete it
@@ -3144,10 +3159,12 @@ linux_proc_pending_signals (int pid, sig
   fclose (procfile);
 }
 
+/* Transfer from the specific LWP currently set by PID of INFERIOR_PTID.  */
+
 static LONGEST
-linux_xfer_partial (struct target_ops *ops, enum target_object object,
-                    const char *annex, gdb_byte *readbuf,
-		    const gdb_byte *writebuf, ULONGEST offset, LONGEST len)
+linux_xfer_partial_lwp (struct target_ops *ops, enum target_object object,
+			const char *annex, gdb_byte *readbuf,
+			const gdb_byte *writebuf, ULONGEST offset, LONGEST len)
 {
   LONGEST xfer;
 
@@ -3164,6 +3181,45 @@ linux_xfer_partial (struct target_ops *o
 			     offset, len);
 }
 
+/* nptl_db expects being able to transfer memory just by specifying PID.
+   After the thread group leader exists the Linux kernel turns the task
+   into zombie no longer permitting accesses to its memory.
+   Transfer the memory from an arbitrary LWP_LIST entry in such case.  */
+
+static LONGEST
+linux_xfer_partial (struct target_ops *ops, enum target_object object,
+                    const char *annex, gdb_byte *readbuf,
+		    const gdb_byte *writebuf, ULONGEST offset, LONGEST len)
+{
+  LONGEST xfer;
+  struct lwp_info *lp;
+  /* Not using SAVE_INFERIOR_PTID already here for better performance.  */
+  struct cleanup *old_chain = NULL;
+  ptid_t inferior_ptid_orig = inferior_ptid;
+
+  errno = 0;
+  xfer = linux_xfer_partial_lwp (ops, object, annex, readbuf, writebuf,
+				 offset, len);
+
+  for (lp = lwp_list; xfer == 0 && (errno == EACCES || errno == ESRCH)
+		      && lp != NULL; lp = lp->next)
+    {
+      if (!is_lwp (lp->ptid) || ptid_equal (lp->ptid, inferior_ptid_orig))
+        continue;
+      
+      if (old_chain == NULL)
+	old_chain = save_inferior_ptid ();
+      inferior_ptid = BUILD_LWP (GET_LWP (lp->ptid), GET_LWP (lp->ptid));
+      errno = 0;
+      xfer = linux_xfer_partial_lwp (ops, object, annex, readbuf, writebuf,
+				     offset, len);
+    }
+
+  if (old_chain != NULL)
+    do_cleanups (old_chain);
+  return xfer;
+}
+
 /* Create a prototype generic Linux target.  The client can override
    it with local methods.  */
 
--- /dev/null	1 Jan 1970 00:00:00 -0000
+++ ./gdb/testsuite/gdb.threads/leader-exit.c	7 Jul 2007 15:21:57 -0000
@@ -0,0 +1,47 @@
+/* Clean exit of the thread group leader should not break GDB.
+
+   Copyright 2007 Free Software Foundation, Inc.
+
+   This file is part of GDB.
+
+   This program is free software; you can redistribute it and/or modify
+   it under the terms of the GNU General Public License as published by
+   the Free Software Foundation; either version 2 of the License, or
+   (at your option) any later version.
+
+   This program is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+   GNU General Public License for more details.
+
+   You should have received a copy of the GNU General Public License
+   along with this program; if not, write to the Free Software
+   Foundation, Inc., 59 Temple Place - Suite 330,
+   Boston, MA 02111-1307, USA.  */
+
+#include <pthread.h>
+#include <assert.h>
+#include <unistd.h>
+
+static void *start (void *arg)
+{
+  for (;;)
+    pause ();
+  /* NOTREACHED */
+  assert (0);
+  return arg;
+}
+
+int main (void)
+{
+  pthread_t thread;
+  int i;
+
+  i = pthread_create (&thread, NULL, start, NULL);	/* create1 */
+  assert (i == 0);
+
+  pthread_exit (NULL);
+  /* NOTREACHED */
+  assert (0);
+  return 0;
+}
--- /dev/null	1 Jan 1970 00:00:00 -0000
+++ ./gdb/testsuite/gdb.threads/leader-exit.exp	7 Jul 2007 15:21:57 -0000
@@ -0,0 +1,64 @@
+# Copyright (C) 2007 Free Software Foundation, Inc.
+
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; either version 2 of the License, or
+# (at your option) any later version.
+# 
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU General Public License for more details.
+# 
+# You should have received a copy of the GNU General Public License
+# along with this program; if not, write to the Free Software
+# Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.  
+
+# Exit of the thread group leader should not break GDB.
+
+# This file was written by Jan Kratochvil <jan.kratochvil@redhat.com>.
+
+if $tracelevel then {
+	strace $tracelevel
+}
+
+set testfile "leader-exit"
+set srcfile ${testfile}.c
+set binfile ${objdir}/${subdir}/${testfile}
+
+if {[gdb_compile_pthreads "${srcdir}/${subdir}/${srcfile}" "${binfile}" executable {debug}] != "" } {
+    return -1
+}
+
+gdb_exit
+gdb_start
+gdb_reinitialize_dir $srcdir/$subdir
+gdb_load ${binfile}
+gdb_run_cmd
+
+proc stop_process { description } {
+  global gdb_prompt
+
+  # For this to work we must be sure to consume the "Continuing."
+  # message first, or GDB's signal handler may not be in place.
+  after 1000 {send_gdb "\003"}
+  gdb_expect {
+    -re "Program received signal SIGINT.*$gdb_prompt $"
+      {
+	pass $description
+      }
+    timeout
+      {
+	fail "$description (timeout)"
+      }
+  }
+}
+
+# Prevent races.
+sleep 2
+
+stop_process "Threads could be stopped"
+
+gdb_test "info threads" \
+         "\\* 2 Thread \[^\r\n\]* in \[^\r\n\]*" \
+         "Single thread has been left"


More information about the Gdb-patches mailing list