This is the mail archive of the
gdb-patches@sourceware.org
mailing list for the GDB project.
[PATCH] Crash on thread id wrap around
- From: Pedro Alves <palves at redhat dot com>
- To: gdb-patches at sourceware dot org
- Date: Thu, 26 Mar 2015 12:45:50 +0000
- Subject: [PATCH] Crash on thread id wrap around
- Authentication-results: sourceware.org; auth=none
On GNU/Linux, if the target reuses the TID of a thread that GDB still
has in its list marked as THREAD_EXITED, GDB crashes, like:
(gdb) continue
Continuing.
/home/pedro/gdb/mygit/src/gdb/thread.c:789: internal-error: set_running: Assertion `tp->state != THREAD_EXITED' failed.
A problem internal to GDB has been detected,
further debugging may prove unreliable.
Quit this debugging session? (y or n) FAIL: gdb.threads/tid-reuse.exp: continue to breakpoint: after_wrap (GDB internal error)
Resyncing due to internal error.
n
Here:
(top-gdb) bt
#0 internal_error (file=0x953dd8 "src/gdb/thread.c", line=789, fmt=0x953da0 "%s: Assertion `%s' failed.")
at src/gdb/common/errors.c:54
#1 0x0000000000638514 in set_running (ptid=..., running=1) at src/gdb/thread.c:789
#2 0x00000000004bda42 in linux_handle_extended_wait (lp=0x16f5760, status=0, stopping=0) at src/gdb/linux-nat.c:2114
#3 0x00000000004bfa24 in linux_nat_filter_event (lwpid=20570, status=198015) at src/gdb/linux-nat.c:3127
#4 0x00000000004c070e in linux_nat_wait_1 (ops=0xe193d0, ptid=..., ourstatus=0x7fffffffd2c0, target_options=1) at src/gdb/linux-nat.c:3478
#5 0x00000000004c1015 in linux_nat_wait (ops=0xe193d0, ptid=..., ourstatus=0x7fffffffd2c0, target_options=1) at src/gdb/linux-nat.c:3722
#6 0x00000000004c92d2 in thread_db_wait (ops=0xd80b60 <thread_db_ops>, ptid=..., ourstatus=0x7fffffffd2c0, options=1)
at src/gdb/linux-thread-db.c:1525
#7 0x000000000066db43 in delegate_wait (self=0xd80b60 <thread_db_ops>, arg1=..., arg2=0x7fffffffd2c0, arg3=1) at src/gdb/target-delegates.c:116
#8 0x000000000067e54b in target_wait (ptid=..., status=0x7fffffffd2c0, options=1) at src/gdb/target.c:2206
#9 0x0000000000625111 in fetch_inferior_event (client_data=0x0) at src/gdb/infrun.c:3275
#10 0x0000000000648a3b in inferior_event_handler (event_type=INF_REG_EVENT, client_data=0x0) at src/gdb/inf-loop.c:56
#11 0x00000000004c2ecb in handle_target_event (error=0, client_data=0x0) at src/gdb/linux-nat.c:4655
I managed to come up with a test that reliably reproduces this. It
relies on pids wrapping around though, so could potentially take a
while. On my box that's 4 seconds; on gcc110, a PPC box which has
max_pid set to 65536, it's over 10 seconds. So I made the test
compute how long that would take, and cap the time waited if that
would be too long.
Tested on x86_64 Fedora 20.
gdb/ChangeLog:
2015-03-26 Pedro Alves <palves@redhat.com>
* linux-thread-db.c (record_thread): Readd the thread to gdb's
list if it was marked exited.
gdb/testsuite/ChangeLog:
2015-03-26 Pedro Alves <palves@redhat.com>
* gdb.threads/tid-reuse.c: New file.
* gdb.threads/tid-reuse.exp: New file.
---
gdb/linux-thread-db.c | 6 +-
gdb/testsuite/gdb.threads/tid-reuse.c | 149 ++++++++++++++++++++++++++++++++
gdb/testsuite/gdb.threads/tid-reuse.exp | 84 ++++++++++++++++++
3 files changed, 237 insertions(+), 2 deletions(-)
create mode 100644 gdb/testsuite/gdb.threads/tid-reuse.c
create mode 100644 gdb/testsuite/gdb.threads/tid-reuse.exp
diff --git a/gdb/linux-thread-db.c b/gdb/linux-thread-db.c
index 88094a7..886d8ac 100644
--- a/gdb/linux-thread-db.c
+++ b/gdb/linux-thread-db.c
@@ -1346,8 +1346,10 @@ record_thread (struct thread_db_info *info,
priv->tid = ti_p->ti_tid;
update_thread_state (priv, ti_p);
- /* Add the thread to GDB's thread list. */
- if (tp == NULL)
+ /* Add the thread to GDB's thread list. If we already know about a
+ thread with this PTID, but it's marked exited, then the kernel
+ reused the tid of an old thread. */
+ if (tp == NULL || tp->state == THREAD_EXITED)
tp = add_thread_with_info (ptid, priv);
else
tp->priv = priv;
diff --git a/gdb/testsuite/gdb.threads/tid-reuse.c b/gdb/testsuite/gdb.threads/tid-reuse.c
new file mode 100644
index 0000000..9127fe7
--- /dev/null
+++ b/gdb/testsuite/gdb.threads/tid-reuse.c
@@ -0,0 +1,149 @@
+/* This testcase is part of GDB, the GNU debugger.
+
+ Copyright 2015 Free Software Foundation, Inc.
+
+ This program is free software; you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation; either version 3 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program. If not, see <http://www.gnu.org/licenses/>. */
+
+#define _GNU_SOURCE
+#include <assert.h>
+#include <pthread.h>
+#include <stdlib.h>
+#include <unistd.h>
+#include <stdio.h>
+#include <limits.h>
+
+/* Number of threads spawned. */
+unsigned long thread_counter;
+
+/* How long it takes for the tid number space to wrap around, in
+ seconds. It'll be capped to a lower value if we can't compute
+ it. */
+unsigned int wrap_time = -1;
+
+/* How many threads fit in the target's thread number space before tid
+ wrapping occurs. */
+int tid_wrap = -1;
+
+void *
+do_nothing_thread_func (void *arg)
+{
+ usleep (1);
+ return NULL;
+}
+
+void *
+spawner_thread_func (void *arg)
+{
+ while (1)
+ {
+ pthread_t child;
+ int rc;
+
+ thread_counter++;
+
+ rc = pthread_create (&child, NULL, do_nothing_thread_func, NULL);
+ assert (rc == 0);
+
+ rc = pthread_join (child, NULL);
+ assert (rc == 0);
+ }
+
+ return NULL;
+}
+
+/* Called after the program is done counting number of spawned threads
+ for a period, to compute WRAP_TIME. */
+
+void
+after_count (void)
+{
+}
+
+/* Called after enough time has passed for TID wrapping to occur. */
+
+void
+after_wrap (void)
+{
+}
+
+#ifdef __linux__
+
+/* Get the running system's configured pid_max. */
+
+static int
+linux_proc_get_pid_max (void)
+{
+ static const char filename[] ="/proc/sys/kernel/pid_max";
+ FILE *file;
+ char buf[100];
+ int retval = -1;
+
+ file = fopen (filename, "r");
+ if (file == NULL)
+ {
+ fprintf (stderr, "unable to open %s\n", filename);
+ return -1;
+ }
+
+ if (fgets (buf, sizeof (buf), file) != NULL)
+ retval = strtol (buf, NULL, 10);
+
+ fclose (file);
+ return retval;
+}
+
+#endif
+
+int
+main (int argc, char *argv[])
+{
+ pthread_t child;
+ int rc;
+ int wrap_time_raw = 0;
+
+ rc = pthread_create (&child, NULL, spawner_thread_func, NULL);
+ assert (rc == 0);
+
+#define COUNT_TIME 2
+ sleep (COUNT_TIME);
+
+#ifdef __linux__
+ tid_wrap = linux_proc_get_pid_max ();
+#endif
+ /* If we don't know how many threads it would take to wrap around on
+ this system, just run the test for a bit. */
+ if (tid_wrap > 0)
+ {
+ wrap_time_raw = tid_wrap / ((float) thread_counter / COUNT_TIME) + 0.5;
+
+ /* Give it a bit more, just in case. */
+ wrap_time = wrap_time_raw + 3;
+ }
+
+ /* 4 seconds were sufficient on the machine this was first observed,
+ an Intel i7-2620M @ 2.70GHz running Linux 3.18.7, with
+ pid_max=32768. Going forward, as machines get faster, this will
+ need less time, unless pid_max is set to a very high number. To
+ avoid unreasonably long test time, cap to an upper bound. */
+ if (wrap_time > 60)
+ wrap_time = 60;
+ printf ("thread_counter=%lu, tid_wrap = %d, wrap_time_raw=%u, wrap_time=%u\n",
+ thread_counter, tid_wrap, wrap_time_raw, wrap_time);
+ after_count ();
+
+ sleep (wrap_time);
+
+ after_wrap ();
+ return 0;
+}
diff --git a/gdb/testsuite/gdb.threads/tid-reuse.exp b/gdb/testsuite/gdb.threads/tid-reuse.exp
new file mode 100644
index 0000000..cf5398c
--- /dev/null
+++ b/gdb/testsuite/gdb.threads/tid-reuse.exp
@@ -0,0 +1,84 @@
+# Copyright 2015 Free Software Foundation, Inc.
+
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; either version 3 of the License, or
+# (at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program. If not, see <http://www.gnu.org/licenses/>.
+
+# Test running a program that spawns enough threads that the tid
+# number space wraps around, all while having an exited selected
+# thread. At some point, the exited thread's tid is reused. GDB
+# should not crash when this happens.
+
+standard_testfile
+
+set options { "additional_flags=-DTIMEOUT=$timeout" debug pthreads }
+
+if {[prepare_for_testing "failed to prepare" $testfile $srcfile { debug pthreads }] == -1} {
+ return -1
+}
+
+clean_restart ${binfile}
+
+if ![runto main] {
+ fail "Can't run to main"
+ return -1
+}
+
+delete_breakpoints
+
+# Avoid dumping a ton of thread create/exit info in the logs.
+gdb_test_no_output "set print thread-events off"
+
+gdb_breakpoint "after_count"
+gdb_continue_to_breakpoint "after_count"
+
+# Get value of VARIABLE in the inferior.
+
+proc getvar {variable} {
+ global decimal
+ global gdb_prompt
+
+ set value 0
+
+ set msg "get $variable"
+ gdb_test_multiple "print $variable" $msg {
+ -re " = ($decimal)\r\n$gdb_prompt $" {
+ set value $expect_out(1,string)
+ pass $msg
+ }
+ }
+ return $value
+}
+
+set inf_timeout [getvar "wrap_time"]
+
+# Now the real test. Run to a breakpoint in a thread that exits
+# immediately once resumed. The thread ends up left on the thread
+# list, marked exited (exactly because it's the selected thread).
+gdb_breakpoint "do_nothing_thread_func"
+gdb_continue_to_breakpoint "do_nothing_thread_func"
+
+delete_breakpoints
+
+# Let the program continue, constantly spawning short-lived threads
+# (one at a time). On some targets (e.g., GNU/Linux), after a bit, a
+# new thread reuses the tid of the old exited thread that we still
+# have selected. GDB should not crash in this situation. Of course,
+# if the tid number space is shared between all processes in the
+# system (such as on Linux), there's a chance that some other process
+# grabs the TID, but that can never cause a spurious test fail.
+gdb_breakpoint "after_wrap"
+
+# Higher than what the test program sleeps before exiting.
+set timeout [expr $inf_timeout * 2]
+
+gdb_continue_to_breakpoint "after_wrap"
--
1.9.3