[patch] Fix linux-ia64 on SIGILL for deleted breakpoint
Jan Kratochvil
jan.kratochvil@redhat.com
Fri Jul 23 22:19:00 GMT 2010
On Tue, 20 Jul 2010 15:28:58 +0200, Pedro Alves wrote:
> That's a general problem with "real" SIGTRAP breakpoints as well. linux-nat.c
> handles this by "cancelling" the breakpoint, so that it is hit again later
> on resume.
Thanks for the pointer, yes, the needed code is already in place as I see.
> I don't see why this can't be extended to handle SIGILL breakpoints too
> (with the difference that there's no PC adjustment required, you just
> discard the signal).
>
> Simply swapping SIGILL for SIGTRAP without accounting for the PC
> adjustment that GDB will try to do on the SIGTRAP (thinking it was a real
> SIGTRAP) makes me a bit nervous.
Extended it.
This SIGTRAP->SIGILL case happens only on ia64 and ia64 does not use any
set_gdbarch_decr_pc_after_break at all, PC stays on the breakpoint bundle+slot
in both the SIGTRAP and SIGILL case.
You are right it is arch-specific. On i386 I checked SIGILL is never
generated (only in some fpu-emulated code). So I checked s390x-linux-gnu::
SIGILL on opcode 0xb29e
si_addr = 0x800009a4
.psw.addr = 0x800009a8
instr at = 0x800009a4
.psw.addr - instr == 4
SIGTRAP on opcode 0x0001
si_addr = (nil)
.psw.addr = 0x800009a6
instr at = 0x800009a4
.psw.addr - instr == 2
GDB really has: set_gdbarch_decr_pc_after_break (gdbarch, 2);
That "infrun: Treating signal as SIGTRAP\n" code I left in place now.
It should get disabled on arches where we know it is not needed but let it be
a different bug / mail thread.
No regressions on {x86_64,x86_64-m32,i686}-fedora13-linux-gnu.
No regressions on ia64-rhel55-linux-gnu.
Thanks,
Jan
gdb/
2010-07-23 Jan Kratochvil <jan.kratochvil@redhat.com>
* ia64-linux-nat.c (ia64_linux_cancel_breakpoint): New function.
(_initialize_ia64_linux_nat): Install it.
* linux-nat.c (cancel_breakpoint): Move the body to ...
(linux_nat_cancel_breakpoint_when_signalled): ... a new function. Set
LP->STATUS to 0 already here.
(linux_nat_cancel_breakpoint_check_sigtrap)
(linux_nat_cancel_breakpoint, linux_nat_set_cancel_breakpoint): New.
(cancel_breakpoints_callback): Move the comment in front of the
function. Call linux_nat_cancel_breakpoint.
(linux_nat_wait_1): Move the signals check and LP->STATUS reset into
linux_nat_cancel_breakpoint.
* linux-nat.h (linux_nat_cancel_breakpoint_when_signalled)
(linux_nat_set_cancel_breakpoint): New prototypes.
gdb/testsuite/
2010-07-23 Jan Kratochvil <jan.kratochvil@redhat.com>
* gdb.threads/ia64-sigill.exp: New file.
* gdb.threads/ia64-sigill.c: New file.
--- a/gdb/ia64-linux-nat.c
+++ b/gdb/ia64-linux-nat.c
@@ -809,6 +809,26 @@ ia64_linux_xfer_partial (struct target_ops *ops,
offset, len);
}
+/* For break.b instruction ia64 CPU forgets the immediate value and generates
+ SIGILL with ILL_ILLOPC instead of more common SIGTRAP with TRAP_BRKPT. */
+
+static int
+ia64_linux_cancel_breakpoint (struct lwp_info *lp)
+{
+ /* We check for lp->waitstatus in addition to lp->status, because we can
+ have pending process exits recorded in lp->status
+ and W_EXITCODE(0,0) == 0. We should probably have an additional
+ lp->status_p flag. */
+
+ if (! (lp->waitstatus.kind == TARGET_WAITKIND_IGNORE
+ && WIFSTOPPED (lp->status)
+ && (WSTOPSIG (lp->status) == SIGTRAP
+ || WSTOPSIG (lp->status) == SIGILL)))
+ return 0;
+
+ return linux_nat_cancel_breakpoint_when_signalled (lp);
+}
+
void _initialize_ia64_linux_nat (void);
void
@@ -848,4 +868,5 @@ _initialize_ia64_linux_nat (void)
/* Register the target. */
linux_nat_add_target (t);
linux_nat_set_new_thread (t, ia64_linux_new_thread);
+ linux_nat_set_cancel_breakpoint (t, ia64_linux_cancel_breakpoint);
}
--- a/gdb/linux-nat.c
+++ b/gdb/linux-nat.c
@@ -2837,18 +2836,20 @@ select_event_lwp_callback (struct lwp_info *lp, void *data)
return 0;
}
-static int
-cancel_breakpoint (struct lwp_info *lp)
-{
- /* Arrange for a breakpoint to be hit again later. We don't keep
- the SIGTRAP status and don't forward the SIGTRAP signal to the
- LWP. We will handle the current event, eventually we will resume
- this LWP, and this breakpoint will trap again.
+/* Arrange for a breakpoint to be hit again later. We don't keep the SIGTRAP
+ status and don't forward the SIGTRAP signal to the LWP. We will handle the
+ current event, eventually we will resume this LWP, and this breakpoint will
+ trap again.
- If we do not do this, then we run the risk that the user will
- delete or disable the breakpoint, but the LWP will have already
- tripped on it. */
+ If we do not do this, then we run the risk that the user will delete or
+ disable the breakpoint, but the LWP will have already tripped on it.
+
+ This function must be called with LP->STATUS signal already verified as
+ valid for a breakpoint. */
+int
+linux_nat_cancel_breakpoint_when_signalled (struct lwp_info *lp)
+{
struct regcache *regcache = get_thread_regcache (lp->ptid);
struct gdbarch *gdbarch = get_regcache_arch (regcache);
CORE_ADDR pc;
@@ -2865,11 +2866,52 @@ cancel_breakpoint (struct lwp_info *lp)
if (gdbarch_decr_pc_after_break (gdbarch))
regcache_write_pc (regcache, pc);
+ /* Throw away the SIGTRAP. */
+ lp->status = 0;
return 1;
}
return 0;
}
+/* Check we hit a breakpoint by checking the SIGTRAP signal. */
+
+static int
+linux_nat_cancel_breakpoint_check_sigtrap (struct lwp_info *lp)
+{
+ /* We check for lp->waitstatus in addition to lp->status, because we can
+ have pending process exits recorded in lp->status
+ and W_EXITCODE(0,0) == 0. We should probably have an additional
+ lp->status_p flag. */
+
+ if (! (lp->waitstatus.kind == TARGET_WAITKIND_IGNORE
+ && WIFSTOPPED (lp->status) && WSTOPSIG (lp->status) == SIGTRAP))
+ return 0;
+
+ return linux_nat_cancel_breakpoint_when_signalled (lp);
+}
+
+static int (*linux_nat_cancel_breakpoint) (struct lwp_info *lp)
+ = linux_nat_cancel_breakpoint_check_sigtrap;
+
+/* Register an arch-specific alternative breakpoint hit check. */
+
+void
+linux_nat_set_cancel_breakpoint (struct target_ops *t,
+ int (*func) (struct lwp_info *lp))
+{
+ linux_nat_cancel_breakpoint = func;
+}
+
+/* If a LWP other than the LWP that we're reporting an event for has hit a GDB
+ breakpoint (as opposed to some random trap signal), then just arrange for
+ it to hit it again later. We don't keep the SIGTRAP status and don't
+ forward the SIGTRAP signal to the LWP. We will handle the current event,
+ eventually we will resume all LWPs, and this one will get its breakpoint
+ trap again.
+
+ If we do not do this, then we run the risk that the user will delete or
+ disable the breakpoint, but the LWP will have already tripped on it. */
+
static int
cancel_breakpoints_callback (struct lwp_info *lp, void *data)
{
@@ -2879,23 +2921,7 @@ cancel_breakpoints_callback (struct lwp_info *lp, void *data)
if (lp == event_lp)
return 0;
- /* If a LWP other than the LWP that we're reporting an event for has
- hit a GDB breakpoint (as opposed to some random trap signal),
- then just arrange for it to hit it again later. We don't keep
- the SIGTRAP status and don't forward the SIGTRAP signal to the
- LWP. We will handle the current event, eventually we will resume
- all LWPs, and this one will get its breakpoint trap again.
-
- If we do not do this, then we run the risk that the user will
- delete or disable the breakpoint, but the LWP will have already
- tripped on it. */
-
- if (lp->waitstatus.kind == TARGET_WAITKIND_IGNORE
- && lp->status != 0
- && WIFSTOPPED (lp->status) && WSTOPSIG (lp->status) == SIGTRAP
- && cancel_breakpoint (lp))
- /* Throw away the SIGTRAP. */
- lp->status = 0;
+ linux_nat_cancel_breakpoint (lp);
return 0;
}
@@ -3410,14 +3436,8 @@ retry:
core before this one is handled. All-stop
always cancels breakpoint hits in all
threads. */
- if (non_stop
- && lp->waitstatus.kind == TARGET_WAITKIND_IGNORE
- && WSTOPSIG (lp->status) == SIGTRAP
- && cancel_breakpoint (lp))
+ if (non_stop && linux_nat_cancel_breakpoint (lp))
{
- /* Throw away the SIGTRAP. */
- lp->status = 0;
-
if (debug_linux_nat)
fprintf (stderr,
"LLW: LWP %ld hit a breakpoint while waiting "
--- a/gdb/linux-nat.h
+++ b/gdb/linux-nat.h
@@ -172,3 +172,10 @@ struct siginfo *linux_nat_get_siginfo (ptid_t ptid);
/* Compute and return the processor core of a given thread. */
int linux_nat_core_of_thread_1 (ptid_t ptid);
+
+/* Arrange for a breakpoint to be hit again later. */
+int linux_nat_cancel_breakpoint_when_signalled (struct lwp_info *lp);
+
+/* Register an arch-specific alternative breakpoint hit check. */
+void linux_nat_set_cancel_breakpoint (struct target_ops *t,
+ int (*func) (struct lwp_info *lp));
--- /dev/null
+++ b/gdb/testsuite/gdb.threads/ia64-sigill.c
@@ -0,0 +1,360 @@
+/* This testcase is part of GDB, the GNU debugger.
+
+ Copyright 2010 Free Software Foundation, Inc.
+
+ This program is free software; you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation; either version 3 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program. If not, see <http://www.gnu.org/licenses/>. */
+
+#define _GNU_SOURCE
+#include <pthread.h>
+#include <stdio.h>
+#include <limits.h>
+#include <errno.h>
+#include <stdlib.h>
+#include <string.h>
+#include <assert.h>
+#include <sys/types.h>
+#include <signal.h>
+#include <unistd.h>
+#include <asm/unistd.h>
+
+#define gettid() syscall (__NR_gettid)
+
+/* Terminate always in the main task, it can lock up with SIGSTOPped GDB
+ otherwise. */
+#define TIMEOUT (gettid () == getpid() ? 10 : 15)
+
+static pid_t thread1_tid;
+static pthread_cond_t thread1_tid_cond = PTHREAD_COND_INITIALIZER;
+static pthread_mutex_t thread1_tid_mutex = PTHREAD_ERRORCHECK_MUTEX_INITIALIZER_NP;
+
+static pid_t thread2_tid;
+static pthread_cond_t thread2_tid_cond = PTHREAD_COND_INITIALIZER;
+static pthread_mutex_t thread2_tid_mutex = PTHREAD_ERRORCHECK_MUTEX_INITIALIZER_NP;
+
+static pthread_mutex_t terminate_mutex = PTHREAD_ERRORCHECK_MUTEX_INITIALIZER_NP;
+
+/* Do not use alarm as it would create a ptrace event which would hang up us if
+ we are being traced by GDB which we stopped ourselves. */
+
+static void timed_mutex_lock (pthread_mutex_t *mutex)
+{
+ int i;
+ struct timespec start, now;
+
+ i = clock_gettime (CLOCK_MONOTONIC, &start);
+ assert (i == 0);
+
+ do
+ {
+ i = pthread_mutex_trylock (mutex);
+ if (i == 0)
+ return;
+ assert (i == EBUSY);
+
+ i = clock_gettime (CLOCK_MONOTONIC, &now);
+ assert (i == 0);
+ assert (now.tv_sec >= start.tv_sec);
+ }
+ while (now.tv_sec - start.tv_sec < TIMEOUT);
+
+ fprintf (stderr, "Timed out waiting for internal lock!\n");
+ exit (EXIT_FAILURE);
+}
+
+static void *
+thread_func (void *threadno_voidp)
+{
+ int threadno = (intptr_t) threadno_voidp;
+ int i;
+
+ switch (threadno)
+ {
+ case 1:
+ timed_mutex_lock (&thread1_tid_mutex);
+
+ /* THREAD1_TID_MUTEX must be already locked to avoid race. */
+ thread1_tid = gettid ();
+
+ i = pthread_cond_signal (&thread1_tid_cond);
+ assert (i == 0);
+ i = pthread_mutex_unlock (&thread1_tid_mutex);
+ assert (i == 0);
+
+ break;
+
+ case 2:
+ timed_mutex_lock (&thread2_tid_mutex);
+
+ /* THREAD2_TID_MUTEX must be already locked to avoid race. */
+ thread2_tid = gettid ();
+
+ i = pthread_cond_signal (&thread2_tid_cond);
+ assert (i == 0);
+ i = pthread_mutex_unlock (&thread2_tid_mutex);
+ assert (i == 0);
+
+ break;
+
+ default:
+ assert (0);
+ }
+
+#ifdef __ia64__
+ asm volatile ("label:\n"
+ "nop.m 0\n"
+ "nop.i 0\n"
+ "nop.b 0\n");
+#endif
+ /* break-here */
+
+ /* Be sure the "t (tracing stop)" test can proceed for both threads. */
+ timed_mutex_lock (&terminate_mutex);
+ i = pthread_mutex_unlock (&terminate_mutex);
+ assert (i == 0);
+
+ return NULL;
+}
+
+static const char *
+proc_string (const char *filename, const char *line)
+{
+ FILE *f;
+ static char buf[LINE_MAX];
+ size_t line_len = strlen (line);
+
+ f = fopen (filename, "r");
+ if (f == NULL)
+ {
+ fprintf (stderr, "fopen (\"%s\") for \"%s\": %s\n", filename, line,
+ strerror (errno));
+ exit (EXIT_FAILURE);
+ }
+ while (errno = 0, fgets (buf, sizeof (buf), f))
+ {
+ char *s;
+
+ s = strchr (buf, '\n');
+ assert (s != NULL);
+ *s = 0;
+
+ if (strncmp (buf, line, line_len) != 0)
+ continue;
+
+ if (fclose (f))
+ {
+ fprintf (stderr, "fclose (\"%s\") for \"%s\": %s\n", filename, line,
+ strerror (errno));
+ exit (EXIT_FAILURE);
+ }
+
+ return &buf[line_len];
+ }
+ if (errno != 0)
+ {
+ fprintf (stderr, "fgets (\"%s\": %s\n", filename, strerror (errno));
+ exit (EXIT_FAILURE);
+ }
+ fprintf (stderr, "\"%s\": No line \"%s\" found.\n", filename, line);
+ exit (EXIT_FAILURE);
+}
+
+static unsigned long
+proc_ulong (const char *filename, const char *line)
+{
+ const char *s = proc_string (filename, line);
+ long retval;
+ char *end;
+
+ errno = 0;
+ retval = strtol (s, &end, 10);
+ if (retval < 0 || retval >= LONG_MAX || (end && *end))
+ {
+ fprintf (stderr, "\"%s\":\"%s\": %ld, %s\n", filename, line, retval,
+ strerror (errno));
+ exit (EXIT_FAILURE);
+ }
+ return retval;
+}
+
+static void
+state_wait (pid_t process, const char *wanted)
+{
+ char *filename;
+ int i;
+ struct timespec start, now;
+ const char *state;
+
+ i = asprintf (&filename, "/proc/%lu/status", (unsigned long) process);
+ assert (i > 0);
+
+ i = clock_gettime (CLOCK_MONOTONIC, &start);
+ assert (i == 0);
+
+ do
+ {
+ state = proc_string (filename, "State:\t");
+
+ /* torvalds/linux-2.6.git 464763cf1c6df632dccc8f2f4c7e50163154a2c0
+ has changed "T (tracing stop)" to "t (tracing stop)". Make the GDB
+ testcase backward compatible with older Linux kernels. */
+ if (strcmp (state, "T (tracing stop)") == 0)
+ state = "t (tracing stop)";
+
+ if (strcmp (state, wanted) == 0)
+ {
+ free (filename);
+ return;
+ }
+
+ if (sched_yield ())
+ {
+ perror ("sched_yield()");
+ exit (EXIT_FAILURE);
+ }
+
+ i = clock_gettime (CLOCK_MONOTONIC, &now);
+ assert (i == 0);
+ assert (now.tv_sec >= start.tv_sec);
+ }
+ while (now.tv_sec - start.tv_sec < TIMEOUT);
+
+ fprintf (stderr, "Timed out waiting for PID %lu \"%s\" (now it is \"%s\")!\n",
+ (unsigned long) process, wanted, state);
+ exit (EXIT_FAILURE);
+}
+
+static volatile pid_t tracer = 0;
+static pthread_t thread1, thread2;
+
+static void
+cleanup (void)
+{
+ printf ("Resuming GDB PID %lu.\n", (unsigned long) tracer);
+
+ if (tracer)
+ {
+ int i;
+ int tracer_save = tracer;
+
+ tracer = 0;
+
+ i = kill (tracer_save, SIGCONT);
+ assert (i == 0);
+ }
+}
+
+int
+main (int argc, char **argv)
+{
+ int i;
+ int standalone = 0;
+
+ if (argc == 2 && strcmp (argv[1], "-s") == 0)
+ standalone = 1;
+ else
+ assert (argc == 1);
+
+ setbuf (stdout, NULL);
+
+ timed_mutex_lock (&thread1_tid_mutex);
+ timed_mutex_lock (&thread2_tid_mutex);
+
+ timed_mutex_lock (&terminate_mutex);
+
+ i = pthread_create (&thread1, NULL, thread_func, (void *) (intptr_t) 1);
+ assert (i == 0);
+
+ i = pthread_create (&thread2, NULL, thread_func, (void *) (intptr_t) 2);
+ assert (i == 0);
+
+ if (!standalone)
+ {
+ tracer = proc_ulong ("/proc/self/status", "TracerPid:\t");
+ if (tracer == 0)
+ {
+ fprintf (stderr, "The testcase must be run by GDB!\n");
+ exit (EXIT_FAILURE);
+ }
+ if (tracer != getppid ())
+ {
+ fprintf (stderr, "The testcase parent must be our GDB tracer!\n");
+ exit (EXIT_FAILURE);
+ }
+ }
+
+ /* SIGCONT our debugger in the case of our crash as we would deadlock
+ otherwise. */
+
+ atexit (cleanup);
+
+ printf ("Stopping GDB PID %lu.\n", (unsigned long) tracer);
+
+ if (tracer)
+ {
+ i = kill (tracer, SIGSTOP);
+ assert (i == 0);
+ state_wait (tracer, "T (stopped)");
+ }
+
+ /* Threads are now waiting at timed_mutex_lock (thread1_tid_mutex) and so
+ they could not trigger the breakpoint before GDB gets unstopped later.
+ Threads get resumed at pthread_cond_wait below. Use `while' loops for
+ protection against spurious pthread_cond_wait wakeups. */
+
+ printf ("Waiting till the threads initialize their TIDs.\n");
+
+ while (thread1_tid == 0)
+ {
+ i = pthread_cond_wait (&thread1_tid_cond, &thread1_tid_mutex);
+ assert (i == 0);
+ }
+
+ while (thread2_tid == 0)
+ {
+ i = pthread_cond_wait (&thread2_tid_cond, &thread2_tid_mutex);
+ assert (i == 0);
+ }
+
+ printf ("Thread 1 TID = %lu, thread 2 TID = %lu, PID = %lu.\n",
+ (unsigned long) thread1_tid, (unsigned long) thread2_tid,
+ (unsigned long) getpid ());
+
+ printf ("Waiting till the threads get trapped by the breakpoint.\n");
+
+ if (tracer)
+ {
+ /* s390x-unknown-linux-gnu will fail with "R (running)". */
+
+ state_wait (thread1_tid, "t (tracing stop)");
+
+ state_wait (thread2_tid, "t (tracing stop)");
+ }
+
+ cleanup ();
+
+ printf ("Joining the threads.\n");
+
+ i = pthread_mutex_unlock (&terminate_mutex);
+ assert (i == 0);
+
+ i = pthread_join (thread1, NULL);
+ assert (i == 0);
+
+ i = pthread_join (thread2, NULL);
+ assert (i == 0);
+
+ printf ("Exiting.\n"); /* break-at-exit */
+
+ return EXIT_SUCCESS;
+}
--- /dev/null
+++ b/gdb/testsuite/gdb.threads/ia64-sigill.exp
@@ -0,0 +1,76 @@
+# This testcase is part of GDB, the GNU debugger.
+
+# Copyright 2010 Free Software Foundation, Inc.
+
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; either version 3 of the License, or
+# (at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program. If not, see <http://www.gnu.org/licenses/>.
+
+# Test SIGILL generated by some special cases of breakpoints on ia64. Problem
+# was SIGILL being stored in non-current thread for later retrieval when its
+# breakpoint has been already deleted. moribund locations are not active in
+# the default all-stop mode.
+
+set testfile "ia64-sigill"
+set srcfile ${testfile}.c
+set binfile ${objdir}/${subdir}/${testfile}
+if {[gdb_compile_pthreads "${srcdir}/${subdir}/${srcfile}" ${binfile} executable [list debug additional_flags=-lrt]] != "" } {
+ return -1
+}
+
+clean_restart $testfile
+
+if ![runto_main] {
+ return -1
+}
+
+set test "info addr label"
+gdb_test_multiple $test $test {
+ -re "Symbol \"label\" is at 0x\[0-9a-f\]+0 in .*\r\n$gdb_prompt $" {
+ # Verify the label really starts at the start of ia64 bundle.
+ pass $test
+
+ # ia64 generates SIGILL for breakpoint at B slot of an MIB bundle.
+ gdb_test "break *label+2" {Breakpoint [0-9]+ at 0x[0-9a-f]+2:.*}
+ }
+ -re "No symbol \"label\" in current context\\.\r\n$gdb_prompt $" {
+ pass $test
+
+ # Either this target never generates non-SIGTRAP signals or they do
+ # not depend on the breakpoint address. Try any address.
+ gdb_breakpoint [gdb_get_line_number "break-here"]
+ }
+}
+
+gdb_test_no_output {set $sigill_bpnum=$bpnum}
+
+gdb_breakpoint [gdb_get_line_number "break-at-exit"]
+
+gdb_test_no_output "set debug infrun 1"
+
+# The ia64 SIGILL signal is visible only in the lin-lwp debug.
+gdb_test_no_output "set debug lin-lwp 1"
+
+gdb_test "continue" "Breakpoint \[0-9\]+,( .* in)? thread_func .*"
+
+gdb_test_no_output {delete $sigill_bpnum}
+
+set test "continue for the pending signal"
+gdb_test_multiple "continue" $test {
+ -re "Breakpoint \[0-9\]+, .*break-at-exit.*\r\n$gdb_prompt $" {
+ # Breakpoint has been skipped in the other thread.
+ pass $test
+ }
+ -re "Program received signal .*\r\n$gdb_prompt $" {
+ fail $test
+ }
+}
More information about the Gdb-patches
mailing list