Bug 18869 - internal-error: inline_frame_this_id: Assertion `frame_id_p (*this_id)' failed.
Summary: internal-error: inline_frame_this_id: Assertion `frame_id_p (*this_id)' failed.
Status: NEW
Alias: None
Product: gdb
Classification: Unclassified
Component: gdb (show other bugs)
Version: 7.9
: P2 normal
Target Milestone: ---
Assignee: Not yet assigned to anyone
URL:
Keywords:
Depends on:
Blocks:
 
Reported: 2015-08-25 23:07 UTC by Andy Lutomirski
Modified: 2019-07-02 16:41 UTC (History)
4 users (show)

See Also:
Host:
Target:
Build:
Last reconfirmed:


Attachments
Full gdb session with extra debug information when gdb crash (12.68 KB, text/plain)
2018-09-27 17:12 UTC, Dawid
Details

Note You need to log in before you can comment on or make changes to this bug.
Description Andy Lutomirski 2015-08-25 23:07:15 UTC
Build this awful hack with -m32 -O2 -g.

#include <sys/ptrace.h>
#include <sys/types.h>
#include <sys/wait.h>
#include <sys/syscall.h>
#include <sys/user.h>
#include <unistd.h>
#include <errno.h>
#include <stddef.h>
#include <stdio.h>
#include <err.h>
#include <string.h>
#include <sys/auxv.h>

static void *vsyscall32;

static void do_full_vsyscall32(unsigned long *nr,
			       unsigned long *arg0, unsigned long *arg1,
			       unsigned long *arg2, unsigned long *arg3,
			       unsigned long *arg4, unsigned long *arg5)
{
	/*
	 * The asm above is still correct, but GCC can't generate code.
	 *
	 * Because we want to preserve the values in all the registers
	 * after return, we need to spill to the stack.  Ideally we'd
	 * force gcc to generate esp-relative references to some local
	 * variables, but I don't think that's possible.  That means
	 * that we can't use any memory operands while ebp is arg5
	 * instead of the base pointer, so we're stuck manually
	 * spilling.
	 */
	register unsigned long sp asm("sp");	/* Hack: block fp omission */
	unsigned long saved_bx;
	asm volatile (
		"movl %%ebx, %[saved_bx]\n\t"
		"pushl %%ebp\n\t"
		"movl %[arg0], %%ebx\n\t"
		"movl %[arg5], %%ebp\n\t"
		"call *%[vsyscall32]\n\t"
		"xchgl %%ebp, (%%esp)\n\t"	/* restore BP but keep arg5 */
		"popl %[arg5]\n\t"
		"movl %%ebx, %[arg0]\n\t"
		"movl %[saved_bx], %%ebx"
		: "+a" (*nr), [arg0] "+m" (*arg0), "+c" (*arg1), "+d" (*arg2),
		  "+S" (*arg3), "+D" (*arg4), [arg5] "+m" (*arg5),
		  [saved_bx] "+m" (saved_bx),
		  "+r" (sp)
		: [vsyscall32] "m" (vsyscall32));
}

int main()
{
	unsigned long nr = 224, a0 = 10, a1 = 11, a2 = 12, a3 = 13, a4 = 14, a5 = 15;

	vsyscall32 = (void *)getauxval(AT_SYSINFO);

	if (vsyscall32) {
		nr = SYS_kill;
		a0 = getpid();
		a1 = SIGUSR1;  /* <-- breakpoint here */
		do_full_vsyscall32(&nr, &a0, &a1, &a2, &a3, &a4, &a5);
		printf("%ld %ld %ld %ld %ld %ld %ld\n",
		       nr, a0, a1, a2, a3, a4, a5);
	}
	return 0;
}

$ gdb ./a.out 
GNU gdb (GDB) Fedora 7.9.1-17.fc22
Copyright (C) 2015 Free Software Foundation, Inc.
License GPLv3+: GNU GPL version 3 or later <http://gnu.org/licenses/gpl.html>
This is free software: you are free to change and redistribute it.
There is NO WARRANTY, to the extent permitted by law.  Type "show copying"
and "show warranty" for details.
This GDB was configured as "x86_64-redhat-linux-gnu".
Type "show configuration" for configuration details.
For bug reporting instructions, please see:
<http://www.gnu.org/software/gdb/bugs/>.
Find the GDB manual and other documentation resources online at:
<http://www.gnu.org/software/gdb/documentation/>.
For help, type "help".
Type "apropos word" to search for commands related to "word"...
Reading symbols from ./a.out...done.
(gdb) b foo.c:60
Breakpoint 1 at 0x8048411: file foo.c, line 60.
(gdb) r
Starting program: /home/luto/apps/linux-devel/tools/testing/selftests/x86/a.out 
Missing separate debuginfos, use: dnf debuginfo-install glibc-2.21-7.fc22.i686

Program received signal SIGUSR1, User defined signal 1.
0xf7fdac10 in __kernel_vsyscall ()
(gdb) bt
../../gdb/inline-frame.c:167: internal-error: inline_frame_this_id: Assertion `frame_id_p (*this_id)' failed.
A problem internal to GDB has been detected,
further debugging may prove unreliable.
Quit this debugging session? (y or n)
Comment 1 Avi Kivity 2018-03-11 13:34:04 UTC
Seeing something similar with a plain C++ application:

#0  0x00007f60b101b66b in raise () from /lib64/libc.so.6
#1  0x00007f60b101d381 in abort () from /lib64/libc.so.6
#2  0x000055bbcd2e96b6 in dump_core() ()
#3  0x000055bbcd2ec2d4 in internal_vproblem(internal_problem*, char const*, int, char const*, __va_list_tag*) ()
#4  0x000055bbcd2ec43b in internal_verror(char const*, int, char const*, __va_list_tag*) ()
#5  0x000055bbcd0e893f in internal_error(char const*, int, char const*, ...) ()
#6  0x000055bbcd20901b in inline_frame_this_id(frame_info*, void**, frame_id*) ()
#7  0x000055bbcd1cea7c in compute_frame_id(frame_info*) ()
#8  0x000055bbcd1cf09a in get_prev_frame_if_no_cycle(frame_info*) ()
#9  0x000055bbcd1d1348 in get_prev_frame_always_1(frame_info*) ()
#10 0x000055bbcd1d192d in get_prev_frame_always(frame_info*) ()
#11 0x000055bbcd1d1d78 in get_prev_frame(frame_info*) ()
#12 0x000055bbcd182374 in value_of_dwarf_reg_entry(type*, frame_info*, call_site_parameter_kind, call_site_parameter_u) ()
#13 0x000055bbcd1824ab in value_of_dwarf_block_entry(type*, frame_info*, unsigned char const*, unsigned long) ()
#14 0x000055bbcd1825cb in loclist_read_variable_at_entry(symbol*, frame_info*) ()
#15 0x000055bbcd293e98 in read_frame_arg(symbol*, frame_info*, frame_arg*, frame_arg*) ()
#16 0x000055bbcd294b13 in print_frame_args(symbol*, frame_info*, int, ui_file*) ()
#17 0x000055bbcd295506 in print_frame_info(frame_info*, int, print_what, int, int) ()
#18 0x000055bbcd296412 in backtrace_command(char const*, int) ()
#19 0x000055bbcd05cb42 in cmd_func(cmd_list_element*, char const*, int) ()
#20 0x000055bbcd2d0c23 in execute_command(char const*, int) ()
#21 0x000055bbcd1c57bc in command_handler(char const*) ()
#22 0x000055bbcd1c5b7a in command_line_handler(char*) ()
#23 0x000055bbcd1c498c in gdb_rl_callback_handler(char*) ()
#24 0x00007f60b42ca8be in rl_callback_read_char () from /lib64/libreadline.so.7
#25 0x000055bbcd1c48b6 in gdb_rl_callback_read_char_wrapper_noexcept() ()
#26 0x000055bbcd1c4939 in gdb_rl_callback_read_char_wrapper(void*) ()
#27 0x000055bbcd1c4ed0 in stdin_event_handler(int, void*) ()
#28 0x000055bbcd1c3c5d in gdb_wait_for_event(int) ()
#29 0x000055bbcd1c3dd2 in gdb_do_one_event() [clone .part.3] ()
#30 0x000055bbcd1c3f3e in start_event_loop() ()
#31 0x000055bbcd21ecc8 in captured_command_loop() ()
#32 0x000055bbcd21ff3d in gdb_main(captured_main_args*) ()
#33 0x000055bbccf9a3ab in main ()
Comment 2 Simon Marchi 2018-03-12 00:08:19 UTC
(In reply to Avi Kivity from comment #1)
> Seeing something similar with a plain C++ application:
> 
> #0  0x00007f60b101b66b in raise () from /lib64/libc.so.6
> #1  0x00007f60b101d381 in abort () from /lib64/libc.so.6
> #2  0x000055bbcd2e96b6 in dump_core() ()
> #3  0x000055bbcd2ec2d4 in internal_vproblem(internal_problem*, char const*,
> int, char const*, __va_list_tag*) ()
> #4  0x000055bbcd2ec43b in internal_verror(char const*, int, char const*,
> __va_list_tag*) ()
> #5  0x000055bbcd0e893f in internal_error(char const*, int, char const*, ...)
> ()
> #6  0x000055bbcd20901b in inline_frame_this_id(frame_info*, void**,
> frame_id*) ()
> #7  0x000055bbcd1cea7c in compute_frame_id(frame_info*) ()
> #8  0x000055bbcd1cf09a in get_prev_frame_if_no_cycle(frame_info*) ()
> #9  0x000055bbcd1d1348 in get_prev_frame_always_1(frame_info*) ()
> #10 0x000055bbcd1d192d in get_prev_frame_always(frame_info*) ()
> #11 0x000055bbcd1d1d78 in get_prev_frame(frame_info*) ()
> #12 0x000055bbcd182374 in value_of_dwarf_reg_entry(type*, frame_info*,
> call_site_parameter_kind, call_site_parameter_u) ()
> #13 0x000055bbcd1824ab in value_of_dwarf_block_entry(type*, frame_info*,
> unsigned char const*, unsigned long) ()
> #14 0x000055bbcd1825cb in loclist_read_variable_at_entry(symbol*,
> frame_info*) ()
> #15 0x000055bbcd293e98 in read_frame_arg(symbol*, frame_info*, frame_arg*,
> frame_arg*) ()
> #16 0x000055bbcd294b13 in print_frame_args(symbol*, frame_info*, int,
> ui_file*) ()
> #17 0x000055bbcd295506 in print_frame_info(frame_info*, int, print_what,
> int, int) ()
> #18 0x000055bbcd296412 in backtrace_command(char const*, int) ()
> #19 0x000055bbcd05cb42 in cmd_func(cmd_list_element*, char const*, int) ()
> #20 0x000055bbcd2d0c23 in execute_command(char const*, int) ()
> #21 0x000055bbcd1c57bc in command_handler(char const*) ()
> #22 0x000055bbcd1c5b7a in command_line_handler(char*) ()
> #23 0x000055bbcd1c498c in gdb_rl_callback_handler(char*) ()
> #24 0x00007f60b42ca8be in rl_callback_read_char () from
> /lib64/libreadline.so.7
> #25 0x000055bbcd1c48b6 in gdb_rl_callback_read_char_wrapper_noexcept() ()
> #26 0x000055bbcd1c4939 in gdb_rl_callback_read_char_wrapper(void*) ()
> #27 0x000055bbcd1c4ed0 in stdin_event_handler(int, void*) ()
> #28 0x000055bbcd1c3c5d in gdb_wait_for_event(int) ()
> #29 0x000055bbcd1c3dd2 in gdb_do_one_event() [clone .part.3] ()
> #30 0x000055bbcd1c3f3e in start_event_loop() ()
> #31 0x000055bbcd21ecc8 in captured_command_loop() ()
> #32 0x000055bbcd21ff3d in gdb_main(captured_main_args*) ()
> #33 0x000055bbccf9a3ab in main ()

Hi Avi,

Would it be possible to share a small reproducer (source code and gdb commands) that lead to this?

Simon
Comment 3 Avi Kivity 2018-03-12 07:21:38 UTC
A small reproducer would be hard. But I can easily share a large binary (built from open source code) that reproduces the problem.
Comment 4 Simon Marchi 2018-03-12 14:08:56 UTC
I guess it would be better than nothing.  Not sure you'll be abl to attach it here if it's big though.

Does the problem happen if you save a core dump (gcore command) and do the backtrace with the core loaded?  If so, it would be easier for others to reproduce (and they wouldn't have to execute a random binary downloaded from the internet).
Comment 5 Avi Kivity 2018-03-12 14:18:46 UTC
Binary and core are in http://scratch.scylladb.com/avi/gdb-18869.tar.gz. Binary was built on updated Fedora 27. The backtrace command is sufficient to trigger the bug.
Comment 6 Dawid 2018-09-27 17:12:49 UTC
Created attachment 11281 [details]
Full gdb session with extra debug information when gdb crash
Comment 7 Dawid 2018-09-27 17:14:20 UTC
Hi,

I can easly reproduce this issue with gdb 8.2, after loading (quite huge) one of ScyllaDB binary. Backtrace is following:

top-gdb) thread apply all bt
(top-gdb)

Thread 1 (Thread 0x7f3724befac0 (LWP 26109)):
#0  0x00007f3725494d7f in raise () from /usr/lib/libc.so.6
#1  0x00007f372547f672 in abort () from /usr/lib/libc.so.6
#2  0x0000562c1df268d6 in dump_core () at utils.c:255
#3  0x0000562c1df2934d in internal_vproblem(internal_problem *, const char *, int, const char *, typedef __va_list_tag __va_list_tag *) (problem=0x562c1e366e00 <internal_error_problem>, file=<optimized out>, line=160, fmt=<optimized out>, ap=<optimized out>)
    at utils.c:465
#4  0x0000562c1df2949b in internal_verror (file=<optimized out>, line=<optimized out>, fmt=<optimized out>, ap=ap@entry=0x7ffc03f05e20) at utils.c:490
#5  0x0000562c1dd7f31f in internal_error (file=file@entry=0x562c1e0f2424 "inline-frame.c", line=line@entry=160, fmt=<optimized out>) at common/errors.c:55
#6  0x0000562c1de363e6 in inline_frame_this_id (this_frame=0x562c69604970, this_cache=<optimized out>, this_id=0x562c696049d0) at inline-frame.c:169
#7  0x0000562c1ddfaa4c in compute_frame_id (fi=fi@entry=0x562c69604970) at frame.c:521
#8  0x0000562c1ddfb020 in get_prev_frame_if_no_cycle (this_frame=0x562c67de0380) at frame.c:1902
#9  0x0000562c1ddfcba0 in get_prev_frame_always_1 (this_frame=this_frame@entry=0x562c67de0380) at frame.c:2084
#10 0x0000562c1ddfd1ed in get_prev_frame_always (this_frame=0x562c67de0380) at frame.c:2100
#11 0x0000562c1ddfd498 in get_prev_frame (this_frame=this_frame@entry=0x562c67de0380) at frame.c:2353
#12 0x0000562c1ddb9584 in value_of_dwarf_reg_entry (type=type@entry=0x562c56fb4c10, frame=frame@entry=0x562c67de0380, kind=kind@entry=CALL_SITE_PARAMETER_DWARF_REG, kind_u=...) at dwarf2loc.c:1411
#13 0x0000562c1ddb96bb in value_of_dwarf_block_entry (type=0x562c56fb4c10, frame=0x562c67de0380, block=0x7f36a0a47af3 "U\366\310\\\003", block_len=<optimized out>) at dwarf2loc.c:1464
#14 0x0000562c1ddb97db in loclist_read_variable_at_entry (symbol=0x562c6048ee50, frame=0x562c67de0380) at dwarf2loc.c:4483
#15 0x0000562c1dec7bc8 in read_frame_arg (sym=0x562c6048ee50, frame=0x562c67de0380, argp=0x7ffc03f062c0, entryargp=0x7ffc03f062e0) at stack.c:370
#16 0x0000562c1dec8613 in print_frame_args (func=<optimized out>, frame=frame@entry=0x562c67de0380, num=num@entry=-1, stream=0x562c204b0a30) at stack.c:681
#17 0x0000562c1dec9031 in print_frame (print_args=<optimized out>, print_what=LOCATION, print_level=<optimized out>, frame=0x562c67de0380, sal=...) at stack.c:1203
#18 print_frame_info (frame=0x562c67de0380, print_level=<optimized out>, print_what=LOCATION, print_args=<optimized out>, set_current_sal=0) at stack.c:860
#19 0x0000562c1dec9c30 in backtrace_command_1 (from_tty=1, no_filters=<optimized out>, flags=..., count_exp=<optimized out>) at stack.c:1790
#20 backtrace_command (arg=<optimized out>, from_tty=1) at stack.c:1865
#21 0x0000562c1dc85752 in cmd_func (cmd=<optimized out>, args=<optimized out>, from_tty=<optimized out>) at cli/cli-decode.c:1857
#22 0x0000562c1df0c4a1 in execute_command (p=<optimized out>, p@entry=0x562c2204e730 "bt", from_tty=1) at top.c:630
#23 0x0000562c1ddf149c in command_handler (command=0x562c2204e730 "bt") at event-top.c:583
#24 0x0000562c1ddf20aa in command_line_handler (rl=<optimized out>) at event-top.c:774
#25 0x0000562c1ddf0cac in gdb_rl_callback_handler (rl=0x562c204a1110 "") at event-top.c:213
#26 0x0000562c1df678df in rl_callback_read_char () at callback.c:220
#27 0x0000562c1ddf0bd6 in gdb_rl_callback_read_char_wrapper_noexcept () at event-top.c:175
#28 0x0000562c1ddf0c59 in gdb_rl_callback_read_char_wrapper (client_data=<optimized out>) at event-top.c:191
#29 0x0000562c1ddf1100 in stdin_event_handler (error=<optimized out>, client_data=0x562c1fd816f0) at event-top.c:511
#30 0x0000562c1ddefff5 in gdb_wait_for_event (block=<optimized out>) at event-loop.c:859
#31 0x0000562c1ddf0198 in gdb_do_one_event () at event-loop.c:347
#32 0x0000562c1ddf02ae in gdb_do_one_event () at event-loop.c:371
#33 start_event_loop () at event-loop.c:371
#34 0x0000562c1de520db in captured_command_loop () at main.c:330
#35 0x0000562c1de5308d in captured_main (data=<optimized out>) at main.c:1157
#36 gdb_main (args=<optimized out>) at main.c:1173
#37 0x0000562c1dc34c9b in main (argc=<optimized out>, argv=<optimized out>) at gdb.c:32

As I see crash comes from unwinder (frame.c) so I used "set debug frame 1" to produce more debug output during unwinding process.
Full gdb dump is attached.
Hope it helps.
Comment 8 Tim Small 2019-07-02 16:35:44 UTC
Not sure if this is the same issue or not.

I'm seeing this on 8.2.1 and git HEAD when debugging a trival Rust application on a microcontroller.  I'm seeing the bug both on real hardware (connected with openocd), and also when running under qemu.  I've boiled it down to essentially an empty program (I was previously seeing it on a several hundred line interrupt-driven embedded app).

Steps to reproduce:

Build/debug host: Debian 10 x86-64

Rust 1.35.0 (also nightly)

gdb: 8.2.1, also 8.3.50.20190702-git

Simplest testcase (see below) gives:

~/prog/rust/rust-embedded-book-examples/gdb-testcase$ /usr/local/bin/gdb-arm-none-eabi -q target/thumbv7m-none-eabi/release/gdb-testcase
Reading symbols from target/thumbv7m-none-eabi/release/gdb-testcase...
(gdb) target remote :3333
Remote debugging using :3333
Reset () at /home/tim/.cargo/registry/src/github.com-1ecc6299db9ec823/cortex-m-rt-0.6.8/src/lib.rs:485
485         __pre_init();
(gdb) cont
Continuing.
^C
Program received signal SIGINT, Interrupt.
main () at src/main.rs:13
13              asm::nop(); // To not have main optimize to abort in release mode, remove when you add code
(gdb) bt
inline-frame.c:156: internal-error: void inline_frame_this_id(frame_info*, void**, frame_id*): Assertion `frame_id_p (*this_id)' failed.
A problem internal to GDB has been detected,
further debugging may prove unreliable.
Quit this debugging session? (y or n) y

This is a bug, please report it.  For instructions, see:
<http://www.gnu.org/software/gdb/bugs/>.

inline-frame.c:156: internal-error: void inline_frame_this_id(frame_info*, void**, frame_id*): Assertion `frame_id_p (*this_id)' failed.
A problem internal to GDB has been detected,
further debugging may prove unreliable.
Create a core file of GDB? (y or n) y
Aborted (core dumped)


Follow the instructions here:

https://rust-embedded.github.io/book/start/qemu.html

... with the following exceptions:

When prompted by:

cargo generate --git https://github.com/rust-embedded/cortex-m-quickstart

... give the name "gdb-testcase"

Use this code in src/main.rs :

#![no_std]
#![no_main]

// pick a panicking behavior
extern crate panic_halt; // you can put a breakpoint on `rust_begin_unwind` to catch panics

use cortex_m::asm;
use cortex_m_rt::entry;

#[entry]
fn main() -> ! {
    loop {
        asm::nop(); // To not have main optimize to abort in release mode, remove when you add code
        // your code goes here
    }
}



Set this in .cargo/config :
[target.thumbv7m-none-eabi]
runner = "qemu-system-arm -cpu cortex-m3 -machine lm3s6965evb -gdb tcp::3333 -S -nographic -semihosting-config enable=on,target=native -kernel"




Execute with:

cargo run --release

... This starts the code in qemu, busy-looping on a nop instruction.


Attach gdb to qemu, e.g.

/usr/local/bin/gdb-arm-none-eabi -q target/thumbv7m-none-eabi/release/gdb-testcase

Execute the following commands:

target remote :3333
cont
<ctrl-c>
bt
Comment 9 Tim Small 2019-07-02 16:41:33 UTC
Some background which might be useful...

This is a common debug case for embedded rust because:

It's common practise to debug code which has been built in "release mode", since this enables code-size related optimisations which can be critical in storage constrained embedded code (the debug symbols of course just stay on the host, so there is no storage penalty from this).

Many common embedded processors switch off their debug hardware when in low power sleep modes, so that when debugging you must make them busy-loop (e.g. for event-driven programs which would otherwise just sleep between timer or external interrupts) in order to be able to debug.

Others have hit this problem in similar environments e.g.

https://mozilla.logbot.info/rust-embedded/20190702#c16431411