How to backtrace an separate stack?

Stefan Hajnoczi stefanha@redhat.com
Tue Mar 8 09:47:31 GMT 2022


On Mon, Mar 07, 2022 at 10:49:47AM +0000, Pedro Alves wrote:
> On 2022-03-03 11:22, Stefan Hajnoczi wrote:
> > Hi,
> > The QEMU emulator uses coroutines with separate stacks. It can be
> > challenging to debug coroutines that have yielded because GDB is not
> > aware of them (no thread is currently executing them).
> > 
> > QEMU has a GDB Python script that helps. It "creates" a stack frame for
> > a given coroutine by temporarily setting register values and then using
> > the "bt" command. This works on a live process under ptrace control but
> > not for coredumps where registers can't be set.
> > 
> > Here is the script (or see the bottom of this email for an inline copy
> > of the relevant code):
> > https://gitlab.com/qemu-project/qemu/-/blob/master/scripts/qemugdb/coroutine.py
> > 
> > I hoped that "select-frame address ADDRESS" could be used instead so
> > this would work on coredumps too. Unfortunately "select-frame" only
> > searches stack frames that GDB is already aware of, so it cannot be used
> > to backtrace coroutine stacks.
> > 
> > Is there a way to backtrace a stack at an arbitrary address in GDB?
> 
> I don't think there's an easy/great answer.  Maybe it could
> be done with a Python unwinder [1]?  See gdb.python/py-unwind-user-regs.py
> in the GDB testsuite for an example you could probably start with.

I tried writing an unwinder that returns the topmost coroutine stack
frame. "info threads" + "bt" shows the main stack though:

  (gdb) qemu coroutine 0x55be3c592120
    Id   Target Id                         Frame
  * 1    Thread 0x7f7abbdd4f00 (LWP 58989) Returning a frame with rip 0x55be3ae19ff4
  0x00007f7abcd2489e in __ppoll (fds=0x21, nfds=6717500806073509987, timeout=<optimized out>, sigmask=0x1f000) at ../sysdeps/unix/sysv/linux/ppoll.c:43
  ...
  #0  0x00007f7abcd2489e in __ppoll (fds=0x55be3c78a9f0, nfds=43, timeout=<optimized out>, timeout@entry=0x7ffef27cc040, sigmask=sigmask@entry=0x0) at ../sysdeps/unix/sysv/linux/ppoll.c:43
  #1  0x000055be3ae26435 in ppoll (__ss=0x0, __timeout=0x7ffef27cc040, __nfds=<optimized out>, __fds=<optimized out>) at /usr/include/bits/poll2.h:81

I was hoping that frame #1 would be the coroutine stack since the debug
message "Returning a frame with rip 0x55be3ae19ff4" shows the unwinder
was invoked.

I've included the code below in case anyone has suggestions for making
the unwinder work. See bt_jmpbuf() and the Unwinder class.

The idea is that bt_jmpbuf() passes the registers of the coroutine to
the unwinder and invokes "info thread" + "bt". The unwinder only returns
a stack frame the first time it's invoked. It cannot unwind successive
stack frames so it disables itself after returning the topmost one (I
was hoping GDB's built-in unwinder would take over from there).

Thanks,
Stefan
---
#
# GDB debugging support
#
# Copyright 2012 Red Hat, Inc. and/or its affiliates
#
# Authors:
#  Avi Kivity <avi@redhat.com>
#
# This work is licensed under the terms of the GNU GPL, version 2
# or later.  See the COPYING file in the top-level directory.

import gdb
import gdb.unwinder

VOID_PTR = gdb.lookup_type('void').pointer()


class FrameId(object):
    def __init__(self, sp, pc):
        self.sp = sp
        self.pc = pc


class Unwinder(gdb.unwinder.Unwinder):
    def __init__(self):
        super(Unwinder, self).__init__('QEMU coroutine unwinder')
        self._regs = None

    def arm(self, regs):
        self._regs = regs

    def __call__(self, pending_frame):
        print('A')
        if not self._regs:
            return None
        regs = self._regs
        self._regs = None

        frame_id = FrameId(regs['rbp'], regs['rip'])
        unwind_info = pending_frame.create_unwind_info(frame_id)
        for reg_name in regs:
            unwind_info.add_saved_register(reg_name, regs[reg_name])

        print('Returning a frame with rip 0x%x' % regs['rip'])
        return unwind_info


unwinder = Unwinder()
gdb.unwinder.register_unwinder(None, unwinder)


def pthread_self():
    '''Fetch pthread_self() from the glibc start_thread function.'''
    f = gdb.newest_frame()
    while f.name() != 'start_thread':
        f = f.older()
        if f is None:
            return gdb.parse_and_eval('$fs_base')

    try:
        return f.read_var("arg")
    except ValueError:
        return gdb.parse_and_eval('$fs_base')

def get_glibc_pointer_guard():
    '''Fetch glibc pointer guard value'''
    fs_base = pthread_self()
    return gdb.parse_and_eval('*(uint64_t*)((uint64_t)%s + 0x30)' % fs_base)

def glibc_ptr_demangle(val, pointer_guard):
    '''Undo effect of glibc's PTR_MANGLE()'''
    return gdb.parse_and_eval('(((uint64_t)%s >> 0x11) | ((uint64_t)%s << (64 - 0x11))) ^ (uint64_t)%s' % (val, val, pointer_guard))

def get_jmpbuf_regs(jmpbuf):
    JB_RBX  = 0
    JB_RBP  = 1
    JB_R12  = 2
    JB_R13  = 3
    JB_R14  = 4
    JB_R15  = 5
    JB_RSP  = 6
    JB_PC   = 7

    pointer_guard = get_glibc_pointer_guard()
    return {'rbx': jmpbuf[JB_RBX],
        'rbp': glibc_ptr_demangle(jmpbuf[JB_RBP], pointer_guard),
        'rsp': glibc_ptr_demangle(jmpbuf[JB_RSP], pointer_guard),
        'r12': jmpbuf[JB_R12],
        'r13': jmpbuf[JB_R13],
        'r14': jmpbuf[JB_R14],
        'r15': jmpbuf[JB_R15],
        'rip': glibc_ptr_demangle(jmpbuf[JB_PC], pointer_guard) }

def bt_jmpbuf(jmpbuf):
    '''Backtrace a jmpbuf'''
    regs = get_jmpbuf_regs(jmpbuf)
    unwinder.arm(regs)
    gdb.execute('info threads')
    gdb.execute('bt')

def co_cast(co):
    return co.cast(gdb.lookup_type('CoroutineUContext').pointer())

def coroutine_to_jmpbuf(co):
    coroutine_pointer = co_cast(co)
    return coroutine_pointer['env']['__jmpbuf']


class CoroutineCommand(gdb.Command):
    '''Display coroutine backtrace'''
    def __init__(self):
        gdb.Command.__init__(self, 'qemu coroutine', gdb.COMMAND_DATA,
                             gdb.COMPLETE_NONE)

    def invoke(self, arg, from_tty):
        argv = gdb.string_to_argv(arg)
        if len(argv) != 1:
            gdb.write('usage: qemu coroutine <coroutine-pointer>\n')
            return

        bt_jmpbuf(coroutine_to_jmpbuf(gdb.parse_and_eval(argv[0])))

class CoroutineBt(gdb.Command):
    '''Display backtrace including coroutine switches'''
    def __init__(self):
        gdb.Command.__init__(self, 'qemu bt', gdb.COMMAND_STACK,
                             gdb.COMPLETE_NONE)

    def invoke(self, arg, from_tty):

        gdb.execute("bt")

        if gdb.parse_and_eval("qemu_in_coroutine()") == False:
            return

        co_ptr = gdb.parse_and_eval("qemu_coroutine_self()")

        while True:
            co = co_cast(co_ptr)
            co_ptr = co["base"]["caller"]
            if co_ptr == 0:
                break
            gdb.write("Coroutine at " + str(co_ptr) + ":\n")
            bt_jmpbuf(coroutine_to_jmpbuf(co_ptr))

class CoroutineSPFunction(gdb.Function):
    def __init__(self):
        gdb.Function.__init__(self, 'qemu_coroutine_sp')

    def invoke(self, addr):
        return get_jmpbuf_regs(coroutine_to_jmpbuf(addr))['rsp'].cast(VOID_PTR)

class CoroutinePCFunction(gdb.Function):
    def __init__(self):
        gdb.Function.__init__(self, 'qemu_coroutine_pc')

    def invoke(self, addr):
        return get_jmpbuf_regs(coroutine_to_jmpbuf(addr))['rip'].cast(VOID_PTR)
-------------- next part --------------
A non-text attachment was scrubbed...
Name: signature.asc
Type: application/pgp-signature
Size: 488 bytes
Desc: not available
URL: <https://sourceware.org/pipermail/gdb/attachments/20220308/c6714801/attachment-0001.sig>


More information about the Gdb mailing list