This is the mail archive of the
systemtap@sourceware.org
mailing list for the systemtap project.
[PR6580; patch for review] first step of unwind_cache enhancement
- From: Serguei Makarov <smakarov at redhat dot com>
- To: systemtap at sourceware dot org
- Cc: "Frank Ch. Eigler" <fche at redhat dot com>, Mark Wielaard <mjw at redhat dot com>
- Date: Wed, 5 Sep 2012 11:43:07 -0400 (EDT)
- Subject: [PR6580; patch for review] first step of unwind_cache enhancement
The first incremental step to the runtime tweaks required to sanely implement PR6580. Namely, the kernel-side backtrace can be unwound one step at a time using _stp_stack_kernel_get, with the already unwound portion of the backtrace cached within the probe context.
This allows us to implement stack(n) (which returns the PC n levels deep within the stack) without having to resort to tokenizing a backtrace string each time.
The patch itself seems to be sound, but testing it drew attention to memory corruption errors in the existing DWARF unwinder (see PR14546).
diff --git a/runtime/common_probe_context.h b/runtime/common_probe_context.h
index df07b90..61c1b26 100644
--- a/runtime/common_probe_context.h
+++ b/runtime/common_probe_context.h
@@ -118,5 +118,12 @@ cycles_t cycles_sum;
/* Current state of the unwinder (as used in the unwind.c dwarf unwinder). */
#if defined(STP_NEED_UNWIND_DATA)
-struct unwind_context uwcontext;
+// Invariants (uwcontext currently refers to uwcontext_kernel):
+// uwcache.depth == 0 --> uwcontext needs to be initialized
+// uwcache.depth > 0 --> uwcontext is state after uwcache.depth unwind steps
+// uwcache.depth >= n --> uwcache.data[n] contains PC at depth n
+// EXCEPT WHEN n == 0, uwcache.data[n] == 0 (current PC not yet retrieved)
+struct unwind_cache uwcache;
+struct unwind_context uwcontext_user;
+struct unwind_context uwcontext_kernel;
#endif
diff --git a/runtime/stack-dwarf.c b/runtime/stack-dwarf.c
index 9c55997..b00ac1d 100644
--- a/runtime/stack-dwarf.c
+++ b/runtime/stack-dwarf.c
@@ -35,6 +35,55 @@ static int _stp_valid_pc_addr(unsigned long addr, struct task_struct *tsk)
#endif
}
+/* NB: Returns 0 for depth == 0 -- DWARF unwinder does not fetch
+ * current PC. Moreover, uwcontext is assumed to have been
+ * initialized by the caller. */
+unsigned long __stp_dwarf_stack_kernel_get(struct pt_regs *regs, int depth,
+ struct unwind_context *uwcontext,
+ struct unwind_cache *uwcache)
+{
+ struct unwind_frame_info *info = &uwcontext->info;
+ int ret;
+
+ /* check if answer is already defined in cache; this is probably
+ * redundant, being already handled for us by the caller */
+ if (unlikely(uwcache->depth >= depth))
+ return uwcache->data[depth];
+
+ /* check if unwind cannot proceed further */
+ if (uwcache->done)
+ return 0;
+
+ if (uwcache->depth == 0) /* need to initialize uwcontext->info */
+ arch_unw_init_frame_info(info, regs, 0);
+
+ dbug_unwind(1, "RESUMING from %d for %d more levels",
+ uwcache->depth, depth - uwcache->depth);
+
+ while (uwcache->depth < depth) {
+ ret = unwind(uwcontext, 0);
+ dbug_unwind(1, "ret=%d PC=%lx SP=%lx\n", ret, UNW_PC(info), UNW_SP(info));
+
+ /* check if unwind hit an error */
+ if (ret || ! _stp_valid_pc_addr(UNW_PC(info), NULL)) {
+ uwcache->done = 1; break;
+ }
+
+ uwcache->depth++;
+ uwcache->data[uwcache->depth] = UNW_PC(info);
+
+ if (UNW_PC(info) == _stp_kretprobe_trampoline) {
+ uwcache->done = 1; break;
+ /* XXX: if there a way to unwind across kretprobe trampolines? PR9999 */
+ }
+ }
+
+ /* unwind may not have reached the desired depth */
+ return uwcache->depth >= depth ? uwcache->data[depth] : 0;
+}
+
+
+// XXX: rewrite in terms of _get
static void __stp_dwarf_stack_kernel_print(struct pt_regs *regs, int verbose,
int levels, struct unwind_context *uwcontext)
{
diff --git a/runtime/stack.c b/runtime/stack.c
index 413ef1e..1834298 100644
--- a/runtime/stack.c
+++ b/runtime/stack.c
@@ -165,7 +165,7 @@ static struct pt_regs *_stp_get_uregs(struct context *c)
else if (c->uregs != NULL && c->kregs != NULL
&& ! (c->probe_flags & _STP_PROBE_STATE_USER_MODE))
{
- struct unwind_frame_info *info = &c->uwcontext.info;
+ struct unwind_frame_info *info = &c->uwcontext_user.info;
int ret = 0;
int levels;
@@ -189,7 +189,7 @@ static struct pt_regs *_stp_get_uregs(struct context *c)
while (levels > 0 && ret == 0 && UNW_PC(info) != REG_IP(c->uregs))
{
levels--;
- ret = unwind(&c->uwcontext, 0);
+ ret = unwind(&c->uwcontext_user, 0);
dbug_unwind(1, "unwind levels: %d, ret: %d, pc=0x%lx\n",
levels, ret, UNW_PC(info));
}
@@ -213,6 +213,66 @@ static struct pt_regs *_stp_get_uregs(struct context *c)
return c->uregs;
}
+/** Returns the PC at given depth in the stack.
+ * @param depth Depth to which to descend in the stack. Depth 0 gives the current PC.
+ *
+ * Unwinds the backtrace at most once and uses a cache for subsequent
+ * queries. Returns 0 on error, or beyond the end of the visible
+ * stack.
+ */
+static unsigned long _stp_stack_kernel_get(struct context *c, unsigned depth)
+{
+ /* TODOXXX add appropriate dbug_unwind messages */
+ struct pt_regs *regs = NULL;
+
+ if (depth >= MAXBACKTRACE)
+ return 0; /* special case which cannot query the cache */
+
+ /* We always start by fetching the current PC if it's not yet known. */
+ if (c->uwcache.depth == 0 && c->uwcache.data[0] == 0) {
+ if (! c->kregs) {
+ /* Even the current PC is unknown; so we have absolutely no data
+ * at any depth. Note that unlike _stp_stack_kernel_print(), we
+ * can't fall back to calling dump_trace() to obtain the
+ * backtrace. */
+ c->uwcache.done = 1; return 0;
+ } else if (c->probe_type == _STP_PROBE_HANDLER_KRETPROBE
+ && c->ips.krp.pi) {
+ c->uwcache.data[0] = (unsigned long)_stp_ret_addr_r(c->ips.krp.pi);
+ } else {
+ c->uwcache.data[0] = REG_IP(c->kregs);
+ }
+ }
+
+ /* check if answer is already defined in cache */
+ if (c->uwcache.depth >= depth)
+ return c->uwcache.data[depth];
+
+ /* check if unwind cannot proceed further */
+ if (c->uwcache.done)
+ return 0;
+
+#ifdef STP_USE_DWARF_UNWINDER
+ if (unlikely (! c->kregs)) { /* XXX a bit of extra paranoia */
+ c->uwcache.done = 1; return 0;
+ }
+ regs = c->kregs;
+
+ if (c->uwcache.depth == 0) { /* need to initialize uwcontext */
+ if (c->uwcache.depth == 0 && c->uregs == &c->uwcontext_kernel.info.regs) {
+ /* Unwinder needs the reg state, clear uregs ref. */
+ c->uregs = NULL;
+ c->probe_flags &= ~_STP_PROBE_STATE_FULL_UREGS;
+ }
+ }
+ return __stp_dwarf_stack_kernel_get(regs, depth, &c->uwcontext_kernel, &c->uwcache);
+#else
+ /* don't use arch specific fallbacks */
+ c->uwcache.done = 1;
+ return 0;
+#endif
+}
+
/** Prints the stack backtrace
* @param regs A pointer to the struct pt_regs.
* @param verbose _STP_SYM_FULL or _STP_SYM_BRIEF
@@ -220,10 +280,14 @@ static struct pt_regs *_stp_get_uregs(struct context *c)
static void _stp_stack_kernel_print(struct context *c, int sym_flags)
{
- struct pt_regs *regs = NULL;
+ unsigned n, remaining;
+ unsigned long l;
+
+ if (! c->kregs) {
+ /* This is a fatal block for _stp_stack_kernel_get,
+ but when printing a backtrace we can use this
+ inexact fallback.
- if (! c->kregs) {
- /* For the kernel we can use an inexact fallback.
When compiled with frame pointers we can do
a pretty good guess at the stack value,
otherwise let dump_stack guess it
@@ -248,36 +312,33 @@ static void _stp_stack_kernel_print(struct context *c, int sym_flags)
_stp_print("\n");
#endif
return;
- } else {
- regs = c->kregs;
- }
+ }
+
+ /* print the current address */
+ if (c->probe_type == _STP_PROBE_HANDLER_KRETPROBE && c->ips.krp.pi
+ && (sym_flags & _STP_SYM_FULL) == _STP_SYM_FULL) {
+ _stp_print("Returning from: ");
+ _stp_print_addr((unsigned long)_stp_probe_addr_r(c->ips.krp.pi),
+ sym_flags, NULL);
+ _stp_print("Returning to : ");
+ }
+ _stp_print_addr(_stp_stack_kernel_get(c, 0), sym_flags, NULL);
- /* print the current address */
- if (c->probe_type == _STP_PROBE_HANDLER_KRETPROBE && c->ips.krp.pi) {
- if ((sym_flags & _STP_SYM_FULL) == _STP_SYM_FULL) {
- _stp_print("Returning from: ");
- _stp_print_addr((unsigned long)_stp_probe_addr_r(c->ips.krp.pi),
- sym_flags, NULL);
- _stp_print("Returning to : ");
- }
- _stp_print_addr((unsigned long)_stp_ret_addr_r(c->ips.krp.pi),
- sym_flags, NULL);
- } else {
- _stp_print_addr(REG_IP(regs), sym_flags, NULL);
- }
-
- /* print rest of stack... */
#ifdef STP_USE_DWARF_UNWINDER
- if (c->uregs == &c->uwcontext.info.regs) {
- /* Unwinder needs the reg state, clear uregs ref. */
- c->uregs = NULL;
- c->probe_flags &= ~_STP_PROBE_STATE_FULL_UREGS;
- }
- __stp_dwarf_stack_kernel_print(regs, sym_flags, MAXBACKTRACE,
- &c->uwcontext);
+ for (n = 1; n < MAXBACKTRACE; n++) {
+ l = _stp_stack_kernel_get(c, n);
+ if (l == 0) {
+ remaining = MAXBACKTRACE - n;
+ _stp_stack_print_fallback(UNW_SP(&c->uwcontext_kernel.info),
+ sym_flags, remaining, 0);
+ break;
+ } else {
+ _stp_print_addr(l, sym_flags, NULL);
+ }
+ }
#else
/* Arch specific fallback for kernel backtraces. */
- __stp_stack_print(regs, sym_flags, MAXBACKTRACE);
+ __stp_stack_print(regs, sym_flags, MAXBACKTRACE);
#endif
}
@@ -326,13 +387,13 @@ static void _stp_stack_user_print(struct context *c, int sym_flags)
/* print rest of stack... */
#ifdef STP_USE_DWARF_UNWINDER
- if (c->uregs == &c->uwcontext.info.regs) {
+ if (c->uregs == &c->uwcontext_user.info.regs) {
/* Unwinder needs the reg state, clear uregs ref. */
c->uregs = NULL;
c->probe_flags &= ~_STP_PROBE_STATE_FULL_UREGS;
}
__stp_dwarf_stack_user_print(regs, sym_flags, MAXBACKTRACE,
- &c->uwcontext, ri, uregs_valid);
+ &c->uwcontext_user, ri, uregs_valid);
#else
/* User stack traces only supported for arches with dwarf unwinder. */
if (sym_flags & _STP_SYM_SYMBOL)
diff --git a/runtime/unwind/unwind.h b/runtime/unwind/unwind.h
index a11dc48..2d367cb 100644
--- a/runtime/unwind/unwind.h
+++ b/runtime/unwind/unwind.h
@@ -336,4 +336,15 @@ struct unwind_context {
static const struct cfa badCFA = { ARRAY_SIZE(reg_info), 1 };
+#ifndef MAXBACKTRACE
+#define MAXBACKTRACE 20
+#endif
+
+struct unwind_cache {
+ // XXX use more compact representation?
+ unsigned depth;
+ unsigned done; // if nonzero, unwind cannot proceed
+ unsigned long data[MAXBACKTRACE];
+};
+
#endif /*_STP_UNWIND_H_*/
diff --git a/tapset/linux/context-symbols.stp b/tapset/linux/context-symbols.stp
index 78a0ae3..d488c5f 100644
--- a/tapset/linux/context-symbols.stp
+++ b/tapset/linux/context-symbols.stp
@@ -12,13 +12,19 @@
//processor.
// </tapsetdescription>
+function __stack_raw (n:long) %{ /* pragma:unwind */
+ STAP_RETVALUE = _stp_stack_kernel_get (CONTEXT, (unsigned)STAP_ARG_n);
+%}
/**
* sfunction stack - return PC at stack unwind level n
* @n: number of levels to descend in the stack.
*/
function stack:long (n:long) {
- /* XXX this is the naive method */
+ r = __stack_raw(n);
+ if (r != 0) return r
+
+ /* fallback: parse backtrace() to go deeper in the stack */
b = backtrace (); orig_n = n;
sym = tokenize (b, " ");
if (sym == "") @__context_unwind_error(orig_n);
diff --git a/tapsets.cxx b/tapsets.cxx
index 5070485..196c87d 100644
--- a/tapsets.cxx
+++ b/tapsets.cxx
@@ -183,6 +183,12 @@ common_probe_entryfn_prologue (translator_output* o, string statestr,
o->newline() << "c->cycles_base = 0;";
o->newline() << "#endif";
*/
+
+ o->newline() << "#if defined(STP_NEED_UNWIND_DATA)";
+ o->newline() << "c->uwcache.depth = 0;";
+ o->newline() << "c->uwcache.done = 0;";
+ o->newline() << "c->uwcache.data[0] = 0;";
+ o->newline() << "#endif";
}