From 6a8fe809a8c4cd59b51cfceefd032c8bb2f526d7 Mon Sep 17 00:00:00 2001 From: Stan Cox Date: Fri, 14 Dec 2012 13:59:42 -0500 Subject: [PATCH] Add process("NAME") to perf probe. * NEWS: Added. * stapprobes.3stap: Added. * perf.c (_stp_perf_init,_stp_perf_del): Add support for task. * perf.h (stap_perf_probe): Add per_thread, per_thread_event, tgt. * tapset-perfmon.cxx (TOK_PROCESS): New (perf_derived_probe): Add has_process, process_name. (join_group): Enable the task finder. (emit_module_decls): Add perf_event.h. perf.h is needed sooner. Emit task finder callback: _stp_perf_probe_cb. Setup task finder. * perf.exp, towers.c: New. * unprivileged_probes.exp (restricted_probe_types): Added perf.type(number).config(number).process --- NEWS | 5 + man/stapprobes.3stap | 6 +- runtime/linux/perf.c | 106 +++++++----- runtime/linux/perf.h | 22 ++- tapset-perfmon.cxx | 81 ++++++++- testsuite/systemtap.base/perf.exp | 81 +++++++++ testsuite/systemtap.base/towers.c | 155 ++++++++++++++++++ .../unprivileged_probes.exp | 2 + 8 files changed, 403 insertions(+), 55 deletions(-) create mode 100644 testsuite/systemtap.base/perf.exp create mode 100644 testsuite/systemtap.base/towers.c diff --git a/NEWS b/NEWS index 8d833f0dc..d62f215ce 100644 --- a/NEWS +++ b/NEWS @@ -1,5 +1,10 @@ * What's new in version 2.1 +- Perf event probes may now be bound to a specific task using the + process-name part: probe perf.type(0).config(0).process("NAME") { } + If the probed process name is not specified, then it is inferred + from the -c CMD argument. + - Some error messages now refer to additional information that is found in man pages. These are generally named error::FOO(7stap) and may be read via diff --git a/man/stapprobes.3stap b/man/stapprobes.3stap index db2b0f22d..a8b5ff81b 100644 --- a/man/stapprobes.3stap +++ b/man/stapprobes.3stap @@ -1133,6 +1133,7 @@ the following syntax: .SAMPLE probe perf.type(NN).config(MM).sample(XX) probe perf.type(NN).config(MM) +probe perf.type(NN).config(MM).process("NAME") .ESAMPLE The systemtap probe handler is called once per XX increments of the underlying performance counter. The default sampling @@ -1144,7 +1145,10 @@ system call, and/or the file. Invalid combinations or exhausted hardware counter resources result in errors during systemtap script startup. Systemtap does not sanity-check the values: it merely passes them through to -the kernel for error- and safety-checking. +the kernel for error- and safety-checking. By default the perf event +probe is systemwide unless .process is specified, which will bind the +probe to a specific process. If the process name is omitted then it +is inferred from the stap -c argument. .SH EXAMPLES .PP diff --git a/runtime/linux/perf.c b/runtime/linux/perf.c index 77aa75b85..b631007d6 100644 --- a/runtime/linux/perf.c +++ b/runtime/linux/perf.c @@ -1,7 +1,6 @@ /* -*- linux-c -*- * Perf Functions - * Copyright (C) 2006 Red Hat Inc. - * Copyright (C) 2010 Red Hat Inc. + * Copyright (C) 2006-2012 Red Hat Inc. * * This file is part of systemtap, and is free software. You can * redistribute it and/or modify it under the terms of the GNU General @@ -26,43 +25,60 @@ * * @param stp Handle for the event to be registered. */ -static long _stp_perf_init (struct stap_perf_probe *stp) +static long _stp_perf_init (struct stap_perf_probe *stp, struct task_struct* task) { int cpu; - /* allocate space for the event descriptor for each cpu */ - stp->events = _stp_alloc_percpu (sizeof(struct perf_event*)); - if (stp->events == NULL) { - return -ENOMEM; + if (stp->per_thread) { + if (task == 0) /* need to setup later when we know the task */ + return 0; + else { + if (stp->per_thread_event != 0) /* already setup */ + return 0; + stp->per_thread_event = perf_event_create_kernel_counter(&stp->attr, + -1, task, + stp->callback +#ifdef STAPCONF_PERF_COUNTER_CONTEXT + , NULL +#endif + ); + } } + else { + /* allocate space for the event descriptor for each cpu */ + stp->events = _stp_alloc_percpu (sizeof(struct perf_event*)); + if (stp->events == NULL) { + return -ENOMEM; + } - /* initialize event on each processor */ - for_each_possible_cpu(cpu) { - struct perf_event **event = per_cpu_ptr (stp->events, cpu); - if (cpu_is_offline(cpu)) { - *event = NULL; - continue; - } - *event = perf_event_create_kernel_counter(&stp->attr, - cpu, + /* initialize event on each processor */ + for_each_possible_cpu(cpu) { + struct perf_event **event = per_cpu_ptr (stp->events, cpu); + if (cpu_is_offline(cpu)) { + *event = NULL; + continue; + } + *event = perf_event_create_kernel_counter(&stp->attr, + cpu, #if defined(STAPCONF_PERF_STRUCTPID) || defined (STAPCONF_PERF_COUNTER_CONTEXT) - NULL, + NULL, #else - -1, + -1, #endif - stp->callback + stp->callback #ifdef STAPCONF_PERF_COUNTER_CONTEXT - , NULL + , NULL #endif - ); + ); - if (IS_ERR(*event)) { - long rc = PTR_ERR(*event); - *event = NULL; - _stp_perf_del(stp); - return rc; - } - } + if (IS_ERR(*event)) { + long rc = PTR_ERR(*event); + *event = NULL; + _stp_perf_del(stp); + return rc; + } + } + } /* (stp->per_thread) */ return 0; } @@ -73,18 +89,28 @@ static long _stp_perf_init (struct stap_perf_probe *stp) */ static void _stp_perf_del (struct stap_perf_probe *stp) { - if (stp && stp->events) { - int cpu; - /* shut down performance event sampling */ - for_each_possible_cpu(cpu) { - struct perf_event **event = per_cpu_ptr (stp->events, cpu); - if (*event) { - perf_event_release_kernel(*event); - } - } - _stp_free_percpu (stp->events); - stp->events = NULL; - } + int cpu; + if (! stp || !stp->events) + return; + + /* shut down performance event sampling */ + if (stp->per_thread) { + if (stp->per_thread_event) { + perf_event_release_kernel(stp->per_thread_event); + } + stp->per_thread_event = NULL; + } + else { + for_each_possible_cpu(cpu) { + struct perf_event **event = per_cpu_ptr (stp->events, cpu); + if (*event) { + perf_event_release_kernel(*event); + } + } + _stp_free_percpu (stp->events); + stp->events = NULL; + } } + #endif /* _PERF_C_ */ diff --git a/runtime/linux/perf.h b/runtime/linux/perf.h index 69c9a2d35..c4417ef92 100644 --- a/runtime/linux/perf.h +++ b/runtime/linux/perf.h @@ -1,7 +1,6 @@ /* -*- linux-c -*- * Perf Header File - * Copyright (C) 2006 Red Hat Inc. - * Copyright (C) 2010 Red Hat Inc. + * Copyright (C) 2006-2012 Red Hat Inc. * * This file is part of systemtap, and is free software. You can * redistribute it and/or modify it under the terms of the GNU General @@ -17,15 +16,24 @@ */ struct stap_perf_probe { - struct perf_event_attr attr; + struct perf_event_attr attr; perf_overflow_handler_t callback; const struct stap_probe * const probe; - - /* per-cpu data. allocated with _stp_alloc_percpu() */ - struct perf_event **events; + int per_thread; + union + { + /* per-cpu data. allocated with _stp_alloc_percpu() */ + struct perf_event **events; + struct + { + /* per-task data. allocated by perf_event_create_kernel_counter */ + struct perf_event *per_thread_event; + struct stap_task_finder_target tgt; + }; + }; }; -static long _stp_perf_init (struct stap_perf_probe *stp); +static long _stp_perf_init (struct stap_perf_probe *stp, struct task_struct* task); static void _stp_perf_del (struct stap_perf_probe *stp); diff --git a/tapset-perfmon.cxx b/tapset-perfmon.cxx index 53f74dc4f..35237da2f 100644 --- a/tapset-perfmon.cxx +++ b/tapset-perfmon.cxx @@ -1,5 +1,5 @@ // tapset for HW performance monitoring -// Copyright (C) 2005-2010 Red Hat Inc. +// Copyright (C) 2005-2012 Red Hat Inc. // Copyright (C) 2005-2007 Intel Corporation. // Copyright (C) 2008 James.Bottomley@HansenPartnership.com // @@ -10,10 +10,12 @@ #include "session.h" #include "tapsets.h" +#include "task_finder.h" #include "translate.h" #include "util.h" #include +#include extern "C" { #define __STDC_FORMAT_MACROS @@ -28,6 +30,7 @@ static const string TOK_PERF("perf"); static const string TOK_TYPE("type"); static const string TOK_CONFIG("config"); static const string TOK_SAMPLE("sample"); +static const string TOK_PROCESS("process"); // ------------------------------------------------------------------------ @@ -41,7 +44,9 @@ struct perf_derived_probe: public derived_probe int64_t event_type; int64_t event_config; int64_t interval; - perf_derived_probe (probe* p, probe_point* l, int64_t type, int64_t config, int64_t i); + bool has_process; + string process_name; + perf_derived_probe (probe* p, probe_point* l, int64_t type, int64_t config, int64_t i, bool pp, string pn); virtual void join_group (systemtap_session& s); }; @@ -57,9 +62,13 @@ struct perf_derived_probe_group: public generic_dpg perf_derived_probe::perf_derived_probe (probe* p, probe_point* l, int64_t type, int64_t config, - int64_t i): + int64_t i, + bool process_p, + string process_n): + derived_probe (p, l, true /* .components soon rewritten */), - event_type (type), event_config (config), interval (i) + event_type (type), event_config (config), interval (i), + has_process (process_p), process_name (process_n) { vector& comps = this->sole_location()->components; comps.clear(); @@ -67,6 +76,7 @@ perf_derived_probe::perf_derived_probe (probe* p, probe_point* l, comps.push_back (new probe_point::component (TOK_TYPE, new literal_number(type))); comps.push_back (new probe_point::component (TOK_CONFIG, new literal_number (config))); comps.push_back (new probe_point::component (TOK_SAMPLE, new literal_number (interval))); + comps.push_back (new probe_point::component (TOK_PROCESS, new literal_string (process_name))); } @@ -76,6 +86,9 @@ perf_derived_probe::join_group (systemtap_session& s) if (! s.perf_derived_probes) s.perf_derived_probes = new perf_derived_probe_group (); s.perf_derived_probes->enroll (this); + + enable_task_finder(s); + } @@ -85,7 +98,8 @@ perf_derived_probe_group::emit_module_decls (systemtap_session& s) if (probes.empty()) return; s.op->newline() << "/* ---- perf probes ---- */"; - s.op->newline() << "#include \"linux/perf.c\""; + s.op->newline() << "#include "; + s.op->newline() << "#include \"linux/perf.h\""; s.op->newline(); /* declarations */ @@ -106,6 +120,23 @@ perf_derived_probe_group::emit_module_decls (systemtap_session& s) } s.op->newline(); + // Output task finder callback routine that gets called for all + // perf probe types. + s.op->newline() << "static int _stp_perf_probe_cb(struct stap_task_finder_target *tgt, struct task_struct *tsk, int register_p, int process_p) {"; + s.op->indent(1); + s.op->newline() << "int rc = 0;"; + s.op->newline() << "struct stap_perf_probe *p = container_of(tgt, struct stap_perf_probe, tgt);"; + + s.op->newline() << "_stp_printf(\"XXX %d %d\\n\", current->pid, task_pid_nr_ns(tsk,task_active_pid_ns(current->parent)));"; + s.op->newline() << "if (register_p) "; + s.op->indent(1); + + s.op->newline() << "rc = _stp_perf_init(p, tsk);"; + s.op->newline(-1) << "else"; + s.op->newline(1) << "_stp_perf_del(p);"; + s.op->newline(-1) << "return rc;"; + s.op->newline(-1) << "}"; + /* data structures */ s.op->newline() << "static struct stap_perf_probe stap_perf_probes [" << probes.size() << "] = {"; @@ -119,6 +150,31 @@ perf_derived_probe_group::emit_module_decls (systemtap_session& s) << "{ .sample_period=" << probes[i]->interval << "ULL }},"; s.op->newline() << ".callback=enter_perf_probe_" << i << ", "; s.op->newline() << ".probe=" << common_probe_init (probes[i]) << ", "; + + string l_process_name; + if (probes[i]->has_process) + { + if (probes[i]->process_name.length() == 0) + { + wordexp_t words; + int rc = wordexp(s.cmd.c_str(), &words, WRDE_NOCMD|WRDE_UNDEF); + if (rc || words.we_wordc <= 0) + throw semantic_error(_("unspecified process probe is invalid without a -c COMMAND")); + l_process_name = words.we_wordv[0]; + wordfree (& words); + } + else + l_process_name = probes[i]->process_name; + + s.op->line() << " .tgt={"; + s.op->line() << " .procname=\"" << l_process_name << "\","; + s.op->line() << " .pid=0,"; + s.op->line() << " .callback=&_stp_perf_probe_cb,"; + s.op->line() << " },"; + s.op->newline() << ".per_thread=" << "1, "; + } + else + s.op->newline() << ".per_thread=" << "0, "; s.op->newline(-1) << "},"; } s.op->newline(-1) << "};"; @@ -159,6 +215,10 @@ perf_derived_probe_group::emit_module_decls (systemtap_session& s) s.op->newline() << "(*stp->probe->ph) (c);"; common_probe_entryfn_epilogue (s, true); s.op->newline(-1) << "}"; + + s.op->newline(); + s.op->newline() << "#include \"linux/perf.c\""; + s.op->newline(); } @@ -169,7 +229,7 @@ perf_derived_probe_group::emit_module_init (systemtap_session& s) s.op->newline() << "for (i=0; i<" << probes.size() << "; i++) {"; s.op->newline(1) << "struct stap_perf_probe* stp = & stap_perf_probes [i];"; - s.op->newline() << "rc = _stp_perf_init(stp);"; + s.op->newline() << "rc = _stp_perf_init(stp, 0);"; s.op->newline() << "if (rc) {"; s.op->newline(1) << "probe_point = stp->probe->pp;"; s.op->newline() << "for (j=0; jnewline(-1) << "}"; // for unwind loop s.op->newline() << "break;"; s.op->newline(-1) << "}"; // if-error + s.op->newline() << "rc = stap_register_task_finder_target(&stp->tgt);"; s.op->newline(-1) << "}"; // for loop } @@ -232,12 +293,16 @@ perf_builder::build(systemtap_session & sess, else if (period < 1) throw semantic_error(_("invalid perf sample period ") + lex_cast(period), parameters.find(TOK_SAMPLE)->second->tok); + bool proc_p; + string proc_n; + proc_p = has_null_param(parameters, TOK_PROCESS) + || get_param(parameters, TOK_PROCESS, proc_n); if (sess.verbose > 1) clog << _F("perf probe type=%" PRId64 " config=%" PRId64 " period=%" PRId64, type, config, period) << endl; finished_results.push_back - (new perf_derived_probe(base, location, type, config, period)); + (new perf_derived_probe(base, location, type, config, period, proc_p, proc_n)); } @@ -252,6 +317,8 @@ register_tapset_perf(systemtap_session& s) match_node* event = perf->bind_num(TOK_TYPE)->bind_num(TOK_CONFIG); event->bind(builder); event->bind_num(TOK_SAMPLE)->bind(builder); + event->bind_str(TOK_PROCESS)->bind(builder); + event->bind(TOK_PROCESS)->bind(builder); } /* vim: set sw=2 ts=8 cino=>4,n-2,{2,^-2,t0,(0,u0,w1,M1 : */ diff --git a/testsuite/systemtap.base/perf.exp b/testsuite/systemtap.base/perf.exp new file mode 100644 index 000000000..39fdab037 --- /dev/null +++ b/testsuite/systemtap.base/perf.exp @@ -0,0 +1,81 @@ +set test "perf" + +proc cleanup_handler { verbose } { + catch {exec rm -f towers.x} +} + +set stap_path $env(SYSTEMTAP_PATH)/stap +set exepath "[pwd]/towers.x" +set flags "" + +set subtest "process()" + +set res [target_compile $srcdir/$subdir/towers.c $exepath executable $flags] +if { $res != "" } { + verbose "target_compile failed: $res" 2 + fail "$test compiling towers.c" + cleanup_handler $verbose + return +} else { + pass "$test compiling towers.c" +} + +spawn $stap_path -c $exepath -e " +global towers_n +global XXX_n +probe perf.type(0).config(0).process(\"$exepath\") +{ + towers_n += 1 +} + +probe perf.type(0).config(0).process(\"XXX\") +{ + XXX_n += 1 +} +" + +# there is no "XXX" process so this probe should have been ignored +set ok 0 +expect { + -timeout 180 + -re {towers_n=0x[0-9a-f][0-9a-f]} { incr ok; exp_continue } + -re {XXX_n=0x0} { incr ok; exp_continue } + timeout { fail "$test (timeout)" } + eof { } +} + +catch {close}; catch {wait} + +if {$ok == 2} { + pass "$test $subtest" +} else { + fail "$test $subtest ($ok)" +} + +set subtest "process" + +spawn $stap_path -c $exepath -e " +global towers_n +probe perf.type(0).config(0).process +{ + towers_n += 1 +} +" + +set ok 0 +expect { + -timeout 180 + -re {towers_n=0x[0-9a-f][0-9a-f]} { incr ok; exp_continue } + timeout { fail "$test (timeout)" } + eof { } +} + +catch {close}; catch {wait} + +if {$ok == 1} { + pass "$test $subtest" +} else { + fail "$test $subtest ($ok)" +} + +cleanup_handler $verbose diff --git a/testsuite/systemtap.base/towers.c b/testsuite/systemtap.base/towers.c new file mode 100644 index 000000000..6465bc422 --- /dev/null +++ b/testsuite/systemtap.base/towers.c @@ -0,0 +1,155 @@ +# include +# include + +#define towersbase 2.39 + +/* Towers */ +#define maxcells 18 +#define stackrange 3 +#define true 1 +#define false 0 + +struct element +{ + int discsize; + int next; +}; + +/* Towers */ +int stack[stackrange + 1]; +struct element cellspace[maxcells + 1]; +int freelist, movesdone; + +/* Program to Solve the Towers of Hanoi */ + +void +error (emsg) + char *emsg; +{ + printf ("Error in Towers: %s\n", emsg); +} + +void +makenull (s) +{ + stack[s] = 0; +} + +int +get_element () +{ + int temp; + if (freelist > 0) + { + temp = freelist; + freelist = cellspace[freelist].next; + } + else + error ("out of space "); + return (temp); +} + +void +push (i, s) + int i, s; +{ + int errorfound, localel; + errorfound = false; + if (stack[s] > 0) + if (cellspace[stack[s]].discsize <= i) + { + errorfound = true; + error ("disc size error"); + }; + if (!errorfound) + { + localel = get_element (); + cellspace[localel].next = stack[s]; + stack[s] = localel; + cellspace[localel].discsize = i; + } +} + +void +init (s, n) + int s, n; +{ + int discctr; + makenull (s); + for (discctr = n; discctr >= 1; discctr--) + push (discctr, s); +} + +int +pop (s) + int s; +{ + int temp, temp1; + if (stack[s] > 0) + { + temp1 = cellspace[stack[s]].discsize; + temp = cellspace[stack[s]].next; + cellspace[stack[s]].next = freelist; + freelist = stack[s]; + stack[s] = temp; + return (temp1); + } + else + error ("nothing to pop "); + return 0; +} + +void +move (s1, s2) + int s1, s2; +{ + push (pop (s1), s2); + movesdone = movesdone + 1; +} + +void +tower (i, j, k) + int i, j, k; +{ + int other; + if (k == 1) + move (i, j); + else + { + other = 6 - i - j; + tower (i, other, k - 1); + move (i, j); + tower (other, j, k - 1); + } +} + + +void +towers () +{ + int i; + for (i = 1; i <= maxcells; i++) + cellspace[i].next = i - 1; + freelist = maxcells; + init (1, 14); + makenull (2); + makenull (3); + movesdone = 0; + tower (1, 2, 14); + if (movesdone != 16383) + printf (" error in Towers.\n"); +} + +#ifndef LOOP +#define LOOP 500 +#endif + +int +main () +{ + int i; + for (i= 0; i < LOOP; i++) + towers(); + return 0; +} + diff --git a/testsuite/systemtap.unprivileged/unprivileged_probes.exp b/testsuite/systemtap.unprivileged/unprivileged_probes.exp index 451b0be0c..8b630d121 100644 --- a/testsuite/systemtap.unprivileged/unprivileged_probes.exp +++ b/testsuite/systemtap.unprivileged/unprivileged_probes.exp @@ -183,6 +183,8 @@ set restricted_probe_types [list \ "module(string).statement(string)" \ "perf.type(number).config(number)" \ "perf.type(number).config(number).sample(number)" \ + "perf.type(number).config(number).process" \ + "perf.type(number).config(number).process(string)" \ "process(number).insn" \ "process(number).insn.block" \ "process(string).insn.block" \ -- 2.43.5