This is the mail archive of the
systemtap@sourceware.org
mailing list for the systemtap project.
Re: proposed perf.counter enhancement
global A
probe perf.type(0).config(0).thread("z") {
# this probe, even if it is never hit, sets up the counter "z"
}
probe process("./bench.x").function("main")
{
printf("In %s\n",pp())
A <<< @perf("z")
}
probe end
{
print (@hist_log(A))
}
Add @perf support
-elaborate.cxx
-elaborate.h
-parse.cxx
-staptree.cxx
-staptree.h
translate time list of perfs in a probe
-elaborate.h perf_counter_idx
Associate a perf with a task
-_stp_perf_init add task parm
-_stp_perf_read_init
-perf_counter_idx in uprobes-common.h
-perf_counters in uprobes-inode.c (runtime perf list)
Initialize a perf for a task at runtime
-stapiu_target_reg
List of perf counters encountered
-session.h
Add thread suffix to perf probe
-tapset-perfmon.cxx
Emit perf reading code (_stp_perf_read avove)
-tapsets.cxx::visit_perf_op
map of perfs for a probe
-dwarf_derived_probe
runtime list of perfs in a probe
-perf_counter_idx in uprobes-common.h
~/work/src ~/work/bld ~/work/stap/perf
diff --git a/elaborate.cxx b/elaborate.cxx
index 633e0c9..1579577 100644
--- a/elaborate.cxx
+++ b/elaborate.cxx
@@ -4472,6 +4472,14 @@ typeresolution_info::visit_entry_op (entry_op* e)
void
+typeresolution_info::visit_perf_op (perf_op* e)
+{
+ e->type = pe_long;
+ // throw semantic_error(_("unexpected @perf"), e->tok);
+}
+
+
+void
typeresolution_info::visit_cast_op (cast_op* e)
{
// Like target_symbol, a cast_op shouldn't survive this far
diff --git a/elaborate.h b/elaborate.h
index 403f507..087d5d1 100644
--- a/elaborate.h
+++ b/elaborate.h
@@ -121,6 +121,7 @@ struct typeresolution_info: public visitor
void visit_cast_op (cast_op* e);
void visit_defined_op (defined_op* e);
void visit_entry_op (entry_op* e);
+ void visit_perf_op (perf_op* e);
};
@@ -189,6 +190,10 @@ public:
// Location of semaphores to activate sdt probes
Dwarf_Addr sdt_semaphore_addr;
+ // Index of desired perf counter, -1 if none
+ std::vector<long> perf_counter_idx;
+ // int perf_counter_idx;
+
// index into session.probes[], set and used during translation
unsigned session_index;
};
diff --git a/parse.cxx b/parse.cxx
index a36f32d..a56e3a4 100644
--- a/parse.cxx
+++ b/parse.cxx
@@ -180,6 +180,7 @@ private: // nonterminals
target_symbol *parse_target_symbol (const token* t);
expression* parse_entry_op (const token* t);
expression* parse_defined_op (const token* t);
+ expression* parse_perf_op (const token* t);
expression* parse_expression ();
expression* parse_assignment ();
expression* parse_ternary ();
@@ -3325,6 +3326,9 @@ expression* parser::parse_symbol ()
if (name == "@entry")
return parse_entry_op (t);
+ if (name == "@perf")
+ return parse_perf_op (t);
+
if (name.size() > 0 && name[0] == '@')
{
stat_op *sop = new stat_op;
@@ -3618,6 +3622,18 @@ expression* parser::parse_entry_op (const token* t)
}
+// Parse a @perf(). Given head token has already been consumed.
+expression* parser::parse_perf_op (const token* t)
+{
+ perf_op* pop = new perf_op;
+ pop->tok = t;
+ expect_op("(");
+ pop->operand = parse_expression ();
+ expect_op(")");
+ return pop;
+}
+
+
void
parser::parse_target_symbol_components (target_symbol* e)
diff --git a/runtime/linux/perf.c b/runtime/linux/perf.c
index 77aa75b..f5c6903 100644
--- a/runtime/linux/perf.c
+++ b/runtime/linux/perf.c
@@ -26,29 +26,49 @@
*
* @param stp Handle for the event to be registered.
*/
-static long _stp_perf_init (struct stap_perf_probe *stp)
+static long _stp_perf_init (struct stap_perf_probe *stp, struct task_struct* task)
{
int cpu;
+ struct task_struct * perf_task;
+
+ if (stp->attr.sample_period == 0 && task == 0)
+ return 0;
+ if (stp->per_thread && task != 0) {
+ if (stp->events != 0) /* already setup */
+ return 0;
+ else {
+ stp->events = _stp_kmalloc(sizeof(struct perf_event*));
+ stp->events[0] = perf_event_create_kernel_counter(&stp->attr,
+ -1,
+ (struct task_struct *)task,
+ stp->callback
+#ifdef STAPCONF_PERF_COUNTER_CONTEXT
+ , NULL
+#endif
+ );
- /* allocate space for the event descriptor for each cpu */
- stp->events = _stp_alloc_percpu (sizeof(struct perf_event*));
- if (stp->events == NULL) {
- return -ENOMEM;
+ }
}
+ else {
+ /* allocate space for the event descriptor for each cpu */
+ stp->events = _stp_alloc_percpu (sizeof(struct perf_event*));
+ if (stp->events == NULL) {
+ return -ENOMEM;
+ }
- /* initialize event on each processor */
- for_each_possible_cpu(cpu) {
- struct perf_event **event = per_cpu_ptr (stp->events, cpu);
- if (cpu_is_offline(cpu)) {
- *event = NULL;
- continue;
- }
+ /* initialize event on each processor */
+ for_each_possible_cpu(cpu) {
+ struct perf_event **event = per_cpu_ptr (stp->events, cpu);
+ if (cpu_is_offline(cpu)) {
+ *event = NULL;
+ continue;
+ }
*event = perf_event_create_kernel_counter(&stp->attr,
cpu,
#if defined(STAPCONF_PERF_STRUCTPID) || defined (STAPCONF_PERF_COUNTER_CONTEXT)
- NULL,
+ NULL,
#else
- -1,
+ -1,
#endif
stp->callback
#ifdef STAPCONF_PERF_COUNTER_CONTEXT
@@ -57,12 +77,13 @@ static long _stp_perf_init (struct stap_perf_probe *stp)
);
if (IS_ERR(*event)) {
- long rc = PTR_ERR(*event);
- *event = NULL;
- _stp_perf_del(stp);
- return rc;
+ long rc = PTR_ERR(*event);
+ *event = NULL;
+ _stp_perf_del(stp);
+ return rc;
}
- }
+ }
+ } /* if (task != 0) else */
return 0;
}
@@ -73,18 +94,82 @@ static long _stp_perf_init (struct stap_perf_probe *stp)
*/
static void _stp_perf_del (struct stap_perf_probe *stp)
{
- if (stp && stp->events) {
int cpu;
- /* shut down performance event sampling */
- for_each_possible_cpu(cpu) {
- struct perf_event **event = per_cpu_ptr (stp->events, cpu);
- if (*event) {
- perf_event_release_kernel(*event);
- }
- }
- _stp_free_percpu (stp->events);
- stp->events = NULL;
+ if (! stp || !stp->events)
+ return;
+
+ /* shut down performance event sampling */
+ if (stp->per_thread) {
+ struct perf_event *event = stp->events[0];
+ if (event) {
+ perf_event_release_kernel(event);
+ }
+ _stp_kfree (stp->events);
+ stp->events = NULL;
}
+ else {
+ for_each_possible_cpu(cpu) {
+ struct perf_event **event = per_cpu_ptr (stp->events, cpu);
+ if (*event) {
+ perf_event_release_kernel(*event);
+ }
+ }
+ _stp_free_percpu (stp->events);
+ stp->events = NULL;
+ }
+}
+
+
+/*
+The first call to _stp_perf_init, via systemtap_module_init at runtime, is for
+setting up aggregate counters. Per thread counters need to be setup when the
+thread is known. This is done by calling _stp_perf_init later when the thread
+is known. A per thread perf counter is defined by a "thread("var") suffix on
+the perf probe. It is defined by perf_builder. This counter is read on demand
+via the "@perf("var")" builtin which is treated as an expression right hand side
+which reads the perf counter associated with the previously defined perf
+counter. It is expanded by dwarf_var_expanding_visitor
+*/
+
+static int _stp_perf_read_init (unsigned i, void* task)
+{
+ /* Choose the stap_perf_probes entry */
+ struct stap_perf_probe* stp = & stap_perf_probes[i];
+
+ _stp_perf_init (stp, (struct task_struct*)task);
+ return 0;
}
+
+long _stp_perf_read (int ncpu, unsigned i)
+{
+ /* Choose the stap_perf_probes entry */
+ struct stap_perf_probe* stp = & stap_perf_probes[i];
+ u64 enabled, running;
+ struct perf_event *event;
+
+ if (stp == NULL)
+ {
+ _stp_printf ("perf_event_read_value: stp is null\n");
+ return 0;
+ }
+
+ if (stp->events == NULL)
+ {
+ _stp_printf ("perf_event_read_value: stp->events is null\n");
+ return 0;
+ }
+ else
+ event = stp->events[0];
+
+ if (event == NULL)
+ {
+ _stp_printf ("perf_event_read_value: event is null\n");
+ return 0;
+ }
+
+ return perf_event_read_value (event, &enabled, &running);
+}
+
+
#endif /* _PERF_C_ */
diff --git a/runtime/linux/perf.h b/runtime/linux/perf.h
index 2d7266c..4a27e06 100644
--- a/runtime/linux/perf.h
+++ b/runtime/linux/perf.h
@@ -21,12 +21,18 @@ struct stap_perf_probe {
perf_overflow_handler_t callback;
struct stap_probe * const probe;
- /* per-cpu data. allocated with _stp_alloc_percpu() */
+ /* ! per thread: per-cpu data. allocated with _stp_alloc_percpu() */
+ /* per thread: one event allocated with kmalloc */
+ int per_thread;
struct perf_event **events;
};
-static long _stp_perf_init (struct stap_perf_probe *stp);
+static long _stp_perf_init (struct stap_perf_probe *stp, struct task_struct* task);
static void _stp_perf_del (struct stap_perf_probe *stp);
+// moved to runtime_defines.h
+// static int _stp_perf_read_init (unsigned i);
+// static long _stp_perf_read (int ncpu, unsigned i);
+
#endif /* _PERF_H_ */
diff --git a/runtime/linux/uprobes-common.h b/runtime/linux/uprobes-common.h
index 8ec1856..6377dd4 100644
--- a/runtime/linux/uprobes-common.h
+++ b/runtime/linux/uprobes-common.h
@@ -26,6 +26,7 @@ struct stap_uprobe_spec {
unsigned return_p:1;
unsigned long address;
unsigned long sdt_sem_offset;
+ unsigned long perf_counter_idx;
struct stap_probe * const probe;
};
diff --git a/runtime/linux/uprobes-inode.c b/runtime/linux/uprobes-inode.c
index 9d4d867..5de2ae0 100644
--- a/runtime/linux/uprobes-inode.c
+++ b/runtime/linux/uprobes-inode.c
@@ -86,6 +86,8 @@ struct stapiu_consumer {
loff_t offset; /* the probe offset within the inode */
loff_t sdt_sem_offset; /* the semaphore offset from process->base */
+ long perf_counters_dim;
+ long (*perf_counters) [];
struct stap_probe * const probe;
};
@@ -249,13 +251,18 @@ stapiu_target_unreg(struct stapiu_target *target)
/* Register all uprobe consumers of a target. */
static int
-stapiu_target_reg(struct stapiu_target *target)
+stapiu_target_reg(struct stapiu_target *target, struct task_struct* task)
{
int ret = 0;
struct stapiu_consumer *c;
list_for_each_entry(c, &target->consumers, target_consumer) {
if (! c->registered) {
+ int i;
+ for (i=0; i < c->perf_counters_dim; i++) {
+ if ((*(c->perf_counters))[i] > -1)
+ _stp_perf_read_init ((*(c->perf_counters))[i], task);
+ }
ret = uprobe_register(target->inode, c->offset, &c->consumer);
if (ret) {
c->registered = 0;
@@ -388,7 +395,7 @@ stapiu_change_plus(struct stapiu_target* target, struct task_struct *task,
/* OK, we've checked the target's buildid. Now
* register all its consumers. */
- rc = stapiu_target_reg(target);
+ rc = stapiu_target_reg(target, task);
if (rc) {
/* Be sure to release the inode on failure. */
iput(target->inode);
diff --git a/runtime/runtime_defines.h b/runtime/runtime_defines.h
index 676cf5b..c93cb3b 100644
--- a/runtime/runtime_defines.h
+++ b/runtime/runtime_defines.h
@@ -111,3 +111,9 @@ enum stp_probe_type {
/* netfilter probe, triggered on network trafic */
stp_probe_type_netfilter,
};
+
+
+// not the right place for this, but dcl is needed before use by probe_*
+static long _stp_perf_read (int ncpu, unsigned i);
+
+static int _stp_perf_read_init (unsigned i, void* pid);
diff --git a/session.h b/session.h
index 4ba7e5c..f6c18d0 100644
--- a/session.h
+++ b/session.h
@@ -282,6 +282,7 @@ public:
std::vector<stapfile*> files;
std::vector<vardecl*> globals;
std::map<std::string,functiondecl*> functions;
+ std::map<std::string,long> perf_counters;
std::vector<derived_probe*> probes; // see also *_probes groups below
std::vector<embeddedcode*> embeds;
std::map<std::string, statistic_decl> stat_decls;
diff --git a/staptree.cxx b/staptree.cxx
index e8a4298..5f4e402 100644
--- a/staptree.cxx
+++ b/staptree.cxx
@@ -434,6 +434,12 @@ void entry_op::print (ostream& o) const
}
+void perf_op::print (ostream& o) const
+{
+ o << "@perf(" << *operand << ")";
+}
+
+
void vardecl::print (ostream& o) const
{
o << name;
@@ -1470,6 +1476,13 @@ entry_op::visit (visitor* u)
void
+perf_op::visit (visitor* u)
+{
+ u->visit_perf_op(this);
+}
+
+
+void
arrayindex::visit (visitor* u)
{
u->visit_arrayindex (this);
@@ -1877,6 +1890,13 @@ traversing_visitor::visit_entry_op (entry_op* e)
void
+traversing_visitor::visit_perf_op (perf_op* e)
+{
+ e->operand->visit (this);
+}
+
+
+void
traversing_visitor::visit_arrayindex (arrayindex* e)
{
for (unsigned i=0; i<e->indexes.size(); i++)
@@ -2075,6 +2095,13 @@ varuse_collecting_visitor::visit_entry_op (entry_op *e)
void
+varuse_collecting_visitor::visit_perf_op (perf_op *e)
+{
+ functioncall_traversing_visitor::visit_perf_op (e);
+}
+
+
+void
varuse_collecting_visitor::visit_print_format (print_format* e)
{
// NB: Instead of being top-level statements, "print" and "printf"
@@ -2514,6 +2541,13 @@ throwing_visitor::visit_entry_op (entry_op* e)
void
+throwing_visitor::visit_perf_op (perf_op* e)
+{
+ throwone (e->tok);
+}
+
+
+void
throwing_visitor::visit_arrayindex (arrayindex* e)
{
throwone (e->tok);
@@ -2786,6 +2820,13 @@ update_visitor::visit_entry_op (entry_op* e)
}
void
+update_visitor::visit_perf_op (perf_op* e)
+{
+ replace (e->operand);
+ provide (e);
+}
+
+void
update_visitor::visit_arrayindex (arrayindex* e)
{
replace (e->base);
@@ -3047,6 +3088,12 @@ deep_copy_visitor::visit_entry_op (entry_op* e)
}
void
+deep_copy_visitor::visit_perf_op (perf_op* e)
+{
+ update_visitor::visit_perf_op(new perf_op(*e));
+}
+
+void
deep_copy_visitor::visit_arrayindex (arrayindex* e)
{
update_visitor::visit_arrayindex(new arrayindex(*e));
diff --git a/staptree.h b/staptree.h
index c2d201a..085231f 100644
--- a/staptree.h
+++ b/staptree.h
@@ -307,6 +307,14 @@ struct entry_op: public expression
};
+struct perf_op: public expression
+{
+ expression *operand;
+ void print (std::ostream& o) const;
+ void visit (visitor* u);
+};
+
+
struct arrayindex: public expression
{
std::vector<expression*> indexes;
@@ -501,6 +509,7 @@ struct vardecl: public symboldecl
literal *init; // for global scalars only
bool synthetic; // for probe locals only, don't init on entry
bool wrap;
+ bool perf; // Implicit variable for perf counter use
};
@@ -508,7 +517,6 @@ struct vardecl_builtin: public vardecl
{
};
-
struct statement;
struct functiondecl: public symboldecl
{
@@ -786,6 +794,7 @@ struct visitor
virtual void visit_cast_op (cast_op* e) = 0;
virtual void visit_defined_op (defined_op* e) = 0;
virtual void visit_entry_op (entry_op* e) = 0;
+ virtual void visit_perf_op (perf_op* e) = 0;
};
@@ -831,8 +840,9 @@ struct traversing_visitor: public visitor
void visit_cast_op (cast_op* e);
void visit_defined_op (defined_op* e);
void visit_entry_op (entry_op* e);
+ void visit_perf_op (perf_op* e);
};
-
+
// A kind of traversing visitor, which also follows function calls.
// It uses an internal set object to prevent infinite recursion.
@@ -880,7 +890,7 @@ struct varuse_collecting_visitor: public functioncall_traversing_visitor
void visit_cast_op (cast_op* e);
void visit_defined_op (defined_op* e);
void visit_entry_op (entry_op* e);
-
+ void visit_perf_op (perf_op* e);
bool side_effect_free ();
bool side_effect_free_wrt (const std::set<vardecl*>& vars);
};
@@ -934,6 +944,7 @@ struct throwing_visitor: public visitor
void visit_cast_op (cast_op* e);
void visit_defined_op (defined_op* e);
void visit_entry_op (entry_op* e);
+ void visit_perf_op (perf_op* e);
};
// A visitor similar to a traversing_visitor, but with the ability to rewrite
@@ -1004,6 +1015,7 @@ struct update_visitor: public visitor
virtual void visit_cast_op (cast_op* e);
virtual void visit_defined_op (defined_op* e);
virtual void visit_entry_op (entry_op* e);
+ virtual void visit_perf_op (perf_op* e);
private:
std::stack<void *> targets;
@@ -1063,6 +1075,7 @@ struct deep_copy_visitor: public update_visitor
virtual void visit_cast_op (cast_op* e);
virtual void visit_defined_op (defined_op* e);
virtual void visit_entry_op (entry_op* e);
+ virtual void visit_perf_op (perf_op* e);
};
#endif // STAPTREE_H
diff --git a/tapset-perfmon.cxx b/tapset-perfmon.cxx
index 53f74dc..4f9b2a7 100644
--- a/tapset-perfmon.cxx
+++ b/tapset-perfmon.cxx
@@ -28,6 +28,7 @@ static const string TOK_PERF("perf");
static const string TOK_TYPE("type");
static const string TOK_CONFIG("config");
static const string TOK_SAMPLE("sample");
+static const string TOK_THREAD("thread");
// ------------------------------------------------------------------------
@@ -41,7 +42,8 @@ struct perf_derived_probe: public derived_probe
int64_t event_type;
int64_t event_config;
int64_t interval;
- perf_derived_probe (probe* p, probe_point* l, int64_t type, int64_t config, int64_t i);
+ string counter_var;
+ perf_derived_probe (probe* p, probe_point* l, int64_t type, int64_t config, int64_t i, string v);
virtual void join_group (systemtap_session& s);
};
@@ -57,9 +59,10 @@ struct perf_derived_probe_group: public generic_dpg<perf_derived_probe>
perf_derived_probe::perf_derived_probe (probe* p, probe_point* l,
int64_t type,
int64_t config,
- int64_t i):
+ int64_t i,
+ string v):
derived_probe (p, l, true /* .components soon rewritten */),
- event_type (type), event_config (config), interval (i)
+ event_type (type), event_config (config), interval (i), counter_var (v)
{
vector<probe_point::component*>& comps = this->sole_location()->components;
comps.clear();
@@ -67,6 +70,7 @@ perf_derived_probe::perf_derived_probe (probe* p, probe_point* l,
comps.push_back (new probe_point::component (TOK_TYPE, new literal_number(type)));
comps.push_back (new probe_point::component (TOK_CONFIG, new literal_number (config)));
comps.push_back (new probe_point::component (TOK_SAMPLE, new literal_number (interval)));
+ comps.push_back (new probe_point::component (TOK_THREAD, new literal_string (counter_var)));
}
@@ -85,7 +89,8 @@ perf_derived_probe_group::emit_module_decls (systemtap_session& s)
if (probes.empty()) return;
s.op->newline() << "/* ---- perf probes ---- */";
- s.op->newline() << "#include \"linux/perf.c\"";
+ s.op->newline() << "#include <linux/perf_event.h>";
+ s.op->newline() << "#include \"linux/perf.h\"";
s.op->newline();
/* declarations */
@@ -119,6 +124,10 @@ perf_derived_probe_group::emit_module_decls (systemtap_session& s)
<< "{ .sample_period=" << probes[i]->interval << "ULL }},";
s.op->newline() << ".callback=enter_perf_probe_" << i << ", ";
s.op->newline() << ".probe=" << common_probe_init (probes[i]) << ", ";
+ if (probes[i]->counter_var.length() == 0)
+ s.op->newline() << ".per_thread=" << "0, ";
+ else
+ s.op->newline() << ".per_thread=" << "1, ";
s.op->newline(-1) << "},";
}
s.op->newline(-1) << "};";
@@ -159,6 +168,9 @@ perf_derived_probe_group::emit_module_decls (systemtap_session& s)
s.op->newline() << "(*stp->probe->ph) (c);";
common_probe_entryfn_epilogue (s, true);
s.op->newline(-1) << "}";
+ s.op->newline();
+ s.op->newline() << "#include \"linux/perf.c\"";
+ s.op->newline();
}
@@ -169,7 +181,8 @@ perf_derived_probe_group::emit_module_init (systemtap_session& s)
s.op->newline() << "for (i=0; i<" << probes.size() << "; i++) {";
s.op->newline(1) << "struct stap_perf_probe* stp = & stap_perf_probes [i];";
- s.op->newline() << "rc = _stp_perf_init(stp);";
+ s.op->newline() << "rc = _stp_perf_init(stp, 0);";
+// s.op->newline() << "_stp_perf_read_init(stp);";
s.op->newline() << "if (rc) {";
s.op->newline(1) << "probe_point = stp->probe->pp;";
s.op->newline() << "for (j=0; j<i; j++) {";
@@ -233,11 +246,38 @@ perf_builder::build(systemtap_session & sess,
throw semantic_error(_("invalid perf sample period ") + lex_cast(period),
parameters.find(TOK_SAMPLE)->second->tok);
+ string var;
+ get_param(parameters, TOK_THREAD, var);
+ if (var.length() > 0)
+ {
+ period = 0; // perf_event_attr.sample_freq should be 0
+ int perf_n = 0;
+ for (unsigned i=0; i<sess.globals.size(); i++)
+ if (sess.globals[i]->name == var)
+ throw parse_error (_("duplicate global name"));
+ else if (sess.globals[i]->perf)
+ perf_n += 1;
+
+ vardecl* d = new vardecl;
+ token* tok = new token();
+ tok->type = tok_identifier;
+ tok->content = var;
+ tok->location = base->tok->location;
+ d->name = var;
+ d->tok = tok;
+ d->type = pe_long;
+ d->set_arity(0, tok);
+ d->perf = true;
+ // stap_perf_probes index
+ d->init = new literal_number (perf_n);
+ sess.globals.push_back (d);
+ }
+
if (sess.verbose > 1)
clog << _F("perf probe type=%" PRId64 " config=%" PRId64 " period=%" PRId64, type, config, period) << endl;
finished_results.push_back
- (new perf_derived_probe(base, location, type, config, period));
+ (new perf_derived_probe(base, location, type, config, period, var));
}
@@ -252,6 +292,7 @@ register_tapset_perf(systemtap_session& s)
match_node* event = perf->bind_num(TOK_TYPE)->bind_num(TOK_CONFIG);
event->bind(builder);
event->bind_num(TOK_SAMPLE)->bind(builder);
+ event->bind_str(TOK_THREAD)->bind(builder);
}
/* vim: set sw=2 ts=8 cino=>4,n-2,{2,^-2,t0,(0,u0,w1,M1 : */
diff --git a/tapsets.cxx b/tapsets.cxx
index c697749..b3d7249 100644
--- a/tapsets.cxx
+++ b/tapsets.cxx
@@ -2237,6 +2237,7 @@ struct dwarf_var_expanding_visitor: public var_expanding_visitor
void visit_target_symbol (target_symbol* e);
void visit_cast_op (cast_op* e);
void visit_entry_op (entry_op* e);
+ void visit_perf_op (perf_op* e);
private:
vector<Dwarf_Die>& getcuscope(target_symbol *e);
vector<Dwarf_Die>& getscopes(target_symbol *e);
@@ -2522,6 +2523,7 @@ dwarf_pretty_print::expand ()
// function pretty_print_X([pointer], [arg1, arg2, ...]) {
// try {
+
// return sprintf("{.foo=...}", (ts)->foo, ...)
// } catch {
// return "ERROR"
@@ -3793,6 +3795,67 @@ dwarf_var_expanding_visitor::visit_entry_op (entry_op *e)
provide (repl);
}
+void
+dwarf_var_expanding_visitor::visit_perf_op (perf_op *e)
+{
+ expression *repl = e;
+ printf ("XXX");
+ repl->print(cout);
+ printf ("\n");
+ token* t = new token;
+ t->location = e->tok->location;
+ t->type = tok_identifier;
+ t->content = ((literal_string*)e->operand)->value;
+
+ add_block = new block;
+
+ vardecl* c = NULL;
+ vector<vardecl*>::iterator it;
+ systemtap_session &s = this->q.sess;
+
+ // find perf_var: perf.type(N).config(N).thread("perf_var")
+ for ( it=s.globals.begin() ; it < s.globals.end(); it++ )
+ if ((*it)->name == ((literal_string*)e->operand)->value)
+ c = *it;
+ if (c == NULL)
+ throw semantic_error (_("perf.thread not defined"), t);
+
+ s.perf_counters[c->name] = ((literal_number*)c->init)->value;
+ embedded_expr *spri = new embedded_expr;
+ spri->tok = t;
+ spri->code = string("_stp_perf_read_init(" + lex_cast(((literal_number*)c->init)->value) + ")");
+
+ expr_statement* spri_s = new expr_statement;
+ spri_s->value = spri;
+ spri_s->tok = t;
+ // ((struct block*)add_block)->statements.push_back(spri_s);
+
+ // perf_var = _stp_perf_read (which_cpu, stap_perf_probes_idx)
+ symbol* c_s = new symbol;
+ c_s->name = c->name;
+ c_s->tok = t;
+ c_s->referent = c;
+
+ embedded_expr *spr = new embedded_expr;
+ spr->tok = t;
+ spr->code = string("_stp_perf_read(smp_processor_id()," + lex_cast(((literal_number*)c->init)->value) + ")");
+
+ assignment* ceqspr = new assignment;
+ ceqspr->left = c_s;
+ ceqspr->op = "=";
+ ceqspr->right = spr;
+ ceqspr->tok = t;
+
+ expr_statement* ceqspr_s = new expr_statement;
+ ceqspr_s->value = ceqspr;
+ ceqspr_s->tok = t;
+
+ add_block->print(cout);
+ ((struct block*)add_block)->statements.push_back(ceqspr_s);
+ add_block->print(cout);
+ provide (c_s);
+}
+
vector<Dwarf_Die>&
dwarf_var_expanding_visitor::getcuscope(target_symbol *e)
{
@@ -4292,6 +4355,15 @@ dwarf_derived_probe::dwarf_derived_probe(const string& funcname,
// XXX: user-space deref's for q.has_process!
dwarf_var_expanding_visitor v (q, scope_die, dwfl_addr);
v.replace (this->body);
+ std::map<std::string,long>::iterator pci;
+ for (pci = q.dw.sess.perf_counters.begin();
+ pci != q.dw.sess.perf_counters.end(); pci++)
+ {
+ this->perf_counter_idx.push_back((*pci).second);
+ // this->perf_counter_idx = (*pci).second;
+ q.dw.sess.perf_counters.erase(pci);
+ }
+ // this->perf_counter_idx = ((struct derived_probe*)(q.base_probe))->perf_counter_idx; // need a way to percolate this forward
if (!q.has_process)
access_vars = v.visited;
@@ -7405,6 +7477,10 @@ uprobe_derived_probe_group::emit_module_utrace_decls (systemtap_session& s)
s.op->line() << " .sdt_sem_offset=(unsigned long)0x"
<< hex << p->sdt_semaphore_addr << dec << "ULL,";
+ s.op->line() << " .perf_counter_idx[]={"
+ << "(unsigned long)"
+ << p->perf_counter_idx[0]
+ << "},";
if (p->has_return)
s.op->line() << " .return_p=1,";
s.op->line() << " },";
@@ -7664,6 +7740,22 @@ uprobe_derived_probe_group::emit_module_inode_decls (systemtap_session& s)
s.op->assert_0_indent();
// Declare the actual probes.
+ s.op->newline() << "long perf_counters[] = {";
+ unsigned pci;
+ for (pci=0; pci<probes.size(); pci++)
+ {
+ // this->perf_counter_idx.push_back((*pci).second);
+ uprobe_derived_probe *p = probes[pci];
+ std::vector<long>::iterator pcii;
+ for (pcii = p->perf_counter_idx.begin();
+ pcii != p->perf_counter_idx.end(); pcii++)
+ {
+ if (*pcii >= 0)
+ s.op->line() << *pcii << ", ";
+ }
+ }
+ s.op->newline() << "};";
+
s.op->newline() << "static struct stapiu_consumer "
<< "stap_inode_uprobe_consumers[] = {";
s.op->indent(1);
@@ -7678,6 +7770,8 @@ uprobe_derived_probe_group::emit_module_inode_decls (systemtap_session& s)
if (p->sdt_semaphore_addr)
s.op->line() << " .sdt_sem_offset=(loff_t)0x"
<< hex << p->sdt_semaphore_addr << dec << "ULL,";
+ s.op->line() << " .perf_counters_dim=" << pci << ",";
+ s.op->line() << " .perf_counters=&perf_counters,";
s.op->line() << " .probe=" << common_probe_init (p) << ",";
s.op->line() << " },";
}
diff --git a/testsuite/buildok/systemtap_privilege.stp b/testsuite/buildok/systemtap_privilege.stp
old mode 100755
new mode 100644
diff --git a/testsuite/semok/entry04.stp b/testsuite/semok/entry04.stp
old mode 100755
new mode 100644
diff --git a/testsuite/semok/pretty-uprobes.stp b/testsuite/semok/pretty-uprobes.stp
old mode 100755
new mode 100644
diff --git a/testsuite/semok/thirtysix-utrace.stp b/testsuite/semok/thirtysix-utrace.stp
old mode 100755
new mode 100644
diff --git a/translate.cxx b/translate.cxx
index 0bf50ed..47bdf50 100644
--- a/translate.cxx
+++ b/translate.cxx
@@ -196,6 +196,7 @@ struct c_unparser: public unparser, public visitor
void visit_cast_op (cast_op* e);
void visit_defined_op (defined_op* e);
void visit_entry_op (entry_op* e);
+ void visit_perf_op (perf_op* e);
};
// A shadow visitor, meant to generate temporary variable declarations
@@ -4327,6 +4328,13 @@ c_unparser::visit_entry_op (entry_op* e)
void
+c_unparser::visit_perf_op (perf_op* e)
+{
+ // throw semantic_error(_("cannot translate general @perf expression"), e->tok);
+}
+
+
+void
c_tmpcounter::load_map_indices(arrayindex *e)
{
symbol *array;