1 // tapset for HW performance monitoring
2 // Copyright (C) 2005-2009 Red Hat Inc.
3 // Copyright (C) 2005-2007 Intel Corporation.
4 // Copyright (C) 2008 James.Bottomley@HansenPartnership.com
6 // This file is part of systemtap, and is free software. You can
7 // redistribute it and/or modify it under the terms of the GNU General
8 // Public License (GPL); either version 2, or (at your option) any
18 #include <perfmon/pfmlib.h>
19 #include <perfmon/perfmon.h>
24 using namespace __gnu_cxx
;
28 // ------------------------------------------------------------------------
29 // perfmon derived probes
30 // ------------------------------------------------------------------------
31 // This is a new interface to the perfmon hw.
35 struct perfmon_var_expanding_visitor
: public var_expanding_visitor
37 systemtap_session
& sess
;
38 unsigned counter_number
;
39 perfmon_var_expanding_visitor(systemtap_session
& s
, unsigned c
):
40 sess(s
), counter_number(c
) {}
41 void visit_target_symbol (target_symbol
* e
);
46 perfmon_var_expanding_visitor::visit_target_symbol (target_symbol
*e
)
48 assert(e
->base_name
.size() > 0 && e
->base_name
[0] == '$');
50 // Synthesize a function.
51 functiondecl
*fdecl
= new functiondecl
;
52 fdecl
->synthetic
= true;
54 embeddedcode
*ec
= new embeddedcode
;
56 bool lvalue
= is_active_lvalue(e
);
59 throw semantic_error("writes to $counter not permitted");
61 string fname
= string("_perfmon_tvar_get")
62 + "_" + e
->base_name
.substr(1)
63 + "_" + lex_cast(counter_number
);
65 if (e
->base_name
!= "$counter")
66 throw semantic_error ("target variables not available to perfmon probes");
69 throw semantic_error("cannot take address of perfmon variable", e
->tok
);
71 e
->assert_no_components("perfmon");
73 ec
->code
= "THIS->__retvalue = _pfm_pmd_x[" +
74 lex_cast(counter_number
) + "].reg_num;";
75 ec
->code
+= "/* pure */";
78 fdecl
->type
= pe_long
;
79 sess
.functions
[fdecl
->name
]=fdecl
;
81 // Synthesize a functioncall.
82 functioncall
* n
= new functioncall
;
85 n
->referent
= 0; // NB: must not resolve yet, to ensure inclusion in session
98 struct perfmon_derived_probe
: public derived_probe
100 systemtap_session
& sess
;
104 perfmon_derived_probe (probe
* p
, probe_point
* l
, systemtap_session
&s
,
105 string e
, perfmon_mode m
);
106 virtual void join_group (systemtap_session
& s
);
110 struct perfmon_derived_probe_group
: public generic_dpg
<perfmon_derived_probe
>
113 void emit_module_decls (systemtap_session
&) {}
114 void emit_module_init (systemtap_session
&) {}
115 void emit_module_exit (systemtap_session
&) {}
119 struct perfmon_builder
: public derived_probe_builder
122 virtual void build(systemtap_session
& sess
,
124 probe_point
* location
,
125 literal_map_t
const & parameters
,
126 vector
<derived_probe
*> & finished_results
)
129 if (!get_param (parameters
, "counter", event
))
130 throw semantic_error("perfmon requires an event");
134 // XXX: need to revise when doing sampling
135 finished_results
.push_back(new perfmon_derived_probe(base
, location
,
142 perfmon_derived_probe::perfmon_derived_probe (probe
* p
, probe_point
* l
,
143 systemtap_session
&s
,
144 string e
, perfmon_mode m
)
145 : derived_probe (p
, l
), sess(s
), event(e
), mode(m
)
147 static unsigned probes_allocated
= 0;
149 // Now expand the local variables in the probe body
150 perfmon_var_expanding_visitor
v (sess
, probes_allocated
++);
151 v
.replace (this->body
);
153 if (sess
.verbose
> 1)
154 clog
<< "perfmon-based probe" << endl
;
159 perfmon_derived_probe::join_group (systemtap_session
& s
)
161 throw semantic_error ("incomplete", this->tok
);
163 if (! s
.perfmon_derived_probes
)
164 s
.perfmon_derived_probes
= new perfmon_derived_probe_group ();
165 s
.perfmon_derived_probes
->enroll (this);
171 perfmon_derived_probe::emit_registrations_start (translator_output
* o
,
174 for (unsigned i
=0; i
<locations
.size(); i
++)
175 o
->newline() << "enter_" << name
<< "_" << i
<< " ();";
180 perfmon_derived_probe::emit_registrations_end (translator_output
* o
,
187 perfmon_derived_probe::emit_deregistrations (translator_output
* o
)
193 perfmon_derived_probe::emit_probe_entries (translator_output
* o
)
195 o
->newline() << "#ifdef STP_TIMING";
196 // NB: This variable may be multiply (but identically) defined.
197 o
->newline() << "static __cacheline_aligned Stat " << "time_" << basest()->name
<< ";";
198 o
->newline() << "#endif";
200 for (unsigned i
=0; i
<locations
.size(); i
++)
202 probe_point
*l
= locations
[i
];
203 o
->newline() << "/* location " << i
<< ": " << *l
<< " */";
204 o
->newline() << "static void enter_" << name
<< "_" << i
<< " (void) {";
207 o
->newline() << "const char* probe_point = "
208 << lex_cast_qstring(*l
) << ";";
210 o
->newline() << "static struct pfarg_ctx _pfm_context;";
211 o
->newline() << "static void *_pfm_desc;";
212 o
->newline() << "static struct pfarg_pmc *_pfm_pmc_x;";
213 o
->newline() << "static int _pfm_num_pmc_x;";
214 o
->newline() << "static struct pfarg_pmd *_pfm_pmd_x;";
215 o
->newline() << "static int _pfm_num_pmd_x;";
217 emit_probe_prologue (o
,
218 (mode
== perfmon_count
?
219 "STAP_SESSION_STARTING" :
220 "STAP_SESSION_RUNNING"),
223 // NB: locals are initialized by probe function itself
224 o
->newline() << name
<< " (c);";
226 emit_probe_epilogue (o
);
228 o
->newline(-1) << "}\n";
235 void no_pfm_event_error (string s
)
237 string
msg(string("Cannot find event:" + s
));
238 throw semantic_error(msg
);
242 void no_pfm_mask_error (string s
)
244 string
msg(string("Cannot find mask:" + s
));
245 throw semantic_error(msg
);
250 split(const string
& s
, vector
<string
>& v
, const string
& separator
)
252 string::size_type last_pos
= s
.find_first_not_of(separator
, 0);
253 string::size_type pos
= s
.find_first_of(separator
, last_pos
);
255 while (string::npos
!= pos
|| string::npos
!= last_pos
) {
256 v
.push_back(s
.substr(last_pos
, pos
- last_pos
));
257 last_pos
= s
.find_first_not_of(separator
, pos
);
258 pos
= s
.find_first_of(separator
, last_pos
);
264 perfmon_derived_probe_group::emit_probes (translator_output
* op
, unparser
* up
)
266 for (unsigned i
=0; i
< probes
.size(); i
++)
269 up
->emit_probe (probes
[i
]);
275 perfmon_derived_probe_group::emit_module_init (translator_output
* o
)
278 pfmlib_input_param_t inp
;
279 pfmlib_output_param_t outp
;
280 pfarg_pmd_t pd
[PFMLIB_MAX_PMDS
];
281 pfarg_pmc_t pc
[PFMLIB_MAX_PMCS
];
283 pfarg_load_t load_args
;
284 pfmlib_options_t pfmlib_options
;
285 unsigned int max_counters
;
287 if ( probes
.size() == 0)
289 ret
= pfm_initialize();
290 if (ret
!= PFMLIB_SUCCESS
)
291 throw semantic_error("Unable to generate performance monitoring events (no libpfm)");
293 pfm_get_num_counters(&max_counters
);
295 memset(&pfmlib_options
, 0, sizeof(pfmlib_options
));
296 pfmlib_options
.pfm_debug
= 0; /* set to 1 for debug */
297 pfmlib_options
.pfm_verbose
= 0; /* set to 1 for debug */
298 pfm_set_options(&pfmlib_options
);
300 memset(pd
, 0, sizeof(pd
));
301 memset(pc
, 0, sizeof(pc
));
302 memset(&ctx
, 0, sizeof(ctx
));
303 memset(&load_args
, 0, sizeof(load_args
));
306 * prepare parameters to library.
308 memset(&inp
,0, sizeof(inp
));
309 memset(&outp
,0, sizeof(outp
));
311 /* figure out the events */
312 for (unsigned i
=0; i
<probes
.size(); ++i
)
314 if (probes
[i
]->event
== "cycles") {
315 if (pfm_get_cycle_event( &inp
.pfp_events
[i
].event
) != PFMLIB_SUCCESS
)
316 no_pfm_event_error(probes
[i
]->event
);
317 } else if (probes
[i
]->event
== "instructions") {
318 if (pfm_get_inst_retired_event( &inp
.pfp_events
[i
].event
) !=
320 no_pfm_event_error(probes
[i
]->event
);
322 unsigned int event_id
= 0;
323 unsigned int mask_id
= 0;
324 vector
<string
> event_spec
;
325 split(probes
[i
]->event
, event_spec
, ":");
326 int num
= event_spec
.size();
330 throw semantic_error("No events found");
333 if (pfm_find_event(event_spec
[0].c_str(), &event_id
) != PFMLIB_SUCCESS
)
334 no_pfm_event_error(event_spec
[0]);
335 inp
.pfp_events
[i
].event
= event_id
;
338 if (masks
> PFMLIB_MAX_MASKS_PER_EVENT
)
339 throw semantic_error("Too many unit masks specified");
341 for (int j
=0; j
< masks
; j
++) {
342 if (pfm_find_event_mask(event_id
, event_spec
[j
+1].c_str(),
343 &mask_id
) != PFMLIB_SUCCESS
)
344 no_pfm_mask_error(string(event_spec
[j
+1]));
345 inp
.pfp_events
[i
].unit_masks
[j
] = mask_id
;
347 inp
.pfp_events
[i
].num_masks
= masks
;
351 /* number of counters in use */
352 inp
.pfp_event_count
= probes
.size();
354 // XXX: no elimination of duplicated counters
355 if (inp
.pfp_event_count
>max_counters
)
356 throw semantic_error("Too many performance monitoring events.");
358 /* count events both in kernel and user-space */
359 inp
.pfp_dfl_plm
= PFM_PLM0
| PFM_PLM3
;
361 /* XXX: some cases a perfmon register might be used of watch dog
362 this code doesn't handle that case */
364 /* figure out the pmcs for the events */
365 if ((ret
=pfm_dispatch_events(&inp
, NULL
, &outp
, NULL
)) != PFMLIB_SUCCESS
)
366 throw semantic_error("Cannot configure events");
368 for (unsigned i
=0; i
< outp
.pfp_pmc_count
; i
++) {
369 pc
[i
].reg_num
= outp
.pfp_pmcs
[i
].reg_num
;
370 pc
[i
].reg_value
= outp
.pfp_pmcs
[i
].reg_value
;
374 * There could be more pmc settings than pmd.
375 * Figure out the actual pmds to use.
377 for (unsigned i
=0, j
=0; i
< inp
.pfp_event_count
; i
++) {
378 pd
[i
].reg_num
= outp
.pfp_pmcs
[j
].reg_pmd_num
;
379 for(; j
< outp
.pfp_pmc_count
; j
++)
380 if (outp
.pfp_pmcs
[j
].reg_evt_idx
!= i
) break;
383 // Output the be probes create function
384 o
->newline() << "static int register_perfmon_probes (void) {";
385 o
->newline(1) << "int rc = 0;";
387 o
->newline() << "/* data for perfmon */";
388 o
->newline() << "static int _pfm_num_pmc = " << outp
.pfp_pmc_count
<< ";";
389 o
->newline() << "static struct pfarg_pmc _pfm_pmc[" << outp
.pfp_pmc_count
391 /* output the needed bits for pmc here */
392 for (unsigned i
=0; i
< outp
.pfp_pmc_count
; i
++) {
393 o
->newline() << "{.reg_num=" << pc
[i
].reg_num
<< ", "
394 << ".reg_value=" << lex_cast_hex(pc
[i
].reg_value
)
398 o
->newline() << "};";
399 o
->newline() << "static int _pfm_num_pmd = " << inp
.pfp_event_count
<< ";";
400 o
->newline() << "static struct pfarg_pmd _pfm_pmd[" << inp
.pfp_event_count
402 /* output the needed bits for pmd here */
403 for (unsigned i
=0; i
< inp
.pfp_event_count
; i
++) {
404 o
->newline() << "{.reg_num=" << pd
[i
].reg_num
<< ", "
405 << ".reg_value=" << pd
[i
].reg_value
<< "},";
407 o
->newline() << "};";
410 o
->newline() << "_pfm_pmc_x=_pfm_pmc;";
411 o
->newline() << "_pfm_num_pmc_x=_pfm_num_pmc;";
412 o
->newline() << "_pfm_pmd_x=_pfm_pmd;";
413 o
->newline() << "_pfm_num_pmd_x=_pfm_num_pmd;";
415 // call all the function bodies associated with perfcounters
416 for (unsigned i
=0; i
< probes
.size (); i
++)
417 probes
[i
]->emit_registrations_start (o
,i
);
419 /* generate call to turn on instrumentation */
420 o
->newline() << "_pfm_context.ctx_flags |= PFM_FL_SYSTEM_WIDE;";
421 o
->newline() << "rc = rc || _stp_perfmon_setup(&_pfm_desc, &_pfm_context,";
422 o
->newline(1) << "_pfm_pmc, _pfm_num_pmc,";
423 o
->newline() << "_pfm_pmd, _pfm_num_pmd);";
426 o
->newline() << "return rc;";
427 o
->newline(-1) << "}\n";
429 // Output the be probes destroy function
430 o
->newline() << "static void unregister_perfmon_probes (void) {";
431 o
->newline(1) << "_stp_perfmon_shutdown(_pfm_desc);";
432 o
->newline(-1) << "}\n";
438 register_tapset_perfmon(systemtap_session
& s
)
440 s
.pattern_root
->bind("perfmon")->bind_str("counter")
441 ->bind(new perfmon_builder());
444 /* vim: set sw=2 ts=8 cino=>4,n-2,{2,^-2,t0,(0,u0,w1,M1 : */