2 // Copyright (C) 2005-2009 Red Hat Inc.
3 // Copyright (C) 2005-2007 Intel Corporation.
4 // Copyright (C) 2008 James.Bottomley@HansenPartnership.com
6 // This file is part of systemtap, and is free software. You can
7 // redistribute it and/or modify it under the terms of the GNU General
8 // Public License (GPL); either version 2, or (at your option) any
14 #include "translate.h"
22 using namespace __gnu_cxx
;
25 static const string
TOK_TIMER("timer");
28 // ------------------------------------------------------------------------
29 // timer derived probes
30 // ------------------------------------------------------------------------
33 struct timer_derived_probe
: public derived_probe
35 int64_t interval
, randomize
;
36 bool time_is_msecs
; // NB: hrtimers get ms-based probes on modern kernels instead
37 timer_derived_probe (probe
* p
, probe_point
* l
,
38 int64_t i
, int64_t r
, bool ms
=false);
39 virtual void join_group (systemtap_session
& s
);
43 struct timer_derived_probe_group
: public generic_dpg
<timer_derived_probe
>
45 void emit_interval (translator_output
* o
);
47 void emit_module_decls (systemtap_session
& s
);
48 void emit_module_init (systemtap_session
& s
);
49 void emit_module_exit (systemtap_session
& s
);
53 timer_derived_probe::timer_derived_probe (probe
* p
, probe_point
* l
,
54 int64_t i
, int64_t r
, bool ms
):
55 derived_probe (p
, l
), interval (i
), randomize (r
), time_is_msecs(ms
)
57 if (interval
<= 0 || interval
> 1000000) // make i and r fit into plain ints
58 throw semantic_error ("invalid interval for jiffies timer");
59 // randomize = 0 means no randomization
60 if (randomize
< 0 || randomize
> interval
)
61 throw semantic_error ("invalid randomize for jiffies timer");
63 if (locations
.size() != 1)
64 throw semantic_error ("expect single probe point");
65 // so we don't have to loop over them in the other functions
70 timer_derived_probe::join_group (systemtap_session
& s
)
72 if (! s
.timer_derived_probes
)
73 s
.timer_derived_probes
= new timer_derived_probe_group ();
74 s
.timer_derived_probes
->enroll (this);
79 timer_derived_probe_group::emit_interval (translator_output
* o
)
82 o
->newline(1) << "unsigned i = stp->intrv;";
83 o
->newline() << "if (stp->rnd != 0)";
84 o
->newline(1) << "i += _stp_random_pm(stp->rnd);";
85 o
->newline(-1) << "stp->ms ? msecs_to_jiffies(i) : i;";
86 o
->newline(-1) << "})";
91 timer_derived_probe_group::emit_module_decls (systemtap_session
& s
)
93 if (probes
.empty()) return;
95 s
.op
->newline() << "/* ---- timer probes ---- */";
97 s
.op
->newline() << "static struct stap_timer_probe {";
98 s
.op
->newline(1) << "struct timer_list timer_list;";
99 s
.op
->newline() << "const char *pp;";
100 s
.op
->newline() << "void (*ph) (struct context*);";
101 s
.op
->newline() << "unsigned intrv, ms, rnd;";
102 s
.op
->newline(-1) << "} stap_timer_probes [" << probes
.size() << "] = {";
104 for (unsigned i
=0; i
< probes
.size(); i
++)
106 s
.op
->newline () << "{";
107 s
.op
->line() << " .pp="
108 << lex_cast_qstring (*probes
[i
]->sole_location()) << ",";
109 s
.op
->line() << " .ph=&" << probes
[i
]->name
<< ",";
110 s
.op
->line() << " .intrv=" << probes
[i
]->interval
<< ",";
111 s
.op
->line() << " .ms=" << probes
[i
]->time_is_msecs
<< ",";
112 s
.op
->line() << " .rnd=" << probes
[i
]->randomize
;
113 s
.op
->line() << " },";
115 s
.op
->newline(-1) << "};";
118 s
.op
->newline() << "static void enter_timer_probe (unsigned long val) {";
119 s
.op
->newline(1) << "struct stap_timer_probe* stp = & stap_timer_probes [val];";
120 s
.op
->newline() << "if ((atomic_read (&session_state) == STAP_SESSION_STARTING) ||";
121 s
.op
->newline() << " (atomic_read (&session_state) == STAP_SESSION_RUNNING))";
122 s
.op
->newline(1) << "mod_timer (& stp->timer_list, jiffies + ";
123 emit_interval (s
.op
);
124 s
.op
->line() << ");";
125 s
.op
->newline(-1) << "{";
127 common_probe_entryfn_prologue (s
.op
, "STAP_SESSION_RUNNING", "stp->pp");
128 s
.op
->newline() << "(*stp->ph) (c);";
129 common_probe_entryfn_epilogue (s
.op
);
130 s
.op
->newline(-1) << "}";
131 s
.op
->newline(-1) << "}";
136 timer_derived_probe_group::emit_module_init (systemtap_session
& s
)
138 if (probes
.empty()) return;
140 s
.op
->newline() << "for (i=0; i<" << probes
.size() << "; i++) {";
141 s
.op
->newline(1) << "struct stap_timer_probe* stp = & stap_timer_probes [i];";
142 s
.op
->newline() << "probe_point = stp->pp;";
143 s
.op
->newline() << "init_timer (& stp->timer_list);";
144 s
.op
->newline() << "stp->timer_list.function = & enter_timer_probe;";
145 s
.op
->newline() << "stp->timer_list.data = i;"; // NB: important!
146 // copy timer renew calculations from above :-(
147 s
.op
->newline() << "stp->timer_list.expires = jiffies + ";
148 emit_interval (s
.op
);
150 s
.op
->newline() << "add_timer (& stp->timer_list);";
151 // note: no partial failure rollback is needed: add_timer cannot fail.
152 s
.op
->newline(-1) << "}"; // for loop
157 timer_derived_probe_group::emit_module_exit (systemtap_session
& s
)
159 if (probes
.empty()) return;
161 s
.op
->newline() << "for (i=0; i<" << probes
.size() << "; i++)";
162 s
.op
->newline(1) << "del_timer_sync (& stap_timer_probes[i].timer_list);";
168 // ------------------------------------------------------------------------
169 // hrtimer derived probes
170 // ------------------------------------------------------------------------
171 // This is a new timer interface that provides more flexibility in specifying
172 // intervals, and uses the hrtimer APIs when available for greater precision.
173 // While hrtimers were added in 2.6.16, the API's weren't exported until
174 // 2.6.17, so we must check this kernel version before attempting to use
177 // * hrtimer_derived_probe: creates a probe point based on the hrtimer APIs.
180 struct hrtimer_derived_probe
: public derived_probe
182 // set a (generous) maximum of one day in ns
183 static const int64_t max_ns_interval
= 1000000000LL * 60LL * 60LL * 24LL;
185 // 100us seems like a reasonable minimum
186 static const int64_t min_ns_interval
= 100000LL;
188 int64_t interval
, randomize
;
190 hrtimer_derived_probe (probe
* p
, probe_point
* l
, int64_t i
, int64_t r
,
192 derived_probe (p
, l
), interval (i
), randomize (r
)
194 if ((i
< min_ns_interval
) || (i
> max_ns_interval
))
195 throw semantic_error(string("interval value out of range (")
196 + lex_cast
<string
>(scale
< min_ns_interval
197 ? min_ns_interval
/scale
: 1)
199 + lex_cast
<string
>(max_ns_interval
/scale
) + ")");
201 // randomize = 0 means no randomization
202 if ((r
< 0) || (r
> i
))
203 throw semantic_error("randomization value out of range");
206 void join_group (systemtap_session
& s
);
210 struct hrtimer_derived_probe_group
: public generic_dpg
<hrtimer_derived_probe
>
212 void emit_interval (translator_output
* o
);
214 void emit_module_decls (systemtap_session
& s
);
215 void emit_module_init (systemtap_session
& s
);
216 void emit_module_exit (systemtap_session
& s
);
221 hrtimer_derived_probe::join_group (systemtap_session
& s
)
223 if (! s
.hrtimer_derived_probes
)
224 s
.hrtimer_derived_probes
= new hrtimer_derived_probe_group ();
225 s
.hrtimer_derived_probes
->enroll (this);
230 hrtimer_derived_probe_group::emit_interval (translator_output
* o
)
233 o
->newline(1) << "unsigned long nsecs;";
234 o
->newline() << "int64_t i = stp->intrv;";
235 o
->newline() << "if (stp->rnd != 0) {";
236 // XXX: why not use stp_random_pm instead of this?
237 o
->newline(1) << "int64_t r;";
238 o
->newline() << "get_random_bytes(&r, sizeof(r));";
239 // ensure that r is positive
240 o
->newline() << "r &= ((uint64_t)1 << (8*sizeof(r) - 1)) - 1;";
241 o
->newline() << "r = _stp_mod64(NULL, r, (2*stp->rnd+1));";
242 o
->newline() << "r -= stp->rnd;";
243 o
->newline() << "i += r;";
244 o
->newline(-1) << "}";
245 o
->newline() << "if (unlikely(i < stap_hrtimer_resolution))";
246 o
->newline(1) << "i = stap_hrtimer_resolution;";
248 o
->newline() << "nsecs = do_div(i, NSEC_PER_SEC);";
249 o
->newline() << "ktime_set(i, nsecs);";
250 o
->newline(-1) << "})";
255 hrtimer_derived_probe_group::emit_module_decls (systemtap_session
& s
)
257 if (probes
.empty()) return;
259 s
.op
->newline() << "/* ---- hrtimer probes ---- */";
261 s
.op
->newline() << "static unsigned long stap_hrtimer_resolution;"; // init later
262 s
.op
->newline() << "static struct stap_hrtimer_probe {";
263 s
.op
->newline(1) << "struct hrtimer hrtimer;";
264 s
.op
->newline() << "const char *pp;";
265 s
.op
->newline() << "void (*ph) (struct context*);";
266 s
.op
->newline() << "int64_t intrv, rnd;";
267 s
.op
->newline(-1) << "} stap_hrtimer_probes [" << probes
.size() << "] = {";
269 for (unsigned i
=0; i
< probes
.size(); i
++)
271 s
.op
->newline () << "{";
272 s
.op
->line() << " .pp=" << lex_cast_qstring (*probes
[i
]->sole_location()) << ",";
273 s
.op
->line() << " .ph=&" << probes
[i
]->name
<< ",";
274 s
.op
->line() << " .intrv=" << probes
[i
]->interval
<< "LL,";
275 s
.op
->line() << " .rnd=" << probes
[i
]->randomize
<< "LL";
276 s
.op
->line() << " },";
278 s
.op
->newline(-1) << "};";
281 // autoconf: add get/set expires if missing (pre 2.6.28-rc1)
282 s
.op
->newline() << "#ifndef STAPCONF_HRTIMER_GETSET_EXPIRES";
283 s
.op
->newline() << "#define hrtimer_get_expires(timer) ((timer)->expires)";
284 s
.op
->newline() << "#define hrtimer_set_expires(timer, time) (void)((timer)->expires = (time))";
285 s
.op
->newline() << "#endif";
287 // autoconf: adapt to HRTIMER_REL -> HRTIMER_MODE_REL renaming near 2.6.21
288 s
.op
->newline() << "#ifdef STAPCONF_HRTIMER_REL";
289 s
.op
->newline() << "#define HRTIMER_MODE_REL HRTIMER_REL";
290 s
.op
->newline() << "#endif";
292 // The function signature changed in 2.6.21.
293 s
.op
->newline() << "#ifdef STAPCONF_HRTIMER_REL";
294 s
.op
->newline() << "static int ";
295 s
.op
->newline() << "#else";
296 s
.op
->newline() << "static enum hrtimer_restart ";
297 s
.op
->newline() << "#endif";
298 s
.op
->newline() << "enter_hrtimer_probe (struct hrtimer *timer) {";
300 s
.op
->newline(1) << "int rc = HRTIMER_NORESTART;";
301 s
.op
->newline() << "struct stap_hrtimer_probe *stp = container_of(timer, struct stap_hrtimer_probe, hrtimer);";
302 s
.op
->newline() << "if ((atomic_read (&session_state) == STAP_SESSION_STARTING) ||";
303 s
.op
->newline() << " (atomic_read (&session_state) == STAP_SESSION_RUNNING)) {";
304 // Compute next trigger time
305 s
.op
->newline(1) << "hrtimer_set_expires(timer, ktime_add (hrtimer_get_expires(timer),";
306 emit_interval (s
.op
);
307 s
.op
->line() << "));";
308 s
.op
->newline() << "rc = HRTIMER_RESTART;";
309 s
.op
->newline(-1) << "}";
310 s
.op
->newline() << "{";
312 common_probe_entryfn_prologue (s
.op
, "STAP_SESSION_RUNNING", "stp->pp");
313 s
.op
->newline() << "(*stp->ph) (c);";
314 common_probe_entryfn_epilogue (s
.op
);
315 s
.op
->newline(-1) << "}";
316 s
.op
->newline() << "return rc;";
317 s
.op
->newline(-1) << "}";
322 hrtimer_derived_probe_group::emit_module_init (systemtap_session
& s
)
324 if (probes
.empty()) return;
326 s
.op
->newline() << "{";
327 s
.op
->newline(1) << "struct timespec res;";
328 s
.op
->newline() << "hrtimer_get_res (CLOCK_MONOTONIC, &res);";
329 s
.op
->newline() << "stap_hrtimer_resolution = timespec_to_ns (&res);";
330 s
.op
->newline(-1) << "}";
332 s
.op
->newline() << "for (i=0; i<" << probes
.size() << "; i++) {";
333 s
.op
->newline(1) << "struct stap_hrtimer_probe* stp = & stap_hrtimer_probes [i];";
334 s
.op
->newline() << "probe_point = stp->pp;";
335 s
.op
->newline() << "hrtimer_init (& stp->hrtimer, CLOCK_MONOTONIC, HRTIMER_MODE_REL);";
336 s
.op
->newline() << "stp->hrtimer.function = & enter_hrtimer_probe;";
337 // There is no hrtimer field to identify *this* (i-th) probe handler
338 // callback. So instead we'll deduce it at entry time.
339 s
.op
->newline() << "(void) hrtimer_start (& stp->hrtimer, ";
340 emit_interval (s
.op
);
341 s
.op
->line() << ", HRTIMER_MODE_REL);";
342 // Note: no partial failure rollback is needed: hrtimer_start only
343 // "fails" if the timer was already active, which cannot be.
344 s
.op
->newline(-1) << "}"; // for loop
349 hrtimer_derived_probe_group::emit_module_exit (systemtap_session
& s
)
351 if (probes
.empty()) return;
353 s
.op
->newline() << "for (i=0; i<" << probes
.size() << "; i++)";
354 s
.op
->newline(1) << "hrtimer_cancel (& stap_hrtimer_probes[i].hrtimer);";
360 // ------------------------------------------------------------------------
361 // profile derived probes
362 // ------------------------------------------------------------------------
363 // On kernels < 2.6.10, this uses the register_profile_notifier API to
364 // generate the timed events for profiling; on kernels >= 2.6.10 this
365 // uses the register_timer_hook API. The latter doesn't currently allow
366 // simultaneous users, so insertion will fail if the profiler is busy.
367 // (Conflicting users may include OProfile, other SystemTap probes, etc.)
370 struct profile_derived_probe
: public derived_probe
372 profile_derived_probe (systemtap_session
&s
, probe
* p
, probe_point
* l
);
373 void join_group (systemtap_session
& s
);
377 struct profile_derived_probe_group
: public generic_dpg
<profile_derived_probe
>
380 void emit_module_decls (systemtap_session
& s
);
381 void emit_module_init (systemtap_session
& s
);
382 void emit_module_exit (systemtap_session
& s
);
386 profile_derived_probe::profile_derived_probe (systemtap_session
&, probe
* p
, probe_point
* l
):
393 profile_derived_probe::join_group (systemtap_session
& s
)
395 if (! s
.profile_derived_probes
)
396 s
.profile_derived_probes
= new profile_derived_probe_group ();
397 s
.profile_derived_probes
->enroll (this);
401 // timer.profile probe handlers are hooked up in an entertaining way
402 // to the underlying kernel facility. The fact that 2.6.11+ era
403 // "register_timer_hook" API allows only one consumer *system-wide*
404 // will give a hint. We will have a single entry function (and thus
405 // trivial registration / unregistration), and it will call all probe
406 // handler functions in sequence.
409 profile_derived_probe_group::emit_module_decls (systemtap_session
& s
)
411 if (probes
.empty()) return;
413 // kernels < 2.6.10: use register_profile_notifier API
414 // kernels >= 2.6.10: use register_timer_hook API
415 s
.op
->newline() << "/* ---- profile probes ---- */";
417 // This function calls all the profiling probe handlers in sequence.
418 // The only tricky thing is that the context will be reused amongst
419 // them. While a simple sequence of calls to the individual probe
420 // handlers is unlikely to go terribly wrong (with c->last_error
421 // being set causing an early return), but for extra assurance, we
422 // open-code the same logic here.
424 s
.op
->newline() << "static void enter_all_profile_probes (struct pt_regs *regs) {";
426 string pp
= lex_cast_qstring("timer.profile"); // hard-coded for convenience
427 common_probe_entryfn_prologue (s
.op
, "STAP_SESSION_RUNNING", pp
);
428 s
.op
->newline() << "c->regs = regs;";
430 for (unsigned i
=0; i
<probes
.size(); i
++)
434 // Some lightweight inter-probe context resetting
435 // XXX: not quite right: MAXERRORS not respected
436 s
.op
->newline() << "c->actionremaining = MAXACTION;";
438 s
.op
->newline() << "if (c->last_error == NULL) " << probes
[i
]->name
<< " (c);";
440 common_probe_entryfn_epilogue (s
.op
);
441 s
.op
->newline(-1) << "}";
443 s
.op
->newline() << "#if LINUX_VERSION_CODE < KERNEL_VERSION(2,6,10)"; // == using_rpn of yore
445 s
.op
->newline() << "static int enter_profile_probes (struct notifier_block *self,"
446 << " unsigned long val, void *data) {";
447 s
.op
->newline(1) << "(void) self; (void) val;";
448 s
.op
->newline() << "enter_all_profile_probes ((struct pt_regs *) data);";
449 s
.op
->newline() << "return 0;";
450 s
.op
->newline(-1) << "}";
451 s
.op
->newline() << "struct notifier_block stap_profile_notifier = {"
452 << " .notifier_call = & enter_profile_probes };";
454 s
.op
->newline() << "#else";
456 s
.op
->newline() << "static int enter_profile_probes (struct pt_regs *regs) {";
457 s
.op
->newline(1) << "enter_all_profile_probes (regs);";
458 s
.op
->newline() << "return 0;";
459 s
.op
->newline(-1) << "}";
461 s
.op
->newline() << "#endif";
466 profile_derived_probe_group::emit_module_init (systemtap_session
& s
)
468 if (probes
.empty()) return;
470 s
.op
->newline() << "probe_point = \"timer.profile\";"; // NB: hard-coded for convenience
471 s
.op
->newline() << "#if LINUX_VERSION_CODE < KERNEL_VERSION(2,6,10)"; // == using_rpn of yore
472 s
.op
->newline() << "rc = register_profile_notifier (& stap_profile_notifier);";
473 s
.op
->newline() << "#else";
474 s
.op
->newline() << "rc = register_timer_hook (& enter_profile_probes);";
475 s
.op
->newline() << "#endif";
480 profile_derived_probe_group::emit_module_exit (systemtap_session
& s
)
482 if (probes
.empty()) return;
484 s
.op
->newline() << "for (i=0; i<" << probes
.size() << "; i++)";
485 s
.op
->newline(1) << "#if LINUX_VERSION_CODE < KERNEL_VERSION(2,6,10)"; // == using_rpn of yore
486 s
.op
->newline() << "unregister_profile_notifier (& stap_profile_notifier);";
487 s
.op
->newline() << "#else";
488 s
.op
->newline() << "unregister_timer_hook (& enter_profile_probes);";
489 s
.op
->newline() << "#endif";
495 // ------------------------------------------------------------------------
496 // unified probe builder for timer probes
497 // ------------------------------------------------------------------------
500 struct timer_builder
: public derived_probe_builder
502 virtual void build(systemtap_session
& sess
,
503 probe
* base
, probe_point
* location
,
504 literal_map_t
const & parameters
,
505 vector
<derived_probe
*> & finished_results
);
507 static void register_patterns(systemtap_session
& s
);
511 timer_builder::build(systemtap_session
& sess
,
513 probe_point
* location
,
514 literal_map_t
const & parameters
,
515 vector
<derived_probe
*> & finished_results
)
517 int64_t scale
=1, period
, rand
=0;
519 if (has_null_param(parameters
, "profile"))
521 sess
.unwindsym_modules
.insert ("kernel");
522 finished_results
.push_back
523 (new profile_derived_probe(sess
, base
, location
));
527 if (!get_param(parameters
, "randomize", rand
))
530 if (get_param(parameters
, "jiffies", period
))
532 // always use basic timers for jiffies
533 finished_results
.push_back
534 (new timer_derived_probe(base
, location
, period
, rand
, false));
537 else if (get_param(parameters
, "hz", period
))
540 throw semantic_error ("frequency must be greater than 0");
541 period
= (1000000000 + period
- 1)/period
;
543 else if (get_param(parameters
, "s", period
) ||
544 get_param(parameters
, "sec", period
))
550 else if (get_param(parameters
, "ms", period
) ||
551 get_param(parameters
, "msec", period
))
557 else if (get_param(parameters
, "us", period
) ||
558 get_param(parameters
, "usec", period
))
564 else if (get_param(parameters
, "ns", period
) ||
565 get_param(parameters
, "nsec", period
))
570 throw semantic_error ("unrecognized timer variant");
572 // Redirect wallclock-time based probes to hrtimer code on recent
574 if (strverscmp(sess
.kernel_base_release
.c_str(), "2.6.17") < 0)
576 // hrtimers didn't exist, so use the old-school timers
577 period
= (period
+ 1000000 - 1)/1000000;
578 rand
= (rand
+ 1000000 - 1)/1000000;
580 finished_results
.push_back
581 (new timer_derived_probe(base
, location
, period
, rand
, true));
584 finished_results
.push_back
585 (new hrtimer_derived_probe(base
, location
, period
, rand
, scale
));
589 register_tapset_timers(systemtap_session
& s
)
591 match_node
* root
= s
.pattern_root
;
592 derived_probe_builder
*builder
= new timer_builder();
594 root
= root
->bind(TOK_TIMER
);
596 root
->bind_num("s")->bind(builder
);
597 root
->bind_num("s")->bind_num("randomize")->bind(builder
);
598 root
->bind_num("sec")->bind(builder
);
599 root
->bind_num("sec")->bind_num("randomize")->bind(builder
);
601 root
->bind_num("ms")->bind(builder
);
602 root
->bind_num("ms")->bind_num("randomize")->bind(builder
);
603 root
->bind_num("msec")->bind(builder
);
604 root
->bind_num("msec")->bind_num("randomize")->bind(builder
);
606 root
->bind_num("us")->bind(builder
);
607 root
->bind_num("us")->bind_num("randomize")->bind(builder
);
608 root
->bind_num("usec")->bind(builder
);
609 root
->bind_num("usec")->bind_num("randomize")->bind(builder
);
611 root
->bind_num("ns")->bind(builder
);
612 root
->bind_num("ns")->bind_num("randomize")->bind(builder
);
613 root
->bind_num("nsec")->bind(builder
);
614 root
->bind_num("nsec")->bind_num("randomize")->bind(builder
);
616 root
->bind_num("jiffies")->bind(builder
);
617 root
->bind_num("jiffies")->bind_num("randomize")->bind(builder
);
619 root
->bind_num("hz")->bind(builder
);
621 root
->bind("profile")->bind(builder
);
626 /* vim: set sw=2 ts=8 cino=>4,n-2,{2,^-2,t0,(0,u0,w1,M1 : */