How often do processes wake up due to timeouts?
Problem
(Transcribed from http://dev.laptop.org/ticket/110#comment:4)
Based on http://blogs.gnome.org/view/ryanl/2006/09/17/0, this SystemTap script aims to reveal how often processes wake up as a result of setting timers for themselves. The intention is to make it extremely easy to spot poorly behaved applications.
Scripts
# pstimeouts # Copyright (C) 2006 Red Hat, Inc., Eugene Teo <eteo@redhat.com> # # This program is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License version 2 as # published by the Free Software Foundation. # # Based on http://blogs.gnome.org/view/ryanl/2006/09/17/0 # # Tested on: # [eteo@kerndev ~]$ date # Mon Oct 30 21:12:40 SGT 2006 # [eteo@kerndev ~]$ uname -a # Linux kerndev.sin.redhat.com 2.6.18-1.2798.fc6 #1 SMP Mon Oct 16 14:37:32 EDT 2006 i686 i686 i386 GNU/Linux # uid=`id -u`; alluid=0; detail=0 while getopts ":auh" options; do case $options in a) alluid=1;; u) detail=1;; h|?) cat >&2 <<-EOF pstimeouts [-au] [pids..] -a show processes belonging to all users -u detailed listing EOF exit 1;; esac done /usr/bin/env stap -g -e ' global alluid, detail, uid global process, trace_poll, trace_select, trace_epoll, total_timeouts global poll_timeouts, epoll_timeouts, select_timeouts, itimer_timeouts, schedule_timeouts /* FIXME: remove this if it is already committed into CVS */ function kernel_long:long(addr:long) %{ /* pure */ THIS->__retvalue = deref (sizeof(long), (uintptr_t)(THIS->addr)); goto success; deref_fault: /* branched to from deref() */ { static char errmsg[40]; snprintf (errmsg, 40, "kernel long copy fault at 0x%p", (void *) (uintptr_t) THIS->addr); CONTEXT->last_error = errmsg; } success: ; %} probe begin { alluid = '$alluid' detail = '$detail' uid = '$uid' } probe kernel.statement("*@kernel/fork.c:175") { p = pid() poll_timeouts[p] = epoll_timeouts[p] = select_timeouts[p] = itimer_timeouts[p] = schedule_timeouts[p] = 0 } probe kernel.function("do_sys_poll") { trace_poll[pid()] = 1 } probe kernel.function("do_sys_poll").return { trace_poll[pid()] = 0 } /* do_sys_poll@fs/select.c */ /* Could not access do_poll directly because of a gcc bug: http://sources.redhat.com/bugzilla/show_bug.cgi?id=1155 */ probe kernel.statement("*@fs/select.c:717") { /* assume !*timeout if count == 0, see fs/select.c:622 */ if ($fdcount == 0) poll_timeouts[pid()]++ } probe kernel.function("do_select") { trace_select[pid()] = 1 } probe kernel.function("do_select").return { trace_select[pid()] = 0 } /* do_select@fs/select.c */ probe kernel.statement("*@fs/select.c:289") { if (!kernel_long($timeout)) select_timeouts[pid()]++ } /* ep_poll@fs/eventpoll.c */ probe kernel.function("*@fs/eventpoll.c:1495") { trace_epoll[pid()] = 1 } /* it_real_fn@kernel/itimer.c */ probe kernel.function("it_real_fn") { itimer_timeouts[pid()]++ } /* schedule_timeout@kernel/timer.c */ probe kernel.function("schedule_timeout").return { p = pid() process[p, uid()] = execname() if ($return <= 0) schedule_timeouts[p]++ if (trace_poll[p] && !$return) schedule_timeouts[p]-- else if (trace_select[p] && !$return) schedule_timeouts[p]-- else if (trace_epoll[p] && !$return) { epoll_timeouts[p]++ schedule_timeouts[p]-- trace_epoll[p] = 0 } } probe kernel.function("do_exit") { p = pid() u = uid() if (process[p, u] == execname()) delete process[p, u] } probe timer.ms(1000) { printf("\033[2J\033[1;1H") /* clear screen */ if (detail) { printf (" uid pid | poll select epoll itimer other| process\n"); foreach ([p+, u] in process) { if (!alluid && u != uid) continue printf ("%5d %7d |%8d %8d %8d %8d %8d| %-.38s\n", u, p, poll_timeouts[p], select_timeouts[p], epoll_timeouts[p], itimer_timeouts[p], schedule_timeouts[p], process[p, u]) } } else { printf (" pid timeouts process\n"); foreach ([p+, u] in process) { if (!alluid && u != uid) continue total_timeouts = poll_timeouts[p] + select_timeouts[p] + epoll_timeouts[p] + itimer_timeouts[p] + schedule_timeouts[p] printf ("%5d %8d %-.38s\n", p, total_timeouts, process[p, u]) } } } '
Output
[eteo@kerndev ~]$ ./pstimeouts -h pstimeouts [-au] [pids..] -a show processes belonging to all users -u detailed listing [eteo@kerndev ~]$ ./pstimeouts pid timeouts process 3485 20 gnome-power-man 3530 16 scim-panel-gtk 3532 42 scim-launcher ... [eteo@kerndev ~]$ ./pstimeouts -a pid timeouts process 2148 0 syslogd 2311 4 pcscd 2342 1 automount ... [eteo@kerndev ~]$ ./pstimeouts -u uid pid | poll select epoll itimer other| process 500 3463 | 116 0 0 0 0| gnome-settings- 500 3467 | 0 0 0 0 0| scim-launcher 500 3482 | 220 0 0 2 0| metacity ... [eteo@kerndev ~]$ ./pstimeouts -ua uid pid | poll select epoll itimer other| process 0 2311 | 0 6 0 0 0| pcscd 0 2342 | 0 0 0 0 2| automount 38 2404 | 0 0 0 0 0| ntpd ...
Lessons
SystemTap makes it so much easier to gather statistics from the kernel and display it at the same time.