5.3.4. Monitoring Polling Applications

This section describes how to identify and monitor which applications are polling. Doing so allows you to track unnecessary or excessive polling, which can help you pinpoint areas for improvement in terms of CPU usage and power savings.
timeout.stp
#! /usr/bin/env stap
# Copyright (C) 2009-2018 Red Hat, Inc.
# Written by Ulrich Drepper <drepper@redhat.com>
# Modified by William Cohen <wcohen@redhat.com>

global process, timeout_count, to
global poll_timeout, epoll_timeout, select_timeout, itimer_timeout
global nanosleep_timeout, futex_timeout, signal_timeout

probe syscall.{poll,epoll_wait} {
  if (timeout) to[pid()]=timeout
}

probe syscall.poll.return {
  if (retval == 0 && to[pid()] > 0 ) {
    poll_timeout[pid()]++
    timeout_count[pid()]++
    process[pid()] = execname()
    delete to[pid()]
  }
}

probe syscall.epoll_wait.return {
  if (retval == 0 && to[pid()] > 0 ) {
    epoll_timeout[pid()]++
    timeout_count[pid()]++
    process[pid()] = execname()
    delete to[pid()]
  }
}

probe syscall.select.return {
  if (retval == 0) {
    select_timeout[pid()]++
    timeout_count[pid()]++
    process[pid()] = execname()
  }
}

probe syscall.futex.return {
  if (errno_str(retval) == "ETIMEDOUT") {
    futex_timeout[pid()]++
    timeout_count[pid()]++
    process[pid()] = execname()
  }
}

probe syscall.nanosleep.return {
  if (retval == 0) {
    nanosleep_timeout[pid()]++
    timeout_count[pid()]++
    process[pid()] = execname()
  }
}

probe kernel.function("it_real_fn") {
  itimer_timeout[pid()]++
  timeout_count[pid()]++
  process[pid()] = execname()
}

probe syscall.rt_sigtimedwait.return {
  if (errno_str(retval) == "EAGAIN") {
    signal_timeout[pid()]++
    timeout_count[pid()]++
    process[pid()] = execname()
  }
}

probe syscall.exit {
  if (pid() in process) {
    delete process[pid()]
    delete timeout_count[pid()]
    delete poll_timeout[pid()]
    delete epoll_timeout[pid()]
    delete select_timeout[pid()]
    delete itimer_timeout[pid()]
    delete futex_timeout[pid()]
    delete nanosleep_timeout[pid()]
    delete signal_timeout[pid()]
  }
}

probe timer.s(1) {
  ansi_clear_screen()
  printf ("  pid |   poll  select   epoll  itimer   futex nanosle  signal| process\n")
  foreach (p in timeout_count- limit 20) {
     printf ("%5d |%7d %7d %7d %7d %7d %7d %7d| %-.38s\n", p,
              poll_timeout[p], select_timeout[p],
              epoll_timeout[p], itimer_timeout[p],
              futex_timeout[p], nanosleep_timeout[p],
              signal_timeout[p], process[p])
  }
}

global prom_arr

probe prometheus {
  foreach (p in timeout_count- limit 20) {
    prom_arr[poll_timeout[p], select_timeout[p],
             epoll_timeout[p], itimer_timeout[p],
	     futex_timeout[p], nanosleep_timeout[p],
	     signal_timeout[p]] = process[p]
  }

  @prometheus_dump_array7(prom_arr, "process_timeouts", "poll_timeout", "select_timeout",
		          "epoll_timeout", "itimer_timeout",
			  "futex_timeout", "nanosleep_timeout",
			  "signal_timeout")
  delete prom_arr
}
timeout.stp tracks how many times each of the following system calls completed due to time expiring rather than due to an actual event occurring:

Example 5.16. timeout.stp Sample Output

  uid |   poll  select   epoll  itimer   futex nanosle  signal| process
28937 | 148793       0       0    4727   37288       0       0| firefox
22945 |      0   56949       0       1       0       0       0| scim-bridge
    0 |      0       0       0   36414       0       0       0| swapper
 4275 |  23140       0       0       1       0       0       0| mixer_applet2
 4191 |      0   14405       0       0       0       0       0| scim-launcher
22941 |   7908       1       0      62       0       0       0| gnome-terminal
 4261 |      0       0       0       2       0    7622       0| escd
 3695 |      0       0       0       0       0    7622       0| gdm-binary
 3483 |      0    7206       0       0       0       0       0| dhcdbd
 4189 |   6916       0       0       2       0       0       0| scim-panel-gtk
 1863 |   5767       0       0       0       0       0       0| iscsid
 2562 |      0    2881       0       1       0    1438       0| pcscd
 4257 |   4255       0       0       1       0       0       0| gnome-power-man
 4278 |   3876       0       0      60       0       0       0| multiload-apple
 4083 |      0    1331       0    1728       0       0       0| Xorg
 3921 |   1603       0       0       0       0       0       0| gam_server
 4248 |   1591       0       0       0       0       0       0| nm-applet
 3165 |      0    1441       0       0       0       0       0| xterm
29548 |      0    1440       0       0       0       0       0| httpd
 1862 |      0       0       0       0       0    1438       0| iscsid
You can increase the sample time by editing the second probe (timer.s(1)). The output of timeout.stp contains the name and UID of the top 20 polling applications, along with how many times each application performed each polling system call (over time). Example 5.16, “timeout.stp Sample Output” contains an excerpt of the script. In this particular example firefox is doing an excessive amount of polling due to a plugin module.