--- /dev/null
+.TH accept2close-nd 8 "2015-01-30" "USER COMMANDS"
+.SH NAME
+accept2close-nd \- trace process signals. Uses Linux SystemTap (non-debuginfo).
+.SH SYNOPSIS
+.B accept2close-nd.stp
+[min_ms]
+.SH DESCRIPTION
+This traces socket duration from the accept() syscall to close(), and provides
+details on the lifespan of these passive connections.
+
+NOTE: This will miss sockets that are opened by one process, and then closed
+by another (eg, sshd).
+.SH REQUIREMENTS
+SystemTap.
+.SH OPTIONS
+.TP
+min_ms
+Minimum lifespan threshold, in milliseconds. This is an optional argument that
+can be used to focus on outliers.
+.SH EXAMPLES
+.TP
+Trace all passive socket connections, and show event details and a summary:
+#
+.B accept2close-nd.stp
+.TP
+Trace all passive socket connections equal to or slower than 5 milliseconds:
+#
+.B accept2close-nd.stp 5
+.SH FIELDS
+.TP
+TIME
+Time of socket close, human readable.
+.TP
+PID
+Server process ID
+.TP
+COMM
+Server process name.
+.TP
+FD
+File descriptor for socket.
+.TP
+DURATION
+Duration of socket connection (lifespan), in milliseconds.
+.SH OVERHEAD
+Should be low. Test first.
+.SH SOURCE
+This is from the SystemTap lwtools collection.
+.IP
+https://github.com/brendangregg/systemtap-lwtools
+.PP
+Also look under the examples directory for a text file containing example
+usage, output, and commentary for this tool.
+.SH OS
+Linux
+.SH STABILITY
+Unstable - in development.
+.SH AUTHOR
+Brendan Gregg
+.SH SEE ALSO
+strace(1)
--- /dev/null
+title: Show socket lifespan, from accept() to close() (non-debuginfo)
+name: accept2close-nd.stp
+author: Brendan Gregg
+keywords: net socket
+description: This traces socket duration from the accept() syscall to close(), and provides details on the lifespan of these passive connections, showing the distribution as a histogram.
+scope: system-wide
+usage: ./accept2close-nd.stp [min_ms]
+arg_1: Minimum lifespan threshold, in milliseconds. This is an optional argument that can be used to focus on outliers.
+test_support: stap -p4 accept2close-nd.stp
+test_check: stap -p4 accept2close-nd.stp
+test_installcheck: stap accept2close-nd.stp -c "sleep 1"
--- /dev/null
+#!/usr/bin/stap
+/*
+ * accept2close-nd.stp Show socket duration: accept()->close().
+ *
+ * USAGE: ./accept2close.stp [min_ms]
+ *
+ * NOTE: This will miss sockets that are opened by one process, and then closed
+ * by another (eg, sshd).
+ *
+ * A minimum latency threshold, in milliseconds, can be provided as an optional
+ * argument. Without this, all events are shown.
+ *
+ * Copyright (C) 2015 Brendan Gregg.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * 20-Jun-2014 Brendan Gregg Created this.
+ */
+
+global duration, ts, min_ns;
+
+probe begin
+{
+ # process arguments
+ if (argv_1 != "") {
+ min_ns = strtol(argv_1, 10) * 1000000;
+ printf("Only tracing events slower than %s ms.\n", argv_1);
+ } else {
+ min_ns = 0;
+ }
+
+ printf("%-24s %-6s %-14s %-4s %s\n", "TIME", "PID", "COMM", "FD",
+ "DURATION");
+}
+
+probe nd_syscall.accept.return,
+ nd_syscall.accept4.return
+{
+ ts[tid(), returnval()] = gettimeofday_ns();
+}
+
+probe nd_syscall.close
+{
+ last = ts[tid(), int_arg(1)];
+ if (last) {
+ delta = gettimeofday_ns() - last;
+ if (delta >= min_ns) {
+ printf("%-24s %-6d %-14s %-4d %d ms\n",
+ ctime(gettimeofday_s()),
+ pid(), execname(), int_arg(1), delta / 1000000);
+ duration <<< delta;
+ }
+ delete ts[tid(), int_arg(1)];
+ }
+}
+
+probe end
+{
+ printf("\nDuration (ns):\n");
+ if (@count(duration)) { print(@hist_log(duration)); }
+}
--- /dev/null
+Examples of accept2close-nd.stp, the Linux SystemTap version.
+
+
+This shows accepted socket lifespans, by measuring the time from syscall
+accept() to close() on the same file descriptor. Sessions are printed as they
+occur, and a histogram summary is printed at the end. For example:
+
+# ./accept2close-nd.stp
+TIME PID COMM FD DURATION
+Sat Jan 31 05:31:14 2015 29042 node 11 1 ms
+Sat Jan 31 05:31:15 2015 29042 node 11 1 ms
+Sat Jan 31 05:31:15 2015 29042 node 11 1 ms
+Sat Jan 31 05:31:15 2015 29042 node 11 1 ms
+Sat Jan 31 05:31:15 2015 29042 node 11 1 ms
+Sat Jan 31 05:31:15 2015 29042 node 11 1 ms
+Sat Jan 31 05:31:15 2015 29042 node 11 1 ms
+Sat Jan 31 05:31:15 2015 29042 node 11 1 ms
+Sat Jan 31 05:31:15 2015 29042 node 11 1 ms
+Sat Jan 31 05:31:15 2015 29042 node 11 1 ms
+Sat Jan 31 05:31:15 2015 29042 node 11 1 ms
+Sat Jan 31 05:31:15 2015 29042 node 11 1 ms
+Sat Jan 31 05:31:15 2015 29042 node 11 2 ms
+Sat Jan 31 05:31:15 2015 29042 node 11 3 ms
+Sat Jan 31 05:31:15 2015 29042 node 11 2 ms
+Sat Jan 31 05:31:15 2015 29042 node 11 1 ms
+Sat Jan 31 05:31:15 2015 29042 node 11 1 ms
+Sat Jan 31 05:31:15 2015 29042 node 11 1 ms
+Sat Jan 31 05:31:15 2015 29042 node 11 1 ms
+[...]
+Duration (ns):
+ value |-------------------------------------------------- count
+ 131072 | 0
+ 262144 | 0
+ 524288 | 1
+ 1048576 |@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@ 138
+ 2097152 |@@ 6
+ 4194304 | 1
+ 8388608 | 0
+16777216 | 0
+
+Most of these sessions were between 1 and 2 milliseconds in lifespan.
+
+The socket lifespan is expected to represent the time taken by a server to
+respond to a request. This time may include cycles on-CPU, and cycles off-CPU
+blocked on another resource. Note that HTTP keep-alive sessions may have long
+durations which process multiple requests.
+
+
+accept2close-nd.stp accepts an optional argument: a minimum lifespan threshold
+in milliseconds. For example, socket sessions that are 5 milliseconds and
+longer:
+
+# ./accept2close-nd.stp 5
+Only tracing events slower than 5 ms.
+TIME PID COMM FD DURATION
+Sat Jan 31 05:36:23 2015 5236 node 11 5 ms
+Sat Jan 31 05:37:39 2015 5236 node 11 5 ms
+Sat Jan 31 05:37:50 2015 5236 node 11 9 ms
+Sat Jan 31 05:38:09 2015 5236 node 11 7 ms
+^C
+Duration (ns):
+ value |-------------------------------------------------- count
+ 1048576 | 0
+ 2097152 | 0
+ 4194304 |@@@ 3
+ 8388608 |@ 1
+16777216 | 0
+33554432 | 0
--- /dev/null
+.TH biolatency-nd 8 "2015-01-30" "USER COMMANDS"
+.SH NAME
+biolatency-nd \- Measure block I/O latency distribution. Uses Linux SystemTap (non-debuginfo).
+.SH SYNOPSIS
+.B biolatency-nd.stp
+[interval [count]]
+.SH DESCRIPTION
+This measures block I/O latency (storage I/O, ie, disk I/O), and shows the
+distribution as a histogram. This can be useful to identify the characteristics
+of I/O latency, beyond the averages shown by iostat(1). For example, to study
+I/O latency outliers, or multi-modal distributions.
+
+This script uses the kernel tracepoints block_rq_issue and block_rq_complete,
+to trace the time from storage request to completion. To include extra time
+spent queued in the kernel, change block_rq_issue to block_rq_insert.
+.SH REQUIREMENTS
+SystemTap.
+.SH OPTIONS
+.TP
+interval
+Interval for reports, in seconds.
+.TP
+count
+Number of reports to output.
+.SH EXAMPLES
+.TP
+Measure block I/O latency, and show a distribution histogram on Ctrl-C:
+#
+.B biolatency-nd.stp
+.TP
+Measure block I/O latency, and show a report every second, five times:
+#
+.B biolatency-nd.stp 1 5
+.SH FIELDS
+.TP
+value
+The minimum value, in nanoseconds, for this histogram bucket. Read this value
+as "I/O latency was at least xxx".
+.TP
+count
+How many I/O fell into this latency range.
+.SH OVERHEAD
+Should be low, relative to the rate of disk I/O. Test first.
+.SH SOURCE
+This is from the SystemTap lwtools collection.
+.IP
+https://github.com/brendangregg/systemtap-lwtools
+.PP
+Also look under the examples directory for a text file containing example
+usage, output, and commentary for this tool.
+.SH OS
+Linux
+.SH STABILITY
+Unstable - in development.
+.SH AUTHOR
+Brendan Gregg
+.SH SEE ALSO
+iostat(1)
--- /dev/null
+title: Measure block I/O latency distribution (non-debuginfo)
+name: biolatency-nd.stp
+author: Brendan Gregg
+keywords: io
+description: This measures block I/O latency (storage I/O, ie, disk I/O), and shows the distribution as a histogram. This can be useful to identify the characteristics of I/O latency, beyond the averages shown by iostat(1). For example, to study I/O latency outliers, or multi-modal distributions.
+scope: system-wide
+usage: ./biolatency-nd.stp [interval [count]]
+arg_1: Output interval (seconds)
+arg_2: Output count
+test_support: stap -p4 biolatency-nd.stp
+test_check: stap -p4 biolatency-nd.stp
+test_installcheck: stap biolatency-nd.stp 1 1
--- /dev/null
+#!/usr/bin/stap
+/*
+ * biolatency-nd.stp Measure storage (bio) latency distribution.
+ * For Linux, uses SystemTap (non-debuginfo).
+ *
+ * USAGE: ./biolatency-nd.stp [interval [count]]
+ *
+ * This script uses the kernel tracepoints block_rq_issue and block_rq_complete,
+ * to trace the time from storage request to completion. To include extra time
+ * spent queued in the kernel, change block_rq_issue to block_rq_insert.
+ *
+ * From systemtap-lwtools: https://github.com/brendangregg/systemtap-lwtools
+ *
+ * See the corresponding man page (in systemtap-lwtools) for more info.
+ *
+ * Copyright (C) 2015 Brendan Gregg.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * 01-Feb-2015 Brendan Gregg Created this.
+ */
+
+global ts[65536]; # 65536 is max concurrent I/O
+global lat, interval = 0, secs = 0, count = 0, output = 0;
+
+probe begin
+{
+ printf("Tracing block I/O... ");
+ if (argv_1 != "") {
+ interval = strtol(argv_1, 10);
+ if (argv_2 != "") {
+ count = strtol(argv_2, 10);
+ }
+ printf("Output every %d secs.\n", interval);
+ } else {
+ printf("Hit Ctrl-C to end.\n");
+ }
+}
+
+probe kernel.trace("block_rq_issue") {
+ ts[$rq] = gettimeofday_ns();
+}
+
+probe kernel.trace("block_rq_complete") {
+ if (ts[$rq]) {
+ lat <<< gettimeofday_ns() - ts[$rq];
+ delete ts[$rq];
+ }
+}
+
+function print_report()
+{
+ printf("\nbio latency (ns):\n");
+ if (@count(lat)) { print(@hist_log(lat)); }
+ delete lat;
+}
+
+probe timer.s(1)
+{
+ if (interval) {
+ if (++secs == interval) {
+ print_report();
+ secs = 0;
+ count && (++output == count) && exit();
+ }
+ }
+}
+
+probe end
+{
+ !interval && print_report();
+ delete secs;
+ delete output;
+}
--- /dev/null
+Examples of biolatency-nd.stp, the Linux SystemTap version.
+
+
+Measuring block I/O (storage I/O, ie, disk I/O) latency until Ctrl-C:
+
+# ./biolatency-nd.stp
+Tracing block I/O... Hit Ctrl-C to end.
+^C
+bio latency (ns):
+ value |-------------------------------------------------- count
+ 131072 | 0
+ 262144 | 0
+ 524288 |@@ 5
+ 1048576 |@@ 4
+ 2097152 |@@@@@@@@@@@@@@@@@@@@@@@@ 49
+ 4194304 |@@@@@@@@@@@@@@@@@@@@@@@@@@@ 55
+ 8388608 |@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@ 82
+16777216 |@@@@@@@@@@@@@@@@@@ 37
+33554432 | 0
+67108864 | 0
+
+This shows a distribution between aout 1 and 16 milliseconds, which is expected
+for current rotational disk I/O.
+
+
+A more interesting distribution:
+
+# ./biolatency-nd.stp
+Tracing block I/O... Hit Ctrl-C to end.
+^C
+bio latency (ns):
+ value |-------------------------------------------------- count
+ 32768 | 0
+ 65536 | 0
+ 131072 | 10
+ 262144 |@@@@@@@@@ 450
+ 524288 |@@@ 159
+ 1048576 | 26
+ 2097152 | 35
+ 4194304 | 18
+ 8388608 |@@ 112
+ 16777216 |@@@@ 195
+ 33554432 |@@@@@@@@@ 452
+ 67108864 |@@@@@@@@@@@@@@@@@@@@@@ 1065
+ 134217728 |@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@ 2398
+ 268435456 |@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@ 2008
+ 536870912 | 0
+1073741824 | 0
+
+This shows a bimodal distribution with a low-latency mode around 0.26
+milliseconds, which may be on-disk cache hits, and then a high-latency mode
+between around 16 and 268 milliseconds. Such high latency is likely
+caused by queueing, which can be investigated further using custom
+SystemTap invocations as well as the "avgqu-sz" column from iostat(1).
+
+
+biolatency-nd.stp accepts an optional interval and count as arguments. Measuring
+block I/O latency, and printing a report every second, five times:
+
+# ./biolatency-nd.stp 1 5
+Tracing block I/O... Output every 1 secs.
+
+bio latency (ns):
+ value |-------------------------------------------------- count
+ 32768 | 0
+ 65536 | 0
+ 131072 |@@ 28
+ 262144 |@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@ 486
+ 524288 |@@@@@@@@@@@@@@@@@@@ 198
+ 1048576 |@@@@@@@@@ 98
+ 2097152 |@@@@@@ 61
+ 4194304 |@@@ 35
+ 8388608 |@ 17
+16777216 | 3
+33554432 | 0
+67108864 | 0
+
+
+bio latency (ns):
+ value |-------------------------------------------------- count
+ 32768 | 0
+ 65536 | 0
+ 131072 |@@ 22
+ 262144 |@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@ 505
+ 524288 |@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@ 370
+ 1048576 |@@@@@@@@ 91
+ 2097152 |@@ 31
+ 4194304 |@ 20
+ 8388608 |@ 20
+16777216 | 4
+33554432 | 0
+67108864 | 0
+
+
+bio latency (ns):
+ value |-------------------------------------------------- count
+ 32768 | 0
+ 65536 | 0
+ 131072 |@@@@ 70
+ 262144 |@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@ 826
+ 524288 |@@@@@@@@@@@@@@@@@@@ 333
+ 1048576 |@@@ 52
+ 2097152 | 11
+ 4194304 | 7
+ 8388608 |@ 17
+16777216 | 5
+33554432 | 0
+67108864 | 0
+
+
+bio latency (ns):
+ value |-------------------------------------------------- count
+ 32768 | 0
+ 65536 | 0
+ 131072 |@@ 35
+ 262144 |@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@ 559
+ 524288 |@@@@@@@@@@@@@@@@@@@@@@ 270
+ 1048576 |@@@@@@ 82
+ 2097152 |@@ 34
+ 4194304 |@ 17
+ 8388608 |@ 19
+ 16777216 | 9
+ 33554432 | 2
+ 67108864 | 0
+134217728 | 0
+
+
+bio latency (ns):
+ value |-------------------------------------------------- count
+ 32768 | 0
+ 65536 | 0
+ 131072 |@@@ 57
+ 262144 |@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@ 877
+ 524288 |@@@@@@@@@@@@@@@ 270
+ 1048576 |@@@@ 85
+ 2097152 |@@ 46
+ 4194304 | 7
+ 8388608 | 15
+16777216 | 10
+33554432 | 0
+67108864 | 0
+
+These disk I/O were much faster, with a mode around 0.25 ms.
--- /dev/null
+.TH bitesize-nd 8 "2015-01-30" "USER COMMANDS"
+.SH NAME
+bitesize-nd \- Measure block I/O size distribution. Uses Linux SystemTap (non-debuginfo).
+.SH SYNOPSIS
+.B bitesize-nd.stp
+.SH DESCRIPTION
+This uses the kernel tracepoint block_rq_insert to read the size of I/O. The
+output includes the name of the process or thread that was on-CPU when the I/O
+request was inserted on the issue queue.
+.SH REQUIREMENTS
+SystemTap.
+.SH EXAMPLES
+.TP
+Measure block I/O sizes, and show a distribution histogram for each process name on Ctrl-C:
+#
+.B bitesize-nd.stp
+.SH FIELDS
+.TP
+value
+The minimum size, in bytes, for this histogram bucket. Read this value
+as "I/O size was at least xxx".
+.TP
+count
+How many I/O fell into this size range.
+.SH OVERHEAD
+Should be low, relative to the rate of disk I/O. Test first.
+.SH SOURCE
+This is from the SystemTap lwtools collection.
+.IP
+https://github.com/brendangregg/systemtap-lwtools
+.PP
+Also look under the examples directory for a text file containing example
+usage, output, and commentary for this tool.
+.SH OS
+Linux
+.SH STABILITY
+Unstable - in development.
+.SH AUTHOR
+Brendan Gregg
+.SH SEE ALSO
+biolatency-nd.stp(8), iostat(1)
--- /dev/null
+title: Measure block I/O size distribution (non-debuginfo)
+name: bitesize-nd.stp
+author: Brendan Gregg
+keywords: io
+description: This uses the kernel tracepoint block_rq_insert to read the size of I/O. The output includes the name of the process or thread that was on-CPU when the I/O request was inserted on the issue queue.
+scope: system-wide
+usage: ./bitesize-nd.stp
+test_support: stap -p4 bitesize-nd.stp
+test_check: stap -p4 bitesize-nd.stp
+test_installcheck: stap bitesize-nd.stp -c "sleep 1"
--- /dev/null
+#!/usr/bin/stap
+/*
+ * bitesize-nd.stp Measure storage (bio) I/O size distribution.
+ * For Linux, uses SystemTap (non-debuginfo).
+ *
+ * USAGE: ./bitesize-nd.stp
+ *
+ * This script uses the kernel tracepoint block_rq_insert. The output includes
+ * the name of the process or thread that was on-CPU when the I/O request was
+ * inserted on the issue queue.
+ *
+ * From systemtap-lwtools: https://github.com/brendangregg/systemtap-lwtools
+ *
+ * See the corresponding man page (in systemtap-lwtools) for more info.
+ *
+ * Copyright (C) 2015 Brendan Gregg.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * 01-Feb-2015 Brendan Gregg Created this.
+ */
+
+global sz;
+
+probe begin
+{
+ printf("Tracing block I/O... Hit Ctrl-C to end.\n");
+}
+
+probe kernel.trace("block_rq_insert") {
+ /*
+ * You aren't supposed to access __data_len directly as it is internal,
+ * but I don't see another way...
+ */
+ sz[execname()] <<< $rq->__data_len;
+}
+
+probe end
+{
+ printf("\nI/O size (bytes):\n\n");
+ foreach (name in sz+) {
+ printf("process name: %s\n", name);
+ print(@hist_log(sz[name]));
+ }
+ delete sz;
+}
--- /dev/null
+Examples of bitesize-nd.stp, the Linux SystemTap version.
+
+
+This shows size distributions for requested block I/O, by process name:
+
+# ./bitesize-nd.stp
+Tracing block I/O... Hit Ctrl-C to end.
+^C
+I/O size (bytes):
+
+process name: flush-202:1
+ value |-------------------------------------------------- count
+ 1024 | 0
+ 2048 | 0
+ 4096 |@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@ 33
+ 8192 |@@@@@@@@@@@@@@@@ 16
+ 16384 |@@@@@@@@@ 9
+ 32768 |@@@@ 4
+ 65536 |@@@ 3
+131072 | 0
+262144 | 0
+
+process name: kjournald
+ value |-------------------------------------------------- count
+ 1024 | 0
+ 2048 | 0
+ 4096 |@ 4
+ 8192 | 2
+ 16384 | 2
+ 32768 |@ 7
+ 65536 |@ 5
+131072 |@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@ 182
+262144 | 0
+524288 | 0
+
+process name: randread.pl
+ value |-------------------------------------------------- count
+ 1024 | 0
+ 2048 | 0
+ 4096 | 4
+ 8192 |@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@ 1859
+ 16384 | 0
+ 32768 | 5
+ 65536 | 18
+131072 | 0
+262144 | 0
+
+process name: tar
+ value |-------------------------------------------------- count
+ 1024 | 0
+ 2048 | 0
+ 4096 |@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@ 1645
+ 8192 |@@@@@@@@@@@@@@@@@@@@@@@@@ 836
+ 16384 |@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@ 1394
+ 32768 |@@@@@@@@@@@@@@@@ 532
+ 65536 |@@@@@ 180
+131072 |@@@@@@ 219
+262144 | 0
+524288 | 0
+
+tar(1) was archiving a directory of varying sized files, and writing an output
+file, which creates the distribution seen above.
+
+The randread.pl program was performing 8 Kbyte random reads. Note that there
+were a few larger I/O: some custom SystemTap can be used to explain this. Eg,
+show kernel stack traces for I/O larger than 32 Kbytes. I'd guess it was
+file system metadata.
+
+The aim of this tool is to help characterize the disk I/O workload.
--- /dev/null
+.TH execsnoop-nd 8 "2015-01-30" "USER COMMANDS"
+.SH NAME
+execsnoop-nd \- trace process exec() with arguments. Uses Linux SystemTap (non-debuginfo).
+.SH SYNOPSIS
+.B execsnoop-nd.stp
+.SH DESCRIPTION
+This traces exec() from the fork()->exec() sequence, which means it will not
+catch new processes that only fork(). It will also show every exec(), including
+those if a process re-execs.
+
+This is a basic version of execsnoop.
+.SH REQUIREMENTS
+SystemTap.
+.SH EXAMPLES
+.TP
+Trace all new processes and arguments via exec():
+#
+.B execsnoop-nd.stp
+.SH FIELDS
+.TP
+TIME
+Time of the exec(), in human readable format.
+.TP
+PID
+Process ID.
+.TP
+PPID
+Parent process ID.
+.TP
+COMM
+Process name.
+.TP
+ARGS
+Command line arguments.
+.SH OVERHEAD
+This reads and processes exec() events and prints a line of output for each.
+Since the rate of exec() is expected to be low (< 500/s), the overhead is
+expected to be negligible.
+.SH SOURCE
+This is from the SystemTap lwtools collection.
+.IP
+https://github.com/brendangregg/systemtap-lwtools
+.PP
+Also look under the examples directory for a text file containing example
+usage, output, and commentary for this tool.
+.SH OS
+Linux
+.SH STABILITY
+Unstable - in development.
+.SH AUTHOR
+Brendan Gregg
+.SH SEE ALSO
+top(1)
--- /dev/null
+title: Trace process exec() with command line argument details (non-debuginfo)
+name: execsnoop-nd.stp
+author: Brendan Gregg
+keywords: io
+description: This can identify if CPU is consumed by short-lived processes, by tracing new process execution. It works by tracing exec() from the fork()->exec() sequence, which means it will not catch new processes that only fork(). It will also show every exec(), including those if a process re-execs.
+scope: system-wide
+usage: ./execsnoop-nd.stp
+test_support: stap -p4 execsnoop-nd.stp
+test_check: stap -p4 execsnoop-nd.stp
+test_installcheck: stap execsnoop-nd.stp -c "sleep 1"
--- /dev/null
+#!/usr/bin/stap
+/*
+ * execsnoop-nd.stp Trace process exec()s with arguments.
+ * For Linux, uses SystemTap (non-debuginfo).
+ *
+ * This is useful for identifying new process creation, provided it follows the
+ * fork()->exec() sequence. New processes that only fork will not be seen; catch
+ * those by tracing fork() or using the process.begin probe.
+ *
+ * USAGE: ./execsnoop-nd.stp
+ *
+ * This is a basic version of execsnoop.
+ *
+ * Copyright (C) 2015 Brendan Gregg.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * 07-Apr-2014 Brendan Gregg Created this.
+ */
+
+probe begin
+{
+ printf("%-24s %6s %6s %6s %14s %s\n", "TIME", "UID", "PPID", "PID",
+ "COMM", "ARGS");
+}
+
+probe nd_syscall.execve.return
+{
+ printf("%-24s %6d %6d %6d %14s %s\n", ctime(gettimeofday_s()), uid(),
+ ppid(), pid(), execname(), cmdline_str());
+}
--- /dev/null
+Examples of execsnoop-nd.stp, the Linux SystemTap version.
+
+
+Tracing while "man ls" was executed:
+
+# ./execsnoop-nd.stp
+TIME UID PPID PID COMM ARGS
+Sat Jan 31 00:56:02 2015 0 11962 13472 man man ls
+Sat Jan 31 00:56:02 2015 0 13472 13473 sh sh -c /usr/bin/gunzip -c /usr/share/man/man1/ls.1.gz
+Sat Jan 31 00:56:02 2015 0 13472 13473 gunzip /bin/sh /usr/bin/gunzip -c /usr/share/man/man1/ls.1.gz
+Sat Jan 31 00:56:02 2015 0 13472 13473 gzip gzip -d -c /usr/share/man/man1/ls.1.gz
+Sat Jan 31 00:56:02 2015 0 13472 13474 sh sh -c /usr/bin/gunzip -c /usr/share/man/man1/ls.1.gz
+Sat Jan 31 00:56:02 2015 0 13472 13474 gunzip /bin/sh /usr/bin/gunzip -c /usr/share/man/man1/ls.1.gz
+Sat Jan 31 00:56:02 2015 0 13472 13474 gzip gzip -d -c /usr/share/man/man1/ls.1.gz
+Sat Jan 31 00:56:02 2015 0 13472 13475 sh sh -c (cd "/usr/share/man" && (echo ".ll 15.5i"; echo ".nr LL 15.5i"; echo ".pl 1100i"; /usr/bin/gunzip -c '/usr/share/man/man1/ls.1.gz'; echo ".\\\""; echo ".pl \n(nlu+10") | /usr/bin/gtbl | /usr/bin/nroff -c -mandoc 2>/dev/null | /usr/bin/less -is)
+Sat Jan 31 00:56:02 2015 0 13476 13479 nroff /bin/bash /usr/bin/nroff -c -mandoc
+Sat Jan 31 00:56:02 2015 0 13476 13481 less /usr/bin/less -is
+Sat Jan 31 00:56:02 2015 0 13476 13478 gtbl /usr/bin/gtbl
+Sat Jan 31 00:56:02 2015 0 13477 13480 gunzip /bin/sh /usr/bin/gunzip -c /usr/share/man/man1/ls.1.gz
+Sat Jan 31 00:56:02 2015 0 13477 13480 gzip gzip -d -c /usr/share/man/man1/ls.1.gz
+Sat Jan 31 00:56:02 2015 0 13482 13483 locale locale charmap
+Sat Jan 31 00:56:02 2015 0 13484 13485 mktemp mktemp /tmp/man.XXXXXX
+Sat Jan 31 00:56:02 2015 0 13479 13486 cat cat
+Sat Jan 31 00:56:02 2015 0 13479 13487 iconv iconv -f utf-8 -t utf-8 /tmp/man.OGU85G
+Sat Jan 31 00:56:02 2015 0 13479 13488 rm rm -f /tmp/man.OGU85G
+Sat Jan 31 00:56:02 2015 0 13476 13479 groff /usr/bin/groff -mtty-char -P-c -mandoc -Tutf8
+Sat Jan 31 00:56:02 2015 0 13479 13489 troff troff -mtty-char -mandoc -Tutf8
+Sat Jan 31 00:56:02 2015 0 13479 13490 grotty grotty -c
+
+The output shows all the exec() syscall, which is a rough way to examine new
+process execution. Some of these processes have called exec() more than once;
+also, some new process that only fork() won't be seen.
--- /dev/null
+.TH fslatency-nd 8 "2015-01-30" "USER COMMANDS"
+.SH NAME
+fslatency-nd \- Measure file system sync read and write latency distribution. Uses Linux SystemTap (non-debuginfo).
+.SH SYNOPSIS
+.B fslatency-nd.stp
+[interval [count]]
+.SH DESCRIPTION
+fslatency-nd.stp dynamically traces two common file system functions:
+do_sync_read() and do_sync_write(), and reports a histogram distribution of
+latency. Many, but not all, file systems and
+workloads use these functions. Tracing their time provides one view of
+suffered file system latency.
+
+Other file system requests not covered by these functions and this tool include:
+asynchronous I/O, directory operations, and file handle operations.
+.SH REQUIREMENTS
+SystemTap.
+.SH OPTIONS
+.TP
+interval
+Interval for reports, in seconds.
+.TP
+count
+Number of reports to output.
+.SH EXAMPLES
+.TP
+Measure (most) file system synchronous read and write latency, and show a distribution histogram on Ctrl-C:
+#
+.B fslatency-nd.stp
+.TP
+Measure (most) file system synchronous read and write latency, and show a report every second, five times:
+#
+.B fslatency-nd.stp 1 5
+.SH FIELDS
+.TP
+value
+The minimum value, in nanoseconds, for this histogram bucket. Read this value
+as "I/O latency was at least xxx".
+.TP
+count
+How many reads/writes fell into this latency range.
+.SH OVERHEAD
+The overheads are measurable, as file system requests can be very frequent, and
+the small overhead of tracing each (eg, 4 microseconds per call) can begin to
+add up. Best case, it won't be noticeable. Worst case, the application may run
+2x slower.
+.SH SOURCE
+This is from the SystemTap lwtools collection.
+.IP
+https://github.com/brendangregg/systemtap-lwtools
+.PP
+Also look under the examples directory for a text file containing example
+usage, output, and commentary for this tool.
+.SH OS
+Linux
+.SH STABILITY
+Unstable - in development.
+.SH AUTHOR
+Brendan Gregg
+.SH SEE ALSO
+iostat(1), fsslower-nd.stp(8), biolatency-nd.stp(8)
--- /dev/null
+title: Measure the distribution of file system synchronous read and write latency (non-debuginfo)
+name: fslatency-nd.stp
+author: Brendan Gregg
+keywords: io filesystem
+description: This dynamically traces two common file system functions: do_sync_read() and do_sync_write(), and reports a histogram distribution of latency. Many, but not all, file systems and workloads use these functions. Tracing their time provides one view of suffered file system latency.
+scope: system-wide
+usage: ./fslatency-nd.stp [interval [count]]
+arg_1: Output interval (seconds)
+arg_2: Output count
+test_support: stap -p4 fslatency-nd.stp
+test_check: stap -p4 fslatency-nd.stp
+test_installcheck: stap fslatency-nd.stp 1 1
--- /dev/null
+#!/usr/bin/stap
+/*
+ * fslatency-nd.stp Measure file system I/O latency distribution.
+ * For Linux, uses SystemTap (non-debuginfo).
+ *
+ * USAGE: ./fslatency-nd.stp [interval [count]]
+ *
+ * This script uses kernel dynamic tracing of two common file system functions:
+ * do_sync_read() and do_sync_write(). This provides a view of just two file
+ * system request types. There are typically many others: asynchronous I/O,
+ * directory operations, file handle operations, etc, that this script does
+ * not instrument.
+ *
+ * This current implementation also traces sock_aio_write(), to exclude that
+ * write time from the report.
+ *
+ * From systemtap-lwtools: https://github.com/brendangregg/systemtap-lwtools
+ *
+ * See the corresponding man page (in systemtap-lwtools) for more info.
+ *
+ * Copyright (C) 2015 Brendan Gregg.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * 06-Feb-2015 Brendan Gregg Created this.
+ */
+
+global ts[65536]; # 65536 is max concurrent I/O
+global lat, interval = 0, secs = 0, count = 0, output = 0;
+
+probe begin
+{
+ printf("Tracing FS sync reads and writes... ");
+ if (argv_1 != "") {
+ interval = strtol(argv_1, 10);
+ if (argv_2 != "") {
+ count = strtol(argv_2, 10);
+ }
+ printf("Output every %d secs.\n", interval);
+ } else {
+ printf("Hit Ctrl-C to end.\n");
+ }
+}
+
+probe kprobe.function("do_sync_read"),
+ kprobe.function("do_sync_write")
+{
+ ts[tid()] = gettimeofday_ns();
+}
+
+probe kprobe.function("sock_aio_write")
+{
+ /* not a file system write */
+ ts[tid()] = 0;
+}
+
+probe kprobe.function("do_sync_read").return,
+ kprobe.function("do_sync_write").return
+{
+ if (ts[tid()]) {
+ lat[ppfunc()] <<< gettimeofday_ns() - ts[tid()];
+ delete ts[tid()];
+ }
+}
+
+function print_report()
+{
+ printf("\n");
+ if (interval) { printf("%s ", ctime(gettimeofday_s())); }
+ printf("FS latency (ns):\n\n");
+ foreach (name in lat+) {
+ printf("FS call: %s()\n", name);
+ print(@hist_log(lat[name]));
+ }
+ delete lat;
+}
+
+probe timer.s(1)
+{
+ if (interval) {
+ if (++secs == interval) {
+ print_report();
+ secs = 0;
+ count && (++output == count) && exit();
+ }
+ }
+}
+
+probe end
+{
+ !interval && print_report();
+ delete secs;
+ delete output;
+}
--- /dev/null
+Examples of fslatency-nd.stp, the Linux SystemTap version.
+
+
+fslatency-nd.stp dynamically traces two common file system functions:
+do_sync_read() and do_sync_write(). Many, but not all, file systems and
+workloads use these functions. Tracing their time provides one view of
+suffered file system latency.
+
+# ./fslatency-nd.stp
+Tracing FS sync reads and writes... Hit Ctrl-C to end.
+^C
+FS latency (ns):
+
+FS call: do_sync_read()
+ value |-------------------------------------------------- count
+ 256 | 0
+ 512 | 0
+ 1024 |@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@ 4858
+ 2048 |@@@@ 400
+ 4096 | 3
+ 8192 | 7
+ 16384 | 14
+ 32768 | 6
+ 65536 | 3
+ 131072 | 9
+ 262144 | 40
+ 524288 |@ 164
+ 1048576 | 87
+ 2097152 | 10
+ 4194304 | 5
+ 8388608 | 2
+ 16777216 | 5
+ 33554432 | 16
+ 67108864 | 2
+ 134217728 | 0
+ 268435456 | 1
+ 536870912 | 0
+1073741824 | 0
+
+FS call: do_sync_write()
+ value |-------------------------------------------------- count
+ 2048 | 0
+ 4096 | 0
+ 8192 |@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@ 5335
+ 16384 |@@ 260
+ 32768 | 32
+ 65536 | 2
+ 131072 | 2
+ 262144 | 0
+ 524288 | 0
+ 1048576 | 0
+ 2097152 | 0
+ 4194304 | 1
+ 8388608 | 0
+16777216 | 0
+
+In the above output, do_sync_read() has multiple modes. The largest, around 1
+to 4 microseconds, likely corresponds to page cache hits, and spans over 5000
+reads. Next is a much smaller mode around 0.5 ms, which is likely a disk read.
+Then a small mode around 32 ms, which would likely be disk I/O that queued.
+
+Unlike biolatency-nd.stp, fslatency-nd.stp includes cache hits.
+
+
+Here is a clear bimodal distribution from a random read workload:
+
+# ./fslatency-nd.stp
+Tracing FS sync reads and writes... Hit Ctrl-C to end.
+^C
+FS latency (ns):
+
+FS call: do_sync_read()
+ value |-------------------------------------------------- count
+ 128 | 0
+ 256 | 0
+ 512 | 2
+ 1024 |@ 6
+ 2048 |@@@@@@@@@@@ 46
+ 4096 |@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@ 134
+ 8192 |@@@@@ 20
+ 16384 | 2
+ 32768 | 1
+ 65536 | 0
+ 131072 | 0
+ 262144 | 1
+ 524288 |@@ 9
+ 1048576 | 0
+ 2097152 |@@@@@@@@@@@@@@ 57
+ 4194304 |@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@ 200
+ 8388608 |@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@ 169
+ 16777216 |@@@ 13
+ 33554432 | 3
+ 67108864 | 0
+134217728 | 0
+
+The first, around 4 us, is likely page cache hits. The second, around 4-8 ms,
+is likely page cache misses (disk reads).
+
+
+fslatency-nd.ntp takes an optional interval and count as arguments. Eg, to
+show latency every second:
+
+# ./fslatency-nd.stp 1
+Tracing FS sync reads and writes... Output every 1 secs.
+
+Fri Feb 6 21:58:48 2015 FS latency (ns):
+
+FS call: do_sync_read()
+ value |-------------------------------------------------- count
+ 512 | 0
+ 1024 | 0
+ 2048 |@@@@@ 11
+ 4096 |@@@@@@@@@@@@@ 27
+ 8192 |@ 3
+ 16384 | 0
+ 32768 | 0
+ 65536 | 0
+ 131072 | 0
+ 262144 | 1
+ 524288 | 1
+ 1048576 | 0
+ 2097152 |@@@@ 9
+ 4194304 |@@@@@@@@@@@@@@@@@@@@@@ 44
+ 8388608 |@@@@@@@@@@@@@@@@@@@@@@@@@@ 52
+ 16777216 |@@ 4
+ 33554432 | 1
+ 67108864 | 0
+134217728 | 0
+
+
+Fri Feb 6 21:58:49 2015 FS latency (ns):
+
+FS call: do_sync_read()
+ value |-------------------------------------------------- count
+ 512 | 0
+ 1024 | 0
+ 2048 |@@@@@@@@@ 9
+ 4096 |@@@@@@@@@@@@@@@@@@@@@@@@@ 25
+ 8192 |@@@@ 4
+ 16384 | 0
+ 32768 | 0
+ 65536 | 0
+ 131072 | 0
+ 262144 |@ 1
+ 524288 |@ 1
+ 1048576 |@@ 2
+ 2097152 |@@@@@@@@@@@@@@ 14
+ 4194304 |@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@ 49
+ 8388608 |@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@ 46
+ 16777216 |@@@@@ 5
+ 33554432 |@@ 2
+ 67108864 | 0
+134217728 | 0
+
+
+Fri Feb 6 21:58:50 2015 FS latency (ns):
+
+FS call: do_sync_read()
+ value |-------------------------------------------------- count
+ 512 | 0
+ 1024 | 0
+ 2048 |@@ 5
+ 4096 |@@@@@@@@@@@@@@@@@@@ 38
+ 8192 |@@ 4
+ 16384 | 0
+ 32768 | 0
+ 65536 | 0
+ 131072 | 0
+ 262144 |@ 2
+ 524288 | 1
+ 1048576 | 0
+ 2097152 |@@@@@@@@@@ 21
+ 4194304 |@@@@@@@@@@@@@@@@@@@@@ 43
+ 8388608 |@@@@@@@@@@@@@@@@@@@@@@@@@@ 53
+ 16777216 |@ 2
+ 33554432 |@ 2
+ 67108864 | 0
+134217728 | 0
+
+
+Fri Feb 6 21:58:51 2015 FS latency (ns):
+
+FS call: do_sync_read()
+ value |-------------------------------------------------- count
+ 512 | 0
+ 1024 | 0
+ 2048 |@@@@@@@@ 17
+ 4096 |@@@@@@@@@@@@@@@@@@@@@@@@ 49
+ 8192 |@ 2
+ 16384 | 0
+ 32768 | 0
+ 65536 | 0
+ 131072 | 0
+ 262144 |@ 3
+ 524288 | 0
+ 1048576 | 1
+ 2097152 |@@@@@@@@@@@@ 24
+ 4194304 |@@@@@@@@@@@@@@@@@@@@@@@@ 48
+ 8388608 |@@@@@@@@@@@@@@@@@@@@@@@@@@ 52
+16777216 |@ 3
+33554432 | 0
+67108864 | 0
+
+
+Fri Feb 6 21:58:52 2015 FS latency (ns):
+
+FS call: do_sync_read()
+ value |-------------------------------------------------- count
+ 512 | 0
+ 1024 | 0
+ 2048 |@@@@@@@@@@@@@ 13
+ 4096 |@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@ 34
+ 8192 |@@@@@ 5
+ 16384 | 0
+ 32768 | 0
+ 65536 | 0
+ 131072 | 0
+ 262144 |@ 1
+ 524288 |@@@@@@ 6
+ 1048576 | 0
+ 2097152 |@@@@@@@@@@@@@ 13
+ 4194304 |@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@ 38
+ 8388608 |@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@ 46
+16777216 |@@@@@@@@@@@ 11
+33554432 | 0
+67108864 | 0
+
+
+Fri Feb 6 21:58:53 2015 FS latency (ns):
+
+FS call: do_sync_read()
+ value |-------------------------------------------------- count
+ 512 | 0
+ 1024 | 0
+ 2048 |@@@@@@@ 15
+ 4096 |@@@@@@@@@@@@@@@@@@@@ 41
+ 8192 |@@@ 7
+ 16384 | 0
+ 32768 | 0
+ 65536 | 0
+ 131072 | 0
+ 262144 |@ 2
+ 524288 |@ 2
+ 1048576 | 0
+ 2097152 |@@@@@@@@@@ 21
+ 4194304 |@@@@@@@@@@@@@@@@@@@@@@@@@@@@@ 59
+ 8388608 |@@@@@@@@@@@@@@@@@@@@@ 42
+16777216 |@@ 5
+33554432 | 0
+67108864 | 0
--- /dev/null
+.TH fsslower-nd 8 "2015-01-30" "USER COMMANDS"
+.SH NAME
+fsslower-nd \- Trace slow file system sync reads and writes. Uses Linux SystemTap (non-debuginfo).
+.SH SYNOPSIS
+.B fsslower-nd.stp
+[min_ms]
+.SH DESCRIPTION
+fsslower-nd.stp dynamically traces two common file system functions:
+do_sync_read() and do_sync_write(), and shows details of each call that is
+slower than a threshold. Many, but not all, file systems and
+workloads use these functions. Tracing their time provides one view of
+suffered file system latency.
+
+Other file system requests not covered by these functions and this tool include:
+asynchronous I/O, directory operations, and file handle operations.
+.SH REQUIREMENTS
+SystemTap.
+.SH OPTIONS
+.TP
+min_ms
+Minimum millisecond threshold.
+.SH EXAMPLES
+.TP
+Measure (most) file system synchronous read and write requests with latency slower than or equal to 5 ms:
+#
+.B fsslower-nd.stp 5
+.SH FIELDS
+.TP
+TIME
+Time of the event completion.
+.TP
+PID
+Process ID.
+.TP
+COMM
+Process name.
+.TP
+FUNC
+Kernel function traced.
+.TP
+SIZE
+Size of FS request, in bytes.
+.TP
+LAT(ms)
+Latency of function, in milliseconds.
+.SH OVERHEAD
+The overheads are measurable, as file system requests can be very frequent, and
+the small overhead of tracing each (eg, 4 microseconds per call) can begin to
+add up. Best case, it won't be noticeable. Worst case, the application may run
+2x slower.
+.SH SOURCE
+This is from the SystemTap lwtools collection.
+.IP
+https://github.com/brendangregg/systemtap-lwtools
+.PP
+Also look under the examples directory for a text file containing example
+usage, output, and commentary for this tool.
+.SH OS
+Linux
+.SH STABILITY
+Unstable - in development.
+.SH AUTHOR
+Brendan Gregg
+.SH SEE ALSO
+iostat(1), fslatency-nd.stp(8), biolatency-nd.stp(8)
--- /dev/null
+title: Trace slow file system synchronous reads and writes (non-debuginfo)
+name: fsslower-nd.stp
+author: Brendan Gregg
+keywords: io filesystem
+description: This dynamically traces two common file system functions: do_sync_read() and do_sync_write(), and shows details of each call that is slower than a threshold. Many, but not all, file systems and workloads use these functions. Tracing their time provides one view of suffered file system latency.
+scope: system-wide
+usage: ./fsslower-nd.stp [min_ms]
+arg_1: Minimum millisecond threshold
+test_support: stap -p4 fsslower-nd.stp
+test_check: stap -p4 fsslower-nd.stp
+test_installcheck: stap fsslower-nd.stp -c "sleep 1"
--- /dev/null
+#!/usr/bin/stap
+/*
+ * fslower-nd.stp Trace slow file system sync reads and writes.
+ * For Linux, uses SystemTap (non-debuginfo).
+ *
+ * USAGE: ./fsslower-nd.stp [min_ms]
+ *
+ * This script uses kernel dynamic tracing of two common file system functions:
+ * do_sync_read() and do_sync_write(). This provides a view of just two file
+ * system request types. There are typically many others: asynchronous I/O,
+ * directory operations, file handle operations, etc, that this script does
+ * not instrument.
+ *
+ * By default, a minimum millisecond threshold of 10 is used.
+ *
+ * From systemtap-lwtools: https://github.com/brendangregg/systemtap-lwtools
+ *
+ * See the corresponding man page (in systemtap-lwtools) for more info.
+ *
+ * Copyright (C) 2015 Brendan Gregg.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * 06-Feb-2015 Brendan Gregg Created this.
+ */
+
+global ts[65536]; # 65536 is max concurrent I/O
+global sz[65536]; # "
+global min_ms = 10;
+
+probe begin
+{
+ if (argv_1 != "") {
+ min_ms = strtol(argv_1, 10);
+ }
+ printf("Tracing FS sync reads and writes slower than %d ms...", min_ms);
+ printf(" Hit Ctrl-C to end.\n");
+ printf("%-8s %-6s %-16s %-14s %-7s %8s\n", "TIME", "PID", "COMM",
+ "FUNC", "SIZE", "LAT(ms)");
+}
+
+function hhmmss() {
+ return substr(ctime(gettimeofday_s()), 11, 8);
+}
+
+probe kprobe.function("do_sync_read"),
+ kprobe.function("do_sync_write")
+{
+ sz[tid()] = int_arg(3);
+ ts[tid()] = gettimeofday_ns();
+}
+
+probe kprobe.function("sock_aio_write")
+{
+ /* not a file system write */
+ sz[tid()] = 0;
+ ts[tid()] = 0;
+}
+
+probe kprobe.function("do_sync_read").return,
+ kprobe.function("do_sync_write").return
+{
+ if (ts[tid()]) {
+ lat = (gettimeofday_ns() - ts[tid()]) / 1000000;
+ if (lat >= min_ms) {
+ printf("%-8s %-6d %-16s %-14s %-7d %8d\n", hhmmss(),
+ pid(), execname(), ppfunc(), sz[tid()], lat);
+ }
+ delete sz[tid()];
+ delete ts[tid()];
+ }
+}
--- /dev/null
+Examples of fsslower-nd.stp, the Linux SystemTap version.
+
+
+This tool prints event-by-event details for each file system synchronous read
+or write slower than a threshold. It works by dynamically traces two common
+file system functions: do_sync_read() and do_sync_write(). Many, but not all,
+file systems and workloads use these functions. Tracing their time provides one
+view of suffered file system latency.
+
+Tracing FS sync reads and writes slower than 5 ms:
+
+# ./fsslower-nd.stp 5
+Tracing FS sync reads and writes slower than 5 ms... Hit Ctrl-C to end.
+TIME PID COMM FUNC SIZE LAT(ms)
+22:27:28 2346 randread.pl do_sync_read 8192 11
+22:27:28 2346 randread.pl do_sync_read 8192 14
+22:27:28 2346 randread.pl do_sync_read 8192 6
+22:27:28 2346 randread.pl do_sync_read 8192 5
+22:27:28 2346 randread.pl do_sync_read 8192 6
+22:27:28 2346 randread.pl do_sync_read 8192 5
+22:27:28 2346 randread.pl do_sync_read 8192 24
+22:27:28 2346 randread.pl do_sync_read 8192 12
+22:27:28 2346 randread.pl do_sync_read 8192 9
+22:27:28 2346 randread.pl do_sync_read 8192 9
+22:27:28 2346 randread.pl do_sync_read 8192 5
+22:27:28 2346 randread.pl do_sync_read 8192 8
+22:27:28 2346 randread.pl do_sync_read 8192 14
+22:27:28 2346 randread.pl do_sync_read 8192 10
+22:27:28 2346 randread.pl do_sync_read 8192 6
+22:27:28 2346 randread.pl do_sync_read 8192 5
+[...]
+
+This shows that the randread.pl process was suffering latency up to 26 ms, when
+issuing do_sync_read()s of 8 Kbytes.
+
+As this is designed to be a non-debuginfo tool, there are not that many details
+available. Investigate further using other tools and custom SystemTap.
--- /dev/null
+.TH killsnoop-nd 8 "2015-01-30" "USER COMMANDS"
+.SH NAME
+killsnoop-nd \- trace process signals. Uses Linux SystemTap (non-debuginfo).
+.SH SYNOPSIS
+.B killsnoop-nd.stp
+.SH DESCRIPTION
+This traces signals system-wide, including those sent by the kill(1) command,
+and shows various details.
+.SH REQUIREMENTS
+SystemTap.
+.SH EXAMPLES
+.TP
+Trace all signals, showing process details, signal number, and result:
+#
+.B killsnoop-nd.stp
+.SH FIELDS
+.TP
+FROM
+Process ID that issued the signal.
+.TP
+COMMAND
+Process name that issued the signal.
+.TP
+SIG
+Signal number. See "kill -l" in bash(1) for a list.
+.TP
+TO
+Signal destination PID.
+.TP
+RESULT
+Signal result. 0 for success.
+.SH OVERHEAD
+This traces signals and prints a line of output for each. The overhead to
+trace each signal should be low/negligible.
+.SH SOURCE
+This is from the SystemTap lwtools collection.
+.IP
+https://github.com/brendangregg/systemtap-lwtools
+.PP
+Also look under the examples directory for a text file containing example
+usage, output, and commentary for this tool.
+.SH OS
+Linux
+.SH STABILITY
+Unstable - in development.
+.SH AUTHOR
+Brendan Gregg
+.SH SEE ALSO
+strace(1)
--- /dev/null
+title: Trace kill() signals showing process and signal details (non-debuginfo)
+name: killsnoop-nd.stp
+author: Brendan Gregg
+keywords: io
+description: This traces signals system-wide, including those sent by the kill(1) command, and shows various details.
+scope: system-wide
+usage: ./killsnoop-nd.stp
+test_support: stap -p4 killsnoop-nd.stp
+test_check: stap -p4 killsnoop-nd.stp
+test_installcheck: stap killsnoop-nd.stp -c "sleep 1"
--- /dev/null
+#!/usr/bin/stap
+/*
+ * killsnoop-nd.stp Trace process signals.
+ * For Linux, uses SystemTap (non-debuginfo).
+ *
+ * Copyright (C) 2015 Brendan Gregg.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * 24-Jun-2014 Brendan Gregg Created this.
+ */
+
+global target;
+global signal;
+
+probe begin
+{
+ printf("%-6s %-12s %-5s %-6s %6s\n", "FROM", "COMMAND", "SIG", "TO",
+ "RESULT");
+}
+
+probe nd_syscall.kill
+{
+ /*
+ * I could use the @entry shortcut on the return probe instead, but
+ * wanted a full example of using global arrays directly.
+ */
+ target[tid()] = uint_arg(1);
+ signal[tid()] = uint_arg(2);
+}
+
+probe nd_syscall.kill.return
+{
+ if (target[tid()] != 0) {
+ printf("%-6d %-12s %-5d %-6d %6d\n", pid(), execname(),
+ signal[tid()], target[tid()], int_arg(1));
+ delete target[tid()];
+ delete signal[tid()];
+ }
+}
--- /dev/null
+Examples of killsnoop-nd.stp, the Linux SystemTap version.
+
+
+killsnoop-nd.stp traces signals, including those sent by the kill(1) command.
+For example:
+
+# ./killsnoop-nd.stp
+FROM COMMAND SIG TO RESULT
+14163 w 0 11962 0
+14163 w 0 12040 0
+14163 w 0 14152 0
+14152 bash 9 12345 -3
+14152 bash 9 14152 0
+^C13914 killsnoop-nd.st 15 14147 0
+
+The output begins by showing the w(1) command sending three signal 0's to
+different processes: signal 0, and the return value, is used as an aliveness
+check. Then bash sends a signal 9 (SIGKILL) to PID 12345, which doesn't exist,
+and returns an error (-3). This is followed by a successful signal 9. These two
+were issued using "kill -9".
+
+Finally, I hit Ctrl-C to end this example, and this was caught and shown after
+the "^C". Signal 15 (SIGTERM)? I thought Ctrl-C issued signal 2 (SIGINT)...
+
+For a quick lookup of signal numbers to names, you can use the kill(1) bash
+builtin with -l. Eg:
+
+bash$ kill -l
+ 1) SIGHUP 2) SIGINT 3) SIGQUIT 4) SIGILL 5) SIGTRAP
+ 6) SIGABRT 7) SIGBUS 8) SIGFPE 9) SIGKILL 10) SIGUSR1
+11) SIGSEGV 12) SIGUSR2 13) SIGPIPE 14) SIGALRM 15) SIGTERM
+16) SIGSTKFLT 17) SIGCHLD 18) SIGCONT 19) SIGSTOP 20) SIGTSTP
+21) SIGTTIN 22) SIGTTOU 23) SIGURG 24) SIGXCPU 25) SIGXFSZ
+26) SIGVTALRM 27) SIGPROF 28) SIGWINCH 29) SIGIO 30) SIGPWR
+31) SIGSYS 34) SIGRTMIN 35) SIGRTMIN+1 36) SIGRTMIN+2 37) SIGRTMIN+3
+38) SIGRTMIN+4 39) SIGRTMIN+5 40) SIGRTMIN+6 41) SIGRTMIN+7 42) SIGRTMIN+8
+43) SIGRTMIN+9 44) SIGRTMIN+10 45) SIGRTMIN+11 46) SIGRTMIN+12 47) SIGRTMIN+13
+48) SIGRTMIN+14 49) SIGRTMIN+15 50) SIGRTMAX-14 51) SIGRTMAX-13 52) SIGRTMAX-12
+53) SIGRTMAX-11 54) SIGRTMAX-10 55) SIGRTMAX-9 56) SIGRTMAX-8 57) SIGRTMAX-7
+58) SIGRTMAX-6 59) SIGRTMAX-5 60) SIGRTMAX-4 61) SIGRTMAX-3 62) SIGRTMAX-2
+63) SIGRTMAX-1 64) SIGRTMAX
--- /dev/null
+.TH opensnoop-nd 8 "2015-01-30" "USER COMMANDS"
+.SH NAME
+opensnoop-nd \- trace process open() with arguments. Uses Linux SystemTap (non-debuginfo).
+.SH SYNOPSIS
+.B opensnoop-nd.stp
+.SH DESCRIPTION
+This traces the open() syscall system-wide, to show which files are being
+opened, and by who.
+
+This is a basic version of opensnoop.
+.SH REQUIREMENTS
+SystemTap.
+.SH EXAMPLES
+.TP
+Trace all open() syscalls, showing process and file details:
+#
+.B opensnoop-nd.stp
+.SH FIELDS
+.TP
+UID
+User ID.
+.TP
+PID
+Process ID.
+.TP
+PPID
+Parent process ID.
+.TP
+COMM
+Process name.
+.TP
+FD
+Return value of the open() syscall, which is either the resulting file descriptor
+(positive) or error code (negative).
+.TP
+PATH
+File pathname provided to the open() syscall.
+.SH OVERHEAD
+This reads and processes open() events and prints a line of output for each.
+If your applications perform a high rate of open()s, the overhead of this
+tool may become noticeable.
+.SH SOURCE
+This is from the SystemTap lwtools collection.
+.IP
+https://github.com/brendangregg/systemtap-lwtools
+.PP
+Also look under the examples directory for a text file containing example
+usage, output, and commentary for this tool.
+.SH OS
+Linux
+.SH STABILITY
+Unstable - in development.
+.SH AUTHOR
+Brendan Gregg
+.SH SEE ALSO
+strace(1)
--- /dev/null
+title: Trace open() syscalls showing filenames (non-debuginfo)
+name: opensnoop-nd.stp
+author: Brendan Gregg
+keywords: io
+description: This traces the open() syscall system-wide, to show which files are being opened, and by who.
+scope: system-wide
+usage: ./opensnoop-nd.stp
+test_support: stap -p4 opensnoop-nd.stp
+test_check: stap -p4 opensnoop-nd.stp
+test_installcheck: stap opensnoop-nd.stp -c "sleep 1"
--- /dev/null
+#!/usr/bin/stap
+/*
+ * opensnoop-nd.stp Trace file open()s. Basic version of opensnoop.
+ * For Linux, uses SystemTap (non-debuginfo).
+ *
+ * Copyright (C) 2015 Brendan Gregg.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * 21-Mar-2014 Brendan Gregg Created this.
+ */
+
+probe begin
+{
+ printf("%6s %6s %16s %4s %s\n", "UID", "PID", "COMM", "FD", "PATH");
+}
+
+probe nd_syscall.open.return
+{
+ /* trace on return to ensure the pathname has faulted in */
+ printf("%6d %6d %16s %4d %s\n", uid(), pid(), execname(),
+ returnval(), user_string2(@entry(pointer_arg(1)), "-"));
+}
--- /dev/null
+Examples of opensnoop-nd.stp, the Linux SystemTap version.
+
+
+This shows the files opened by an "iostat -x 1" command, using opensnoop-nd.stp:
+
+# ./opensnoop-nd.stp
+ UID PID COMM FD PATH
+ 0 13858 iostat 3 /etc/ld.so.cache
+ 0 13858 iostat 3 /lib64/libc.so.6
+ 0 13858 iostat 3 /usr/lib/locale/locale-archive
+ 0 13858 iostat 3 /sys/devices/system/cpu
+ 0 13858 iostat 3 /proc/diskstats
+ 0 13858 iostat 3 /etc/localtime
+ 0 13858 iostat 3 /proc/uptime
+ 0 13858 iostat 3 /proc/stat
+ 0 13858 iostat 3 /proc/diskstats
+ 0 13858 iostat 4 /etc/sysconfig/sysstat.ioconf
+ 0 13858 iostat 3 /proc/uptime
+ 0 13858 iostat 3 /proc/stat
+ 0 13858 iostat 3 /proc/diskstats
+[...]
+
+This can be a handy way to locate configuration files and source files. The
+output includes /proc pathnames where the iostat statistics are sourced, as
+well as /etc/sysconfig/sysstat.ioconf, a configuration file.
--- /dev/null
+.TH rwtime-nd 8 "2015-01-30" "USER COMMANDS"
+.SH NAME
+rwtime-nd \- summarize syscall read() and write() times. Uses Linux SystemTap (non-debuginfo).
+.SH SYNOPSIS
+.B rwtime-nd.stp
+[execname]
+.SH DESCRIPTION
+This traces read() and write() syscalls, producing a histogram summary of their
+durations (aka latencies).
+.SH REQUIREMENTS
+SystemTap.
+.SH OPTIONS
+.TP
+execname
+An optional argument to specify the process name to trace. All PIDs with this
+process name will be observed.
+.SH EXAMPLES
+.TP
+Trace all syscall read()s and write()s, showing latency summaries:
+#
+.B rwtime-nd.stp
+.TP
+Trace all syscall read()s and write()s for processes named "node":
+.B rwtime-nd.stp
+node
+.SH OVERHEAD
+While these syscalls are frequent, the overhead should be low as this uses
+in-kernel summaries. Test before use to confirm.
+.SH SOURCE
+This is from the SystemTap lwtools collection.
+.IP
+https://github.com/brendangregg/systemtap-lwtools
+.PP
+Also look under the examples directory for a text file containing example
+usage, output, and commentary for this tool.
+.SH OS
+Linux
+.SH STABILITY
+Unstable - in development.
+.SH AUTHOR
+Brendan Gregg
+.SH SEE ALSO
+strace(1)
--- /dev/null
+title: Summarize read() and write() syscall latency (non-debuginfo)
+name: rwtime-nd.stp
+author: Brendan Gregg
+keywords: io
+description: This traces read() and write() syscalls, producing a histogram summary of their durations (aka latencies).
+scope: system-wide
+usage: ./rwtime-nd.stp [execname]
+arg_1: Optional process name to match
+test_support: stap -p4 rwtime-nd.stp
+test_check: stap -p4 rwtime-nd.stp
+test_installcheck: stap rwtime-nd.stp -c "sleep 1"
--- /dev/null
+#!/usr/bin/stap
+/*
+ * rwtime-nd.stp read/write syscalls by latency.
+ * For Linux, uses SystemTap (non-debuginfo).
+ *
+ * USAGE: ./rwtime.stp [execname]
+ *
+ * An option argument of the program name, eg, "httpd", can be provided. Without
+ * this, all processes are traced.
+ *
+ * Copyright (C) 2015 Brendan Gregg.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * 24-Jun-2014 Brendan Gregg Created this.
+ */
+
+global read, write, tproc = "";
+
+probe begin
+{
+ tproc = argv_1;
+ if (tproc != "") {
+ printf("Tracing read/write syscalls for processes named \"%s\"",
+ tproc);
+ } else {
+ printf("Tracing read/write syscalls");
+ }
+ printf("... Hit Ctrl-C to end.\n");
+}
+
+probe nd_syscall.read.return
+{
+ if (tproc == "" || execname() == tproc) {
+ read <<< gettimeofday_ns() - @entry(gettimeofday_ns());
+ }
+}
+
+probe nd_syscall.write.return
+{
+ if (tproc == "" || execname() == tproc) {
+ write <<< gettimeofday_ns() - @entry(gettimeofday_ns());
+ }
+}
+
+probe end
+{
+ printf("\nsyscall read latency (ns):\n");
+ if (@count(read)) { print(@hist_log(read)); }
+ printf("syscall write latency (ns):\n");
+ if (@count(write)) { print(@hist_log(write)); }
+}
--- /dev/null
+Examples of rwtime-nd.stp, the Linux SystemTap version.
+
+
+rwtime-nd.stp measures syscall read() and write() times, summarizing them
+as histograms. A process name can be specified. For example, to measure
+read() and write() latency from processes named "node":
+
+# ./rwtime-nd.stp node
+Tracing read/write syscalls for processes named "node"... Hit Ctrl-C to end.
+^C
+syscall read latency (ns):
+value |-------------------------------------------------- count
+ 512 | 0
+ 1024 | 0
+ 2048 | 2
+ 4096 |@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@ 278
+ 8192 | 1
+16384 | 5
+32768 | 0
+65536 | 0
+
+syscall write latency (ns):
+ value |-------------------------------------------------- count
+ 4096 | 0
+ 8192 | 0
+ 16384 |@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@ 135
+ 32768 |@@ 8
+ 65536 | 0
+131072 | 0
+
+Those reads and writes were very fast, between 4 and 32 microseconds.
+
+
+Here's an example with much higher latencies: processes named "bash":
+
+# ./rwtime-nd.stp bash
+Tracing read/write syscalls for processes named "bash"... Hit Ctrl-C to end.
+^C
+syscall read latency (ns):
+ value |-------------------------------------------------- count
+ 256 | 0
+ 512 | 0
+ 1024 |@ 1
+ 2048 |@ 1
+ 4096 | 0
+ 8192 | 0
+ 16384 | 0
+ 32768 |@ 1
+ 65536 |@@@@@@ 6
+ 131072 |@@@@@@@@@@@ 11
+ 262144 | 0
+ 524288 | 0
+ ~
+ 2097152 | 0
+ 4194304 | 0
+ 8388608 |@ 1
+ 16777216 | 0
+ 33554432 |@ 1
+ 67108864 |@@@ 3
+ 134217728 |@@@@@@@@@ 9
+ 268435456 |@@@ 3
+ 536870912 |@@ 2
+ 1073741824 |@@ 2
+ 2147483648 | 0
+ 4294967296 |@ 1
+ 8589934592 | 0
+17179869184 | 0
+
+syscall write latency (ns):
+value |-------------------------------------------------- count
+ 512 | 0
+ 1024 | 0
+ 2048 |@@@@@@@@@@@@@@@@@@@@@ 21
+ 4096 |@@@@@@@@@@@@@@@@@@@@@@ 22
+ 8192 |@@@@@@@@@@@@@@@@@@ 18
+16384 | 0
+32768 | 0
+
+The high read time, with a mode between 134 and 268 milliseconds, corresponds
+to inter-keystroke latency as the bash shell reads keystrokes.
--- /dev/null
+.TH syscallbypid-nd 8 "2015-01-30" "USER COMMANDS"
+.SH NAME
+syscallbypid-nd \- trace process signals. Uses Linux SystemTap (non-debuginfo).
+.SH SYNOPSIS
+.B syscallbypid-nd.stp
+.SH DESCRIPTION
+This traces syscalls system-wide, and produces a summary report showing their
+counts by process ID, process name, and syscall types.
+.SH REQUIREMENTS
+SystemTap.
+.SH EXAMPLES
+.TP
+Count and summarize syscalls:
+#
+.B syscallbypid-nd.stp
+.SH FIELDS
+.TP
+PID
+Process ID.
+.TP
+CMD
+Process name.
+.TP
+SYSCALL
+System call name.
+.TP
+COUNT
+Number of syscalls issued during tracing.
+.SH OVERHEAD
+This uses in-kernel summaries to reduce overheads, so the overhead should be
+low (much lower than the current strace(1) implementation, for example). Test
+before use to confirm.
+.SH SOURCE
+This is from the SystemTap lwtools collection.
+.IP
+https://github.com/brendangregg/systemtap-lwtools
+.PP
+Also look under the examples directory for a text file containing example
+usage, output, and commentary for this tool.
+.SH OS
+Linux
+.SH STABILITY
+Unstable - in development.
+.SH AUTHOR
+Brendan Gregg
+.SH SEE ALSO
+strace(1)
--- /dev/null
+title: Count syscalls with process details (non-debuginfo)
+name: syscallbypid-nd.stp
+author: Brendan Gregg
+keywords: io
+description: This traces syscalls system-wide, and produces a summary report showing their counts by process ID, process name, and syscall types.
+scope: system-wide
+usage: ./syscallbypid-nd.stp
+test_support: stap -p4 syscallbypid-nd.stp
+test_check: stap -p4 syscallbypid-nd.stp
+test_installcheck: stap syscallbypid-nd.stp -c "sleep 1"
--- /dev/null
+#!/usr/bin/stap
+/*
+ * syscallbypid-nd.d Count syscalls with process details.
+ * For Linux, uses SystemTap (non-debuginfo).
+ *
+ * USAGE: ./syscallbypid-nd.d
+ *
+ * Copyright (C) 2015 Brendan Gregg.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * 23-Jun-2014 Brendan Gregg Created this.
+ */
+
+global num;
+
+probe begin
+{
+ printf("Tracing syscall completions... Hit Ctrl-C to end.\n");
+}
+
+probe nd_syscall.*.return
+{
+ num[pid(), execname(), name] <<< 1;
+}
+
+probe end
+{
+ printf("\n%-8s %-16s %-24s %8s\n", "PID", "CMD", "SYSCALL", "COUNT");
+ foreach ([p, e, n] in num+) {
+ printf("%-8d %-16s %-24s %8d\n", p, e, n, @count(num[p, e, n]));
+ }
+}
--- /dev/null
+Examples of execsnoop-nd.stp, the Linux SystemTap version.
+
+
+This traces syscalls system-wide, and produces a summary report showing their
+counts by process ID, process name, and syscall type. For example:
+
+# ./syscallbypid-nd.stp
+Tracing syscall completions... Hit Ctrl-C to end.
+^C
+PID CMD SYSCALL COUNT
+4282 bash read 1
+4283 bash ioctl 1
+902 crond select 1
+4283 bash setpgid 1
+902 crond stat 1
+4283 bash read 1
+902 crond rt_sigaction 1
+14409 bash stat 1
+4279 stapio rt_sigreturn 1
+4279 stapio pselect6 1
+14564 sshd ioctl 1
+4282 bash ioctl 1
+4282 bash setpgid 1
+4279 stapio write 1
+4283 curl execve 1
+4283 curl connect 1
+4282 curl pipe 1
+4282 curl set_tid_address 1
+4283 curl setsockopt 1
+4282 curl gettid 1
+785 auditd gettimeofday 1
+4282 curl ioctl 1
+4282 curl getsockname 1
+4282 curl rt_sigprocmask 1
+4283 curl write 1
+4046 syscallbypid-nd rt_sigprocmask 1
+4279 stapio madvise 1
+4283 curl statfs 1
+4282 curl getsockopt 1
+4283 curl bind 1
+4283 curl getrlimit 1
+4282 curl arch_prctl 1
+4283 curl recvfrom 1
+4282 curl connect 1
+4282 curl execve 1
+4279 stapio tgkill 1
+4283 curl pipe 1
+4283 curl set_tid_address 1
+4282 curl setsockopt 1
+4283 curl gettid 1
+4283 curl ioctl 1
+4282 curl write 1
+4283 curl getsockname 1
+4283 curl rt_sigprocmask 1
+4283 curl getsockopt 1
+4282 curl statfs 1
+4282 curl getrlimit 1
+4282 curl bind 1
+4283 curl arch_prctl 1
+4282 curl recvfrom 1
+4279 stapio futex 1
+4283 bash close 2
+15216 node close 2
+4282 bash close 2
+15216 node write 2
+14409 bash rt_sigreturn 2
+902 crond rt_sigprocmask 2
+14409 bash sendmsg 2
+4282 curl sendto 2
+4283 curl munmap 2
+4282 curl access 2
+4283 curl fcntl 2
+14409 bash pipe 2
+14409 bash clone 2
+14409 bash setpgid 2
+14409 bash socket 2
+4283 curl sendto 2
+4282 curl fcntl 2
+4283 curl access 2
+4282 curl munmap 2
+785 auditd clock_gettime 2
+14409 bash time 3
+14564 sshd read 3
+14564 sshd write 3
+4283 bash rt_sigprocmask 3
+902 crond read 3
+902 crond fstat 3
+902 crond munmap 3
+902 crond open 3
+4282 bash rt_sigprocmask 3
+902 crond close 3
+902 crond mmap2 3
+902 crond time 3
+4282 curl brk 3
+4282 curl rt_sigaction 3
+4282 curl recvmsg 3
+4283 curl futex 3
+4283 curl socket 3
+4283 curl brk 3
+15216 SignalSender futex 3
+4283 curl rt_sigaction 3
+4283 curl recvmsg 3
+4282 curl futex 3
+4282 curl socket 3
+14409 bash wait4 4
+15216 node accept4 4
+15216 node futex 4
+15216 node read 4
+4279 stapio fcntl 4
+4282 curl time 4
+4283 curl time 4
+14564 sshd select 5
+15216 SignalSender nanosleep 5
+4283 curl poll 5
+4282 curl poll 5
+15216 node epoll_wait 6
+15216 node epoll_ctl 6
+14409 bash close 6
+4283 bash rt_sigaction 7
+4282 bash rt_sigaction 7
+14564 sshd rt_sigprocmask 12
+14409 bash ioctl 20
+14405 sshd ioctl 20
+4282 curl clock_gettime 24
+4283 curl clock_gettime 24
+15216 node clock_gettime 26
+14409 bash read 28
+14409 bash write 29
+4283 curl read 32
+4282 curl read 32
+4282 curl open 34
+4283 curl fstat 34
+4283 curl open 34
+4282 curl fstat 34
+4279 stapio ppoll 38
+4282 curl close 38
+4283 curl close 38
+4279 stapio read 42
+14409 bash rt_sigaction 48
+14405 sshd write 49
+14405 sshd read 49
+4282 curl mprotect 58
+4283 curl mprotect 58
+4282 curl mmap2 91
+4283 curl mmap2 91
+14405 sshd select 97
+14409 bash rt_sigprocmask 111
+14405 sshd rt_sigprocmask 196
+
+This can be used to get a handle on syscall usage and rates before using more
+expensive tools to measure their behavior.
+
+
+The output is limited to 2048 rows. If it exceeds that, for example, because
+of many short-lived processes each with their own PID, then you will see the
+following error:
+
+# ./syscallbypid-nd.stp
+Tracing syscall completions... Hit Ctrl-C to end.
+ERROR: Array overflow, check MAXMAPENTRIES near identifier 'num' at ./syscallbypid-nd.stp:32:2
+WARNING: Number of errors: 1, skipped probes: 0
+WARNING: /usr/local/bin/staprun exited with status: 1
+Pass 5: run failed. [man error::pass5]
+
+See the oneliners.txt for probing syscalls without breaking down by PID to
+start with.
+