This is the mail archive of the systemtap@sourceware.org mailing list for the systemtap project.


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]
Other format: [Raw text]

[PATCH v2] Scheduler Tapset based on kernel tracepoints


Hi,

I have modified the patch according to the comments by Frank and
William.

Changelog
---------
  - Changed "?" to "!" while falling back to the kprobe based probe.
  - Added a level of indirection for the variables.

Signed-off-by: Kiran Prakash <kiran@linux.vnet.ibm.com>

diff -Naur systemtap-orig/tapset/scheduler.stp systemtap/tapset/scheduler.stp
--- systemtap-orig/tapset/scheduler.stp	2009-09-19 10:27:14.000000000 -0400
+++ systemtap/tapset/scheduler.stp	2009-09-22 07:32:04.000000000 -0400
@@ -118,32 +118,280 @@
  *  Arguments:
  *    prev_pid: The pid of the process to be switched out
  *    next_pid: The pid of the process to be switched in
+ *    prev_tid: The tid of the process to be switched out
+ *    next_tid: The tid of the process to be switched in	 
+ *    prev_task_name: The name of the process to be switched out
+ *    next_task_name: The name of the process to be switched in
  *    prevtsk_state: the state of the process to be switched out
+ *    nexttsk_state: the state of the process to be switched in
  */
-probe scheduler.ctxswitch =
+
+probe scheduler.ctxswitch.tp = kernel.trace("sched_switch") 
+{
+	next_pid =  $next->pid
+        next_pid = task_tid($next)
+        next_task = $next
+        next_task_name = task_execname($next)
+        nexttsk_state = $next->state
+	prev_pid = $prev->pid
+        prev_tid = task_tid(tid)
+        prev_task = $prev
+        prev_task_name = task_execname($prev)
+        prevtsk_state = $prev->state
+}
+
+probe scheduler.ctxswitch.kp =
 %( arch != "x86_64" && arch != "ia64" %?
-	kernel.trace("sched_switch") !, kernel.function("__switch_to")
+        kernel.function("__switch_to")
 %:
-	kernel.trace("sched_switch") !, kernel.function("context_switch")
+        kernel.function("context_switch")
 %)
 {
 %( arch == "powerpc" %?
-        prev_pid = $prev->pid
-        next_pid =  $new->pid
+	prev_pid = $prev->pid
+        next_pid = $new->pid
+        prev_tid = task_tid($prev)
+        next_pid = task_tid($new)
         prev_task = $prev
         next_task = $new
+        prev_task_name = task_execname($prev)
+        next_task_name = task_execname($new)
         prevtsk_state = $prev->state
+        nexttsk_state = $new->state
+
 %: %( arch == "x86_64" || arch == "ia64" %?
         prev_pid = $prev->pid
-        next_pid =  $next->pid
+        next_pid = $next->pid
+	prev_tid = task_tid($prev)
+        next_pid = task_tid($next)
         prev_task = $prev
         next_task = $next
+	prev_task_name = task_execname($prev)
+        next_task_name = task_execname($next)
         prevtsk_state = $prev->state
+	nexttsk_state = $next->state
 %:
         prev_pid = $prev_p->pid
         next_pid = $next_p->pid
+	prev_tid = task_tid($prev_p)
+        next_pid = task_tid($next_p)
         prev_task = $prev_p
         next_task = $next_p
+	prev_task_name = task_execname($prev_p)
+        next_task_name = task_execname($next_p)
         prevtsk_state = $prev_p->state
+	nexttsk_state = $next_p->state
 %) %)
 }
+
+probe scheduler.ctxswitch
+ = scheduler.ctxswitch.tp !, scheduler.ctxswitch.kp
+{}
+
+
+/**
+ * probe scheduler.kthread_stop - Fires when a thread created by kthread_create is stopped.
+ * @thread_pid: pid of the thread being stopped.
+ * @thread_priority: priority of the thread.
+ */
+probe scheduler.kthread_stop.kp = kernel.function("kthread_stop")
+{
+	thread_pid = $k->pid
+	thread_priority = $k->priority
+}
+probe scheduler.kthread_stop.tp = kernel.trace("sched_kthread_stop") 
+{
+        thread_pid = $t->pid
+        thread_priority = $t->prio
+}
+probe scheduler.kthread_stop 
+   = scheduler.kthread_stop.tp !,
+     scheduler.kthread_stop.kp
+{}
+
+
+/**
+ * probe scheduler.kthread_stop.return - Fires once the kthread is stopped and gets the return value
+ * @return_value: return value after stopping the thread.
+ */
+
+probe scheduler.kthread_stop.return.kp = kernel.function("kthread_stop").return
+{
+	return_value = $k->exit_code
+}
+probe scheduler.kthread_stop.return.tp = kernel.trace("sched_kthread_stop_ret")
+{
+        return_value = $ret
+}
+
+probe scheduler.kthread_stop.return 
+ = scheduler.kthread_stop.return.tp !,
+   scheduler.kthread_stop.return.kp
+{}
+
+/**
+ * probe scheduler.wait_task - Fires when waiting on a task to unschedule. 
+ *                             It waits till the task becomes inactive.
+ * @task_pid: pid of the task the scheduler is waiting on.
+ * @task_priority: priority of the task
+ */
+
+probe scheduler.wait_task
+ = kernel.trace("sched_wait_task") !,
+   kernel.function("wait_task_inactive")
+{
+        task_pid = $p->pid
+        task_priority = $p->prio
+}
+
+/**
+ * probe scheduler.wakeup - Fires when a task is woken up 
+ * @task_pid: pid of the task being woken up
+ * @task_priority: priority of the task being woken up
+ * @success: returns 1 if the wakeup is successful
+ */
+
+probe scheduler.wakeup
+ = kernel.trace("sched_wakeup") !,
+   kernel.function("try_to_wake_up")
+{
+        task_pid = $p->pid
+        task_priority = $p->prio
+        success = $success
+
+}
+
+/**
+ * probe scheduler.wakeup_new - Fires when a newly created task is woken up for the first time
+ * @task_pid: pid of the new task woken up
+ * @task_priority: priority of the new task
+ * @success: returns 1 if the wake-up is successful
+ */
+probe scheduler.wakeup_new
+ = kernel.trace("sched_wakeup_new") !,
+   kernel.function("wake_up_new_task")
+{
+        task_pid = $p->pid
+        task_priority = $p->prio
+}
+
+/**
+ * probe scheduler.migrate_task - Traces the migration of the tasks across cpus by the scheduler.
+ * @pid: pid of the task being migrated.
+ * @priority: priority of the task being migrated.
+ * @original_cpu: the original cpu
+ * @destination_cpu: the destination cpu
+ */
+probe scheduler.migrate_task.kp = kernel.function("set_task_cpu")
+{
+	destination_cpu = $new_cpu
+}
+probe scheduler.migrate_task.tp = kernel.trace("sched_migrate_task")
+{
+	destination_cpu = $dest_cpu
+}
+probe scheduler.migrate_task
+ = scheduler.migrate_task.tp !,
+   scheduler.migrate_task.kp
+{
+	pid = $p->pid
+        priority = $p->prio
+        original_cpu = task_cpu($p)
+}
+/**
+ * probe scheduler.process_free - Traces the process of freeing up of a process
+ * @pid: PID of the process getting freed
+ * @priority: priority of the process getting freed
+ */
+probe scheduler.process_free.kp = kernel.function("delayed_put_task_struct")
+{
+	pid = $tsk->pid
+	priority = $tsk->prio
+}
+probe scheduler.process_free.tp = kernel.trace("sched_process_free")
+{
+        pid = $p->pid
+        priority = $p->prio
+}
+probe scheduler.process_free
+ = scheduler.process_free.tp !,
+   scheduler.process_free.kp
+{}
+
+/**
+ * probe scheduler.process_exit - Fires when a process exits
+ * @pid: pid of the process exiting
+ * @priority: priority of the process exiting
+ */
+probe scheduler.process_exit.kp = kernel.function("do_exit")
+{
+	pid = $tsk->pid
+	priority = $tsk->priority
+}
+probe scheduler.process_exit.tp = kernel.trace("sched_process_exit")
+{
+        pid = $p->pid
+        priority = $p->prio
+}
+
+probe scheduler.process_exit 
+ = scheduler.process_exit.tp !,
+   scheduler.process_exit.kp
+{}
+
+/**
+ * probe scheduler.process_wait - Fires when scheduler waits on a process
+ * @pid: PID of the process scheduler is waiting on
+ */
+probe scheduler.process_wait.kp = kernel.function("do_wait")
+{
+	pid = $wo->wo_pid
+}
+probe scheduler.process_wait.tp = kernel.trace("sched_process_wait")
+{
+        pid = $pid
+}
+probe scheduler.process_wait
+ = scheduler.process_wait.tp !,
+   scheduler.process_wait.kp
+{}
+
+/**
+ * probe scheduler.process_fork - Probes the tracepoint for forking a process
+ * @parent_pid: PID of the parent process
+ * @child_pid: PID of the child process
+ */
+probe scheduler.process_fork.kp = kernel.function("do_fork")
+{
+	parent_pid = $current->pid
+	child_pid = $p->pid
+}
+probe scheduler.process_fork.tp = kernel.trace("sched_process_fork")
+{
+        parent_pid = $parent->pid
+        child_pid = $child->pid
+}
+
+probe scheduler.process_fork
+ = scheduler.process_fork.tp !,
+   scheduler.process_fork.kp
+{}
+/**
+ * probe scheduler.signal_send - Probes the tracepoint for sending a signal
+ * @pid: pid of the process sending signal
+ * @signal_number: signal number
+ */
+probe scheduler.signal_send.kp = kernel.function("__send_signal")
+{
+	pid = $t->pid
+}
+probe scheduler.signal_send.tp = kernel.trace("sched_signal_send")
+{
+        pid = $p->pid
+}
+probe scheduler.signal_send
+ = scheduler.signal_send.tp !,
+   scheduler.signal_send.kp
+{
+	signal_number = $sig
+}
diff -Naur systemtap-orig/testsuite/buildok/scheduler-test-tracepoints.stp systemtap/testsuite/buildok/scheduler-test-tracepoints.stp
--- systemtap-orig/testsuite/buildok/scheduler-test-tracepoints.stp	1969-12-31 19:00:00.000000000 -0500
+++ systemtap/testsuite/buildok/scheduler-test-tracepoints.stp	2009-09-22 02:10:36.000000000 -0400
@@ -0,0 +1,53 @@
+#! stap -up4
+
+//Tests if all probes in the scheduler tapset are resolvable.
+
+probe scheduler.kthread_stop {
+	printf("pid = %d, priority = %d\n", thread_pid, thread_priority);
+}
+
+probe scheduler.kthread_stop.return {
+        printf("return value = %d\n", return_value);
+}
+
+probe scheduler.wait_task {
+        printf("pid = %d, priority = %d\n", task_pid, task_priority);
+}
+
+probe scheduler.wakeup {
+        printf("pid = %d, priority = %d\n, state = %d, cpu = %d, success = %d",task_pid, task_priority, task_state, task_cpu, success);
+}
+
+probe scheduler.wakeup_new {
+        printf("pid = %d, priority = %d, success = %d\n", task_pid, task_priority, success);
+}
+
+probe scheduler.ctxswitch {
+        printf("prev_pid = %d, prev_priority = %d, prev_state = %d, prev_task_name = %s, prev_tid = %d, next_pid = %d, next_priority = %d, next_state = %d, next_task_name = %s, next_tid = %d\n", prev_pid, prev_priority, prev_state, prev_task_name, prev_tid, next_pid, next_priority, next_state, next_task_name, next_tid);
+}
+
+probe scheduler.migrate_task {
+        printf("pid = %d, priority = %d, original cpu = %d destination cpu = %d\n", pid, priority, original_cpu, destination_cpu);
+}
+
+probe scheduler.process_free {
+        printf("pid = %d, priority = %d\n", pid, priority);
+}
+
+probe scheduler.process_exit {
+        printf("pid = %d, priority = %d\n", pid, priority);
+}
+
+probe scheduler.process_wait {
+        printf("pid = %d, priority = %d\n", pid, priority);
+}
+
+probe scheduler.process_fork {
+        printf("parent pid = %d, child pid = %d\n", parent_pid, child_pid);
+}
+
+probe scheduler.signal_send {
+        printf("pid = %d, signal = %d\n", pid, signal);
+}
+
+
diff -Naur systemtap-orig/testsuite/systemtap.examples/profiling/sched_switch.meta systemtap/testsuite/systemtap.examples/profiling/sched_switch.meta
--- systemtap-orig/testsuite/systemtap.examples/profiling/sched_switch.meta	1969-12-31 19:00:00.000000000 -0500
+++ systemtap/testsuite/systemtap.examples/profiling/sched_switch.meta	2009-09-22 02:18:03.000000000 -0400
@@ -0,0 +1,14 @@
+title: Display the task switches happeningt the scheduler
+name: sched_switch.stp
+version: 1.0
+author: kiran
+keywords: profiling functions
+subsystem: kernel
+status: production
+exit: user-controlled
+output: sorted-list on-exit
+scope: system-wide
+description: The sched_switch.stp script takes two arguments, first argument can be "pid" or "name" to indicate what is being passed as second argument. The script will trace the process based on pid/name and print the scheduler switches happening with the process. If no arguments are passed, it displays all the scheduler switches. This can be used to understand which tasks scheduler the current process being traced, out and when it gets scheduled in again.
+test_check: stap -p4 sched_switch.stp
+test_installcheck: stap  sched_switch.stp -c "sleep 1"
+
diff -Naur systemtap-orig/testsuite/systemtap.examples/profiling/sched_switch.stp systemtap/testsuite/systemtap.examples/profiling/sched_switch.stp
--- systemtap-orig/testsuite/systemtap.examples/profiling/sched_switch.stp	1969-12-31 19:00:00.000000000 -0500
+++ systemtap/testsuite/systemtap.examples/profiling/sched_switch.stp	2009-09-22 02:29:16.000000000 -0400
@@ -0,0 +1,71 @@
+/* This script wokrs similar to ftrace's sched_switch. It displays a list of
+ * processes which get switched in and out of the scheduler. The format of display
+ * is PROCESS_NAME PROCESS_PID CPU TIMESTAMP PID: PRIORITY: PROCESS STATE ->/+
+ *    NEXT_PID : NEXT_PRIORITY: NEXT_STATE NEXT_PROCESS_NAME 
+ * -> indicates that prev process is scheduled out and the next process is 
+ *    scheduled in.
+ * + indicates that prev process has woken up the next process.
+ * The usage is sched_switch.stp <"pid"/"name"> pid/name
+ */
+
+global task_cpu_old[9999]
+global pids[999]
+global processes
+global prev
+
+function state_calc(state) {
+        if(state == 0)
+        status = "R"
+        if(state == 1)
+        status = "S"
+        if(state == 2)
+        status = "D"
+        if(state == 4)
+        status = "T"
+        if(state == 8)
+        status = "T"
+        if(state == 16)
+        status = "Z"
+        if(state == 32)
+        status = "EXIT_DEAD"
+        return status
+}
+probe scheduler.wakeup
+{
+	pids[task_pid]++
+	processes[task_pid] = $p;
+	prev[task_pid] = task_current()
+	
+}
+probe scheduler.ctxswitch
+{
+	tid = next_tid
+	tid1 = prev_tid
+	state = prev_state
+	state1 = next_state
+	
+	%( $# == 2 %?
+	
+	if(@1 == "pid") 
+		if (tid != $2 && tid1 != $2)
+			next
+	if(@1 == "name")
+		if (task_execname(task_current()) != @2 && task_execname($next) != @2)
+               		next 
+	
+	foreach (name in pids-) {
+		if ((@1 == "pid" && (name == $2 || task_pid(prev[name]) == $2)) || 
+		   (@1 == "name" && (task_execname(prev[name]) == @2 || task_execname(processes[name]) == @2)))
+			printf("%s\t\t%d\t%d\t%d\t%d:%d:%s + %d:%d:%s %s\n",
+				task_execname(prev[name]), task_pid(prev[name]), task_cpu(processes[name]), gettimeofday_ns(), 
+				task_pid(prev[name]), task_prio(prev[name]), state_calc(task_state(prev[name])), 
+				task_pid(processes[name]), task_prio(processes[name]), state_calc(task_state(processes[name])), 
+				task_execname(processes[name]))
+	} %)
+
+	old_cpu = task_cpu_old[tid]
+	printf("%s\t\t%d\t%d\t%d\t%d:%d:%s ==> %d:%d:%s %s\n",task_execname(task_current()),tid1,
+		old_cpu,gettimeofday_ns(),tid1,task_prio(task_current()),state_calc(state),next_pid,
+		next_prio,state_calc(next_state),next_task_name )
+	task_cpu_old[next_tid] = cpu()
+}



Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]