sched: Add wait, sleep and iowait accounting tracepoints
authorPeter Zijlstra <a.p.zijlstra@chello.nl>
Thu, 23 Jul 2009 18:13:26 +0000 (20:13 +0200)
committerIngo Molnar <mingo@elte.hu>
Wed, 2 Sep 2009 07:12:18 +0000 (09:12 +0200)
Add 3 schedstat tracepoints to help account for wait-time,
sleep-time and iowait-time.

They can also be used as a perf-counter source to profile tasks
on these clocks.

Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Steven Rostedt <rostedt@goodmis.org>
Cc: Frederic Weisbecker <fweisbec@gmail.com>
Cc: Arjan van de Ven <arjan@linux.intel.com>
LKML-Reference: <new-submission>
[ build fix for the !CONFIG_SCHEDSTATS case ]
Signed-off-by: Ingo Molnar <mingo@elte.hu>
include/trace/events/sched.h
kernel/sched_fair.c

index 8949bb7eb08278bbe30d6785a1e6c1902db8a99a..a4c369ec328fcb760078e0a8774cc9bf97bc0bab 100644 (file)
@@ -340,6 +340,101 @@ TRACE_EVENT(sched_signal_send,
                  __entry->sig, __entry->comm, __entry->pid)
 );
 
+/*
+ * XXX the below sched_stat tracepoints only apply to SCHED_OTHER/BATCH/IDLE
+ *     adding sched_stat support to SCHED_FIFO/RR would be welcome.
+ */
+
+/*
+ * Tracepoint for accounting wait time (time the task is runnable
+ * but not actually running due to scheduler contention).
+ */
+TRACE_EVENT(sched_stat_wait,
+
+       TP_PROTO(struct task_struct *tsk, u64 delay),
+
+       TP_ARGS(tsk, delay),
+
+       TP_STRUCT__entry(
+               __array( char,  comm,   TASK_COMM_LEN   )
+               __field( pid_t, pid                     )
+               __field( u64,   delay                   )
+       ),
+
+       TP_fast_assign(
+               memcpy(__entry->comm, tsk->comm, TASK_COMM_LEN);
+               __entry->pid    = tsk->pid;
+               __entry->delay  = delay;
+       )
+       TP_perf_assign(
+               __perf_count(delay);
+       ),
+
+       TP_printk("task: %s:%d wait: %Lu [ns]",
+                       __entry->comm, __entry->pid,
+                       (unsigned long long)__entry->delay)
+);
+
+/*
+ * Tracepoint for accounting sleep time (time the task is not runnable,
+ * including iowait, see below).
+ */
+TRACE_EVENT(sched_stat_sleep,
+
+       TP_PROTO(struct task_struct *tsk, u64 delay),
+
+       TP_ARGS(tsk, delay),
+
+       TP_STRUCT__entry(
+               __array( char,  comm,   TASK_COMM_LEN   )
+               __field( pid_t, pid                     )
+               __field( u64,   delay                   )
+       ),
+
+       TP_fast_assign(
+               memcpy(__entry->comm, tsk->comm, TASK_COMM_LEN);
+               __entry->pid    = tsk->pid;
+               __entry->delay  = delay;
+       )
+       TP_perf_assign(
+               __perf_count(delay);
+       ),
+
+       TP_printk("task: %s:%d sleep: %Lu [ns]",
+                       __entry->comm, __entry->pid,
+                       (unsigned long long)__entry->delay)
+);
+
+/*
+ * Tracepoint for accounting iowait time (time the task is not runnable
+ * due to waiting on IO to complete).
+ */
+TRACE_EVENT(sched_stat_iowait,
+
+       TP_PROTO(struct task_struct *tsk, u64 delay),
+
+       TP_ARGS(tsk, delay),
+
+       TP_STRUCT__entry(
+               __array( char,  comm,   TASK_COMM_LEN   )
+               __field( pid_t, pid                     )
+               __field( u64,   delay                   )
+       ),
+
+       TP_fast_assign(
+               memcpy(__entry->comm, tsk->comm, TASK_COMM_LEN);
+               __entry->pid    = tsk->pid;
+               __entry->delay  = delay;
+       )
+       TP_perf_assign(
+               __perf_count(delay);
+       ),
+
+       TP_printk("task: %s:%d iowait: %Lu [ns]",
+                       __entry->comm, __entry->pid,
+                       (unsigned long long)__entry->delay)
+);
+
 #endif /* _TRACE_SCHED_H */
 
 /* This part must be outside protection */
index 471fa281f5e0f32d5929334034a7bf38a63c5ba4..2ff850f90d1e77c02f2efd75a45b72b1a4e382e1 100644 (file)
@@ -546,6 +546,13 @@ update_stats_wait_end(struct cfs_rq *cfs_rq, struct sched_entity *se)
        schedstat_set(se->wait_sum, se->wait_sum +
                        rq_of(cfs_rq)->clock - se->wait_start);
        schedstat_set(se->wait_start, 0);
+
+#ifdef CONFIG_SCHEDSTATS
+       if (entity_is_task(se)) {
+               trace_sched_stat_wait(task_of(se),
+                       rq_of(cfs_rq)->clock - se->wait_start);
+       }
+#endif
 }
 
 static inline void
@@ -636,8 +643,10 @@ static void enqueue_sleeper(struct cfs_rq *cfs_rq, struct sched_entity *se)
                se->sleep_start = 0;
                se->sum_sleep_runtime += delta;
 
-               if (tsk)
+               if (tsk) {
                        account_scheduler_latency(tsk, delta >> 10, 1);
+                       trace_sched_stat_sleep(tsk, delta);
+               }
        }
        if (se->block_start) {
                u64 delta = rq_of(cfs_rq)->clock - se->block_start;
@@ -655,6 +664,7 @@ static void enqueue_sleeper(struct cfs_rq *cfs_rq, struct sched_entity *se)
                        if (tsk->in_iowait) {
                                se->iowait_sum += delta;
                                se->iowait_count++;
+                               trace_sched_stat_iowait(tsk, delta);
                        }
 
                        /*