From 768d0c27226e6587cad2fcf543f9711da3f3774e Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Thu, 23 Jul 2009 20:13:26 +0200 Subject: [PATCH] sched: Add wait, sleep and iowait accounting tracepoints Add 3 schedstat tracepoints to help account for wait-time, sleep-time and iowait-time. They can also be used as a perf-counter source to profile tasks on these clocks. Signed-off-by: Peter Zijlstra Cc: Steven Rostedt Cc: Frederic Weisbecker Cc: Arjan van de Ven LKML-Reference: [ build fix for the !CONFIG_SCHEDSTATS case ] Signed-off-by: Ingo Molnar --- include/trace/events/sched.h | 95 ++++++++++++++++++++++++++++++++++++ kernel/sched_fair.c | 12 ++++- 2 files changed, 106 insertions(+), 1 deletion(-) diff --git a/include/trace/events/sched.h b/include/trace/events/sched.h index 8949bb7eb082..a4c369ec328f 100644 --- a/include/trace/events/sched.h +++ b/include/trace/events/sched.h @@ -340,6 +340,101 @@ TRACE_EVENT(sched_signal_send, __entry->sig, __entry->comm, __entry->pid) ); +/* + * XXX the below sched_stat tracepoints only apply to SCHED_OTHER/BATCH/IDLE + * adding sched_stat support to SCHED_FIFO/RR would be welcome. + */ + +/* + * Tracepoint for accounting wait time (time the task is runnable + * but not actually running due to scheduler contention). + */ +TRACE_EVENT(sched_stat_wait, + + TP_PROTO(struct task_struct *tsk, u64 delay), + + TP_ARGS(tsk, delay), + + TP_STRUCT__entry( + __array( char, comm, TASK_COMM_LEN ) + __field( pid_t, pid ) + __field( u64, delay ) + ), + + TP_fast_assign( + memcpy(__entry->comm, tsk->comm, TASK_COMM_LEN); + __entry->pid = tsk->pid; + __entry->delay = delay; + ) + TP_perf_assign( + __perf_count(delay); + ), + + TP_printk("task: %s:%d wait: %Lu [ns]", + __entry->comm, __entry->pid, + (unsigned long long)__entry->delay) +); + +/* + * Tracepoint for accounting sleep time (time the task is not runnable, + * including iowait, see below). + */ +TRACE_EVENT(sched_stat_sleep, + + TP_PROTO(struct task_struct *tsk, u64 delay), + + TP_ARGS(tsk, delay), + + TP_STRUCT__entry( + __array( char, comm, TASK_COMM_LEN ) + __field( pid_t, pid ) + __field( u64, delay ) + ), + + TP_fast_assign( + memcpy(__entry->comm, tsk->comm, TASK_COMM_LEN); + __entry->pid = tsk->pid; + __entry->delay = delay; + ) + TP_perf_assign( + __perf_count(delay); + ), + + TP_printk("task: %s:%d sleep: %Lu [ns]", + __entry->comm, __entry->pid, + (unsigned long long)__entry->delay) +); + +/* + * Tracepoint for accounting iowait time (time the task is not runnable + * due to waiting on IO to complete). + */ +TRACE_EVENT(sched_stat_iowait, + + TP_PROTO(struct task_struct *tsk, u64 delay), + + TP_ARGS(tsk, delay), + + TP_STRUCT__entry( + __array( char, comm, TASK_COMM_LEN ) + __field( pid_t, pid ) + __field( u64, delay ) + ), + + TP_fast_assign( + memcpy(__entry->comm, tsk->comm, TASK_COMM_LEN); + __entry->pid = tsk->pid; + __entry->delay = delay; + ) + TP_perf_assign( + __perf_count(delay); + ), + + TP_printk("task: %s:%d iowait: %Lu [ns]", + __entry->comm, __entry->pid, + (unsigned long long)__entry->delay) +); + #endif /* _TRACE_SCHED_H */ /* This part must be outside protection */ diff --git a/kernel/sched_fair.c b/kernel/sched_fair.c index 471fa281f5e0..2ff850f90d1e 100644 --- a/kernel/sched_fair.c +++ b/kernel/sched_fair.c @@ -546,6 +546,13 @@ update_stats_wait_end(struct cfs_rq *cfs_rq, struct sched_entity *se) schedstat_set(se->wait_sum, se->wait_sum + rq_of(cfs_rq)->clock - se->wait_start); schedstat_set(se->wait_start, 0); + +#ifdef CONFIG_SCHEDSTATS + if (entity_is_task(se)) { + trace_sched_stat_wait(task_of(se), + rq_of(cfs_rq)->clock - se->wait_start); + } +#endif } static inline void @@ -636,8 +643,10 @@ static void enqueue_sleeper(struct cfs_rq *cfs_rq, struct sched_entity *se) se->sleep_start = 0; se->sum_sleep_runtime += delta; - if (tsk) + if (tsk) { account_scheduler_latency(tsk, delta >> 10, 1); + trace_sched_stat_sleep(tsk, delta); + } } if (se->block_start) { u64 delta = rq_of(cfs_rq)->clock - se->block_start; @@ -655,6 +664,7 @@ static void enqueue_sleeper(struct cfs_rq *cfs_rq, struct sched_entity *se) if (tsk->in_iowait) { se->iowait_sum += delta; se->iowait_count++; + trace_sched_stat_iowait(tsk, delta); } /* -- 2.20.1