sched/tracing: Add a new tracepoint for sleeptime
authorArun Sharma <asharma@fb.com>
Thu, 22 Dec 2011 00:15:40 +0000 (16:15 -0800)
committerIngo Molnar <mingo@elte.hu>
Fri, 23 Dec 2011 16:56:17 +0000 (17:56 +0100)
If CONFIG_SCHEDSTATS is defined, the kernel maintains
information about how long the task was sleeping or
in the case of iowait, blocking in the kernel before
getting woken up.

This will be useful for sleep time profiling.

Note: this information is only provided for sched_fair.
Other scheduling classes may choose to provide this in
the future.

Note: the delay includes the time spent on the runqueue
as well.

Signed-off-by: Arun Sharma <asharma@fb.com>
Acked-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Steven Rostedt <rostedt@goodmis.org>
Cc: Mathieu Desnoyers <mathieu.desnoyers@efficios.com>
Cc: Arnaldo Carvalho de Melo <acme@infradead.org>
Cc: Andrew Vagin <avagin@openvz.org>
Cc: Frederic Weisbecker <fweisbec@gmail.com>
Link: http://lkml.kernel.org/r/1324512940-32060-2-git-send-email-asharma@fb.com
Signed-off-by: Ingo Molnar <mingo@elte.hu>
include/trace/events/sched.h
kernel/sched/core.c
kernel/sched/fair.c

index e33ed1bfa1138bb6200eda7a213db34094cedf49..6ba596b07a7236ef0c48cd5b910dc6d6844ba508 100644 (file)
@@ -370,6 +370,56 @@ TRACE_EVENT(sched_stat_runtime,
                        (unsigned long long)__entry->vruntime)
 );
 
+#ifdef CREATE_TRACE_POINTS
+static inline u64 trace_get_sleeptime(struct task_struct *tsk)
+{
+#ifdef CONFIG_SCHEDSTATS
+       u64 block, sleep;
+
+       block = tsk->se.statistics.block_start;
+       sleep = tsk->se.statistics.sleep_start;
+       tsk->se.statistics.block_start = 0;
+       tsk->se.statistics.sleep_start = 0;
+
+       return block ? block : sleep ? sleep : 0;
+#else
+       return 0;
+#endif
+}
+#endif
+
+/*
+ * Tracepoint for accounting sleeptime (time the task is sleeping
+ * or waiting for I/O).
+ */
+TRACE_EVENT(sched_stat_sleeptime,
+
+       TP_PROTO(struct task_struct *tsk, u64 now),
+
+       TP_ARGS(tsk, now),
+
+       TP_STRUCT__entry(
+               __array( char,  comm,   TASK_COMM_LEN   )
+               __field( pid_t, pid                     )
+               __field( u64,   sleeptime               )
+       ),
+
+       TP_fast_assign(
+               memcpy(__entry->comm, tsk->comm, TASK_COMM_LEN);
+               __entry->pid            = tsk->pid;
+               __entry->sleeptime = trace_get_sleeptime(tsk);
+               __entry->sleeptime = __entry->sleeptime ?
+                               now - __entry->sleeptime : 0;
+       )
+       TP_perf_assign(
+               __perf_count(__entry->sleeptime);
+       ),
+
+       TP_printk("comm=%s pid=%d sleeptime=%Lu [ns]",
+                       __entry->comm, __entry->pid,
+                       (unsigned long long)__entry->sleeptime)
+);
+
 /*
  * Tracepoint for showing priority inheritance modifying a tasks
  * priority.
index 8ffe523dfa8ecbd2848231d9fb8e0e9abbec487a..4dbfd04a2148546c8311638bfb61d561b72d2b42 100644 (file)
@@ -1937,6 +1937,7 @@ static void finish_task_switch(struct rq *rq, struct task_struct *prev)
        local_irq_enable();
 #endif /* __ARCH_WANT_INTERRUPTS_ON_CTXSW */
        finish_lock_switch(rq, prev);
+       trace_sched_stat_sleeptime(current, rq->clock);
 
        fire_sched_in_preempt_notifiers(current);
        if (mm)
index bdf18836f74ed2462f29538c2a7c79cc48793131..8e42de9105f800d1a7ca7231626b6d683b1cce4a 100644 (file)
@@ -1003,7 +1003,6 @@ static void enqueue_sleeper(struct cfs_rq *cfs_rq, struct sched_entity *se)
                if (unlikely(delta > se->statistics.sleep_max))
                        se->statistics.sleep_max = delta;
 
-               se->statistics.sleep_start = 0;
                se->statistics.sum_sleep_runtime += delta;
 
                if (tsk) {
@@ -1020,7 +1019,6 @@ static void enqueue_sleeper(struct cfs_rq *cfs_rq, struct sched_entity *se)
                if (unlikely(delta > se->statistics.block_max))
                        se->statistics.block_max = delta;
 
-               se->statistics.block_start = 0;
                se->statistics.sum_sleep_runtime += delta;
 
                if (tsk) {