sched/events: Revert trace_sched_stat_sleeptime()
authorPeter Zijlstra <a.p.zijlstra@chello.nl>
Mon, 30 Jan 2012 13:51:37 +0000 (14:51 +0100)
committerIngo Molnar <mingo@elte.hu>
Wed, 22 Feb 2012 11:06:55 +0000 (12:06 +0100)
Commit 1ac9bc69 ("sched/tracing: Add a new tracepoint for sleeptime")
added a new sched:sched_stat_sleeptime tracepoint.

It's broken: the first sample we get on a task might be bad because
of a stale sleep_start value that wasn't reset at the last task switch
because the tracepoint was not active.

It also breaks the existing schedstat samples due to the side
effects of:

-               se->statistics.sleep_start = 0;
...
-               se->statistics.block_start = 0;

Nor do I see means to fix it without adding overhead to the scheduler
fast path, which I'm not willing to for the sake of redundant
instrumentation.

Most importantly, sleep time information can already be constructed
by tracing context switches and wakeups, and taking the timestamp
difference between the schedule-out, the wakeup and the schedule-in.

Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Andrew Vagin <avagin@openvz.org>
Cc: Arnaldo Carvalho de Melo <acme@redhat.com>
Cc: Frederic Weisbecker <fweisbec@gmail.com>
Link: http://lkml.kernel.org/n/tip-pc4c9qhl8q6vg3bs4j6k0rbd@git.kernel.org
Signed-off-by: Ingo Molnar <mingo@elte.hu>
include/trace/events/sched.h
kernel/sched/core.c
kernel/sched/fair.c

index 6ba596b07a7236ef0c48cd5b910dc6d6844ba508..e33ed1bfa1138bb6200eda7a213db34094cedf49 100644 (file)
@@ -370,56 +370,6 @@ TRACE_EVENT(sched_stat_runtime,
                        (unsigned long long)__entry->vruntime)
 );
 
-#ifdef CREATE_TRACE_POINTS
-static inline u64 trace_get_sleeptime(struct task_struct *tsk)
-{
-#ifdef CONFIG_SCHEDSTATS
-       u64 block, sleep;
-
-       block = tsk->se.statistics.block_start;
-       sleep = tsk->se.statistics.sleep_start;
-       tsk->se.statistics.block_start = 0;
-       tsk->se.statistics.sleep_start = 0;
-
-       return block ? block : sleep ? sleep : 0;
-#else
-       return 0;
-#endif
-}
-#endif
-
-/*
- * Tracepoint for accounting sleeptime (time the task is sleeping
- * or waiting for I/O).
- */
-TRACE_EVENT(sched_stat_sleeptime,
-
-       TP_PROTO(struct task_struct *tsk, u64 now),
-
-       TP_ARGS(tsk, now),
-
-       TP_STRUCT__entry(
-               __array( char,  comm,   TASK_COMM_LEN   )
-               __field( pid_t, pid                     )
-               __field( u64,   sleeptime               )
-       ),
-
-       TP_fast_assign(
-               memcpy(__entry->comm, tsk->comm, TASK_COMM_LEN);
-               __entry->pid            = tsk->pid;
-               __entry->sleeptime = trace_get_sleeptime(tsk);
-               __entry->sleeptime = __entry->sleeptime ?
-                               now - __entry->sleeptime : 0;
-       )
-       TP_perf_assign(
-               __perf_count(__entry->sleeptime);
-       ),
-
-       TP_printk("comm=%s pid=%d sleeptime=%Lu [ns]",
-                       __entry->comm, __entry->pid,
-                       (unsigned long long)__entry->sleeptime)
-);
-
 /*
  * Tracepoint for showing priority inheritance modifying a tasks
  * priority.
index 5255c9d2e053225173dfea134e7e243ad0e80891..b342f57879e693f54c12a5b15d1e4e7c5bdde7e1 100644 (file)
@@ -1932,7 +1932,6 @@ static void finish_task_switch(struct rq *rq, struct task_struct *prev)
        local_irq_enable();
 #endif /* __ARCH_WANT_INTERRUPTS_ON_CTXSW */
        finish_lock_switch(rq, prev);
-       trace_sched_stat_sleeptime(current, rq->clock);
 
        fire_sched_in_preempt_notifiers(current);
        if (mm)
index 7c6414fc669de4f09dc03fc763aaddd8040f98bf..aca16b843b7ee463e005996508b098ea44e01b32 100644 (file)
@@ -1003,6 +1003,7 @@ static void enqueue_sleeper(struct cfs_rq *cfs_rq, struct sched_entity *se)
                if (unlikely(delta > se->statistics.sleep_max))
                        se->statistics.sleep_max = delta;
 
+               se->statistics.sleep_start = 0;
                se->statistics.sum_sleep_runtime += delta;
 
                if (tsk) {
@@ -1019,6 +1020,7 @@ static void enqueue_sleeper(struct cfs_rq *cfs_rq, struct sched_entity *se)
                if (unlikely(delta > se->statistics.block_max))
                        se->statistics.block_max = delta;
 
+               se->statistics.block_start = 0;
                se->statistics.sum_sleep_runtime += delta;
 
                if (tsk) {