hung_task debugging: Add tracepoint to report the hang
authorOleg Nesterov <oleg@redhat.com>
Sat, 19 Oct 2013 16:18:28 +0000 (18:18 +0200)
committerIngo Molnar <mingo@kernel.org>
Thu, 31 Oct 2013 10:16:18 +0000 (11:16 +0100)
Currently check_hung_task() prints a warning if it detects the
problem, but it is not convenient to watch the system logs if
user-space wants to be notified about the hang.

Add the new trace_sched_process_hang() into check_hung_task(),
this way a user-space monitor can easily wait for the hang and
potentially resolve a problem.

Signed-off-by: Oleg Nesterov <oleg@redhat.com>
Cc: Dave Sullivan <dsulliva@redhat.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Steven Rostedt <rostedt@goodmis.org>
Link: http://lkml.kernel.org/r/20131019161828.GA7439@redhat.com
Signed-off-by: Ingo Molnar <mingo@kernel.org>
include/trace/events/sched.h
kernel/hung_task.c

index 2e7d9947a10d3f62bfe12e450375625efae913df..2a652d124fbbb119244c48139f4967678390f5be 100644 (file)
@@ -424,6 +424,25 @@ TRACE_EVENT(sched_pi_setprio,
                        __entry->oldprio, __entry->newprio)
 );
 
+#ifdef CONFIG_DETECT_HUNG_TASK
+TRACE_EVENT(sched_process_hang,
+       TP_PROTO(struct task_struct *tsk),
+       TP_ARGS(tsk),
+
+       TP_STRUCT__entry(
+               __array( char,  comm,   TASK_COMM_LEN   )
+               __field( pid_t, pid                     )
+       ),
+
+       TP_fast_assign(
+               memcpy(__entry->comm, tsk->comm, TASK_COMM_LEN);
+               __entry->pid = tsk->pid;
+       ),
+
+       TP_printk("comm=%s pid=%d", __entry->comm, __entry->pid)
+);
+#endif /* CONFIG_DETECT_HUNG_TASK */
+
 #endif /* _TRACE_SCHED_H */
 
 /* This part must be outside protection */
index 042252383fd2bbff549aa2380cec3a9662adaa68..8807061ca004cc0a532460244e01a56391eb0469 100644 (file)
@@ -16,6 +16,7 @@
 #include <linux/export.h>
 #include <linux/sysctl.h>
 #include <linux/utsname.h>
+#include <trace/events/sched.h>
 
 /*
  * The number of tasks checked:
@@ -92,6 +93,9 @@ static void check_hung_task(struct task_struct *t, unsigned long timeout)
                t->last_switch_count = switch_count;
                return;
        }
+
+       trace_sched_process_hang(t);
+
        if (!sysctl_hung_task_warnings)
                return;
        sysctl_hung_task_warnings--;