rcu: Print remote CPU's stacks in stall warnings
authorPaul E. McKenney <paul.mckenney@linaro.org>
Wed, 19 Sep 2012 23:58:38 +0000 (16:58 -0700)
committerPaul E. McKenney <paulmck@linux.vnet.ibm.com>
Tue, 23 Oct 2012 21:55:25 +0000 (14:55 -0700)
The RCU CPU stall warnings rely on trigger_all_cpu_backtrace() to
do NMI-based dump of the stack traces of all CPUs.  Unfortunately, a
number of architectures do not implement trigger_all_cpu_backtrace(), in
which case RCU falls back to just dumping the stack of the running CPU.
This is unhelpful in the case where the running CPU has detected that
some other CPU has stalled.

This commit therefore makes the running CPU dump the stacks of the
tasks running on the stalled CPUs.

Signed-off-by: Paul E. McKenney <paul.mckenney@linaro.org>
Signed-off-by: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
include/linux/sched.h
kernel/rcutree.c
kernel/sched/core.c

index 0dd42a02df2e851e0847df640dd559f3689c36ed..ba69b5adea30f494d39e0f834a6549afa3ce1efd 100644 (file)
@@ -109,6 +109,8 @@ extern void update_cpu_load_nohz(void);
 
 extern unsigned long get_parent_ip(unsigned long addr);
 
+extern void dump_cpu_task(int cpu);
+
 struct seq_file;
 struct cfs_rq;
 struct task_group;
index 74df86bd9204aef5ec9d14ca8b4b0094777bef87..e78538712df02428e696bdca52b91ebfcbfb512f 100644 (file)
@@ -873,6 +873,29 @@ static void record_gp_stall_check_time(struct rcu_state *rsp)
        rsp->jiffies_stall = jiffies + jiffies_till_stall_check();
 }
 
+/*
+ * Dump stacks of all tasks running on stalled CPUs.  This is a fallback
+ * for architectures that do not implement trigger_all_cpu_backtrace().
+ * The NMI-triggered stack traces are more accurate because they are
+ * printed by the target CPU.
+ */
+static void rcu_dump_cpu_stacks(struct rcu_state *rsp)
+{
+       int cpu;
+       unsigned long flags;
+       struct rcu_node *rnp;
+
+       rcu_for_each_leaf_node(rsp, rnp) {
+               raw_spin_lock_irqsave(&rnp->lock, flags);
+               if (rnp->qsmask != 0) {
+                       for (cpu = 0; cpu <= rnp->grphi - rnp->grplo; cpu++)
+                               if (rnp->qsmask & (1UL << cpu))
+                                       dump_cpu_task(rnp->grplo + cpu);
+               }
+               raw_spin_unlock_irqrestore(&rnp->lock, flags);
+       }
+}
+
 static void print_other_cpu_stall(struct rcu_state *rsp)
 {
        int cpu;
@@ -929,7 +952,7 @@ static void print_other_cpu_stall(struct rcu_state *rsp)
        if (ndetected == 0)
                printk(KERN_ERR "INFO: Stall ended before state dump start\n");
        else if (!trigger_all_cpu_backtrace())
-               dump_stack();
+               rcu_dump_cpu_stacks(rsp);
 
        /* Complain about tasks blocking the grace period. */
 
index 2d8927fda712f5ee1e19f1fe364fbad557d8a736..59d08fb1a9e3f7fb0095522b396ec7a4b416cd54 100644 (file)
@@ -8076,3 +8076,9 @@ struct cgroup_subsys cpuacct_subsys = {
        .base_cftypes = files,
 };
 #endif /* CONFIG_CGROUP_CPUACCT */
+
+void dump_cpu_task(int cpu)
+{
+       pr_info("Task dump for CPU %d:\n", cpu);
+       sched_show_task(cpu_curr(cpu));
+}