[PATCH] x86: all cpu backtrace
authorAndrew Morton <akpm@osdl.org>
Thu, 7 Dec 2006 01:14:01 +0000 (02:14 +0100)
committerAndi Kleen <andi@basil.nowhere.org>
Thu, 7 Dec 2006 01:14:01 +0000 (02:14 +0100)
When a spinlock lockup occurs, arrange for the NMI code to emit an all-cpu
backtrace, so we get to see which CPU is holding the lock, and where.

Cc: Andi Kleen <ak@muc.de>
Cc: Ingo Molnar <mingo@elte.hu>
Cc: Badari Pulavarty <pbadari@us.ibm.com>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Andi Kleen <ak@suse.de>
arch/i386/kernel/nmi.c
arch/x86_64/kernel/nmi.c
include/asm-i386/nmi.h
include/asm-x86_64/nmi.h
include/linux/nmi.h
lib/spinlock_debug.c

index eaafe233a5da83f4abbb2c17e670b8a46fa4edaa..171194ccb7bc480d0dc6cfbfbf06c07514d47d31 100644 (file)
@@ -22,6 +22,7 @@
 #include <linux/percpu.h>
 #include <linux/dmi.h>
 #include <linux/kprobes.h>
+#include <linux/cpumask.h>
 
 #include <asm/smp.h>
 #include <asm/nmi.h>
@@ -42,6 +43,8 @@ int nmi_watchdog_enabled;
 static DEFINE_PER_CPU(unsigned long, perfctr_nmi_owner);
 static DEFINE_PER_CPU(unsigned long, evntsel_nmi_owner[3]);
 
+static cpumask_t backtrace_mask = CPU_MASK_NONE;
+
 /* this number is calculated from Intel's MSR_P4_CRU_ESCR5 register and it's
  * offset from MSR_P4_BSU_ESCR0.  It will be the max for all platforms (for now)
  */
@@ -907,6 +910,16 @@ __kprobes int nmi_watchdog_tick(struct pt_regs * regs, unsigned reason)
                touched = 1;
        }
 
+       if (cpu_isset(cpu, backtrace_mask)) {
+               static DEFINE_SPINLOCK(lock);   /* Serialise the printks */
+
+               spin_lock(&lock);
+               printk("NMI backtrace for cpu %d\n", cpu);
+               dump_stack();
+               spin_unlock(&lock);
+               cpu_clear(cpu, backtrace_mask);
+       }
+
        sum = per_cpu(irq_stat, cpu).apic_timer_irqs;
 
        /* if the apic timer isn't firing, this cpu isn't doing much */
@@ -1033,6 +1046,19 @@ int proc_nmi_enabled(struct ctl_table *table, int write, struct file *file,
 
 #endif
 
+void __trigger_all_cpu_backtrace(void)
+{
+       int i;
+
+       backtrace_mask = cpu_online_map;
+       /* Wait for up to 10 seconds for all CPUs to do the backtrace */
+       for (i = 0; i < 10 * 1000; i++) {
+               if (cpus_empty(backtrace_mask))
+                       break;
+               mdelay(1);
+       }
+}
+
 EXPORT_SYMBOL(nmi_active);
 EXPORT_SYMBOL(nmi_watchdog);
 EXPORT_SYMBOL(avail_to_resrv_perfctr_nmi);
index 7af9cb3e2d99a8e71e54d89c170245580b2d470e..27e95e7922c16e6f2d637f9b677927cc049b989e 100644 (file)
  *  Mikael Pettersson  : PM converted to driver model. Disable/enable API.
  */
 
+#include <linux/nmi.h>
 #include <linux/mm.h>
 #include <linux/delay.h>
 #include <linux/interrupt.h>
 #include <linux/module.h>
 #include <linux/sysdev.h>
-#include <linux/nmi.h>
 #include <linux/sysctl.h>
 #include <linux/kprobes.h>
+#include <linux/cpumask.h>
 
 #include <asm/smp.h>
 #include <asm/nmi.h>
@@ -41,6 +42,8 @@ int panic_on_unrecovered_nmi;
 static DEFINE_PER_CPU(unsigned, perfctr_nmi_owner);
 static DEFINE_PER_CPU(unsigned, evntsel_nmi_owner[2]);
 
+static cpumask_t backtrace_mask = CPU_MASK_NONE;
+
 /* this number is calculated from Intel's MSR_P4_CRU_ESCR5 register and it's
  * offset from MSR_P4_BSU_ESCR0.  It will be the max for all platforms (for now)
  */
@@ -782,6 +785,7 @@ int __kprobes nmi_watchdog_tick(struct pt_regs * regs, unsigned reason)
 {
        int sum;
        int touched = 0;
+       int cpu = smp_processor_id();
        struct nmi_watchdog_ctlblk *wd = &__get_cpu_var(nmi_watchdog_ctlblk);
        u64 dummy;
        int rc=0;
@@ -799,6 +803,16 @@ int __kprobes nmi_watchdog_tick(struct pt_regs * regs, unsigned reason)
                touched = 1;
        }
 
+       if (cpu_isset(cpu, backtrace_mask)) {
+               static DEFINE_SPINLOCK(lock);   /* Serialise the printks */
+
+               spin_lock(&lock);
+               printk("NMI backtrace for cpu %d\n", cpu);
+               dump_stack();
+               spin_unlock(&lock);
+               cpu_clear(cpu, backtrace_mask);
+       }
+
 #ifdef CONFIG_X86_MCE
        /* Could check oops_in_progress here too, but it's safer
           not too */
@@ -931,6 +945,19 @@ int proc_nmi_enabled(struct ctl_table *table, int write, struct file *file,
 
 #endif
 
+void __trigger_all_cpu_backtrace(void)
+{
+       int i;
+
+       backtrace_mask = cpu_online_map;
+       /* Wait for up to 10 seconds for all CPUs to do the backtrace */
+       for (i = 0; i < 10 * 1000; i++) {
+               if (cpus_empty(backtrace_mask))
+                       break;
+               mdelay(1);
+       }
+}
+
 EXPORT_SYMBOL(nmi_active);
 EXPORT_SYMBOL(nmi_watchdog);
 EXPORT_SYMBOL(avail_to_resrv_perfctr_nmi);
index 269d315719ca422d478e7d18cd441cf71c44438d..b04333ea6f31e9bd338b9842d1afeec1ea9f7ba4 100644 (file)
@@ -5,6 +5,9 @@
 #define ASM_NMI_H
 
 #include <linux/pm.h>
+#include <asm/irq.h>
+
+#ifdef ARCH_HAS_NMI_WATCHDOG
 
 /**
  * do_nmi_callback
@@ -42,4 +45,9 @@ extern int proc_nmi_enabled(struct ctl_table *, int , struct file *,
                        void __user *, size_t *, loff_t *);
 extern int unknown_nmi_panic;
 
+void __trigger_all_cpu_backtrace(void);
+#define trigger_all_cpu_backtrace() __trigger_all_cpu_backtrace()
+
+#endif
+
 #endif /* ASM_NMI_H */
index f367d4014b423b5298df04dfc556ff4d5aeaf3fb..72375e7d32a895846200ff921ebfd5e8776296bf 100644 (file)
@@ -77,4 +77,7 @@ extern int proc_nmi_enabled(struct ctl_table *, int , struct file *,
 
 extern int unknown_nmi_panic;
 
+void __trigger_all_cpu_backtrace(void);
+#define trigger_all_cpu_backtrace() __trigger_all_cpu_backtrace()
+
 #endif /* ASM_NMI_H */
index e16904e28c3a39b57480d5bc5e64f5923cc5434a..acb4ed1302479ac4f1833466905024e013de41e7 100644 (file)
  * disables interrupts for a long time. This call is stateless.
  */
 #ifdef ARCH_HAS_NMI_WATCHDOG
+#include <asm/nmi.h>
 extern void touch_nmi_watchdog(void);
 #else
 # define touch_nmi_watchdog() touch_softlockup_watchdog()
 #endif
 
+#ifndef trigger_all_cpu_backtrace
+#define trigger_all_cpu_backtrace() do { } while (0)
+#endif
+
 #endif
index b6c4f898197c52f0b175ad0ea9e26466c338ea30..479fd462eaa9c7630df676bdddd91d4dd1212a2c 100644 (file)
@@ -7,6 +7,7 @@
  */
 
 #include <linux/spinlock.h>
+#include <linux/nmi.h>
 #include <linux/interrupt.h>
 #include <linux/debug_locks.h>
 #include <linux/delay.h>
@@ -117,6 +118,9 @@ static void __spin_lock_debug(spinlock_t *lock)
                                raw_smp_processor_id(), current->comm,
                                current->pid, lock);
                        dump_stack();
+#ifdef CONFIG_SMP
+                       trigger_all_cpu_backtrace();
+#endif
                }
        }
 }