From a9edc88093287183ac934be44f295f183b2c62dd Mon Sep 17 00:00:00 2001 From: "Steven Rostedt (Red Hat)" Date: Thu, 19 Jun 2014 17:33:32 -0400 Subject: [PATCH] x86/nmi: Perform a safe NMI stack trace on all CPUs When trigger_all_cpu_backtrace() is called on x86, it will trigger an NMI on each CPU and call show_regs(). But this can lead to a hard lock up if the NMI comes in on another printk(). In order to avoid this, when the NMI triggers, it switches the printk routine for that CPU to call a NMI safe printk function that records the printk in a per_cpu seq_buf descriptor. After all NMIs have finished recording its data, the seq_bufs are printed in a safe context. Link: http://lkml.kernel.org/p/20140619213952.360076309@goodmis.org Link: http://lkml.kernel.org/r/20141115050605.055232587@goodmis.org Tested-by: Jiri Kosina Acked-by: Jiri Kosina Acked-by: Paul E. McKenney Reviewed-by: Petr Mladek Signed-off-by: Steven Rostedt --- arch/x86/kernel/apic/hw_nmi.c | 91 +++++++++++++++++++++++++++++++++-- 1 file changed, 86 insertions(+), 5 deletions(-) diff --git a/arch/x86/kernel/apic/hw_nmi.c b/arch/x86/kernel/apic/hw_nmi.c index 6a1e71bde323..c95c3e9ce196 100644 --- a/arch/x86/kernel/apic/hw_nmi.c +++ b/arch/x86/kernel/apic/hw_nmi.c @@ -18,6 +18,7 @@ #include #include #include +#include #ifdef CONFIG_HARDLOCKUP_DETECTOR u64 hw_nmi_get_sample_period(int watchdog_thresh) @@ -29,14 +30,35 @@ u64 hw_nmi_get_sample_period(int watchdog_thresh) #ifdef arch_trigger_all_cpu_backtrace /* For reliability, we're prepared to waste bits here. */ static DECLARE_BITMAP(backtrace_mask, NR_CPUS) __read_mostly; +static cpumask_var_t printtrace_mask; + +#define NMI_BUF_SIZE 4096 + +struct nmi_seq_buf { + unsigned char buffer[NMI_BUF_SIZE]; + struct seq_buf seq; +}; + +/* Safe printing in NMI context */ +static DEFINE_PER_CPU(struct nmi_seq_buf, nmi_print_seq); /* "in progress" flag of arch_trigger_all_cpu_backtrace */ static unsigned long backtrace_flag; +static void print_seq_line(struct nmi_seq_buf *s, int start, int end) +{ + const char *buf = s->buffer + start; + + printk("%.*s", (end - start) + 1, buf); +} + void arch_trigger_all_cpu_backtrace(bool include_self) { + struct nmi_seq_buf *s; + int len; + int cpu; int i; - int cpu = get_cpu(); + int this_cpu = get_cpu(); if (test_and_set_bit(0, &backtrace_flag)) { /* @@ -49,7 +71,17 @@ void arch_trigger_all_cpu_backtrace(bool include_self) cpumask_copy(to_cpumask(backtrace_mask), cpu_online_mask); if (!include_self) - cpumask_clear_cpu(cpu, to_cpumask(backtrace_mask)); + cpumask_clear_cpu(this_cpu, to_cpumask(backtrace_mask)); + + cpumask_copy(printtrace_mask, to_cpumask(backtrace_mask)); + /* + * Set up per_cpu seq_buf buffers that the NMIs running on the other + * CPUs will write to. + */ + for_each_cpu(cpu, to_cpumask(backtrace_mask)) { + s = &per_cpu(nmi_print_seq, cpu); + seq_buf_init(&s->seq, s->buffer, NMI_BUF_SIZE); + } if (!cpumask_empty(to_cpumask(backtrace_mask))) { pr_info("sending NMI to %s CPUs:\n", @@ -65,11 +97,58 @@ void arch_trigger_all_cpu_backtrace(bool include_self) touch_softlockup_watchdog(); } + /* + * Now that all the NMIs have triggered, we can dump out their + * back traces safely to the console. + */ + for_each_cpu(cpu, printtrace_mask) { + int last_i = 0; + + s = &per_cpu(nmi_print_seq, cpu); + len = seq_buf_used(&s->seq); + if (!len) + continue; + + /* Print line by line. */ + for (i = 0; i < len; i++) { + if (s->buffer[i] == '\n') { + print_seq_line(s, last_i, i); + last_i = i + 1; + } + } + /* Check if there was a partial line. */ + if (last_i < len) { + print_seq_line(s, last_i, len - 1); + pr_cont("\n"); + } + } + clear_bit(0, &backtrace_flag); smp_mb__after_atomic(); put_cpu(); } +/* + * It is not safe to call printk() directly from NMI handlers. + * It may be fine if the NMI detected a lock up and we have no choice + * but to do so, but doing a NMI on all other CPUs to get a back trace + * can be done with a sysrq-l. We don't want that to lock up, which + * can happen if the NMI interrupts a printk in progress. + * + * Instead, we redirect the vprintk() to this nmi_vprintk() that writes + * the content into a per cpu seq_buf buffer. Then when the NMIs are + * all done, we can safely dump the contents of the seq_buf to a printk() + * from a non NMI context. + */ +static int nmi_vprintk(const char *fmt, va_list args) +{ + struct nmi_seq_buf *s = this_cpu_ptr(&nmi_print_seq); + unsigned int len = seq_buf_used(&s->seq); + + seq_buf_vprintf(&s->seq, fmt, args); + return seq_buf_used(&s->seq) - len; +} + static int arch_trigger_all_cpu_backtrace_handler(unsigned int cmd, struct pt_regs *regs) { @@ -78,12 +157,14 @@ arch_trigger_all_cpu_backtrace_handler(unsigned int cmd, struct pt_regs *regs) cpu = smp_processor_id(); if (cpumask_test_cpu(cpu, to_cpumask(backtrace_mask))) { - static arch_spinlock_t lock = __ARCH_SPIN_LOCK_UNLOCKED; + printk_func_t printk_func_save = this_cpu_read(printk_func); - arch_spin_lock(&lock); + /* Replace printk to write into the NMI seq */ + this_cpu_write(printk_func, nmi_vprintk); printk(KERN_WARNING "NMI backtrace for cpu %d\n", cpu); show_regs(regs); - arch_spin_unlock(&lock); + this_cpu_write(printk_func, printk_func_save); + cpumask_clear_cpu(cpu, to_cpumask(backtrace_mask)); return NMI_HANDLED; } -- 2.20.1