tile: support delivering NMIs for multicore backtrace
authorChris Metcalf <cmetcalf@ezchip.com>
Mon, 4 May 2015 21:26:35 +0000 (17:26 -0400)
committerChris Metcalf <cmetcalf@ezchip.com>
Mon, 11 May 2015 15:22:31 +0000 (11:22 -0400)
A new hypervisor service was added some time ago (MDE 4.2.1 or
later, or MDE 4.3 or later) that allows cores to request NMIs
to be delivered to other cores.  Use this facility to deliver
a request that causes a backtrace to be generated on each core,
and hook it into the magic SysRq functionality.

Signed-off-by: Chris Metcalf <cmetcalf@ezchip.com>
arch/tile/include/asm/irq.h
arch/tile/include/asm/traps.h
arch/tile/include/hv/hypervisor.h
arch/tile/kernel/hvglue.S
arch/tile/kernel/hvglue_trace.c
arch/tile/kernel/intvec_64.S
arch/tile/kernel/process.c
arch/tile/kernel/traps.c

index 1fe86911838b272d80fc0c7232a9f56b21f0e949..84a924034bdbf816f5bb41442251ce13f23f30b5 100644 (file)
@@ -78,4 +78,9 @@ void tile_irq_activate(unsigned int irq, int tile_irq_type);
 
 void setup_irq_regs(void);
 
+#ifdef __tilegx__
+void arch_trigger_all_cpu_backtrace(bool self);
+#define arch_trigger_all_cpu_backtrace arch_trigger_all_cpu_backtrace
+#endif
+
 #endif /* _ASM_TILE_IRQ_H */
index 4b99a1c3aab2533c3bafe930cb6f92b5fc17117c..11c82270c1f5be730fa598314515c66764d66947 100644 (file)
@@ -52,6 +52,14 @@ void do_timer_interrupt(struct pt_regs *, int fault_num);
 /* kernel/messaging.c */
 void hv_message_intr(struct pt_regs *, int intnum);
 
+#define        TILE_NMI_DUMP_STACK     1       /* Dump stack for sysrq+'l' */
+
+/* kernel/process.c */
+void do_nmi_dump_stack(struct pt_regs *regs);
+
+/* kernel/traps.c */
+void do_nmi(struct pt_regs *, int fault_num, unsigned long reason);
+
 /* kernel/irq.c */
 void tile_dev_intr(struct pt_regs *, int intnum);
 
index e0e6af4e783b077ca01968ae54af1a2a622834fb..f10b332b3b65d88f572ff2109ac45785ebc21eb0 100644 (file)
 /** hv_console_set_ipi */
 #define HV_DISPATCH_CONSOLE_SET_IPI               63
 
+/** hv_send_nmi */
+#define HV_DISPATCH_SEND_NMI                      65
+
 /** One more than the largest dispatch value */
-#define _HV_DISPATCH_END                          64
+#define _HV_DISPATCH_END                          66
 
 
 #ifndef __ASSEMBLER__
@@ -1253,6 +1256,11 @@ void hv_downcall_dispatch(void);
 #define INT_DMATLB_ACCESS_DWNCL  INT_DMA_CPL
 /** Device interrupt downcall interrupt vector */
 #define INT_DEV_INTR_DWNCL       INT_WORLD_ACCESS
+/** NMI downcall interrupt vector */
+#define INT_NMI_DWNCL            64
+
+#define HV_NMI_FLAG_FORCE    0x1  /**< Force an NMI downcall regardless of
+               the ICS bit of the client. */
 
 #ifndef __ASSEMBLER__
 
@@ -1780,6 +1788,56 @@ int hv_dev_poll(int devhdl, __hv32 events, HV_IntArg intarg);
 int hv_dev_poll_cancel(int devhdl);
 
 
+/** NMI information */
+typedef struct
+{
+  /** Result: negative error, or HV_NMI_RESULT_xxx. */
+  int result;
+
+  /** PC from interrupted remote core (if result != HV_NMI_RESULT_FAIL_HV). */
+  HV_VirtAddr pc;
+
+} HV_NMI_Info;
+
+/** NMI issued successfully. */
+#define HV_NMI_RESULT_OK        0
+
+/** NMI not issued: remote tile running at client PL with ICS set. */
+#define HV_NMI_RESULT_FAIL_ICS  1
+
+/** NMI not issued: remote tile waiting in hypervisor. */
+#define HV_NMI_RESULT_FAIL_HV   2
+
+/** Force an NMI downcall regardless of the ICS bit of the client. */
+#define HV_NMI_FLAG_FORCE    0x1
+
+/** Send an NMI interrupt request to a particular tile.
+ *
+ *  This will cause the NMI to be issued on the remote tile regardless
+ *  of the state of the client interrupt mask.  However, if the remote
+ *  tile is in the hypervisor, it will not execute the NMI, and
+ *  HV_NMI_RESULT_FAIL_HV will be returned.  Similarly, if the remote
+ *  tile is in a client interrupt critical section at the time of the
+ *  NMI, it will not execute the NMI, and HV_NMI_RESULT_FAIL_ICS will
+ *  be returned.  In this second case, however, if HV_NMI_FLAG_FORCE
+ *  is set in flags, then the remote tile will enter its NMI interrupt
+ *  vector regardless.  Forcing the NMI vector during an interrupt
+ *  critical section will mean that the client can not safely continue
+ *  execution after handling the interrupt.
+ *
+ *  @param tile Tile to which the NMI request is sent.
+ *  @param info NMI information which is defined by and interpreted by the
+ *         supervisor, is passed to the specified tile, and is
+ *         stored in the SPR register SYSTEM_SAVE_{CLIENT_PL}_2 on the
+ *         specified tile when entering the NMI handler routine.
+ *         Typically, this parameter stores the NMI type, or an aligned
+ *         VA plus some special bits, etc.
+ *  @param flags Flags (HV_NMI_FLAG_xxx).
+ *  @return Information about the requested NMI.
+ */
+HV_NMI_Info hv_send_nmi(HV_Coord tile, unsigned long info, __hv64 flags);
+
+
 /** Scatter-gather list for preada/pwritea calls. */
 typedef struct
 #if CHIP_VA_WIDTH() <= 32
index 2ab4566223912399504d6860e1a20032add367e4..d78ee2ad610c640c5ac048e09078c3adb936f4e5 100644 (file)
@@ -71,4 +71,5 @@ gensym hv_flush_all, 0x6e0, 32
 gensym hv_get_ipi_pte, 0x700, 32
 gensym hv_set_pte_super_shift, 0x720, 32
 gensym hv_console_set_ipi, 0x7e0, 32
-gensym hv_glue_internals, 0x800, 30720
+gensym hv_send_nmi, 0x820, 32
+gensym hv_glue_internals, 0x820, 30688
index 85c74ad29312d835aab1dd96e6f7b4e6e8f6d058..add0d71395c63221060c9e34bf154efa203d5745 100644 (file)
@@ -75,6 +75,7 @@
 #define hv_get_ipi_pte _hv_get_ipi_pte
 #define hv_set_pte_super_shift _hv_set_pte_super_shift
 #define hv_console_set_ipi _hv_console_set_ipi
+#define hv_send_nmi _hv_send_nmi
 #include <hv/hypervisor.h>
 #undef hv_init
 #undef hv_install_context
 #undef hv_get_ipi_pte
 #undef hv_set_pte_super_shift
 #undef hv_console_set_ipi
+#undef hv_send_nmi
 
 /*
  * Provide macros based on <linux/syscalls.h> to provide a wrapper
@@ -264,3 +266,5 @@ HV_WRAP9(int, hv_flush_remote, HV_PhysAddr, cache_pa,
         HV_VirtAddr, tlb_va, unsigned long, tlb_length,
         unsigned long, tlb_pgsize, unsigned long*, tlb_cpumask,
         HV_Remote_ASID*, asids, int, asidcount)
+HV_WRAP3(HV_NMI_Info, hv_send_nmi, HV_Coord, tile, unsigned long, info,
+        __hv64, flags)
index 5b67efcecabd17603deb47cd622d94e9b710d470..800b91d3f9dc79f15b22a08e3bba21f89640268a 100644 (file)
@@ -515,6 +515,10 @@ intvec_\vecname:
        .ifc \c_routine, handle_perf_interrupt
        mfspr   r2, AUX_PERF_COUNT_STS
        .endif
+       .ifc \c_routine, do_nmi
+       mfspr   r2, SPR_SYSTEM_SAVE_K_2   /* nmi type */
+       .else
+       .endif
        .endif
        .endif
        .endif
@@ -1571,3 +1575,5 @@ intrpt_start:
 
        /* Synthetic interrupt delivered only by the simulator */
        int_hand     INT_BREAKPOINT, BREAKPOINT, do_breakpoint
+       /* Synthetic interrupt delivered by hv */
+       int_hand     INT_NMI_DWNCL, NMI_DWNCL, do_nmi, handle_nmi
index b403c2e3e26344b7468a9d775c177bbcfd702118..0dddcf7e5bfa15dc582b3c2bd2cee87fbae5b682 100644 (file)
@@ -27,6 +27,7 @@
 #include <linux/kernel.h>
 #include <linux/tracehook.h>
 #include <linux/signal.h>
+#include <linux/delay.h>
 #include <linux/context_tracking.h>
 #include <asm/stack.h>
 #include <asm/switch_to.h>
@@ -574,3 +575,103 @@ void show_regs(struct pt_regs *regs)
 
        dump_stack_regs(regs);
 }
+
+/* To ensure stack dump on tiles occurs one by one. */
+static DEFINE_SPINLOCK(backtrace_lock);
+/* To ensure no backtrace occurs before all of the stack dump are done. */
+static atomic_t backtrace_cpus;
+/* The cpu mask to avoid reentrance. */
+static struct cpumask backtrace_mask;
+
+void do_nmi_dump_stack(struct pt_regs *regs)
+{
+       int is_idle = is_idle_task(current) && !in_interrupt();
+       int cpu;
+
+       nmi_enter();
+       cpu = smp_processor_id();
+       if (WARN_ON_ONCE(!cpumask_test_and_clear_cpu(cpu, &backtrace_mask)))
+               goto done;
+
+       spin_lock(&backtrace_lock);
+       if (is_idle)
+               pr_info("CPU: %d idle\n", cpu);
+       else
+               show_regs(regs);
+       spin_unlock(&backtrace_lock);
+       atomic_dec(&backtrace_cpus);
+done:
+       nmi_exit();
+}
+
+#ifdef __tilegx__
+void arch_trigger_all_cpu_backtrace(bool self)
+{
+       struct cpumask mask;
+       HV_Coord tile;
+       unsigned int timeout;
+       int cpu;
+       int ongoing;
+       HV_NMI_Info info[NR_CPUS];
+
+       ongoing = atomic_cmpxchg(&backtrace_cpus, 0, num_online_cpus() - 1);
+       if (ongoing != 0) {
+               pr_err("Trying to do all-cpu backtrace.\n");
+               pr_err("But another all-cpu backtrace is ongoing (%d cpus left)\n",
+                      ongoing);
+               if (self) {
+                       pr_err("Reporting the stack on this cpu only.\n");
+                       dump_stack();
+               }
+               return;
+       }
+
+       cpumask_copy(&mask, cpu_online_mask);
+       cpumask_clear_cpu(smp_processor_id(), &mask);
+       cpumask_copy(&backtrace_mask, &mask);
+
+       /* Backtrace for myself first. */
+       if (self)
+               dump_stack();
+
+       /* Tentatively dump stack on remote tiles via NMI. */
+       timeout = 100;
+       while (!cpumask_empty(&mask) && timeout) {
+               for_each_cpu(cpu, &mask) {
+                       tile.x = cpu_x(cpu);
+                       tile.y = cpu_y(cpu);
+                       info[cpu] = hv_send_nmi(tile, TILE_NMI_DUMP_STACK, 0);
+                       if (info[cpu].result == HV_NMI_RESULT_OK)
+                               cpumask_clear_cpu(cpu, &mask);
+               }
+
+               mdelay(10);
+               timeout--;
+       }
+
+       /* Warn about cpus stuck in ICS and decrement their counts here. */
+       if (!cpumask_empty(&mask)) {
+               for_each_cpu(cpu, &mask) {
+                       switch (info[cpu].result) {
+                       case HV_NMI_RESULT_FAIL_ICS:
+                               pr_warn("Skipping stack dump of cpu %d in ICS at pc %#llx\n",
+                                       cpu, info[cpu].pc);
+                               break;
+                       case HV_NMI_RESULT_FAIL_HV:
+                               pr_warn("Skipping stack dump of cpu %d in hypervisor\n",
+                                       cpu);
+                               break;
+                       case HV_ENOSYS:
+                               pr_warn("Hypervisor too old to allow remote stack dumps.\n");
+                               goto skip_for_each;
+                       default:  /* should not happen */
+                               pr_warn("Skipping stack dump of cpu %d [%d,%#llx]\n",
+                                       cpu, info[cpu].result, info[cpu].pc);
+                               break;
+                       }
+               }
+skip_for_each:
+               atomic_sub(cpumask_weight(&mask), &backtrace_cpus);
+       }
+}
+#endif /* __tilegx_ */
index 312fc134c1cb12dc610b7b8bfea3c856b9752674..855f7316f1ee71f679a5c64c8af1a3002d2bca81 100644 (file)
@@ -395,6 +395,18 @@ done:
        exception_exit(prev_state);
 }
 
+void do_nmi(struct pt_regs *regs, int fault_num, unsigned long reason)
+{
+       switch (reason) {
+       case TILE_NMI_DUMP_STACK:
+               do_nmi_dump_stack(regs);
+               break;
+       default:
+               panic("Unexpected do_nmi type %ld", reason);
+               return;
+       }
+}
+
 void kernel_double_fault(int dummy, ulong pc, ulong lr, ulong sp, ulong r52)
 {
        _dump_stack(dummy, pc, lr, sp, r52);