powerpc/book3s: Print task info if we take a machine check in user mode
authorMichael Ellerman <mpe@ellerman.id.au>
Mon, 3 Apr 2017 05:29:34 +0000 (15:29 +1000)
committerMichael Ellerman <mpe@ellerman.id.au>
Mon, 3 Apr 2017 06:12:00 +0000 (16:12 +1000)
For an MCE (Machine Check Exception) that hits while in user mode
MSR(PR=1), print the task info to the console MCE error log. This may
help to identify an application that triggered the MCE.

After this patch the MCE console looks like:

  Severe Machine check interrupt [Recovered]
    NIP: [0000000010039778] PID: 762 Comm: ebizzy
    Initiator: CPU
    Error type: SLB [Multihit]
      Effective address: 0000000010039778

  Severe Machine check interrupt [Not recovered]
    NIP: [0000000010039778] PID: 763 Comm: ebizzy
    Initiator: CPU
    Error type: UE [Page table walk ifetch]
      Effective address: 0000000010039778
  ebizzy[763]: unhandled signal 7 at 0000000010039778 nip 0000000010039778 lr 0000000010001b44 code 30004

Signed-off-by: Mahesh Salgaonkar <mahesh@linux.vnet.ibm.com>
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
arch/powerpc/include/asm/mce.h
arch/powerpc/kernel/mce.c
arch/powerpc/platforms/powernv/opal.c

index e3498b446788e8c553b2181e9591acfb866f1160..81eff8631434df85b9c89fdf7f4c75a78b3b98cf 100644 (file)
@@ -207,7 +207,8 @@ extern void save_mce_event(struct pt_regs *regs, long handled,
 extern int get_mce_event(struct machine_check_event *mce, bool release);
 extern void release_mce_event(void);
 extern void machine_check_queue_event(void);
-extern void machine_check_print_event_info(struct machine_check_event *evt);
+extern void machine_check_print_event_info(struct machine_check_event *evt,
+                                          bool user_mode);
 extern uint64_t get_mce_fault_addr(struct machine_check_event *evt);
 
 #endif /* __ASM_PPC64_MCE_H__ */
index b960f00f490878683745202245d82190babae9a8..16eb0b50876151d979460c480edaba1777fc862d 100644 (file)
@@ -228,12 +228,13 @@ static void machine_check_process_queued_event(struct irq_work *work)
        while (__this_cpu_read(mce_queue_count) > 0) {
                index = __this_cpu_read(mce_queue_count) - 1;
                machine_check_print_event_info(
-                               this_cpu_ptr(&mce_event_queue[index]));
+                               this_cpu_ptr(&mce_event_queue[index]), false);
                __this_cpu_dec(mce_queue_count);
        }
 }
 
-void machine_check_print_event_info(struct machine_check_event *evt)
+void machine_check_print_event_info(struct machine_check_event *evt,
+                                   bool user_mode)
 {
        const char *level, *sevstr, *subtype;
        static const char *mc_ue_types[] = {
@@ -311,7 +312,15 @@ void machine_check_print_event_info(struct machine_check_event *evt)
        printk("%s%s Machine check interrupt [%s]\n", level, sevstr,
               evt->disposition == MCE_DISPOSITION_RECOVERED ?
               "Recovered" : "Not recovered");
-       printk("%s  NIP [%016llx]: %pS\n", level, evt->srr0, (void *)evt->srr0);
+
+       if (user_mode) {
+               printk("%s  NIP: [%016llx] PID: %d Comm: %s\n", level,
+                       evt->srr0, current->pid, current->comm);
+       } else {
+               printk("%s  NIP [%016llx]: %pS\n", level, evt->srr0,
+                      (void *)evt->srr0);
+       }
+
        printk("%s  Initiator: %s\n", level,
               evt->initiator == MCE_INITIATOR_CPU ? "CPU" : "Unknown");
        switch (evt->error_type) {
index e0f856bfbfe8f3c6ecfa70e737b1b2496725d563..296c9426f72b25397126f106f2bbd0d7b1a42176 100644 (file)
@@ -435,7 +435,7 @@ int opal_machine_check(struct pt_regs *regs)
                       evt.version);
                return 0;
        }
-       machine_check_print_event_info(&evt);
+       machine_check_print_event_info(&evt, user_mode(regs));
 
        if (opal_recover_mce(regs, &evt))
                return 1;