r = RESUME_GUEST;
break;
case BOOK3S_INTERRUPT_MACHINE_CHECK:
- /*
- * Deliver a machine check interrupt to the guest.
- * We have to do this, even if the host has handled the
- * machine check, because machine checks use SRR0/1 and
- * the interrupt might have trashed guest state in them.
- */
- kvmppc_book3s_queue_irqprio(vcpu,
- BOOK3S_INTERRUPT_MACHINE_CHECK);
- r = RESUME_GUEST;
+ /* Exit to guest with KVM_EXIT_NMI as exit reason */
+ run->exit_reason = KVM_EXIT_NMI;
+ run->hw.hardware_exit_reason = vcpu->arch.trap;
+ /* Clear out the old NMI status from run->flags */
+ run->flags &= ~KVM_RUN_PPC_NMI_DISP_MASK;
+ /* Now set the NMI status */
+ if (vcpu->arch.mce_evt.disposition == MCE_DISPOSITION_RECOVERED)
+ run->flags |= KVM_RUN_PPC_NMI_DISP_FULLY_RECOV;
+ else
+ run->flags |= KVM_RUN_PPC_NMI_DISP_NOT_RECOV;
+
+ r = RESUME_HOST;
+ /* Print the MCE event to host console. */
+ machine_check_print_event_info(&vcpu->arch.mce_evt, false);
break;
case BOOK3S_INTERRUPT_PROGRAM:
{
out:
/*
+ * For guest that supports FWNMI capability, hook the MCE event into
+ * vcpu structure. We are going to exit the guest with KVM_EXIT_NMI
+ * exit reason. On our way to exit we will pull this event from vcpu
+ * structure and print it from thread 0 of the core/subcore.
+ *
+ * For guest that does not support FWNMI capability (old QEMU):
* We are now going enter guest either through machine check
* interrupt (for unhandled errors) or will continue from
* current HSRR0 (for handled errors) in guest. Hence
* queue up the event so that we can log it from host console later.
*/
- machine_check_queue_event();
+ if (vcpu->kvm->arch.fwnmi_enabled) {
+ /*
+ * Hook up the mce event on to vcpu structure.
+ * First clear the old event.
+ */
+ memset(&vcpu->arch.mce_evt, 0, sizeof(vcpu->arch.mce_evt));
+ if (get_mce_event(&mce_evt, MCE_EVENT_RELEASE)) {
+ vcpu->arch.mce_evt = mce_evt;
+ }
+ } else
+ machine_check_queue_event();
return handled;
}
stb r0, HSTATE_HWTHREAD_REQ(r13)
/*
- * For external and machine check interrupts, we need
- * to call the Linux handler to process the interrupt.
- * We do that by jumping to absolute address 0x500 for
- * external interrupts, or the machine_check_fwnmi label
- * for machine checks (since firmware might have patched
- * the vector area at 0x200). The [h]rfid at the end of the
- * handler will return to the book3s_hv_interrupts.S code.
- * For other interrupts we do the rfid to get back
- * to the book3s_hv_interrupts.S code here.
+ * For external interrupts we need to call the Linux
+ * handler to process the interrupt. We do that by jumping
+ * to absolute address 0x500 for external interrupts.
+ * The [h]rfid at the end of the handler will return to
+ * the book3s_hv_interrupts.S code. For other interrupts
+ * we do the rfid to get back to the book3s_hv_interrupts.S
+ * code here.
*/
ld r8, 112+PPC_LR_STKOFF(r1)
addi r1, r1, 112
andi. r0, r0, MSR_IR /* in real mode? */
bne .Lvirt_return
- cmpwi cr1, r12, BOOK3S_INTERRUPT_MACHINE_CHECK
cmpwi r12, BOOK3S_INTERRUPT_EXTERNAL
beq 11f
cmpwi r12, BOOK3S_INTERRUPT_H_DOORBELL
mtmsrd r6, 1 /* Clear RI in MSR */
mtsrr0 r8
mtsrr1 r7
- beq cr1, 13f /* machine check */
+ /*
+ * BOOK3S_INTERRUPT_MACHINE_CHECK is handled at the
+ * time of guest exit
+ */
RFI
/* On POWER7, we have external interrupts set to use HSRR0/1 */
mtspr SPRN_HSRR1, r7
ba 0x500
-13: b machine_check_fwnmi
-
14: mtspr SPRN_HSRR0, r8
mtspr SPRN_HSRR1, r7
b hmi_exception_after_realmode
ld r9, HSTATE_KVM_VCPU(r13)
li r12, BOOK3S_INTERRUPT_MACHINE_CHECK
/*
- * Deliver unhandled/fatal (e.g. UE) MCE errors to guest through
- * machine check interrupt (set HSRR0 to 0x200). And for handled
- * errors (no-fatal), just go back to guest execution with current
- * HSRR0 instead of exiting guest. This new approach will inject
- * machine check to guest for fatal error causing guest to crash.
- *
- * The old code used to return to host for unhandled errors which
- * was causing guest to hang with soft lockups inside guest and
- * makes it difficult to recover guest instance.
+ * For the guest that is FWNMI capable, deliver all the MCE errors
+ * (handled/unhandled) by exiting the guest with KVM_EXIT_NMI exit
+ * reason. This new approach injects machine check errors in guest
+ * address space to guest with additional information in the form
+ * of RTAS event, thus enabling guest kernel to suitably handle
+ * such errors.
*
+ * For the guest that is not FWNMI capable (old QEMU) fallback
+ * to old behaviour for backward compatibility:
+ * Deliver unhandled/fatal (e.g. UE) MCE errors to guest either
+ * through machine check interrupt (set HSRR0 to 0x200).
+ * For handled errors (no-fatal), just go back to guest execution
+ * with current HSRR0.
* if we receive machine check with MSR(RI=0) then deliver it to
* guest as machine check causing guest to crash.
*/
ld r11, VCPU_MSR(r9)
rldicl. r0, r11, 64-MSR_HV_LG, 63 /* check if it happened in HV mode */
bne mc_cont /* if so, exit to host */
+ /* Check if guest is capable of handling NMI exit */
+ ld r10, VCPU_KVM(r9)
+ lbz r10, KVM_FWNMI(r10)
+ cmpdi r10, 1 /* FWNMI capable? */
+ beq mc_cont /* if so, exit with KVM_EXIT_NMI. */
+
+ /* if not, fall through for backward compatibility. */
andi. r10, r11, MSR_RI /* check for unrecoverable exception */
beq 1f /* Deliver a machine check to guest */
ld r10, VCPU_PC(r9)