KVM: nVMX: fix guest CR4 loading when emulating L2 to L1 exit
[GitHub/mt8127/android_kernel_alcatel_ttab.git] / arch / x86 / kvm / vmx.c
index 51139ff34917ed8387086d9cd4018818826a06b9..be138952728409420a0a8dc853bb7a7eb3714cbe 100644 (file)
@@ -366,6 +366,7 @@ struct nested_vmx {
        struct list_head vmcs02_pool;
        int vmcs02_num;
        u64 vmcs01_tsc_offset;
+       bool change_vmcs01_virtual_x2apic_mode;
        /* L2 must run next, and mustn't decide to exit to L1. */
        bool nested_run_pending;
        /*
@@ -438,6 +439,7 @@ struct vcpu_vmx {
 #endif
                int           gs_ldt_reload_needed;
                int           fs_reload_needed;
+               unsigned long vmcs_host_cr4;    /* May not match real cr4 */
        } host_state;
        struct {
                int vm86_active;
@@ -1045,10 +1047,10 @@ static inline bool nested_cpu_has_virtual_nmis(struct vmcs12 *vmcs12,
        return vmcs12->pin_based_vm_exec_control & PIN_BASED_VIRTUAL_NMIS;
 }
 
-static inline bool is_exception(u32 intr_info)
+static inline bool is_nmi(u32 intr_info)
 {
        return (intr_info & (INTR_INFO_INTR_TYPE_MASK | INTR_INFO_VALID_MASK))
-               == (INTR_TYPE_HARD_EXCEPTION | INTR_INFO_VALID_MASK);
+               == (INTR_TYPE_NMI_INTR | INTR_INFO_VALID_MASK);
 }
 
 static void nested_vmx_vmexit(struct kvm_vcpu *vcpu);
@@ -1486,6 +1488,13 @@ static void add_atomic_switch_msr(struct vcpu_vmx *vmx, unsigned msr,
                        return;
                }
                break;
+       case MSR_IA32_PEBS_ENABLE:
+               /* PEBS needs a quiescent period after being disabled (to write
+                * a record).  Disabling PEBS through VMX MSR swapping doesn't
+                * provide that period, so a CPU could write host's record into
+                * guest's memory.
+                */
+               wrmsrl(MSR_IA32_PEBS_ENABLE, 0);
        }
 
        for (i = 0; i < m->nr; ++i)
@@ -3065,7 +3074,7 @@ static void fix_rmode_seg(int seg, struct kvm_segment *save)
        }
 
        vmcs_write16(sf->selector, var.selector);
-       vmcs_write32(sf->base, var.base);
+       vmcs_writel(sf->base, var.base);
        vmcs_write32(sf->limit, var.limit);
        vmcs_write32(sf->ar_bytes, vmx_segment_access_rights(&var));
 }
@@ -4076,11 +4085,16 @@ static void vmx_set_constant_host_state(struct vcpu_vmx *vmx)
        u32 low32, high32;
        unsigned long tmpl;
        struct desc_ptr dt;
+       unsigned long cr4;
 
        vmcs_writel(HOST_CR0, read_cr0() & ~X86_CR0_TS);  /* 22.2.3 */
-       vmcs_writel(HOST_CR4, read_cr4());  /* 22.2.3, 22.2.5 */
        vmcs_writel(HOST_CR3, read_cr3());  /* 22.2.3  FIXME: shadow tables */
 
+       /* Save the most likely value for this task's CR4 in the VMCS. */
+       cr4 = read_cr4();
+       vmcs_writel(HOST_CR4, cr4);                     /* 22.2.3, 22.2.5 */
+       vmx->host_state.vmcs_host_cr4 = cr4;
+
        vmcs_write16(HOST_CS_SELECTOR, __KERNEL_CS);  /* 22.2.4 */
 #ifdef CONFIG_X86_64
        /*
@@ -4702,7 +4716,7 @@ static int handle_exception(struct kvm_vcpu *vcpu)
        if (is_machine_check(intr_info))
                return handle_machine_check(vcpu);
 
-       if ((intr_info & INTR_INFO_INTR_TYPE_MASK) == INTR_TYPE_NMI_INTR)
+       if (is_nmi(intr_info))
                return 1;  /* already handled by vmx_vcpu_run() */
 
        if (is_no_device(intr_info)) {
@@ -6242,6 +6256,18 @@ static int handle_vmptrst(struct kvm_vcpu *vcpu)
        return 1;
 }
 
+static int handle_invept(struct kvm_vcpu *vcpu)
+{
+       kvm_queue_exception(vcpu, UD_VECTOR);
+       return 1;
+}
+
+static int handle_invvpid(struct kvm_vcpu *vcpu)
+{
+       kvm_queue_exception(vcpu, UD_VECTOR);
+       return 1;
+}
+
 /*
  * The exit handlers return 1 if the exit was handled fully and guest execution
  * may resume.  Otherwise they set the kvm_run parameter to indicate what needs
@@ -6286,6 +6312,8 @@ static int (*const kvm_vmx_exit_handlers[])(struct kvm_vcpu *vcpu) = {
        [EXIT_REASON_PAUSE_INSTRUCTION]       = handle_pause,
        [EXIT_REASON_MWAIT_INSTRUCTION]       = handle_invalid_op,
        [EXIT_REASON_MONITOR_INSTRUCTION]     = handle_invalid_op,
+       [EXIT_REASON_INVEPT]                  = handle_invept,
+       [EXIT_REASON_INVVPID]                 = handle_invvpid,
 };
 
 static const int kvm_vmx_max_exit_handlers =
@@ -6479,7 +6507,7 @@ static bool nested_vmx_exit_handled(struct kvm_vcpu *vcpu)
 
        switch (exit_reason) {
        case EXIT_REASON_EXCEPTION_NMI:
-               if (!is_exception(intr_info))
+               if (is_nmi(intr_info))
                        return 0;
                else if (is_page_fault(intr_info))
                        return enable_ept;
@@ -6512,6 +6540,7 @@ static bool nested_vmx_exit_handled(struct kvm_vcpu *vcpu)
        case EXIT_REASON_VMPTRST: case EXIT_REASON_VMREAD:
        case EXIT_REASON_VMRESUME: case EXIT_REASON_VMWRITE:
        case EXIT_REASON_VMOFF: case EXIT_REASON_VMON:
+       case EXIT_REASON_INVEPT: case EXIT_REASON_INVVPID:
                /*
                 * VMX instructions trap unconditionally. This allows L1 to
                 * emulate them for its L2 guest, i.e., allows 3-level nesting!
@@ -6674,6 +6703,12 @@ static void vmx_set_virtual_x2apic_mode(struct kvm_vcpu *vcpu, bool set)
 {
        u32 sec_exec_control;
 
+       /* Postpone execution until vmcs01 is the current VMCS. */
+       if (is_guest_mode(vcpu)) {
+               to_vmx(vcpu)->nested.change_vmcs01_virtual_x2apic_mode = true;
+               return;
+       }
+
        /*
         * There is not point to enable virtualize x2apic without enable
         * apicv
@@ -6768,8 +6803,7 @@ static void vmx_complete_atomic_exit(struct vcpu_vmx *vmx)
                kvm_machine_check();
 
        /* We need to handle NMIs before interrupts are enabled */
-       if ((exit_intr_info & INTR_INFO_INTR_TYPE_MASK) == INTR_TYPE_NMI_INTR &&
-           (exit_intr_info & INTR_INFO_VALID_MASK)) {
+       if (is_nmi(exit_intr_info)) {
                kvm_before_handle_nmi(&vmx->vcpu);
                asm("int $2");
                kvm_after_handle_nmi(&vmx->vcpu);
@@ -6956,7 +6990,7 @@ static void atomic_switch_perf_msrs(struct vcpu_vmx *vmx)
 static void __noclone vmx_vcpu_run(struct kvm_vcpu *vcpu)
 {
        struct vcpu_vmx *vmx = to_vmx(vcpu);
-       unsigned long debugctlmsr;
+       unsigned long debugctlmsr, cr4;
 
        /* Record the guest's net vcpu time for enforced NMI injections. */
        if (unlikely(!cpu_has_virtual_nmis() && vmx->soft_vnmi_blocked))
@@ -6977,6 +7011,12 @@ static void __noclone vmx_vcpu_run(struct kvm_vcpu *vcpu)
        if (test_bit(VCPU_REGS_RIP, (unsigned long *)&vcpu->arch.regs_dirty))
                vmcs_writel(GUEST_RIP, vcpu->arch.regs[VCPU_REGS_RIP]);
 
+       cr4 = read_cr4();
+       if (unlikely(cr4 != vmx->host_state.vmcs_host_cr4)) {
+               vmcs_writel(HOST_CR4, cr4);
+               vmx->host_state.vmcs_host_cr4 = cr4;
+       }
+
        /* When single-stepping over STI and MOV SS, we must clear the
         * corresponding interruptibility bits in the guest state. Otherwise
         * vmentry fails as it then expects bit 14 (BS) in pending debug
@@ -7974,7 +8014,7 @@ static void load_vmcs12_host_state(struct kvm_vcpu *vcpu,
         * (KVM doesn't change it)- no reason to call set_cr4_guest_host_mask();
         */
        vcpu->arch.cr4_guest_owned_bits = ~vmcs_readl(CR4_GUEST_HOST_MASK);
-       kvm_set_cr4(vcpu, vmcs12->host_cr4);
+       vmx_set_cr4(vcpu, vmcs12->host_cr4);
 
        /* shadow page tables on either EPT or shadow page tables */
        kvm_set_cr3(vcpu, vmcs12->host_cr3);
@@ -8051,6 +8091,12 @@ static void nested_vmx_vmexit(struct kvm_vcpu *vcpu)
        /* Update TSC_OFFSET if TSC was changed while L2 ran */
        vmcs_write64(TSC_OFFSET, vmx->nested.vmcs01_tsc_offset);
 
+       if (vmx->nested.change_vmcs01_virtual_x2apic_mode) {
+               vmx->nested.change_vmcs01_virtual_x2apic_mode = false;
+               vmx_set_virtual_x2apic_mode(vcpu,
+                               vcpu->arch.apic_base & X2APIC_ENABLE);
+       }
+
        /* This is needed for same reason as it was needed in prepare_vmcs02 */
        vmx->host_rsp = 0;