KVM: x86: save/load state on SMM switch
authorPaolo Bonzini <pbonzini@redhat.com>
Tue, 5 May 2015 09:50:23 +0000 (11:50 +0200)
committerPaolo Bonzini <pbonzini@redhat.com>
Thu, 4 Jun 2015 14:17:46 +0000 (16:17 +0200)
The big ugly one.  This patch adds support for switching in and out of
system management mode, respectively upon receiving KVM_REQ_SMI and upon
executing a RSM instruction.  Both 32- and 64-bit formats are supported
for the SMM state save area.

Reviewed-by: Radim Krčmář <rkrcmar@redhat.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
arch/x86/kvm/cpuid.h
arch/x86/kvm/emulate.c
arch/x86/kvm/trace.h
arch/x86/kvm/x86.c

index 496b3695d3d3c96fd2687b2b6bc013d9ee8d96e5..dd05b9cef6ae332d42ebee6c2cb41554e8eef632 100644 (file)
@@ -70,6 +70,14 @@ static inline bool guest_cpuid_has_fsgsbase(struct kvm_vcpu *vcpu)
        return best && (best->ebx & bit(X86_FEATURE_FSGSBASE));
 }
 
+static inline bool guest_cpuid_has_longmode(struct kvm_vcpu *vcpu)
+{
+       struct kvm_cpuid_entry2 *best;
+
+       best = kvm_find_cpuid_entry(vcpu, 0x80000001, 0);
+       return best && (best->edx & bit(X86_FEATURE_LM));
+}
+
 static inline bool guest_cpuid_has_osvw(struct kvm_vcpu *vcpu)
 {
        struct kvm_cpuid_entry2 *best;
index e763a9b8c26bf185cbf6681804cc7cc68047d874..e7a4fde5d631031908b6f336d9ecc40e7a3a413d 100644 (file)
@@ -2259,12 +2259,258 @@ static int em_lseg(struct x86_emulate_ctxt *ctxt)
        return rc;
 }
 
+static int emulator_has_longmode(struct x86_emulate_ctxt *ctxt)
+{
+       u32 eax, ebx, ecx, edx;
+
+       eax = 0x80000001;
+       ecx = 0;
+       ctxt->ops->get_cpuid(ctxt, &eax, &ebx, &ecx, &edx);
+       return edx & bit(X86_FEATURE_LM);
+}
+
+#define GET_SMSTATE(type, smbase, offset)                                \
+       ({                                                                \
+        type __val;                                                      \
+        int r = ctxt->ops->read_std(ctxt, smbase + offset, &__val,       \
+                                    sizeof(__val), NULL);                \
+        if (r != X86EMUL_CONTINUE)                                       \
+                return X86EMUL_UNHANDLEABLE;                             \
+        __val;                                                           \
+       })
+
+static void rsm_set_desc_flags(struct desc_struct *desc, u32 flags)
+{
+       desc->g    = (flags >> 23) & 1;
+       desc->d    = (flags >> 22) & 1;
+       desc->l    = (flags >> 21) & 1;
+       desc->avl  = (flags >> 20) & 1;
+       desc->p    = (flags >> 15) & 1;
+       desc->dpl  = (flags >> 13) & 3;
+       desc->s    = (flags >> 12) & 1;
+       desc->type = (flags >>  8) & 15;
+}
+
+static int rsm_load_seg_32(struct x86_emulate_ctxt *ctxt, u64 smbase, int n)
+{
+       struct desc_struct desc;
+       int offset;
+       u16 selector;
+
+       selector = GET_SMSTATE(u32, smbase, 0x7fa8 + n * 4);
+
+       if (n < 3)
+               offset = 0x7f84 + n * 12;
+       else
+               offset = 0x7f2c + (n - 3) * 12;
+
+       set_desc_base(&desc,      GET_SMSTATE(u32, smbase, offset + 8));
+       set_desc_limit(&desc,     GET_SMSTATE(u32, smbase, offset + 4));
+       rsm_set_desc_flags(&desc, GET_SMSTATE(u32, smbase, offset));
+       ctxt->ops->set_segment(ctxt, selector, &desc, 0, n);
+       return X86EMUL_CONTINUE;
+}
+
+static int rsm_load_seg_64(struct x86_emulate_ctxt *ctxt, u64 smbase, int n)
+{
+       struct desc_struct desc;
+       int offset;
+       u16 selector;
+       u32 base3;
+
+       offset = 0x7e00 + n * 16;
+
+       selector =                GET_SMSTATE(u16, smbase, offset);
+       rsm_set_desc_flags(&desc, GET_SMSTATE(u16, smbase, offset + 2) << 8);
+       set_desc_limit(&desc,     GET_SMSTATE(u32, smbase, offset + 4));
+       set_desc_base(&desc,      GET_SMSTATE(u32, smbase, offset + 8));
+       base3 =                   GET_SMSTATE(u32, smbase, offset + 12);
+
+       ctxt->ops->set_segment(ctxt, selector, &desc, base3, n);
+       return X86EMUL_CONTINUE;
+}
+
+static int rsm_enter_protected_mode(struct x86_emulate_ctxt *ctxt,
+                                    u64 cr0, u64 cr4)
+{
+       int bad;
+
+       /*
+        * First enable PAE, long mode needs it before CR0.PG = 1 is set.
+        * Then enable protected mode.  However, PCID cannot be enabled
+        * if EFER.LMA=0, so set it separately.
+        */
+       bad = ctxt->ops->set_cr(ctxt, 4, cr4 & ~X86_CR4_PCIDE);
+       if (bad)
+               return X86EMUL_UNHANDLEABLE;
+
+       bad = ctxt->ops->set_cr(ctxt, 0, cr0);
+       if (bad)
+               return X86EMUL_UNHANDLEABLE;
+
+       if (cr4 & X86_CR4_PCIDE) {
+               bad = ctxt->ops->set_cr(ctxt, 4, cr4);
+               if (bad)
+                       return X86EMUL_UNHANDLEABLE;
+       }
+
+       return X86EMUL_CONTINUE;
+}
+
+static int rsm_load_state_32(struct x86_emulate_ctxt *ctxt, u64 smbase)
+{
+       struct desc_struct desc;
+       struct desc_ptr dt;
+       u16 selector;
+       u32 val, cr0, cr4;
+       int i;
+
+       cr0 =                      GET_SMSTATE(u32, smbase, 0x7ffc);
+       ctxt->ops->set_cr(ctxt, 3, GET_SMSTATE(u32, smbase, 0x7ff8));
+       ctxt->eflags =             GET_SMSTATE(u32, smbase, 0x7ff4) | X86_EFLAGS_FIXED;
+       ctxt->_eip =               GET_SMSTATE(u32, smbase, 0x7ff0);
+
+       for (i = 0; i < 8; i++)
+               *reg_write(ctxt, i) = GET_SMSTATE(u32, smbase, 0x7fd0 + i * 4);
+
+       val = GET_SMSTATE(u32, smbase, 0x7fcc);
+       ctxt->ops->set_dr(ctxt, 6, (val & DR6_VOLATILE) | DR6_FIXED_1);
+       val = GET_SMSTATE(u32, smbase, 0x7fc8);
+       ctxt->ops->set_dr(ctxt, 7, (val & DR7_VOLATILE) | DR7_FIXED_1);
+
+       selector =                 GET_SMSTATE(u32, smbase, 0x7fc4);
+       set_desc_base(&desc,       GET_SMSTATE(u32, smbase, 0x7f64));
+       set_desc_limit(&desc,      GET_SMSTATE(u32, smbase, 0x7f60));
+       rsm_set_desc_flags(&desc,  GET_SMSTATE(u32, smbase, 0x7f5c));
+       ctxt->ops->set_segment(ctxt, selector, &desc, 0, VCPU_SREG_TR);
+
+       selector =                 GET_SMSTATE(u32, smbase, 0x7fc0);
+       set_desc_base(&desc,       GET_SMSTATE(u32, smbase, 0x7f80));
+       set_desc_limit(&desc,      GET_SMSTATE(u32, smbase, 0x7f7c));
+       rsm_set_desc_flags(&desc,  GET_SMSTATE(u32, smbase, 0x7f78));
+       ctxt->ops->set_segment(ctxt, selector, &desc, 0, VCPU_SREG_LDTR);
+
+       dt.address =               GET_SMSTATE(u32, smbase, 0x7f74);
+       dt.size =                  GET_SMSTATE(u32, smbase, 0x7f70);
+       ctxt->ops->set_gdt(ctxt, &dt);
+
+       dt.address =               GET_SMSTATE(u32, smbase, 0x7f58);
+       dt.size =                  GET_SMSTATE(u32, smbase, 0x7f54);
+       ctxt->ops->set_idt(ctxt, &dt);
+
+       for (i = 0; i < 6; i++) {
+               int r = rsm_load_seg_32(ctxt, smbase, i);
+               if (r != X86EMUL_CONTINUE)
+                       return r;
+       }
+
+       cr4 = GET_SMSTATE(u32, smbase, 0x7f14);
+
+       ctxt->ops->set_smbase(ctxt, GET_SMSTATE(u32, smbase, 0x7ef8));
+
+       return rsm_enter_protected_mode(ctxt, cr0, cr4);
+}
+
+static int rsm_load_state_64(struct x86_emulate_ctxt *ctxt, u64 smbase)
+{
+       struct desc_struct desc;
+       struct desc_ptr dt;
+       u64 val, cr0, cr4;
+       u32 base3;
+       u16 selector;
+       int i;
+
+       for (i = 0; i < 16; i++)
+               *reg_write(ctxt, i) = GET_SMSTATE(u64, smbase, 0x7ff8 - i * 8);
+
+       ctxt->_eip   = GET_SMSTATE(u64, smbase, 0x7f78);
+       ctxt->eflags = GET_SMSTATE(u32, smbase, 0x7f70) | X86_EFLAGS_FIXED;
+
+       val = GET_SMSTATE(u32, smbase, 0x7f68);
+       ctxt->ops->set_dr(ctxt, 6, (val & DR6_VOLATILE) | DR6_FIXED_1);
+       val = GET_SMSTATE(u32, smbase, 0x7f60);
+       ctxt->ops->set_dr(ctxt, 7, (val & DR7_VOLATILE) | DR7_FIXED_1);
+
+       cr0 =                       GET_SMSTATE(u64, smbase, 0x7f58);
+       ctxt->ops->set_cr(ctxt, 3,  GET_SMSTATE(u64, smbase, 0x7f50));
+       cr4 =                       GET_SMSTATE(u64, smbase, 0x7f48);
+       ctxt->ops->set_smbase(ctxt, GET_SMSTATE(u32, smbase, 0x7f00));
+       val =                       GET_SMSTATE(u64, smbase, 0x7ed0);
+       ctxt->ops->set_msr(ctxt, MSR_EFER, val & ~EFER_LMA);
+
+       selector =                  GET_SMSTATE(u32, smbase, 0x7e90);
+       rsm_set_desc_flags(&desc,   GET_SMSTATE(u32, smbase, 0x7e92) << 8);
+       set_desc_limit(&desc,       GET_SMSTATE(u32, smbase, 0x7e94));
+       set_desc_base(&desc,        GET_SMSTATE(u32, smbase, 0x7e98));
+       base3 =                     GET_SMSTATE(u32, smbase, 0x7e9c);
+       ctxt->ops->set_segment(ctxt, selector, &desc, base3, VCPU_SREG_TR);
+
+       dt.size =                   GET_SMSTATE(u32, smbase, 0x7e84);
+       dt.address =                GET_SMSTATE(u64, smbase, 0x7e88);
+       ctxt->ops->set_idt(ctxt, &dt);
+
+       selector =                  GET_SMSTATE(u32, smbase, 0x7e70);
+       rsm_set_desc_flags(&desc,   GET_SMSTATE(u32, smbase, 0x7e72) << 8);
+       set_desc_limit(&desc,       GET_SMSTATE(u32, smbase, 0x7e74));
+       set_desc_base(&desc,        GET_SMSTATE(u32, smbase, 0x7e78));
+       base3 =                     GET_SMSTATE(u32, smbase, 0x7e7c);
+       ctxt->ops->set_segment(ctxt, selector, &desc, base3, VCPU_SREG_LDTR);
+
+       dt.size =                   GET_SMSTATE(u32, smbase, 0x7e64);
+       dt.address =                GET_SMSTATE(u64, smbase, 0x7e68);
+       ctxt->ops->set_gdt(ctxt, &dt);
+
+       for (i = 0; i < 6; i++) {
+               int r = rsm_load_seg_64(ctxt, smbase, i);
+               if (r != X86EMUL_CONTINUE)
+                       return r;
+       }
+
+       return rsm_enter_protected_mode(ctxt, cr0, cr4);
+}
+
 static int em_rsm(struct x86_emulate_ctxt *ctxt)
 {
+       unsigned long cr0, cr4, efer;
+       u64 smbase;
+       int ret;
+
        if ((ctxt->emul_flags & X86EMUL_SMM_MASK) == 0)
                return emulate_ud(ctxt);
 
-       return X86EMUL_UNHANDLEABLE;
+       /*
+        * Get back to real mode, to prepare a safe state in which to load
+        * CR0/CR3/CR4/EFER.  Also this will ensure that addresses passed
+        * to read_std/write_std are not virtual.
+        *
+        * CR4.PCIDE must be zero, because it is a 64-bit mode only feature.
+        */
+       cr0 = ctxt->ops->get_cr(ctxt, 0);
+       if (cr0 & X86_CR0_PE)
+               ctxt->ops->set_cr(ctxt, 0, cr0 & ~(X86_CR0_PG | X86_CR0_PE));
+       cr4 = ctxt->ops->get_cr(ctxt, 4);
+       if (cr4 & X86_CR4_PAE)
+               ctxt->ops->set_cr(ctxt, 4, cr4 & ~X86_CR4_PAE);
+       efer = 0;
+       ctxt->ops->set_msr(ctxt, MSR_EFER, efer);
+
+       smbase = ctxt->ops->get_smbase(ctxt);
+       if (emulator_has_longmode(ctxt))
+               ret = rsm_load_state_64(ctxt, smbase + 0x8000);
+       else
+               ret = rsm_load_state_32(ctxt, smbase + 0x8000);
+
+       if (ret != X86EMUL_CONTINUE) {
+               /* FIXME: should triple fault */
+               return X86EMUL_UNHANDLEABLE;
+       }
+
+       if ((ctxt->emul_flags & X86EMUL_SMM_INSIDE_NMI_MASK) == 0)
+               ctxt->ops->set_nmi_mask(ctxt, false);
+
+       ctxt->emul_flags &= ~X86EMUL_SMM_INSIDE_NMI_MASK;
+       ctxt->emul_flags &= ~X86EMUL_SMM_MASK;
+       return X86EMUL_CONTINUE;
 }
 
 static void
index 7c7bc8bef21fd125e13c6ddc4a1f27f36c6b6b25..4eae7c35ddf56464ca8f2063a7bd36bf83eead53 100644 (file)
@@ -952,6 +952,28 @@ TRACE_EVENT(kvm_wait_lapic_expire,
                  __entry->delta < 0 ? "early" : "late")
 );
 
+TRACE_EVENT(kvm_enter_smm,
+       TP_PROTO(unsigned int vcpu_id, u64 smbase, bool entering),
+       TP_ARGS(vcpu_id, smbase, entering),
+
+       TP_STRUCT__entry(
+               __field(        unsigned int,   vcpu_id         )
+               __field(        u64,            smbase          )
+               __field(        bool,           entering        )
+       ),
+
+       TP_fast_assign(
+               __entry->vcpu_id        = vcpu_id;
+               __entry->smbase         = smbase;
+               __entry->entering       = entering;
+       ),
+
+       TP_printk("vcpu %u: %s SMM, smbase 0x%llx",
+                 __entry->vcpu_id,
+                 __entry->entering ? "entering" : "leaving",
+                 __entry->smbase)
+);
+
 #endif /* _TRACE_KVM_H */
 
 #undef TRACE_INCLUDE_PATH
index ab2521b588d8757f1451ff16d81d838fe033e176..51d994e1d6af65f348a502ebc5876e5225275c52 100644 (file)
@@ -5479,6 +5479,9 @@ static int complete_emulated_pio(struct kvm_vcpu *vcpu);
 static void kvm_smm_changed(struct kvm_vcpu *vcpu)
 {
        if (!(vcpu->arch.hflags & HF_SMM_MASK)) {
+               /* This is a good place to trace that we are exiting SMM.  */
+               trace_kvm_enter_smm(vcpu->vcpu_id, vcpu->arch.smbase, false);
+
                if (unlikely(vcpu->arch.smi_pending)) {
                        kvm_make_request(KVM_REQ_SMI, vcpu);
                        vcpu->arch.smi_pending = 0;
@@ -6390,14 +6393,231 @@ static void process_nmi(struct kvm_vcpu *vcpu)
        kvm_make_request(KVM_REQ_EVENT, vcpu);
 }
 
+#define put_smstate(type, buf, offset, val)                      \
+       *(type *)((buf) + (offset) - 0x7e00) = val
+
+static u32 process_smi_get_segment_flags(struct kvm_segment *seg)
+{
+       u32 flags = 0;
+       flags |= seg->g       << 23;
+       flags |= seg->db      << 22;
+       flags |= seg->l       << 21;
+       flags |= seg->avl     << 20;
+       flags |= seg->present << 15;
+       flags |= seg->dpl     << 13;
+       flags |= seg->s       << 12;
+       flags |= seg->type    << 8;
+       return flags;
+}
+
+static void process_smi_save_seg_32(struct kvm_vcpu *vcpu, char *buf, int n)
+{
+       struct kvm_segment seg;
+       int offset;
+
+       kvm_get_segment(vcpu, &seg, n);
+       put_smstate(u32, buf, 0x7fa8 + n * 4, seg.selector);
+
+       if (n < 3)
+               offset = 0x7f84 + n * 12;
+       else
+               offset = 0x7f2c + (n - 3) * 12;
+
+       put_smstate(u32, buf, offset + 8, seg.base);
+       put_smstate(u32, buf, offset + 4, seg.limit);
+       put_smstate(u32, buf, offset, process_smi_get_segment_flags(&seg));
+}
+
+static void process_smi_save_seg_64(struct kvm_vcpu *vcpu, char *buf, int n)
+{
+       struct kvm_segment seg;
+       int offset;
+       u16 flags;
+
+       kvm_get_segment(vcpu, &seg, n);
+       offset = 0x7e00 + n * 16;
+
+       flags = process_smi_get_segment_flags(&seg) >> 8;
+       put_smstate(u16, buf, offset, seg.selector);
+       put_smstate(u16, buf, offset + 2, flags);
+       put_smstate(u32, buf, offset + 4, seg.limit);
+       put_smstate(u64, buf, offset + 8, seg.base);
+}
+
+static void process_smi_save_state_32(struct kvm_vcpu *vcpu, char *buf)
+{
+       struct desc_ptr dt;
+       struct kvm_segment seg;
+       unsigned long val;
+       int i;
+
+       put_smstate(u32, buf, 0x7ffc, kvm_read_cr0(vcpu));
+       put_smstate(u32, buf, 0x7ff8, kvm_read_cr3(vcpu));
+       put_smstate(u32, buf, 0x7ff4, kvm_get_rflags(vcpu));
+       put_smstate(u32, buf, 0x7ff0, kvm_rip_read(vcpu));
+
+       for (i = 0; i < 8; i++)
+               put_smstate(u32, buf, 0x7fd0 + i * 4, kvm_register_read(vcpu, i));
+
+       kvm_get_dr(vcpu, 6, &val);
+       put_smstate(u32, buf, 0x7fcc, (u32)val);
+       kvm_get_dr(vcpu, 7, &val);
+       put_smstate(u32, buf, 0x7fc8, (u32)val);
+
+       kvm_get_segment(vcpu, &seg, VCPU_SREG_TR);
+       put_smstate(u32, buf, 0x7fc4, seg.selector);
+       put_smstate(u32, buf, 0x7f64, seg.base);
+       put_smstate(u32, buf, 0x7f60, seg.limit);
+       put_smstate(u32, buf, 0x7f5c, process_smi_get_segment_flags(&seg));
+
+       kvm_get_segment(vcpu, &seg, VCPU_SREG_LDTR);
+       put_smstate(u32, buf, 0x7fc0, seg.selector);
+       put_smstate(u32, buf, 0x7f80, seg.base);
+       put_smstate(u32, buf, 0x7f7c, seg.limit);
+       put_smstate(u32, buf, 0x7f78, process_smi_get_segment_flags(&seg));
+
+       kvm_x86_ops->get_gdt(vcpu, &dt);
+       put_smstate(u32, buf, 0x7f74, dt.address);
+       put_smstate(u32, buf, 0x7f70, dt.size);
+
+       kvm_x86_ops->get_idt(vcpu, &dt);
+       put_smstate(u32, buf, 0x7f58, dt.address);
+       put_smstate(u32, buf, 0x7f54, dt.size);
+
+       for (i = 0; i < 6; i++)
+               process_smi_save_seg_32(vcpu, buf, i);
+
+       put_smstate(u32, buf, 0x7f14, kvm_read_cr4(vcpu));
+
+       /* revision id */
+       put_smstate(u32, buf, 0x7efc, 0x00020000);
+       put_smstate(u32, buf, 0x7ef8, vcpu->arch.smbase);
+}
+
+static void process_smi_save_state_64(struct kvm_vcpu *vcpu, char *buf)
+{
+#ifdef CONFIG_X86_64
+       struct desc_ptr dt;
+       struct kvm_segment seg;
+       unsigned long val;
+       int i;
+
+       for (i = 0; i < 16; i++)
+               put_smstate(u64, buf, 0x7ff8 - i * 8, kvm_register_read(vcpu, i));
+
+       put_smstate(u64, buf, 0x7f78, kvm_rip_read(vcpu));
+       put_smstate(u32, buf, 0x7f70, kvm_get_rflags(vcpu));
+
+       kvm_get_dr(vcpu, 6, &val);
+       put_smstate(u64, buf, 0x7f68, val);
+       kvm_get_dr(vcpu, 7, &val);
+       put_smstate(u64, buf, 0x7f60, val);
+
+       put_smstate(u64, buf, 0x7f58, kvm_read_cr0(vcpu));
+       put_smstate(u64, buf, 0x7f50, kvm_read_cr3(vcpu));
+       put_smstate(u64, buf, 0x7f48, kvm_read_cr4(vcpu));
+
+       put_smstate(u32, buf, 0x7f00, vcpu->arch.smbase);
+
+       /* revision id */
+       put_smstate(u32, buf, 0x7efc, 0x00020064);
+
+       put_smstate(u64, buf, 0x7ed0, vcpu->arch.efer);
+
+       kvm_get_segment(vcpu, &seg, VCPU_SREG_TR);
+       put_smstate(u16, buf, 0x7e90, seg.selector);
+       put_smstate(u16, buf, 0x7e92, process_smi_get_segment_flags(&seg) >> 8);
+       put_smstate(u32, buf, 0x7e94, seg.limit);
+       put_smstate(u64, buf, 0x7e98, seg.base);
+
+       kvm_x86_ops->get_idt(vcpu, &dt);
+       put_smstate(u32, buf, 0x7e84, dt.size);
+       put_smstate(u64, buf, 0x7e88, dt.address);
+
+       kvm_get_segment(vcpu, &seg, VCPU_SREG_LDTR);
+       put_smstate(u16, buf, 0x7e70, seg.selector);
+       put_smstate(u16, buf, 0x7e72, process_smi_get_segment_flags(&seg) >> 8);
+       put_smstate(u32, buf, 0x7e74, seg.limit);
+       put_smstate(u64, buf, 0x7e78, seg.base);
+
+       kvm_x86_ops->get_gdt(vcpu, &dt);
+       put_smstate(u32, buf, 0x7e64, dt.size);
+       put_smstate(u64, buf, 0x7e68, dt.address);
+
+       for (i = 0; i < 6; i++)
+               process_smi_save_seg_64(vcpu, buf, i);
+#else
+       WARN_ON_ONCE(1);
+#endif
+}
+
 static void process_smi(struct kvm_vcpu *vcpu)
 {
+       struct kvm_segment cs, ds;
+       char buf[512];
+       u32 cr0;
+
        if (is_smm(vcpu)) {
                vcpu->arch.smi_pending = true;
                return;
        }
 
-       printk_once(KERN_DEBUG "Ignoring guest SMI\n");
+       trace_kvm_enter_smm(vcpu->vcpu_id, vcpu->arch.smbase, true);
+       vcpu->arch.hflags |= HF_SMM_MASK;
+       memset(buf, 0, 512);
+       if (guest_cpuid_has_longmode(vcpu))
+               process_smi_save_state_64(vcpu, buf);
+       else
+               process_smi_save_state_32(vcpu, buf);
+
+       kvm_write_guest(vcpu->kvm, vcpu->arch.smbase + 0xfe00, buf, sizeof(buf));
+
+       if (kvm_x86_ops->get_nmi_mask(vcpu))
+               vcpu->arch.hflags |= HF_SMM_INSIDE_NMI_MASK;
+       else
+               kvm_x86_ops->set_nmi_mask(vcpu, true);
+
+       kvm_set_rflags(vcpu, X86_EFLAGS_FIXED);
+       kvm_rip_write(vcpu, 0x8000);
+
+       cr0 = vcpu->arch.cr0 & ~(X86_CR0_PE | X86_CR0_EM | X86_CR0_TS | X86_CR0_PG);
+       kvm_x86_ops->set_cr0(vcpu, cr0);
+       vcpu->arch.cr0 = cr0;
+
+       kvm_x86_ops->set_cr4(vcpu, 0);
+
+       __kvm_set_dr(vcpu, 7, DR7_FIXED_1);
+
+       cs.selector = (vcpu->arch.smbase >> 4) & 0xffff;
+       cs.base = vcpu->arch.smbase;
+
+       ds.selector = 0;
+       ds.base = 0;
+
+       cs.limit    = ds.limit = 0xffffffff;
+       cs.type     = ds.type = 0x3;
+       cs.dpl      = ds.dpl = 0;
+       cs.db       = ds.db = 0;
+       cs.s        = ds.s = 1;
+       cs.l        = ds.l = 0;
+       cs.g        = ds.g = 1;
+       cs.avl      = ds.avl = 0;
+       cs.present  = ds.present = 1;
+       cs.unusable = ds.unusable = 0;
+       cs.padding  = ds.padding = 0;
+
+       kvm_set_segment(vcpu, &cs, VCPU_SREG_CS);
+       kvm_set_segment(vcpu, &ds, VCPU_SREG_DS);
+       kvm_set_segment(vcpu, &ds, VCPU_SREG_ES);
+       kvm_set_segment(vcpu, &ds, VCPU_SREG_FS);
+       kvm_set_segment(vcpu, &ds, VCPU_SREG_GS);
+       kvm_set_segment(vcpu, &ds, VCPU_SREG_SS);
+
+       if (guest_cpuid_has_longmode(vcpu))
+               kvm_x86_ops->set_efer(vcpu, 0);
+
+       kvm_update_cpuid(vcpu);
+       kvm_mmu_reset_context(vcpu);
 }
 
 static void vcpu_scan_ioapic(struct kvm_vcpu *vcpu)