s390/kvm: Kick guests out of sie if prefix page host pte is touched
authorChristian Borntraeger <borntraeger@de.ibm.com>
Fri, 17 May 2013 12:41:36 +0000 (14:41 +0200)
committerGleb Natapov <gleb@redhat.com>
Tue, 21 May 2013 08:55:24 +0000 (11:55 +0300)
The guest prefix pages must be mapped writeable all the time
while SIE is running, otherwise the guest might see random
behaviour. (pinned at the pte level) Turns out that mlocking is
not enough, the page table entry (not the page) might change or
become r/o. This patch uses the gmap notifiers to kick guest
cpus out of SIE.

Signed-off-by: Christian Borntraeger <borntraeger@de.ibm.com>
Acked-by: Martin Schwidefsky <schwidefsky@de.ibm.com>
Signed-off-by: Gleb Natapov <gleb@redhat.com>
arch/s390/include/asm/pgtable.h
arch/s390/kvm/intercept.c
arch/s390/kvm/kvm-s390.c
arch/s390/kvm/kvm-s390.h

index 1fc68d97be9d0540eb1c2eac0d8b6572464fbf5c..1d0ad7d2d29a433e6a1de4981ca5a2e6279b57a8 100644 (file)
@@ -739,6 +739,7 @@ struct gmap {
        struct mm_struct *mm;
        unsigned long *table;
        unsigned long asce;
+       void *private;
        struct list_head crst_list;
 };
 
index b7d1b2edeeb35720402d9bf79055252f2d38ecd1..f0b8be0cc08d8132803600b28b39f976baf9e846 100644 (file)
@@ -174,47 +174,12 @@ static int handle_stop(struct kvm_vcpu *vcpu)
 
 static int handle_validity(struct kvm_vcpu *vcpu)
 {
-       unsigned long vmaddr;
        int viwhy = vcpu->arch.sie_block->ipb >> 16;
-       int rc;
 
        vcpu->stat.exit_validity++;
        trace_kvm_s390_intercept_validity(vcpu, viwhy);
-       if (viwhy == 0x37) {
-               vmaddr = gmap_fault(vcpu->arch.sie_block->prefix,
-                                   vcpu->arch.gmap);
-               if (IS_ERR_VALUE(vmaddr)) {
-                       rc = -EOPNOTSUPP;
-                       goto out;
-               }
-               rc = fault_in_pages_writeable((char __user *) vmaddr,
-                        PAGE_SIZE);
-               if (rc) {
-                       /* user will receive sigsegv, exit to user */
-                       rc = -EOPNOTSUPP;
-                       goto out;
-               }
-               vmaddr = gmap_fault(vcpu->arch.sie_block->prefix + PAGE_SIZE,
-                                   vcpu->arch.gmap);
-               if (IS_ERR_VALUE(vmaddr)) {
-                       rc = -EOPNOTSUPP;
-                       goto out;
-               }
-               rc = fault_in_pages_writeable((char __user *) vmaddr,
-                        PAGE_SIZE);
-               if (rc) {
-                       /* user will receive sigsegv, exit to user */
-                       rc = -EOPNOTSUPP;
-                       goto out;
-               }
-       } else
-               rc = -EOPNOTSUPP;
-
-out:
-       if (rc)
-               VCPU_EVENT(vcpu, 2, "unhandled validity intercept code %d",
-                          viwhy);
-       return rc;
+       WARN_ONCE(true, "kvm: unhandled validity intercept 0x%x\n", viwhy);
+       return -EOPNOTSUPP;
 }
 
 static int handle_instruction(struct kvm_vcpu *vcpu)
index ef4ef21f2c732a1444752024089309ee3e8629bc..08227c1e816fc8c0b9fa8caaeeec35c8ff5d30ff 100644 (file)
@@ -84,6 +84,7 @@ struct kvm_stats_debugfs_item debugfs_entries[] = {
 };
 
 static unsigned long long *facilities;
+static struct gmap_notifier gmap_notifier;
 
 /* Section: not file related */
 int kvm_arch_hardware_enable(void *garbage)
@@ -96,13 +97,18 @@ void kvm_arch_hardware_disable(void *garbage)
 {
 }
 
+static void kvm_gmap_notifier(struct gmap *gmap, unsigned long address);
+
 int kvm_arch_hardware_setup(void)
 {
+       gmap_notifier.notifier_call = kvm_gmap_notifier;
+       gmap_register_ipte_notifier(&gmap_notifier);
        return 0;
 }
 
 void kvm_arch_hardware_unsetup(void)
 {
+       gmap_unregister_ipte_notifier(&gmap_notifier);
 }
 
 void kvm_arch_check_processor_compat(void *rtn)
@@ -239,6 +245,7 @@ int kvm_arch_init_vm(struct kvm *kvm, unsigned long type)
                kvm->arch.gmap = gmap_alloc(current->mm);
                if (!kvm->arch.gmap)
                        goto out_nogmap;
+               kvm->arch.gmap->private = kvm;
        }
 
        kvm->arch.css_support = 0;
@@ -309,6 +316,7 @@ int kvm_arch_vcpu_init(struct kvm_vcpu *vcpu)
                vcpu->arch.gmap = gmap_alloc(current->mm);
                if (!vcpu->arch.gmap)
                        return -ENOMEM;
+               vcpu->arch.gmap->private = vcpu->kvm;
                return 0;
        }
 
@@ -482,6 +490,22 @@ void exit_sie_sync(struct kvm_vcpu *vcpu)
        exit_sie(vcpu);
 }
 
+static void kvm_gmap_notifier(struct gmap *gmap, unsigned long address)
+{
+       int i;
+       struct kvm *kvm = gmap->private;
+       struct kvm_vcpu *vcpu;
+
+       kvm_for_each_vcpu(i, vcpu, kvm) {
+               /* match against both prefix pages */
+               if (vcpu->arch.sie_block->prefix == (address & ~0x1000UL)) {
+                       VCPU_EVENT(vcpu, 2, "gmap notifier for %lx", address);
+                       kvm_make_request(KVM_REQ_MMU_RELOAD, vcpu);
+                       exit_sie_sync(vcpu);
+               }
+       }
+}
+
 int kvm_arch_vcpu_should_kick(struct kvm_vcpu *vcpu)
 {
        /* kvm common code refers to this, but never calls it */
@@ -634,6 +658,27 @@ int kvm_arch_vcpu_ioctl_set_mpstate(struct kvm_vcpu *vcpu,
        return -EINVAL; /* not implemented yet */
 }
 
+static int kvm_s390_handle_requests(struct kvm_vcpu *vcpu)
+{
+       /*
+        * We use MMU_RELOAD just to re-arm the ipte notifier for the
+        * guest prefix page. gmap_ipte_notify will wait on the ptl lock.
+        * This ensures that the ipte instruction for this request has
+        * already finished. We might race against a second unmapper that
+        * wants to set the blocking bit. Lets just retry the request loop.
+        */
+       while (kvm_check_request(KVM_REQ_MMU_RELOAD, vcpu)) {
+               int rc;
+               rc = gmap_ipte_notify(vcpu->arch.gmap,
+                                     vcpu->arch.sie_block->prefix,
+                                     PAGE_SIZE * 2);
+               if (rc)
+                       return rc;
+               s390_vcpu_unblock(vcpu);
+       }
+       return 0;
+}
+
 static int __vcpu_run(struct kvm_vcpu *vcpu)
 {
        int rc;
@@ -649,6 +694,10 @@ static int __vcpu_run(struct kvm_vcpu *vcpu)
        if (!kvm_is_ucontrol(vcpu->kvm))
                kvm_s390_deliver_pending_interrupts(vcpu);
 
+       rc = kvm_s390_handle_requests(vcpu);
+       if (rc)
+               return rc;
+
        vcpu->arch.sie_block->icptcode = 0;
        preempt_disable();
        kvm_guest_enter();
index 7a8abfd26a0f20b96a70f2d677ecfc421856cfad..269b523d0f6e87486a6eb82360631bffa1969cc0 100644 (file)
@@ -63,6 +63,7 @@ static inline void kvm_s390_set_prefix(struct kvm_vcpu *vcpu, u32 prefix)
 {
        vcpu->arch.sie_block->prefix = prefix & 0x7fffe000u;
        vcpu->arch.sie_block->ihcpu  = 0xffff;
+       kvm_make_request(KVM_REQ_MMU_RELOAD, vcpu);
 }
 
 static inline u64 kvm_s390_get_base_disp_s(struct kvm_vcpu *vcpu)