KVM: Support assigning userspace memory to the guest
authorIzik Eidus <izike@qumranet.com>
Tue, 9 Oct 2007 17:20:39 +0000 (19:20 +0200)
committerAvi Kivity <avi@qumranet.com>
Wed, 30 Jan 2008 15:52:51 +0000 (17:52 +0200)
Instead of having the kernel allocate memory to the guest, let userspace
allocate it and pass the address to the kernel.

This is required for s390 support, but also enables features like memory
sharing and using hugetlbfs backed memory.

Signed-off-by: Izik Eidus <izike@qumranet.com>
Signed-off-by: Avi Kivity <avi@qumranet.com>
drivers/kvm/kvm.h
drivers/kvm/kvm_main.c
include/linux/kvm.h

index ec5b498945ae71effb79e97d1d96b3e1fceb8324..3eaed4dafec708ead232921963f29bda3b7bb15a 100644 (file)
@@ -408,6 +408,7 @@ struct kvm_memory_slot {
        struct page **phys_mem;
        unsigned long *rmap;
        unsigned long *dirty_bitmap;
+       int user_alloc; /* user allocated memory */
 };
 
 struct kvm {
index a1983d2d5b8f451e3e335313fab3cfa79e52f42d..22b143feb66d79f702fbf642c5309e4507f2642d 100644 (file)
@@ -40,6 +40,7 @@
 #include <linux/anon_inodes.h>
 #include <linux/profile.h>
 #include <linux/kvm_para.h>
+#include <linux/pagemap.h>
 
 #include <asm/processor.h>
 #include <asm/msr.h>
@@ -300,19 +301,40 @@ static struct kvm *kvm_create_vm(void)
        return kvm;
 }
 
+static void kvm_free_userspace_physmem(struct kvm_memory_slot *free)
+{
+       int i;
+
+       for (i = 0; i < free->npages; ++i) {
+               if (free->phys_mem[i]) {
+                       if (!PageReserved(free->phys_mem[i]))
+                               SetPageDirty(free->phys_mem[i]);
+                       page_cache_release(free->phys_mem[i]);
+               }
+       }
+}
+
+static void kvm_free_kernel_physmem(struct kvm_memory_slot *free)
+{
+       int i;
+
+       for (i = 0; i < free->npages; ++i)
+               if (free->phys_mem[i])
+                       __free_page(free->phys_mem[i]);
+}
+
 /*
  * Free any memory in @free but not in @dont.
  */
 static void kvm_free_physmem_slot(struct kvm_memory_slot *free,
                                  struct kvm_memory_slot *dont)
 {
-       int i;
-
        if (!dont || free->phys_mem != dont->phys_mem)
                if (free->phys_mem) {
-                       for (i = 0; i < free->npages; ++i)
-                               if (free->phys_mem[i])
-                                       __free_page(free->phys_mem[i]);
+                       if (free->user_alloc)
+                               kvm_free_userspace_physmem(free);
+                       else
+                               kvm_free_kernel_physmem(free);
                        vfree(free->phys_mem);
                }
        if (!dont || free->rmap != dont->rmap)
@@ -652,7 +674,9 @@ EXPORT_SYMBOL_GPL(fx_init);
  * Discontiguous memory is allowed, mostly for framebuffers.
  */
 static int kvm_vm_ioctl_set_memory_region(struct kvm *kvm,
-                                         struct kvm_memory_region *mem)
+                                         struct
+                                         kvm_userspace_memory_region *mem,
+                                         int user_alloc)
 {
        int r;
        gfn_t base_gfn;
@@ -728,11 +752,27 @@ static int kvm_vm_ioctl_set_memory_region(struct kvm *kvm,
 
                memset(new.phys_mem, 0, npages * sizeof(struct page *));
                memset(new.rmap, 0, npages * sizeof(*new.rmap));
-               for (i = 0; i < npages; ++i) {
-                       new.phys_mem[i] = alloc_page(GFP_HIGHUSER
-                                                    | __GFP_ZERO);
-                       if (!new.phys_mem[i])
+               if (user_alloc) {
+                       unsigned long pages_num;
+
+                       new.user_alloc = 1;
+                       down_read(&current->mm->mmap_sem);
+
+                       pages_num = get_user_pages(current, current->mm,
+                                                  mem->userspace_addr,
+                                                  npages, 1, 1, new.phys_mem,
+                                                  NULL);
+
+                       up_read(&current->mm->mmap_sem);
+                       if (pages_num != npages)
                                goto out_unlock;
+               } else {
+                       for (i = 0; i < npages; ++i) {
+                               new.phys_mem[i] = alloc_page(GFP_HIGHUSER
+                                                            | __GFP_ZERO);
+                               if (!new.phys_mem[i])
+                                       goto out_unlock;
+                       }
                }
        }
 
@@ -3108,11 +3148,29 @@ static long kvm_vm_ioctl(struct file *filp,
                break;
        case KVM_SET_MEMORY_REGION: {
                struct kvm_memory_region kvm_mem;
+               struct kvm_userspace_memory_region kvm_userspace_mem;
 
                r = -EFAULT;
                if (copy_from_user(&kvm_mem, argp, sizeof kvm_mem))
                        goto out;
-               r = kvm_vm_ioctl_set_memory_region(kvm, &kvm_mem);
+               kvm_userspace_mem.slot = kvm_mem.slot;
+               kvm_userspace_mem.flags = kvm_mem.flags;
+               kvm_userspace_mem.guest_phys_addr = kvm_mem.guest_phys_addr;
+               kvm_userspace_mem.memory_size = kvm_mem.memory_size;
+               r = kvm_vm_ioctl_set_memory_region(kvm, &kvm_userspace_mem, 0);
+               if (r)
+                       goto out;
+               break;
+       }
+       case KVM_SET_USER_MEMORY_REGION: {
+               struct kvm_userspace_memory_region kvm_userspace_mem;
+
+               r = -EFAULT;
+               if (copy_from_user(&kvm_userspace_mem, argp,
+                                               sizeof kvm_userspace_mem))
+                       goto out;
+
+               r = kvm_vm_ioctl_set_memory_region(kvm, &kvm_userspace_mem, 1);
                if (r)
                        goto out;
                break;
@@ -3332,6 +3390,7 @@ static long kvm_dev_ioctl(struct file *filp,
                case KVM_CAP_IRQCHIP:
                case KVM_CAP_HLT:
                case KVM_CAP_MMU_SHADOW_CACHE_CONTROL:
+               case KVM_CAP_USER_MEMORY:
                        r = 1;
                        break;
                default:
index d2fd973d81feded004c9508cef7573e37944caa8..971f465e26fa688616dafac4b2a5f15911b59eee 100644 (file)
@@ -23,6 +23,15 @@ struct kvm_memory_region {
        __u64 memory_size; /* bytes */
 };
 
+/* for KVM_SET_USER_MEMORY_REGION */
+struct kvm_userspace_memory_region {
+       __u32 slot;
+       __u32 flags;
+       __u64 guest_phys_addr;
+       __u64 memory_size; /* bytes */
+       __u64 userspace_addr; /* start of the userspace allocated memory */
+};
+
 /* for kvm_memory_region::flags */
 #define KVM_MEM_LOG_DIRTY_PAGES  1UL
 
@@ -348,6 +357,7 @@ struct kvm_signal_mask {
 #define KVM_CAP_IRQCHIP          0
 #define KVM_CAP_HLT      1
 #define KVM_CAP_MMU_SHADOW_CACHE_CONTROL 2
+#define KVM_CAP_USER_MEMORY 3
 
 /*
  * ioctls for VM fds
@@ -355,6 +365,8 @@ struct kvm_signal_mask {
 #define KVM_SET_MEMORY_REGION     _IOW(KVMIO, 0x40, struct kvm_memory_region)
 #define KVM_SET_NR_MMU_PAGES      _IO(KVMIO, 0x44)
 #define KVM_GET_NR_MMU_PAGES      _IO(KVMIO, 0x45)
+#define KVM_SET_USER_MEMORY_REGION _IOW(KVMIO, 0x46,\
+                                       struct kvm_userspace_memory_region)
 /*
  * KVM_CREATE_VCPU receives as a parameter the vcpu slot, and returns
  * a vcpu fd.