x86/xen: safely map and unmap grant frames when in atomic context
authorDavid Vrabel <david.vrabel@citrix.com>
Fri, 11 Jul 2014 15:42:34 +0000 (16:42 +0100)
committerKonrad Rzeszutek Wilk <konrad.wilk@oracle.com>
Mon, 14 Jul 2014 19:28:02 +0000 (15:28 -0400)
arch_gnttab_map_frames() and arch_gnttab_unmap_frames() are called in
atomic context but were calling alloc_vm_area() which might sleep.

Also, if a driver attempts to allocate a grant ref from an interrupt
and the table needs expanding, then the CPU may already by in lazy MMU
mode and apply_to_page_range() will BUG when it tries to re-enable
lazy MMU mode.

These two functions are only used in PV guests.

Introduce arch_gnttab_init() to allocates the virtual address space in
advance.

Avoid the use of apply_to_page_range() by using saving and using the
array of PTE addresses from the alloc_vm_area() call.

N.B. 'alloc_vm_area' pre-allocates the pagetable so there is no need
to worry about having to do a PGD/PUD/PMD walk (like apply_to_page_range
does) and we can instead do set_pte.

Signed-off-by: David Vrabel <david.vrabel@citrix.com>
Signed-off-by: Konrad Rzeszutek Wilk <konrad.wilk@oracle.com>
----
[v2: Add comment about alloc_vm_area]
[v3: Fix compile error found by 0-day bot]

arch/arm/xen/grant-table.c
arch/x86/xen/grant-table.c
drivers/xen/grant-table.c
include/xen/grant_table.h

index 859a9bb002d54875e5089a940442d5015a9fe1a4..91cf08ba1e957d251fcb5b0028705a4cd52a7fd8 100644 (file)
@@ -51,3 +51,8 @@ int arch_gnttab_map_status(uint64_t *frames, unsigned long nr_gframes,
 {
        return -ENOSYS;
 }
+
+int arch_gnttab_init(unsigned long nr_shared, unsigned long nr_status)
+{
+       return 0;
+}
index c985835885802a69ef0b255e36c81484026ab227..ebfa9b2c871db56a2a6a631e5502acdf96e2032e 100644 (file)
 
 #include <linux/sched.h>
 #include <linux/mm.h>
+#include <linux/slab.h>
 #include <linux/vmalloc.h>
 
 #include <xen/interface/xen.h>
 #include <xen/page.h>
 #include <xen/grant_table.h>
+#include <xen/xen.h>
 
 #include <asm/pgtable.h>
 
-static int map_pte_fn(pte_t *pte, struct page *pmd_page,
-                     unsigned long addr, void *data)
+static struct gnttab_vm_area {
+       struct vm_struct *area;
+       pte_t **ptes;
+} gnttab_shared_vm_area, gnttab_status_vm_area;
+
+int arch_gnttab_map_shared(unsigned long *frames, unsigned long nr_gframes,
+                          unsigned long max_nr_gframes,
+                          void **__shared)
 {
-       unsigned long **frames = (unsigned long **)data;
+       void *shared = *__shared;
+       unsigned long addr;
+       unsigned long i;
 
-       set_pte_at(&init_mm, addr, pte, mfn_pte((*frames)[0], PAGE_KERNEL));
-       (*frames)++;
-       return 0;
-}
+       if (shared == NULL)
+               *__shared = shared = gnttab_shared_vm_area.area->addr;
 
-/*
- * This function is used to map shared frames to store grant status. It is
- * different from map_pte_fn above, the frames type here is uint64_t.
- */
-static int map_pte_fn_status(pte_t *pte, struct page *pmd_page,
-                            unsigned long addr, void *data)
-{
-       uint64_t **frames = (uint64_t **)data;
+       addr = (unsigned long)shared;
+
+       for (i = 0; i < nr_gframes; i++) {
+               set_pte_at(&init_mm, addr, gnttab_shared_vm_area.ptes[i],
+                          mfn_pte(frames[i], PAGE_KERNEL));
+               addr += PAGE_SIZE;
+       }
 
-       set_pte_at(&init_mm, addr, pte, mfn_pte((*frames)[0], PAGE_KERNEL));
-       (*frames)++;
        return 0;
 }
 
-static int unmap_pte_fn(pte_t *pte, struct page *pmd_page,
-                       unsigned long addr, void *data)
+int arch_gnttab_map_status(uint64_t *frames, unsigned long nr_gframes,
+                          unsigned long max_nr_gframes,
+                          grant_status_t **__shared)
 {
+       grant_status_t *shared = *__shared;
+       unsigned long addr;
+       unsigned long i;
+
+       if (shared == NULL)
+               *__shared = shared = gnttab_status_vm_area.area->addr;
+
+       addr = (unsigned long)shared;
+
+       for (i = 0; i < nr_gframes; i++) {
+               set_pte_at(&init_mm, addr, gnttab_status_vm_area.ptes[i],
+                          mfn_pte(frames[i], PAGE_KERNEL));
+               addr += PAGE_SIZE;
+       }
 
-       set_pte_at(&init_mm, addr, pte, __pte(0));
        return 0;
 }
 
-int arch_gnttab_map_shared(unsigned long *frames, unsigned long nr_gframes,
-                          unsigned long max_nr_gframes,
-                          void **__shared)
+void arch_gnttab_unmap(void *shared, unsigned long nr_gframes)
 {
-       int rc;
-       void *shared = *__shared;
+       pte_t **ptes;
+       unsigned long addr;
+       unsigned long i;
 
-       if (shared == NULL) {
-               struct vm_struct *area =
-                       alloc_vm_area(PAGE_SIZE * max_nr_gframes, NULL);
-               BUG_ON(area == NULL);
-               shared = area->addr;
-               *__shared = shared;
-       }
+       if (shared == gnttab_status_vm_area.area->addr)
+               ptes = gnttab_status_vm_area.ptes;
+       else
+               ptes = gnttab_shared_vm_area.ptes;
 
-       rc = apply_to_page_range(&init_mm, (unsigned long)shared,
-                                PAGE_SIZE * nr_gframes,
-                                map_pte_fn, &frames);
-       return rc;
+       addr = (unsigned long)shared;
+
+       for (i = 0; i < nr_gframes; i++) {
+               set_pte_at(&init_mm, addr, ptes[i], __pte(0));
+               addr += PAGE_SIZE;
+       }
 }
 
-int arch_gnttab_map_status(uint64_t *frames, unsigned long nr_gframes,
-                          unsigned long max_nr_gframes,
-                          grant_status_t **__shared)
+static int arch_gnttab_valloc(struct gnttab_vm_area *area, unsigned nr_frames)
 {
-       int rc;
-       grant_status_t *shared = *__shared;
+       area->ptes = kmalloc(sizeof(pte_t *) * nr_frames, GFP_KERNEL);
+       if (area->ptes == NULL)
+               return -ENOMEM;
 
-       if (shared == NULL) {
-               /* No need to pass in PTE as we are going to do it
-                * in apply_to_page_range anyhow. */
-               struct vm_struct *area =
-                       alloc_vm_area(PAGE_SIZE * max_nr_gframes, NULL);
-               BUG_ON(area == NULL);
-               shared = area->addr;
-               *__shared = shared;
+       area->area = alloc_vm_area(PAGE_SIZE * nr_frames, area->ptes);
+       if (area->area == NULL) {
+               kfree(area->ptes);
+               return -ENOMEM;
        }
 
-       rc = apply_to_page_range(&init_mm, (unsigned long)shared,
-                                PAGE_SIZE * nr_gframes,
-                                map_pte_fn_status, &frames);
-       return rc;
+       return 0;
 }
 
-void arch_gnttab_unmap(void *shared, unsigned long nr_gframes)
+static void arch_gnttab_vfree(struct gnttab_vm_area *area)
+{
+       free_vm_area(area->area);
+       kfree(area->ptes);
+}
+
+int arch_gnttab_init(unsigned long nr_shared, unsigned long nr_status)
 {
-       apply_to_page_range(&init_mm, (unsigned long)shared,
-                           PAGE_SIZE * nr_gframes, unmap_pte_fn, NULL);
+       int ret;
+
+       if (!xen_pv_domain())
+               return 0;
+
+       ret = arch_gnttab_valloc(&gnttab_shared_vm_area, nr_shared);
+       if (ret < 0)
+               return ret;
+
+       /*
+        * Always allocate the space for the status frames in case
+        * we're migrated to a host with V2 support.
+        */
+       ret = arch_gnttab_valloc(&gnttab_status_vm_area, nr_status);
+       if (ret < 0)
+               goto err;
+
+       return 0;
+  err:
+       arch_gnttab_vfree(&gnttab_shared_vm_area);
+       return -ENOMEM;
 }
+
 #ifdef CONFIG_XEN_PVH
 #include <xen/balloon.h>
 #include <xen/events.h>
-#include <xen/xen.h>
 #include <linux/slab.h>
 static int __init xlated_setup_gnttab_pages(void)
 {
index 5d4de88fe5b8aaf9792c37188931f8edeed1fb0f..eeba7544f0cd4a7dc54119b9a2df2f7c13a975db 100644 (file)
@@ -1195,18 +1195,20 @@ static int gnttab_expand(unsigned int req_entries)
 int gnttab_init(void)
 {
        int i;
+       unsigned long max_nr_grant_frames;
        unsigned int max_nr_glist_frames, nr_glist_frames;
        unsigned int nr_init_grefs;
        int ret;
 
        gnttab_request_version();
+       max_nr_grant_frames = gnttab_max_grant_frames();
        nr_grant_frames = 1;
 
        /* Determine the maximum number of frames required for the
         * grant reference free list on the current hypervisor.
         */
        BUG_ON(grefs_per_grant_frame == 0);
-       max_nr_glist_frames = (gnttab_max_grant_frames() *
+       max_nr_glist_frames = (max_nr_grant_frames *
                               grefs_per_grant_frame / RPP);
 
        gnttab_list = kmalloc(max_nr_glist_frames * sizeof(grant_ref_t *),
@@ -1223,6 +1225,11 @@ int gnttab_init(void)
                }
        }
 
+       ret = arch_gnttab_init(max_nr_grant_frames,
+                              nr_status_frames(max_nr_grant_frames));
+       if (ret < 0)
+               goto ini_nomem;
+
        if (gnttab_setup() < 0) {
                ret = -ENODEV;
                goto ini_nomem;
index a5af2a26d94f3f698072b3bf70a444bdb5da7978..5c1aba154b64c833c9acb865bcc600057234b59a 100644 (file)
@@ -170,6 +170,7 @@ gnttab_set_unmap_op(struct gnttab_unmap_grant_ref *unmap, phys_addr_t addr,
        unmap->dev_bus_addr = 0;
 }
 
+int arch_gnttab_init(unsigned long nr_shared, unsigned long nr_status);
 int arch_gnttab_map_shared(xen_pfn_t *frames, unsigned long nr_gframes,
                           unsigned long max_nr_gframes,
                           void **__shared);