xen/blkback: make pool of persistent grants and free pages per-queue
authorBob Liu <bob.liu@oracle.com>
Sat, 14 Nov 2015 03:12:19 +0000 (11:12 +0800)
committerKonrad Rzeszutek Wilk <konrad.wilk@oracle.com>
Mon, 4 Jan 2016 17:21:06 +0000 (12:21 -0500)
Make pool of persistent grants and free pages per-queue/ring instead of
per-device to get better scalability.

Test was done based on null_blk driver:
dom0: v4.2-rc8 16vcpus 10GB "modprobe null_blk"
domu: v4.2-rc8 16vcpus 10GB

[test]
rw=read
direct=1
ioengine=libaio
bs=4k
time_based
runtime=30
filename=/dev/xvdb
numjobs=16
iodepth=64
iodepth_batch=64
iodepth_batch_complete=64
group_reporting

Results:
iops1: After patch "xen/blkfront: make persistent grants per-queue".
iops2: After this patch.

Queues:   1     4       8     16
Iops orig(k): 810  1064  780  700
Iops1(k): 810     1230(~20%) 1024(~20%) 850(~20%)
Iops2(k): 810     1410(~35%) 1354(~75%)      1440(~100%)

With 4 queues after this commit we can get ~75% increase in IOPS, and
performance won't drop if increasing queue numbers.

Please find the respective chart in this link:
https://www.dropbox.com/s/agrcy2pbzbsvmwv/iops.png?dl=0

Signed-off-by: Bob Liu <bob.liu@oracle.com>
Signed-off-by: Konrad Rzeszutek Wilk <konrad.wilk@oracle.com>
drivers/block/xen-blkback/blkback.c
drivers/block/xen-blkback/common.h
drivers/block/xen-blkback/xenbus.c

index 18b27770d80be1150426b6d7893a9ab2ba8d8102..a00d6c6c288057d67590e0f30bb9b22ba6031341 100644 (file)
@@ -123,60 +123,60 @@ module_param(log_stats, int, 0644);
 /* Number of free pages to remove on each call to gnttab_free_pages */
 #define NUM_BATCH_FREE_PAGES 10
 
-static inline int get_free_page(struct xen_blkif *blkif, struct page **page)
+static inline int get_free_page(struct xen_blkif_ring *ring, struct page **page)
 {
        unsigned long flags;
 
-       spin_lock_irqsave(&blkif->free_pages_lock, flags);
-       if (list_empty(&blkif->free_pages)) {
-               BUG_ON(blkif->free_pages_num != 0);
-               spin_unlock_irqrestore(&blkif->free_pages_lock, flags);
+       spin_lock_irqsave(&ring->free_pages_lock, flags);
+       if (list_empty(&ring->free_pages)) {
+               BUG_ON(ring->free_pages_num != 0);
+               spin_unlock_irqrestore(&ring->free_pages_lock, flags);
                return gnttab_alloc_pages(1, page);
        }
-       BUG_ON(blkif->free_pages_num == 0);
-       page[0] = list_first_entry(&blkif->free_pages, struct page, lru);
+       BUG_ON(ring->free_pages_num == 0);
+       page[0] = list_first_entry(&ring->free_pages, struct page, lru);
        list_del(&page[0]->lru);
-       blkif->free_pages_num--;
-       spin_unlock_irqrestore(&blkif->free_pages_lock, flags);
+       ring->free_pages_num--;
+       spin_unlock_irqrestore(&ring->free_pages_lock, flags);
 
        return 0;
 }
 
-static inline void put_free_pages(struct xen_blkif *blkif, struct page **page,
+static inline void put_free_pages(struct xen_blkif_ring *ring, struct page **page,
                                   int num)
 {
        unsigned long flags;
        int i;
 
-       spin_lock_irqsave(&blkif->free_pages_lock, flags);
+       spin_lock_irqsave(&ring->free_pages_lock, flags);
        for (i = 0; i < num; i++)
-               list_add(&page[i]->lru, &blkif->free_pages);
-       blkif->free_pages_num += num;
-       spin_unlock_irqrestore(&blkif->free_pages_lock, flags);
+               list_add(&page[i]->lru, &ring->free_pages);
+       ring->free_pages_num += num;
+       spin_unlock_irqrestore(&ring->free_pages_lock, flags);
 }
 
-static inline void shrink_free_pagepool(struct xen_blkif *blkif, int num)
+static inline void shrink_free_pagepool(struct xen_blkif_ring *ring, int num)
 {
        /* Remove requested pages in batches of NUM_BATCH_FREE_PAGES */
        struct page *page[NUM_BATCH_FREE_PAGES];
        unsigned int num_pages = 0;
        unsigned long flags;
 
-       spin_lock_irqsave(&blkif->free_pages_lock, flags);
-       while (blkif->free_pages_num > num) {
-               BUG_ON(list_empty(&blkif->free_pages));
-               page[num_pages] = list_first_entry(&blkif->free_pages,
+       spin_lock_irqsave(&ring->free_pages_lock, flags);
+       while (ring->free_pages_num > num) {
+               BUG_ON(list_empty(&ring->free_pages));
+               page[num_pages] = list_first_entry(&ring->free_pages,
                                                   struct page, lru);
                list_del(&page[num_pages]->lru);
-               blkif->free_pages_num--;
+               ring->free_pages_num--;
                if (++num_pages == NUM_BATCH_FREE_PAGES) {
-                       spin_unlock_irqrestore(&blkif->free_pages_lock, flags);
+                       spin_unlock_irqrestore(&ring->free_pages_lock, flags);
                        gnttab_free_pages(num_pages, page);
-                       spin_lock_irqsave(&blkif->free_pages_lock, flags);
+                       spin_lock_irqsave(&ring->free_pages_lock, flags);
                        num_pages = 0;
                }
        }
-       spin_unlock_irqrestore(&blkif->free_pages_lock, flags);
+       spin_unlock_irqrestore(&ring->free_pages_lock, flags);
        if (num_pages != 0)
                gnttab_free_pages(num_pages, page);
 }
@@ -199,23 +199,29 @@ static void make_response(struct xen_blkif_ring *ring, u64 id,
 
 
 /*
- * pers_gnts_lock must be used around all the persistent grant helpers
- * because blkback may use multi-thread/queue for each backend.
+ * We don't need locking around the persistent grant helpers
+ * because blkback uses a single-thread for each backend, so we
+ * can be sure that this functions will never be called recursively.
+ *
+ * The only exception to that is put_persistent_grant, that can be called
+ * from interrupt context (by xen_blkbk_unmap), so we have to use atomic
+ * bit operations to modify the flags of a persistent grant and to count
+ * the number of used grants.
  */
-static int add_persistent_gnt(struct xen_blkif *blkif,
+static int add_persistent_gnt(struct xen_blkif_ring *ring,
                               struct persistent_gnt *persistent_gnt)
 {
        struct rb_node **new = NULL, *parent = NULL;
        struct persistent_gnt *this;
+       struct xen_blkif *blkif = ring->blkif;
 
-       BUG_ON(!spin_is_locked(&blkif->pers_gnts_lock));
-       if (blkif->persistent_gnt_c >= xen_blkif_max_pgrants) {
+       if (ring->persistent_gnt_c >= xen_blkif_max_pgrants) {
                if (!blkif->vbd.overflow_max_grants)
                        blkif->vbd.overflow_max_grants = 1;
                return -EBUSY;
        }
        /* Figure out where to put new node */
-       new = &blkif->persistent_gnts.rb_node;
+       new = &ring->persistent_gnts.rb_node;
        while (*new) {
                this = container_of(*new, struct persistent_gnt, node);
 
@@ -234,20 +240,19 @@ static int add_persistent_gnt(struct xen_blkif *blkif,
        set_bit(PERSISTENT_GNT_ACTIVE, persistent_gnt->flags);
        /* Add new node and rebalance tree. */
        rb_link_node(&(persistent_gnt->node), parent, new);
-       rb_insert_color(&(persistent_gnt->node), &blkif->persistent_gnts);
-       blkif->persistent_gnt_c++;
-       atomic_inc(&blkif->persistent_gnt_in_use);
+       rb_insert_color(&(persistent_gnt->node), &ring->persistent_gnts);
+       ring->persistent_gnt_c++;
+       atomic_inc(&ring->persistent_gnt_in_use);
        return 0;
 }
 
-static struct persistent_gnt *get_persistent_gnt(struct xen_blkif *blkif,
+static struct persistent_gnt *get_persistent_gnt(struct xen_blkif_ring *ring,
                                                 grant_ref_t gref)
 {
        struct persistent_gnt *data;
        struct rb_node *node = NULL;
 
-       BUG_ON(!spin_is_locked(&blkif->pers_gnts_lock));
-       node = blkif->persistent_gnts.rb_node;
+       node = ring->persistent_gnts.rb_node;
        while (node) {
                data = container_of(node, struct persistent_gnt, node);
 
@@ -261,25 +266,24 @@ static struct persistent_gnt *get_persistent_gnt(struct xen_blkif *blkif,
                                return NULL;
                        }
                        set_bit(PERSISTENT_GNT_ACTIVE, data->flags);
-                       atomic_inc(&blkif->persistent_gnt_in_use);
+                       atomic_inc(&ring->persistent_gnt_in_use);
                        return data;
                }
        }
        return NULL;
 }
 
-static void put_persistent_gnt(struct xen_blkif *blkif,
+static void put_persistent_gnt(struct xen_blkif_ring *ring,
                                struct persistent_gnt *persistent_gnt)
 {
-       BUG_ON(!spin_is_locked(&blkif->pers_gnts_lock));
        if(!test_bit(PERSISTENT_GNT_ACTIVE, persistent_gnt->flags))
                pr_alert_ratelimited("freeing a grant already unused\n");
        set_bit(PERSISTENT_GNT_WAS_ACTIVE, persistent_gnt->flags);
        clear_bit(PERSISTENT_GNT_ACTIVE, persistent_gnt->flags);
-       atomic_dec(&blkif->persistent_gnt_in_use);
+       atomic_dec(&ring->persistent_gnt_in_use);
 }
 
-static void free_persistent_gnts(struct xen_blkif *blkif, struct rb_root *root,
+static void free_persistent_gnts(struct xen_blkif_ring *ring, struct rb_root *root,
                                  unsigned int num)
 {
        struct gnttab_unmap_grant_ref unmap[BLKIF_MAX_SEGMENTS_PER_REQUEST];
@@ -293,7 +297,6 @@ static void free_persistent_gnts(struct xen_blkif *blkif, struct rb_root *root,
        unmap_data.unmap_ops = unmap;
        unmap_data.kunmap_ops = NULL;
 
-       BUG_ON(!spin_is_locked(&blkif->pers_gnts_lock));
        foreach_grant_safe(persistent_gnt, n, root, node) {
                BUG_ON(persistent_gnt->handle ==
                        BLKBACK_INVALID_HANDLE);
@@ -311,7 +314,7 @@ static void free_persistent_gnts(struct xen_blkif *blkif, struct rb_root *root,
                        unmap_data.count = segs_to_unmap;
                        BUG_ON(gnttab_unmap_refs_sync(&unmap_data));
 
-                       put_free_pages(blkif, pages, segs_to_unmap);
+                       put_free_pages(ring, pages, segs_to_unmap);
                        segs_to_unmap = 0;
                }
 
@@ -328,17 +331,15 @@ void xen_blkbk_unmap_purged_grants(struct work_struct *work)
        struct page *pages[BLKIF_MAX_SEGMENTS_PER_REQUEST];
        struct persistent_gnt *persistent_gnt;
        int segs_to_unmap = 0;
-       struct xen_blkif *blkif = container_of(work, typeof(*blkif), persistent_purge_work);
+       struct xen_blkif_ring *ring = container_of(work, typeof(*ring), persistent_purge_work);
        struct gntab_unmap_queue_data unmap_data;
-       unsigned long flags;
 
        unmap_data.pages = pages;
        unmap_data.unmap_ops = unmap;
        unmap_data.kunmap_ops = NULL;
 
-       spin_lock_irqsave(&blkif->pers_gnts_lock, flags);
-       while(!list_empty(&blkif->persistent_purge_list)) {
-               persistent_gnt = list_first_entry(&blkif->persistent_purge_list,
+       while(!list_empty(&ring->persistent_purge_list)) {
+               persistent_gnt = list_first_entry(&ring->persistent_purge_list,
                                                  struct persistent_gnt,
                                                  remove_node);
                list_del(&persistent_gnt->remove_node);
@@ -353,45 +354,42 @@ void xen_blkbk_unmap_purged_grants(struct work_struct *work)
                if (++segs_to_unmap == BLKIF_MAX_SEGMENTS_PER_REQUEST) {
                        unmap_data.count = segs_to_unmap;
                        BUG_ON(gnttab_unmap_refs_sync(&unmap_data));
-                       put_free_pages(blkif, pages, segs_to_unmap);
+                       put_free_pages(ring, pages, segs_to_unmap);
                        segs_to_unmap = 0;
                }
                kfree(persistent_gnt);
        }
-       spin_unlock_irqrestore(&blkif->pers_gnts_lock, flags);
        if (segs_to_unmap > 0) {
                unmap_data.count = segs_to_unmap;
                BUG_ON(gnttab_unmap_refs_sync(&unmap_data));
-               put_free_pages(blkif, pages, segs_to_unmap);
+               put_free_pages(ring, pages, segs_to_unmap);
        }
 }
 
-static void purge_persistent_gnt(struct xen_blkif *blkif)
+static void purge_persistent_gnt(struct xen_blkif_ring *ring)
 {
        struct persistent_gnt *persistent_gnt;
        struct rb_node *n;
        unsigned int num_clean, total;
        bool scan_used = false, clean_used = false;
        struct rb_root *root;
-       unsigned long flags;
 
-       spin_lock_irqsave(&blkif->pers_gnts_lock, flags);
-       if (blkif->persistent_gnt_c < xen_blkif_max_pgrants ||
-           (blkif->persistent_gnt_c == xen_blkif_max_pgrants &&
-           !blkif->vbd.overflow_max_grants)) {
+       if (ring->persistent_gnt_c < xen_blkif_max_pgrants ||
+           (ring->persistent_gnt_c == xen_blkif_max_pgrants &&
+           !ring->blkif->vbd.overflow_max_grants)) {
                goto out;
        }
 
-       if (work_busy(&blkif->persistent_purge_work)) {
+       if (work_busy(&ring->persistent_purge_work)) {
                pr_alert_ratelimited("Scheduled work from previous purge is still busy, cannot purge list\n");
                goto out;
        }
 
        num_clean = (xen_blkif_max_pgrants / 100) * LRU_PERCENT_CLEAN;
-       num_clean = blkif->persistent_gnt_c - xen_blkif_max_pgrants + num_clean;
-       num_clean = min(blkif->persistent_gnt_c, num_clean);
+       num_clean = ring->persistent_gnt_c - xen_blkif_max_pgrants + num_clean;
+       num_clean = min(ring->persistent_gnt_c, num_clean);
        if ((num_clean == 0) ||
-           (num_clean > (blkif->persistent_gnt_c - atomic_read(&blkif->persistent_gnt_in_use))))
+           (num_clean > (ring->persistent_gnt_c - atomic_read(&ring->persistent_gnt_in_use))))
                goto out;
 
        /*
@@ -407,8 +405,8 @@ static void purge_persistent_gnt(struct xen_blkif *blkif)
 
        pr_debug("Going to purge %u persistent grants\n", num_clean);
 
-       BUG_ON(!list_empty(&blkif->persistent_purge_list));
-       root = &blkif->persistent_gnts;
+       BUG_ON(!list_empty(&ring->persistent_purge_list));
+       root = &ring->persistent_gnts;
 purge_list:
        foreach_grant_safe(persistent_gnt, n, root, node) {
                BUG_ON(persistent_gnt->handle ==
@@ -427,7 +425,7 @@ purge_list:
 
                rb_erase(&persistent_gnt->node, root);
                list_add(&persistent_gnt->remove_node,
-                        &blkif->persistent_purge_list);
+                        &ring->persistent_purge_list);
                if (--num_clean == 0)
                        goto finished;
        }
@@ -448,18 +446,14 @@ finished:
                goto purge_list;
        }
 
-       blkif->persistent_gnt_c -= (total - num_clean);
-       spin_unlock_irqrestore(&blkif->pers_gnts_lock, flags);
-       blkif->vbd.overflow_max_grants = 0;
+       ring->persistent_gnt_c -= (total - num_clean);
+       ring->blkif->vbd.overflow_max_grants = 0;
 
        /* We can defer this work */
-       schedule_work(&blkif->persistent_purge_work);
+       schedule_work(&ring->persistent_purge_work);
        pr_debug("Purged %u/%u\n", (total - num_clean), total);
-       return;
 
 out:
-       spin_unlock_irqrestore(&blkif->pers_gnts_lock, flags);
-
        return;
 }
 
@@ -591,14 +585,16 @@ irqreturn_t xen_blkif_be_int(int irq, void *dev_id)
  * SCHEDULER FUNCTIONS
  */
 
-static void print_stats(struct xen_blkif *blkif)
+static void print_stats(struct xen_blkif_ring *ring)
 {
+       struct xen_blkif *blkif = ring->blkif;
+
        pr_info("(%s): oo %3llu  |  rd %4llu  |  wr %4llu  |  f %4llu"
                 "  |  ds %4llu | pg: %4u/%4d\n",
                 current->comm, blkif->st_oo_req,
                 blkif->st_rd_req, blkif->st_wr_req,
                 blkif->st_f_req, blkif->st_ds_req,
-                blkif->persistent_gnt_c,
+                ring->persistent_gnt_c,
                 xen_blkif_max_pgrants);
        blkif->st_print = jiffies + msecs_to_jiffies(10 * 1000);
        blkif->st_rd_req = 0;
@@ -651,23 +647,23 @@ int xen_blkif_schedule(void *arg)
 
 purge_gnt_list:
                if (blkif->vbd.feature_gnt_persistent &&
-                   time_after(jiffies, blkif->next_lru)) {
-                       purge_persistent_gnt(blkif);
-                       blkif->next_lru = jiffies + msecs_to_jiffies(LRU_INTERVAL);
+                   time_after(jiffies, ring->next_lru)) {
+                       purge_persistent_gnt(ring);
+                       ring->next_lru = jiffies + msecs_to_jiffies(LRU_INTERVAL);
                }
 
                /* Shrink if we have more than xen_blkif_max_buffer_pages */
-               shrink_free_pagepool(blkif, xen_blkif_max_buffer_pages);
+               shrink_free_pagepool(ring, xen_blkif_max_buffer_pages);
 
-               if (log_stats && time_after(jiffies, blkif->st_print))
-                       print_stats(blkif);
+               if (log_stats && time_after(jiffies, ring->blkif->st_print))
+                       print_stats(ring);
        }
 
        /* Drain pending purge work */
-       flush_work(&blkif->persistent_purge_work);
+       flush_work(&ring->persistent_purge_work);
 
        if (log_stats)
-               print_stats(blkif);
+               print_stats(ring);
 
        ring->xenblkd = NULL;
        xen_blkif_put(blkif);
@@ -680,21 +676,16 @@ purge_gnt_list:
  */
 void xen_blkbk_free_caches(struct xen_blkif_ring *ring)
 {
-       struct xen_blkif *blkif = ring->blkif;
-       unsigned long flags;
-
        /* Free all persistent grant pages */
-       spin_lock_irqsave(&blkif->pers_gnts_lock, flags);
-       if (!RB_EMPTY_ROOT(&blkif->persistent_gnts))
-               free_persistent_gnts(blkif, &blkif->persistent_gnts,
-                       blkif->persistent_gnt_c);
+       if (!RB_EMPTY_ROOT(&ring->persistent_gnts))
+               free_persistent_gnts(ring, &ring->persistent_gnts,
+                       ring->persistent_gnt_c);
 
-       BUG_ON(!RB_EMPTY_ROOT(&blkif->persistent_gnts));
-       blkif->persistent_gnt_c = 0;
-       spin_unlock_irqrestore(&blkif->pers_gnts_lock, flags);
+       BUG_ON(!RB_EMPTY_ROOT(&ring->persistent_gnts));
+       ring->persistent_gnt_c = 0;
 
        /* Since we are shutting down remove all pages from the buffer */
-       shrink_free_pagepool(blkif, 0 /* All */);
+       shrink_free_pagepool(ring, 0 /* All */);
 }
 
 static unsigned int xen_blkbk_unmap_prepare(
@@ -705,13 +696,10 @@ static unsigned int xen_blkbk_unmap_prepare(
        struct page **unmap_pages)
 {
        unsigned int i, invcount = 0;
-       unsigned long flags;
 
        for (i = 0; i < num; i++) {
                if (pages[i]->persistent_gnt != NULL) {
-                       spin_lock_irqsave(&ring->blkif->pers_gnts_lock, flags);
-                       put_persistent_gnt(ring->blkif, pages[i]->persistent_gnt);
-                       spin_unlock_irqrestore(&ring->blkif->pers_gnts_lock, flags);
+                       put_persistent_gnt(ring, pages[i]->persistent_gnt);
                        continue;
                }
                if (pages[i]->handle == BLKBACK_INVALID_HANDLE)
@@ -736,7 +724,7 @@ static void xen_blkbk_unmap_and_respond_callback(int result, struct gntab_unmap_
           but is this the best way to deal with this? */
        BUG_ON(result);
 
-       put_free_pages(blkif, data->pages, data->count);
+       put_free_pages(ring, data->pages, data->count);
        make_response(ring, pending_req->id,
                      pending_req->operation, pending_req->status);
        free_req(ring, pending_req);
@@ -803,7 +791,7 @@ static void xen_blkbk_unmap(struct xen_blkif_ring *ring,
                if (invcount) {
                        ret = gnttab_unmap_refs(unmap, NULL, unmap_pages, invcount);
                        BUG_ON(ret);
-                       put_free_pages(ring->blkif, unmap_pages, invcount);
+                       put_free_pages(ring, unmap_pages, invcount);
                }
                pages += batch;
                num -= batch;
@@ -824,7 +812,6 @@ static int xen_blkbk_map(struct xen_blkif_ring *ring,
        int last_map = 0, map_until = 0;
        int use_persistent_gnts;
        struct xen_blkif *blkif = ring->blkif;
-       unsigned long irq_flags;
 
        use_persistent_gnts = (blkif->vbd.feature_gnt_persistent);
 
@@ -838,11 +825,9 @@ again:
                uint32_t flags;
 
                if (use_persistent_gnts) {
-                       spin_lock_irqsave(&blkif->pers_gnts_lock, irq_flags);
                        persistent_gnt = get_persistent_gnt(
-                               blkif,
+                               ring,
                                pages[i]->gref);
-                       spin_unlock_irqrestore(&blkif->pers_gnts_lock, irq_flags);
                }
 
                if (persistent_gnt) {
@@ -853,7 +838,7 @@ again:
                        pages[i]->page = persistent_gnt->page;
                        pages[i]->persistent_gnt = persistent_gnt;
                } else {
-                       if (get_free_page(blkif, &pages[i]->page))
+                       if (get_free_page(ring, &pages[i]->page))
                                goto out_of_memory;
                        addr = vaddr(pages[i]->page);
                        pages_to_gnt[segs_to_map] = pages[i]->page;
@@ -886,7 +871,7 @@ again:
                        BUG_ON(new_map_idx >= segs_to_map);
                        if (unlikely(map[new_map_idx].status != 0)) {
                                pr_debug("invalid buffer -- could not remap it\n");
-                               put_free_pages(blkif, &pages[seg_idx]->page, 1);
+                               put_free_pages(ring, &pages[seg_idx]->page, 1);
                                pages[seg_idx]->handle = BLKBACK_INVALID_HANDLE;
                                ret |= 1;
                                goto next;
@@ -896,7 +881,7 @@ again:
                        continue;
                }
                if (use_persistent_gnts &&
-                   blkif->persistent_gnt_c < xen_blkif_max_pgrants) {
+                   ring->persistent_gnt_c < xen_blkif_max_pgrants) {
                        /*
                         * We are using persistent grants, the grant is
                         * not mapped but we might have room for it.
@@ -914,19 +899,16 @@ again:
                        persistent_gnt->gnt = map[new_map_idx].ref;
                        persistent_gnt->handle = map[new_map_idx].handle;
                        persistent_gnt->page = pages[seg_idx]->page;
-                       spin_lock_irqsave(&blkif->pers_gnts_lock, irq_flags);
-                       if (add_persistent_gnt(blkif,
+                       if (add_persistent_gnt(ring,
                                               persistent_gnt)) {
-                               spin_unlock_irqrestore(&blkif->pers_gnts_lock, irq_flags);
                                kfree(persistent_gnt);
                                persistent_gnt = NULL;
                                goto next;
                        }
                        pages[seg_idx]->persistent_gnt = persistent_gnt;
                        pr_debug("grant %u added to the tree of persistent grants, using %u/%u\n",
-                                persistent_gnt->gnt, blkif->persistent_gnt_c,
+                                persistent_gnt->gnt, ring->persistent_gnt_c,
                                 xen_blkif_max_pgrants);
-                       spin_unlock_irqrestore(&blkif->pers_gnts_lock, irq_flags);
                        goto next;
                }
                if (use_persistent_gnts && !blkif->vbd.overflow_max_grants) {
@@ -950,7 +932,7 @@ next:
 
 out_of_memory:
        pr_alert("%s: out of memory\n", __func__);
-       put_free_pages(blkif, pages_to_gnt, segs_to_map);
+       put_free_pages(ring, pages_to_gnt, segs_to_map);
        return -ENOMEM;
 }
 
index 847444dc1df4ea89bf7226dfdba3c0bbb632efa8..3c244ecf22a4877910035c79a4d21685be6d9dd8 100644 (file)
@@ -291,6 +291,22 @@ struct xen_blkif_ring {
        spinlock_t              pending_free_lock;
        wait_queue_head_t       pending_free_wq;
 
+       /* Tree to store persistent grants. */
+       spinlock_t              pers_gnts_lock;
+       struct rb_root          persistent_gnts;
+       unsigned int            persistent_gnt_c;
+       atomic_t                persistent_gnt_in_use;
+       unsigned long           next_lru;
+
+       /* Used by the kworker that offload work from the persistent purge. */
+       struct list_head        persistent_purge_list;
+       struct work_struct      persistent_purge_work;
+
+       /* Buffer of free pages to map grant refs. */
+       spinlock_t              free_pages_lock;
+       int                     free_pages_num;
+       struct list_head        free_pages;
+
        struct work_struct      free_work;
        /* Thread shutdown wait queue. */
        wait_queue_head_t       shutdown_wq;
@@ -312,22 +328,6 @@ struct xen_blkif {
        struct completion       drain_complete;
        atomic_t                drain;
 
-       /* tree to store persistent grants */
-       spinlock_t              pers_gnts_lock;
-       struct rb_root          persistent_gnts;
-       unsigned int            persistent_gnt_c;
-       atomic_t                persistent_gnt_in_use;
-       unsigned long           next_lru;
-
-       /* used by the kworker that offload work from the persistent purge */
-       struct list_head        persistent_purge_list;
-       struct work_struct      persistent_purge_work;
-
-       /* buffer of free pages to map grant refs */
-       spinlock_t              free_pages_lock;
-       int                     free_pages_num;
-       struct list_head        free_pages;
-
        /* statistics */
        unsigned long           st_print;
        unsigned long long                      st_rd_req;
index 0d6bb9383a6810bfae65f3c5fa2d65387a2edab9..2b8650a9a6a9087f6436d19553a2c24712f5f532 100644 (file)
@@ -150,6 +150,10 @@ static int xen_blkif_alloc_rings(struct xen_blkif *blkif)
                spin_lock_init(&ring->blk_ring_lock);
                init_waitqueue_head(&ring->wq);
                INIT_LIST_HEAD(&ring->pending_free);
+               INIT_LIST_HEAD(&ring->persistent_purge_list);
+               INIT_WORK(&ring->persistent_purge_work, xen_blkbk_unmap_purged_grants);
+               spin_lock_init(&ring->free_pages_lock);
+               INIT_LIST_HEAD(&ring->free_pages);
 
                spin_lock_init(&ring->pending_free_lock);
                init_waitqueue_head(&ring->pending_free_wq);
@@ -175,11 +179,7 @@ static struct xen_blkif *xen_blkif_alloc(domid_t domid)
        atomic_set(&blkif->refcnt, 1);
        init_completion(&blkif->drain_complete);
        INIT_WORK(&blkif->free_work, xen_blkif_deferred_free);
-       spin_lock_init(&blkif->free_pages_lock);
-       INIT_LIST_HEAD(&blkif->free_pages);
-       INIT_LIST_HEAD(&blkif->persistent_purge_list);
        blkif->st_print = jiffies;
-       INIT_WORK(&blkif->persistent_purge_work, xen_blkbk_unmap_purged_grants);
 
        return blkif;
 }
@@ -290,6 +290,12 @@ static int xen_blkif_disconnect(struct xen_blkif *blkif)
                        i++;
                }
 
+               BUG_ON(atomic_read(&ring->persistent_gnt_in_use) != 0);
+               BUG_ON(!list_empty(&ring->persistent_purge_list));
+               BUG_ON(!RB_EMPTY_ROOT(&ring->persistent_gnts));
+               BUG_ON(!list_empty(&ring->free_pages));
+               BUG_ON(ring->free_pages_num != 0);
+               BUG_ON(ring->persistent_gnt_c != 0);
                WARN_ON(i != (XEN_BLKIF_REQS_PER_PAGE * blkif->nr_ring_pages));
        }
        blkif->nr_ring_pages = 0;
@@ -304,13 +310,6 @@ static void xen_blkif_free(struct xen_blkif *blkif)
        xen_vbd_free(&blkif->vbd);
 
        /* Make sure everything is drained before shutting down */
-       BUG_ON(blkif->persistent_gnt_c != 0);
-       BUG_ON(atomic_read(&blkif->persistent_gnt_in_use) != 0);
-       BUG_ON(blkif->free_pages_num != 0);
-       BUG_ON(!list_empty(&blkif->persistent_purge_list));
-       BUG_ON(!list_empty(&blkif->free_pages));
-       BUG_ON(!RB_EMPTY_ROOT(&blkif->persistent_gnts));
-
        kfree(blkif->rings);
        kmem_cache_free(xen_blkif_cachep, blkif);
 }