virtio-blk: Add REQ_FLUSH and REQ_FUA support to bio path
authorAsias He <asias@redhat.com>
Wed, 8 Aug 2012 08:07:05 +0000 (16:07 +0800)
committerRusty Russell <rusty@rustcorp.com.au>
Fri, 28 Sep 2012 05:35:14 +0000 (15:05 +0930)
We need to support both REQ_FLUSH and REQ_FUA for bio based path since
it does not get the sequencing of REQ_FUA into REQ_FLUSH that request
based drivers can request.

REQ_FLUSH is emulated by:
A) If the bio has no data to write:
1. Send VIRTIO_BLK_T_FLUSH to device,
2. In the flush I/O completion handler, finish the bio

B) If the bio has data to write:
1. Send VIRTIO_BLK_T_FLUSH to device
2. In the flush I/O completion handler, send the actual write data to device
3. In the write I/O completion handler, finish the bio

REQ_FUA is emulated by:
1. Send the actual write data to device
2. In the write I/O completion handler, send VIRTIO_BLK_T_FLUSH to device
3. In the flush I/O completion handler, finish the bio

Changes in v7:
- Using vbr->flags to trace request type
- Dropped unnecessary struct virtio_blk *vblk parameter
- Reuse struct virtblk_req in bio done function

Cahnges in v6:
- Reworked REQ_FLUSH and REQ_FUA emulatation order

Cc: Rusty Russell <rusty@rustcorp.com.au>
Cc: Jens Axboe <axboe@kernel.dk>
Cc: Christoph Hellwig <hch@lst.de>
Cc: Tejun Heo <tj@kernel.org>
Cc: Shaohua Li <shli@kernel.org>
Cc: "Michael S. Tsirkin" <mst@redhat.com>
Cc: kvm@vger.kernel.org
Cc: linux-kernel@vger.kernel.org
Cc: virtualization@lists.linux-foundation.org
Signed-off-by: Asias He <asias@redhat.com>
Signed-off-by: Rusty Russell <rusty@rustcorp.com.au>
drivers/block/virtio_blk.c

index 95cfeeda4f3a3af201c829d19372a19a64da2d19..2edfb5cef4f2126631ee42f1dcd5f0d55e08313b 100644 (file)
@@ -58,10 +58,20 @@ struct virtblk_req
        struct bio *bio;
        struct virtio_blk_outhdr out_hdr;
        struct virtio_scsi_inhdr in_hdr;
+       struct work_struct work;
+       struct virtio_blk *vblk;
+       int flags;
        u8 status;
        struct scatterlist sg[];
 };
 
+enum {
+       VBLK_IS_FLUSH           = 1,
+       VBLK_REQ_FLUSH          = 2,
+       VBLK_REQ_DATA           = 4,
+       VBLK_REQ_FUA            = 8,
+};
+
 static inline int virtblk_result(struct virtblk_req *vbr)
 {
        switch (vbr->status) {
@@ -74,9 +84,133 @@ static inline int virtblk_result(struct virtblk_req *vbr)
        }
 }
 
-static inline void virtblk_request_done(struct virtio_blk *vblk,
-                                       struct virtblk_req *vbr)
+static inline struct virtblk_req *virtblk_alloc_req(struct virtio_blk *vblk,
+                                                   gfp_t gfp_mask)
+{
+       struct virtblk_req *vbr;
+
+       vbr = mempool_alloc(vblk->pool, gfp_mask);
+       if (vbr && use_bio)
+               sg_init_table(vbr->sg, vblk->sg_elems);
+
+       vbr->vblk = vblk;
+
+       return vbr;
+}
+
+static void virtblk_add_buf_wait(struct virtio_blk *vblk,
+                                struct virtblk_req *vbr,
+                                unsigned long out,
+                                unsigned long in)
+{
+       DEFINE_WAIT(wait);
+
+       for (;;) {
+               prepare_to_wait_exclusive(&vblk->queue_wait, &wait,
+                                         TASK_UNINTERRUPTIBLE);
+
+               spin_lock_irq(vblk->disk->queue->queue_lock);
+               if (virtqueue_add_buf(vblk->vq, vbr->sg, out, in, vbr,
+                                     GFP_ATOMIC) < 0) {
+                       spin_unlock_irq(vblk->disk->queue->queue_lock);
+                       io_schedule();
+               } else {
+                       virtqueue_kick(vblk->vq);
+                       spin_unlock_irq(vblk->disk->queue->queue_lock);
+                       break;
+               }
+
+       }
+
+       finish_wait(&vblk->queue_wait, &wait);
+}
+
+static inline void virtblk_add_req(struct virtblk_req *vbr,
+                                  unsigned int out, unsigned int in)
+{
+       struct virtio_blk *vblk = vbr->vblk;
+
+       spin_lock_irq(vblk->disk->queue->queue_lock);
+       if (unlikely(virtqueue_add_buf(vblk->vq, vbr->sg, out, in, vbr,
+                                       GFP_ATOMIC) < 0)) {
+               spin_unlock_irq(vblk->disk->queue->queue_lock);
+               virtblk_add_buf_wait(vblk, vbr, out, in);
+               return;
+       }
+       virtqueue_kick(vblk->vq);
+       spin_unlock_irq(vblk->disk->queue->queue_lock);
+}
+
+static int virtblk_bio_send_flush(struct virtblk_req *vbr)
+{
+       unsigned int out = 0, in = 0;
+
+       vbr->flags |= VBLK_IS_FLUSH;
+       vbr->out_hdr.type = VIRTIO_BLK_T_FLUSH;
+       vbr->out_hdr.sector = 0;
+       vbr->out_hdr.ioprio = 0;
+       sg_set_buf(&vbr->sg[out++], &vbr->out_hdr, sizeof(vbr->out_hdr));
+       sg_set_buf(&vbr->sg[out + in++], &vbr->status, sizeof(vbr->status));
+
+       virtblk_add_req(vbr, out, in);
+
+       return 0;
+}
+
+static int virtblk_bio_send_data(struct virtblk_req *vbr)
 {
+       struct virtio_blk *vblk = vbr->vblk;
+       unsigned int num, out = 0, in = 0;
+       struct bio *bio = vbr->bio;
+
+       vbr->flags &= ~VBLK_IS_FLUSH;
+       vbr->out_hdr.type = 0;
+       vbr->out_hdr.sector = bio->bi_sector;
+       vbr->out_hdr.ioprio = bio_prio(bio);
+
+       sg_set_buf(&vbr->sg[out++], &vbr->out_hdr, sizeof(vbr->out_hdr));
+
+       num = blk_bio_map_sg(vblk->disk->queue, bio, vbr->sg + out);
+
+       sg_set_buf(&vbr->sg[num + out + in++], &vbr->status,
+                  sizeof(vbr->status));
+
+       if (num) {
+               if (bio->bi_rw & REQ_WRITE) {
+                       vbr->out_hdr.type |= VIRTIO_BLK_T_OUT;
+                       out += num;
+               } else {
+                       vbr->out_hdr.type |= VIRTIO_BLK_T_IN;
+                       in += num;
+               }
+       }
+
+       virtblk_add_req(vbr, out, in);
+
+       return 0;
+}
+
+static void virtblk_bio_send_data_work(struct work_struct *work)
+{
+       struct virtblk_req *vbr;
+
+       vbr = container_of(work, struct virtblk_req, work);
+
+       virtblk_bio_send_data(vbr);
+}
+
+static void virtblk_bio_send_flush_work(struct work_struct *work)
+{
+       struct virtblk_req *vbr;
+
+       vbr = container_of(work, struct virtblk_req, work);
+
+       virtblk_bio_send_flush(vbr);
+}
+
+static inline void virtblk_request_done(struct virtblk_req *vbr)
+{
+       struct virtio_blk *vblk = vbr->vblk;
        struct request *req = vbr->req;
        int error = virtblk_result(vbr);
 
@@ -92,17 +226,47 @@ static inline void virtblk_request_done(struct virtio_blk *vblk,
        mempool_free(vbr, vblk->pool);
 }
 
-static inline void virtblk_bio_done(struct virtio_blk *vblk,
-                                   struct virtblk_req *vbr)
+static inline void virtblk_bio_flush_done(struct virtblk_req *vbr)
 {
-       bio_endio(vbr->bio, virtblk_result(vbr));
-       mempool_free(vbr, vblk->pool);
+       struct virtio_blk *vblk = vbr->vblk;
+
+       if (vbr->flags & VBLK_REQ_DATA) {
+               /* Send out the actual write data */
+               INIT_WORK(&vbr->work, virtblk_bio_send_data_work);
+               queue_work(virtblk_wq, &vbr->work);
+       } else {
+               bio_endio(vbr->bio, virtblk_result(vbr));
+               mempool_free(vbr, vblk->pool);
+       }
+}
+
+static inline void virtblk_bio_data_done(struct virtblk_req *vbr)
+{
+       struct virtio_blk *vblk = vbr->vblk;
+
+       if (unlikely(vbr->flags & VBLK_REQ_FUA)) {
+               /* Send out a flush before end the bio */
+               vbr->flags &= ~VBLK_REQ_DATA;
+               INIT_WORK(&vbr->work, virtblk_bio_send_flush_work);
+               queue_work(virtblk_wq, &vbr->work);
+       } else {
+               bio_endio(vbr->bio, virtblk_result(vbr));
+               mempool_free(vbr, vblk->pool);
+       }
+}
+
+static inline void virtblk_bio_done(struct virtblk_req *vbr)
+{
+       if (unlikely(vbr->flags & VBLK_IS_FLUSH))
+               virtblk_bio_flush_done(vbr);
+       else
+               virtblk_bio_data_done(vbr);
 }
 
 static void virtblk_done(struct virtqueue *vq)
 {
        struct virtio_blk *vblk = vq->vdev->priv;
-       unsigned long bio_done = 0, req_done = 0;
+       bool bio_done = false, req_done = false;
        struct virtblk_req *vbr;
        unsigned long flags;
        unsigned int len;
@@ -110,11 +274,11 @@ static void virtblk_done(struct virtqueue *vq)
        spin_lock_irqsave(vblk->disk->queue->queue_lock, flags);
        while ((vbr = virtqueue_get_buf(vblk->vq, &len)) != NULL) {
                if (vbr->bio) {
-                       virtblk_bio_done(vblk, vbr);
-                       bio_done++;
+                       virtblk_bio_done(vbr);
+                       bio_done = true;
                } else {
-                       virtblk_request_done(vblk, vbr);
-                       req_done++;
+                       virtblk_request_done(vbr);
+                       req_done = true;
                }
        }
        /* In case queue is stopped waiting for more buffers. */
@@ -126,18 +290,6 @@ static void virtblk_done(struct virtqueue *vq)
                wake_up(&vblk->queue_wait);
 }
 
-static inline struct virtblk_req *virtblk_alloc_req(struct virtio_blk *vblk,
-                                                   gfp_t gfp_mask)
-{
-       struct virtblk_req *vbr;
-
-       vbr = mempool_alloc(vblk->pool, gfp_mask);
-       if (vbr && use_bio)
-               sg_init_table(vbr->sg, vblk->sg_elems);
-
-       return vbr;
-}
-
 static bool do_req(struct request_queue *q, struct virtio_blk *vblk,
                   struct request *req)
 {
@@ -242,41 +394,12 @@ static void virtblk_request(struct request_queue *q)
                virtqueue_kick(vblk->vq);
 }
 
-static void virtblk_add_buf_wait(struct virtio_blk *vblk,
-                                struct virtblk_req *vbr,
-                                unsigned long out,
-                                unsigned long in)
-{
-       DEFINE_WAIT(wait);
-
-       for (;;) {
-               prepare_to_wait_exclusive(&vblk->queue_wait, &wait,
-                                         TASK_UNINTERRUPTIBLE);
-
-               spin_lock_irq(vblk->disk->queue->queue_lock);
-               if (virtqueue_add_buf(vblk->vq, vbr->sg, out, in, vbr,
-                                     GFP_ATOMIC) < 0) {
-                       spin_unlock_irq(vblk->disk->queue->queue_lock);
-                       io_schedule();
-               } else {
-                       virtqueue_kick(vblk->vq);
-                       spin_unlock_irq(vblk->disk->queue->queue_lock);
-                       break;
-               }
-
-       }
-
-       finish_wait(&vblk->queue_wait, &wait);
-}
-
 static void virtblk_make_request(struct request_queue *q, struct bio *bio)
 {
        struct virtio_blk *vblk = q->queuedata;
-       unsigned int num, out = 0, in = 0;
        struct virtblk_req *vbr;
 
        BUG_ON(bio->bi_phys_segments + 2 > vblk->sg_elems);
-       BUG_ON(bio->bi_rw & (REQ_FLUSH | REQ_FUA));
 
        vbr = virtblk_alloc_req(vblk, GFP_NOIO);
        if (!vbr) {
@@ -285,37 +408,18 @@ static void virtblk_make_request(struct request_queue *q, struct bio *bio)
        }
 
        vbr->bio = bio;
-       vbr->req = NULL;
-       vbr->out_hdr.type = 0;
-       vbr->out_hdr.sector = bio->bi_sector;
-       vbr->out_hdr.ioprio = bio_prio(bio);
-
-       sg_set_buf(&vbr->sg[out++], &vbr->out_hdr, sizeof(vbr->out_hdr));
-
-       num = blk_bio_map_sg(q, bio, vbr->sg + out);
-
-       sg_set_buf(&vbr->sg[num + out + in++], &vbr->status,
-                  sizeof(vbr->status));
-
-       if (num) {
-               if (bio->bi_rw & REQ_WRITE) {
-                       vbr->out_hdr.type |= VIRTIO_BLK_T_OUT;
-                       out += num;
-               } else {
-                       vbr->out_hdr.type |= VIRTIO_BLK_T_IN;
-                       in += num;
-               }
-       }
-
-       spin_lock_irq(vblk->disk->queue->queue_lock);
-       if (unlikely(virtqueue_add_buf(vblk->vq, vbr->sg, out, in, vbr,
-                                      GFP_ATOMIC) < 0)) {
-               spin_unlock_irq(vblk->disk->queue->queue_lock);
-               virtblk_add_buf_wait(vblk, vbr, out, in);
-               return;
-       }
-       virtqueue_kick(vblk->vq);
-       spin_unlock_irq(vblk->disk->queue->queue_lock);
+       vbr->flags = 0;
+       if (bio->bi_rw & REQ_FLUSH)
+               vbr->flags |= VBLK_REQ_FLUSH;
+       if (bio->bi_rw & REQ_FUA)
+               vbr->flags |= VBLK_REQ_FUA;
+       if (bio->bi_size)
+               vbr->flags |= VBLK_REQ_DATA;
+
+       if (unlikely(vbr->flags & VBLK_REQ_FLUSH))
+               virtblk_bio_send_flush(vbr);
+       else
+               virtblk_bio_send_data(vbr);
 }
 
 /* return id (s/n) string for *disk to *id_str
@@ -529,7 +633,7 @@ static void virtblk_update_cache_mode(struct virtio_device *vdev)
        u8 writeback = virtblk_get_cache_mode(vdev);
        struct virtio_blk *vblk = vdev->priv;
 
-       if (writeback && !use_bio)
+       if (writeback)
                blk_queue_flush(vblk->disk->queue, REQ_FLUSH);
        else
                blk_queue_flush(vblk->disk->queue, 0);