[BLOCK] ll_rw_blk: Enable out-of-order request completions through softirq
authorJens Axboe <axboe@suse.de>
Mon, 9 Jan 2006 15:02:34 +0000 (16:02 +0100)
committerJens Axboe <axboe@suse.de>
Mon, 9 Jan 2006 15:02:34 +0000 (16:02 +0100)
Request completion can be a quite heavy process, since it needs to
iterate through the entire request and complete the bio's it holds.
This patch adds blk_complete_request() which moves this processing
into a dedicated block softirq.

Signed-off-by: Jens Axboe <axboe@suse.de>
block/ll_rw_blk.c
include/linux/blkdev.h
include/linux/interrupt.h

index 91d3b4828c4916290ded65ab1f016cf2cdfc50ae..8e136450abc21bffc6c5844319f3d1eb6ce9c656 100644 (file)
@@ -27,6 +27,8 @@
 #include <linux/swap.h>
 #include <linux/writeback.h>
 #include <linux/blkdev.h>
+#include <linux/interrupt.h>
+#include <linux/cpu.h>
 
 /*
  * for max sense size
@@ -62,13 +64,15 @@ static wait_queue_head_t congestion_wqh[2] = {
 /*
  * Controlling structure to kblockd
  */
-static struct workqueue_struct *kblockd_workqueue; 
+static struct workqueue_struct *kblockd_workqueue;
 
 unsigned long blk_max_low_pfn, blk_max_pfn;
 
 EXPORT_SYMBOL(blk_max_low_pfn);
 EXPORT_SYMBOL(blk_max_pfn);
 
+static DEFINE_PER_CPU(struct list_head, blk_cpu_done);
+
 /* Amount of time in which a process may batch requests */
 #define BLK_BATCH_TIME (HZ/50UL)
 
@@ -207,6 +211,13 @@ void blk_queue_merge_bvec(request_queue_t *q, merge_bvec_fn *mbfn)
 
 EXPORT_SYMBOL(blk_queue_merge_bvec);
 
+void blk_queue_softirq_done(request_queue_t *q, softirq_done_fn *fn)
+{
+       q->softirq_done_fn = fn;
+}
+
+EXPORT_SYMBOL(blk_queue_softirq_done);
+
 /**
  * blk_queue_make_request - define an alternate make_request function for a device
  * @q:  the request queue for the device to be affected
@@ -270,6 +281,7 @@ EXPORT_SYMBOL(blk_queue_make_request);
 static inline void rq_init(request_queue_t *q, struct request *rq)
 {
        INIT_LIST_HEAD(&rq->queuelist);
+       INIT_LIST_HEAD(&rq->donelist);
 
        rq->errors = 0;
        rq->rq_status = RQ_ACTIVE;
@@ -286,6 +298,7 @@ static inline void rq_init(request_queue_t *q, struct request *rq)
        rq->sense = NULL;
        rq->end_io = NULL;
        rq->end_io_data = NULL;
+       rq->completion_data = NULL;
 }
 
 /**
@@ -3286,6 +3299,87 @@ int end_that_request_chunk(struct request *req, int uptodate, int nr_bytes)
 
 EXPORT_SYMBOL(end_that_request_chunk);
 
+/*
+ * splice the completion data to a local structure and hand off to
+ * process_completion_queue() to complete the requests
+ */
+static void blk_done_softirq(struct softirq_action *h)
+{
+       struct list_head *cpu_list;
+       LIST_HEAD(local_list);
+
+       local_irq_disable();
+       cpu_list = &__get_cpu_var(blk_cpu_done);
+       list_splice_init(cpu_list, &local_list);
+       local_irq_enable();
+
+       while (!list_empty(&local_list)) {
+               struct request *rq = list_entry(local_list.next, struct request, donelist);
+
+               list_del_init(&rq->donelist);
+               rq->q->softirq_done_fn(rq);
+       }
+}
+
+#ifdef CONFIG_HOTPLUG_CPU
+
+static int blk_cpu_notify(struct notifier_block *self, unsigned long action,
+                         void *hcpu)
+{
+       /*
+        * If a CPU goes away, splice its entries to the current CPU
+        * and trigger a run of the softirq
+        */
+       if (action == CPU_DEAD) {
+               int cpu = (unsigned long) hcpu;
+
+               local_irq_disable();
+               list_splice_init(&per_cpu(blk_cpu_done, cpu),
+                                &__get_cpu_var(blk_cpu_done));
+               raise_softirq_irqoff(BLOCK_SOFTIRQ);
+               local_irq_enable();
+       }
+
+       return NOTIFY_OK;
+}
+
+
+static struct notifier_block __devinitdata blk_cpu_notifier = {
+       .notifier_call  = blk_cpu_notify,
+};
+
+#endif /* CONFIG_HOTPLUG_CPU */
+
+/**
+ * blk_complete_request - end I/O on a request
+ * @req:      the request being processed
+ *
+ * Description:
+ *     Ends all I/O on a request. It does not handle partial completions,
+ *     unless the driver actually implements this in its completionc callback
+ *     through requeueing. Theh actual completion happens out-of-order,
+ *     through a softirq handler. The user must have registered a completion
+ *     callback through blk_queue_softirq_done().
+ **/
+
+void blk_complete_request(struct request *req)
+{
+       struct list_head *cpu_list;
+       unsigned long flags;
+
+       BUG_ON(!req->q->softirq_done_fn);
+               
+       local_irq_save(flags);
+
+       cpu_list = &__get_cpu_var(blk_cpu_done);
+       list_add_tail(&req->donelist, cpu_list);
+       raise_softirq_irqoff(BLOCK_SOFTIRQ);
+
+       local_irq_restore(flags);
+}
+
+EXPORT_SYMBOL(blk_complete_request);
+       
 /*
  * queue lock must be held
  */
@@ -3364,6 +3458,8 @@ EXPORT_SYMBOL(kblockd_flush);
 
 int __init blk_dev_init(void)
 {
+       int i;
+
        kblockd_workqueue = create_workqueue("kblockd");
        if (!kblockd_workqueue)
                panic("Failed to create kblockd\n");
@@ -3377,6 +3473,14 @@ int __init blk_dev_init(void)
        iocontext_cachep = kmem_cache_create("blkdev_ioc",
                        sizeof(struct io_context), 0, SLAB_PANIC, NULL, NULL);
 
+       for (i = 0; i < NR_CPUS; i++)
+               INIT_LIST_HEAD(&per_cpu(blk_cpu_done, i));
+
+       open_softirq(BLOCK_SOFTIRQ, blk_done_softirq, NULL);
+#ifdef CONFIG_HOTPLUG_CPU
+       register_cpu_notifier(&blk_cpu_notifier);
+#endif
+
        blk_max_low_pfn = max_low_pfn;
        blk_max_pfn = max_pfn;
 
index fb098537742116a71067fa6af70abac17dc055e8..804cc4ec953328107018ab0eb70384b7f8d32b0f 100644 (file)
@@ -118,9 +118,9 @@ struct request_list {
  * try to put the fields that are referenced together in the same cacheline
  */
 struct request {
-       struct list_head queuelist; /* looking for ->queue? you must _not_
-                                    * access it directly, use
-                                    * blkdev_dequeue_request! */
+       struct list_head queuelist;
+       struct list_head donelist;
+
        unsigned long flags;            /* see REQ_ bits below */
 
        /* Maintain bio traversal state for part by part I/O submission.
@@ -141,6 +141,7 @@ struct request {
        struct bio *biotail;
 
        void *elevator_private;
+       void *completion_data;
 
        unsigned short ioprio;
 
@@ -291,6 +292,7 @@ typedef int (merge_bvec_fn) (request_queue_t *, struct bio *, struct bio_vec *);
 typedef void (activity_fn) (void *data, int rw);
 typedef int (issue_flush_fn) (request_queue_t *, struct gendisk *, sector_t *);
 typedef void (prepare_flush_fn) (request_queue_t *, struct request *);
+typedef void (softirq_done_fn)(struct request *);
 
 enum blk_queue_state {
        Queue_down,
@@ -332,6 +334,7 @@ struct request_queue
        activity_fn             *activity_fn;
        issue_flush_fn          *issue_flush_fn;
        prepare_flush_fn        *prepare_flush_fn;
+       softirq_done_fn         *softirq_done_fn;
 
        /*
         * Dispatch queue sorting
@@ -646,6 +649,17 @@ extern int end_that_request_first(struct request *, int, int);
 extern int end_that_request_chunk(struct request *, int, int);
 extern void end_that_request_last(struct request *, int);
 extern void end_request(struct request *req, int uptodate);
+extern void blk_complete_request(struct request *);
+
+static inline int rq_all_done(struct request *rq, unsigned int nr_bytes)
+{
+       if (blk_fs_request(rq))
+               return (nr_bytes >= (rq->hard_nr_sectors << 9));
+       else if (blk_pc_request(rq))
+               return nr_bytes >= rq->data_len;
+
+       return 0;
+}
 
 /*
  * end_that_request_first/chunk() takes an uptodate argument. we account
@@ -694,6 +708,7 @@ extern void blk_queue_segment_boundary(request_queue_t *, unsigned long);
 extern void blk_queue_prep_rq(request_queue_t *, prep_rq_fn *pfn);
 extern void blk_queue_merge_bvec(request_queue_t *, merge_bvec_fn *);
 extern void blk_queue_dma_alignment(request_queue_t *, int);
+extern void blk_queue_softirq_done(request_queue_t *, softirq_done_fn *);
 extern struct backing_dev_info *blk_get_backing_dev_info(struct block_device *bdev);
 extern int blk_queue_ordered(request_queue_t *, unsigned, prepare_flush_fn *);
 extern void blk_queue_issue_flush_fn(request_queue_t *, issue_flush_fn *);
index e50a95fbeb110d69bc9c3e1008fea9981f9d5bcf..f02204706984b9083437cb5a71e237f59f02d547 100644 (file)
@@ -112,6 +112,7 @@ enum
        TIMER_SOFTIRQ,
        NET_TX_SOFTIRQ,
        NET_RX_SOFTIRQ,
+       BLOCK_SOFTIRQ,
        SCSI_SOFTIRQ,
        TASKLET_SOFTIRQ
 };