blk-mq: implement hybrid poll mode for sync O_DIRECT

author Jens Axboe <axboe@fb.com>

Mon, 14 Nov 2016 20:01:59 +0000 (13:01 -0700)

committer Jens Axboe <axboe@fb.com>

Thu, 17 Nov 2016 20:34:51 +0000 (13:34 -0700)
author Jens Axboe <axboe@fb.com>
Mon, 14 Nov 2016 20:01:59 +0000 (13:01 -0700)
committer Jens Axboe <axboe@fb.com>
Thu, 17 Nov 2016 20:34:51 +0000 (13:34 -0700)
diff --git a/block/blk-mq.c b/block/blk-mq.c

index f39e69c732cc628c7fa54802160a2c495b28e87d..8cb248fb6a68308d948d2162b95b3a4fe9b5cd8f 100644 (file)
--- a/block/blk-mq.c
+++ b/block/blk-mq.c
@@ -332,6 +332,7 @@ static void __blk_mq_free_request(struct blk_mq_hw_ctx *hctx,
         rq->rq_flags = 0;
  
         clear_bit(REQ_ATOM_STARTED, &rq->atomic_flags);
+       clear_bit(REQ_ATOM_POLL_SLEPT, &rq->atomic_flags);
         blk_mq_put_tag(hctx, ctx, tag);
         blk_queue_exit(q);
  }
@@ -2468,11 +2469,60 @@ void blk_mq_update_nr_hw_queues(struct blk_mq_tag_set *set, int nr_hw_queues)
  }
  EXPORT_SYMBOL_GPL(blk_mq_update_nr_hw_queues);
  
+static bool blk_mq_poll_hybrid_sleep(struct request_queue *q,
+                                    struct request *rq)
+{
+       struct hrtimer_sleeper hs;
+       enum hrtimer_mode mode;
+       ktime_t kt;
+
+       if (!q->poll_nsec || test_bit(REQ_ATOM_POLL_SLEPT, &rq->atomic_flags))
+               return false;
+
+       set_bit(REQ_ATOM_POLL_SLEPT, &rq->atomic_flags);
+
+       /*
+        * This will be replaced with the stats tracking code, using
+        * 'avg_completion_time / 2' as the pre-sleep target.
+        */
+       kt = ktime_set(0, q->poll_nsec);
+
+       mode = HRTIMER_MODE_REL;
+       hrtimer_init_on_stack(&hs.timer, CLOCK_MONOTONIC, mode);
+       hrtimer_set_expires(&hs.timer, kt);
+
+       hrtimer_init_sleeper(&hs, current);
+       do {
+               if (test_bit(REQ_ATOM_COMPLETE, &rq->atomic_flags))
+                       break;
+               set_current_state(TASK_UNINTERRUPTIBLE);
+               hrtimer_start_expires(&hs.timer, mode);
+               if (hs.task)
+                       io_schedule();
+               hrtimer_cancel(&hs.timer);
+               mode = HRTIMER_MODE_ABS;
+       } while (hs.task && !signal_pending(current));
+
+       __set_current_state(TASK_RUNNING);
+       destroy_hrtimer_on_stack(&hs.timer);
+       return true;
+}
+
  static bool __blk_mq_poll(struct blk_mq_hw_ctx *hctx, struct request *rq)
  {
         struct request_queue *q = hctx->queue;
         long state;
  
+       /*
+        * If we sleep, have the caller restart the poll loop to reset
+        * the state. Like for the other success return cases, the
+        * caller is responsible for checking if the IO completed. If
+        * the IO isn't complete, we'll get called again and will go
+        * straight to the busy poll loop.
+        */
+       if (blk_mq_poll_hybrid_sleep(q, rq))
+               return true;
+
         hctx->poll_considered++;
  
         state = current->state;
diff --git a/block/blk-sysfs.c b/block/blk-sysfs.c

index 415e764807d03de33c55cafa582c0ebf957295ee..dcdfcaa126539c3e79dbf7f55834397631a82bac 100644 (file)
--- a/block/blk-sysfs.c
+++ b/block/blk-sysfs.c
@@ -350,6 +350,28 @@ queue_rq_affinity_store(struct request_queue *q, const char *page, size_t count)
         return ret;
  }
  
+static ssize_t queue_poll_delay_show(struct request_queue *q, char *page)
+{
+       return queue_var_show(q->poll_nsec / 1000, page);
+}
+
+static ssize_t queue_poll_delay_store(struct request_queue *q, const char *page,
+                               size_t count)
+{
+       unsigned long poll_usec;
+       ssize_t ret;
+
+       if (!q->mq_ops || !q->mq_ops->poll)
+               return -EINVAL;
+
+       ret = queue_var_store(&poll_usec, page, count);
+       if (ret < 0)
+               return ret;
+
+       q->poll_nsec = poll_usec * 1000;
+       return ret;
+}
+
  static ssize_t queue_poll_show(struct request_queue *q, char *page)
  {
         return queue_var_show(test_bit(QUEUE_FLAG_POLL, &q->queue_flags), page);
@@ -602,6 +624,12 @@ static struct queue_sysfs_entry queue_poll_entry = {
         .store = queue_poll_store,
  };
  
+static struct queue_sysfs_entry queue_poll_delay_entry = {
+       .attr = {.name = "io_poll_delay", .mode = S_IRUGO | S_IWUSR },
+       .show = queue_poll_delay_show,
+       .store = queue_poll_delay_store,
+};
+
  static struct queue_sysfs_entry queue_wc_entry = {
         .attr = {.name = "write_cache", .mode = S_IRUGO | S_IWUSR },
         .show = queue_wc_show,
@@ -655,6 +683,7 @@ static struct attribute *default_attrs[] = {
         &queue_dax_entry.attr,
         &queue_stats_entry.attr,
         &queue_wb_lat_entry.attr,
+       &queue_poll_delay_entry.attr,
         NULL,
  };
  
diff --git a/block/blk.h b/block/blk.h

index aa132dea598c5e14a253fc91c5cae063e0981da0..041185e5f12994dc146528db2627707e42a700ee 100644 (file)
--- a/block/blk.h
+++ b/block/blk.h
@@ -111,6 +111,7 @@ void blk_account_io_done(struct request *req);
  enum rq_atomic_flags {
         REQ_ATOM_COMPLETE = 0,
         REQ_ATOM_STARTED,
+       REQ_ATOM_POLL_SLEPT,
  };
  
  /*
diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h

index bab18ee5810d3ac40c2d51ace9b80e947164769d..37ed4ea705c8401b411c7bcb47bd586635357362 100644 (file)
--- a/include/linux/blkdev.h
+++ b/include/linux/blkdev.h
@@ -509,6 +509,7 @@ struct request_queue {
         unsigned int            request_fn_active;
  
         unsigned int            rq_timeout;
+       unsigned int            poll_nsec;
         struct timer_list       timeout;
         struct work_struct      timeout_work;
         struct list_head        timeout_list;
author	Jens Axboe <axboe@fb.com>
	Mon, 14 Nov 2016 20:01:59 +0000 (13:01 -0700)
committer	Jens Axboe <axboe@fb.com>
	Thu, 17 Nov 2016 20:34:51 +0000 (13:34 -0700)
block/blk-mq.c		patch \| blob \| blame \| history
block/blk-sysfs.c		patch \| blob \| blame \| history
block/blk.h		patch \| blob \| blame \| history
include/linux/blkdev.h		patch \| blob \| blame \| history