blk-mq: decouble blk-mq freezing from generic bypassing
authorTejun Heo <tj@kernel.org>
Tue, 1 Jul 2014 16:31:13 +0000 (10:31 -0600)
committerJens Axboe <axboe@fb.com>
Tue, 1 Jul 2014 16:31:13 +0000 (10:31 -0600)
blk_mq freezing is entangled with generic bypassing which bypasses
blkcg and io scheduler and lets IO requests fall through the block
layer to the drivers in FIFO order.  This allows forward progress on
IOs with the advanced features disabled so that those features can be
configured or altered without worrying about stalling IO which may
lead to deadlock through memory allocation.

However, generic bypassing doesn't quite fit blk-mq.  blk-mq currently
doesn't make use of blkcg or ioscheds and it maps bypssing to
freezing, which blocks request processing and drains all the in-flight
ones.  This causes problems as bypassing assumes that request
processing is online.  blk-mq works around this by conditionally
allowing request processing for the problem case - during queue
initialization.

Another weirdity is that except for during queue cleanup, bypassing
started on the generic side prevents blk-mq from processing new
requests but doesn't drain the in-flight ones.  This shouldn't break
anything but again highlights that something isn't quite right here.

The root cause is conflating blk-mq freezing and generic bypassing
which are two different mechanisms.  The only intersecting purpose
that they serve is during queue cleanup.  Let's properly separate
blk-mq freezing from generic bypassing and simply use it where
necessary.

* request_queue->mq_freeze_depth is added and
  blk_mq_[un]freeze_queue() now operate on this counter instead of
  ->bypass_depth.  The replacement for QUEUE_FLAG_BYPASS isn't added
  but the counter is tested directly.  This will be further updated by
  later changes.

* blk_mq_drain_queue() is dropped and "__" prefix is dropped from
  blk_mq_freeze_queue().  Queue cleanup path now calls
  blk_mq_freeze_queue() directly.

* blk_queue_enter()'s fast path condition is simplified to simply
  check @q->mq_freeze_depth.  Previously, the condition was

!blk_queue_dying(q) &&
    (!blk_queue_bypass(q) || !blk_queue_init_done(q))

  mq_freeze_depth is incremented right after dying is set and
  blk_queue_init_done() exception isn't necessary as blk-mq doesn't
  start frozen, which only leaves the blk_queue_bypass() test which
  can be replaced by @q->mq_freeze_depth test.

This change simplifies the code and reduces confusion in the area.

Signed-off-by: Tejun Heo <tj@kernel.org>
Cc: Jens Axboe <axboe@kernel.dk>
Cc: Nicholas A. Bellinger <nab@linux-iscsi.org>
Signed-off-by: Jens Axboe <axboe@fb.com>
block/blk-core.c
block/blk-mq.c
block/blk-mq.h
include/linux/blkdev.h

index 0d0bdd65b2d7d3fcd96f6c6d15289ef50ad6cdc7..c359d72e9d76f24a44b7c4b6b8c36c6677436191 100644 (file)
@@ -514,7 +514,7 @@ void blk_cleanup_queue(struct request_queue *q)
         * prevent that q->request_fn() gets invoked after draining finished.
         */
        if (q->mq_ops) {
-               blk_mq_drain_queue(q);
+               blk_mq_freeze_queue(q);
                spin_lock_irq(lock);
        } else {
                spin_lock_irq(lock);
index f4bdddd7ed996dc572e353581792502ca63b0b6b..1e324a123d40261ed5e4e0d1702f4e4ea0c562c9 100644 (file)
@@ -84,15 +84,14 @@ static int blk_mq_queue_enter(struct request_queue *q)
        smp_mb();
 
        /* we have problems freezing the queue if it's initializing */
-       if (!blk_queue_dying(q) &&
-           (!blk_queue_bypass(q) || !blk_queue_init_done(q)))
+       if (!q->mq_freeze_depth)
                return 0;
 
        __percpu_counter_add(&q->mq_usage_counter, -1, 1000000);
 
        spin_lock_irq(q->queue_lock);
        ret = wait_event_interruptible_lock_irq(q->mq_freeze_wq,
-               !blk_queue_bypass(q) || blk_queue_dying(q),
+               !q->mq_freeze_depth || blk_queue_dying(q),
                *q->queue_lock);
        /* inc usage with lock hold to avoid freeze_queue runs here */
        if (!ret && !blk_queue_dying(q))
@@ -129,11 +128,10 @@ void blk_mq_drain_queue(struct request_queue *q)
  * Guarantee no request is in use, so we can change any data structure of
  * the queue afterward.
  */
-static void blk_mq_freeze_queue(struct request_queue *q)
+void blk_mq_freeze_queue(struct request_queue *q)
 {
        spin_lock_irq(q->queue_lock);
-       q->bypass_depth++;
-       queue_flag_set(QUEUE_FLAG_BYPASS, q);
+       q->mq_freeze_depth++;
        spin_unlock_irq(q->queue_lock);
 
        blk_mq_drain_queue(q);
@@ -144,11 +142,8 @@ static void blk_mq_unfreeze_queue(struct request_queue *q)
        bool wake = false;
 
        spin_lock_irq(q->queue_lock);
-       if (!--q->bypass_depth) {
-               queue_flag_clear(QUEUE_FLAG_BYPASS, q);
-               wake = true;
-       }
-       WARN_ON_ONCE(q->bypass_depth < 0);
+       wake = !--q->mq_freeze_depth;
+       WARN_ON_ONCE(q->mq_freeze_depth < 0);
        spin_unlock_irq(q->queue_lock);
        if (wake)
                wake_up_all(&q->mq_freeze_wq);
index 26460884c6cd835202e90b2d5985c827495ca626..ca4964a6295d48490f22e25f513c39165af190a4 100644 (file)
@@ -28,7 +28,7 @@ struct blk_mq_ctx {
 void __blk_mq_complete_request(struct request *rq);
 void blk_mq_run_hw_queue(struct blk_mq_hw_ctx *hctx, bool async);
 void blk_mq_init_flush(struct request_queue *q);
-void blk_mq_drain_queue(struct request_queue *q);
+void blk_mq_freeze_queue(struct request_queue *q);
 void blk_mq_free_queue(struct request_queue *q);
 void blk_mq_clone_flush_request(struct request *flush_rq,
                struct request *orig_rq);
index 8699bcf5f0999db98a8f2a2917c284d950a12d75..c8f344ff74fed8053f190536b8c538cfe0721801 100644 (file)
@@ -470,6 +470,7 @@ struct request_queue {
        struct mutex            sysfs_lock;
 
        int                     bypass_depth;
+       int                     mq_freeze_depth;
 
 #if defined(CONFIG_BLK_DEV_BSG)
        bsg_job_fn              *bsg_job_fn;