block: return on congested block device
authorGoldwyn Rodrigues <rgoldwyn@suse.com>
Tue, 20 Jun 2017 12:05:46 +0000 (07:05 -0500)
committerJens Axboe <axboe@kernel.dk>
Tue, 20 Jun 2017 13:12:03 +0000 (07:12 -0600)
A new bio operation flag REQ_NOWAIT is introduced to identify bio's
orignating from iocb with IOCB_NOWAIT. This flag indicates
to return immediately if a request cannot be made instead
of retrying.

Stacked devices such as md (the ones with make_request_fn hooks)
currently are not supported because it may block for housekeeping.
For example, an md can have a part of the device suspended.
For this reason, only request based devices are supported.
In the future, this feature will be expanded to stacked devices
by teaching them how to handle the REQ_NOWAIT flags.

Reviewed-by: Christoph Hellwig <hch@lst.de>
Reviewed-by: Jens Axboe <axboe@kernel.dk>
Signed-off-by: Goldwyn Rodrigues <rgoldwyn@suse.com>
Signed-off-by: Jens Axboe <axboe@kernel.dk>
block/blk-core.c
block/blk-mq.c
fs/direct-io.c
include/linux/bio.h
include/linux/blk_types.h

index 62cf92550512241249c4bc4f05cae285e75587db..279e3c432d7be15793b54073d1cb65b6f98662ab 100644 (file)
@@ -143,6 +143,7 @@ static const struct {
        [BLK_STS_MEDIUM]        = { -ENODATA,   "critical medium" },
        [BLK_STS_PROTECTION]    = { -EILSEQ,    "protection" },
        [BLK_STS_RESOURCE]      = { -ENOMEM,    "kernel resource" },
+       [BLK_STS_AGAIN]         = { -EAGAIN,    "nonblocking retry" },
 
        /* device mapper special case, should not leak out: */
        [BLK_STS_DM_REQUEUE]    = { -EREMCHG, "dm internal retry" },
@@ -1314,6 +1315,11 @@ retry:
        if (!IS_ERR(rq))
                return rq;
 
+       if (op & REQ_NOWAIT) {
+               blk_put_rl(rl);
+               return ERR_PTR(-EAGAIN);
+       }
+
        if (!gfpflags_allow_blocking(gfp_mask) || unlikely(blk_queue_dying(q))) {
                blk_put_rl(rl);
                return rq;
@@ -1961,6 +1967,14 @@ generic_make_request_checks(struct bio *bio)
                goto end_io;
        }
 
+       /*
+        * For a REQ_NOWAIT based request, return -EOPNOTSUPP
+        * if queue is not a request based queue.
+        */
+
+       if ((bio->bi_opf & REQ_NOWAIT) && !queue_is_rq_based(q))
+               goto not_supported;
+
        part = bio->bi_bdev->bd_part;
        if (should_fail_request(part, bio->bi_iter.bi_size) ||
            should_fail_request(&part_to_disk(part)->part0,
@@ -2118,7 +2132,7 @@ blk_qc_t generic_make_request(struct bio *bio)
        do {
                struct request_queue *q = bdev_get_queue(bio->bi_bdev);
 
-               if (likely(blk_queue_enter(q, false) == 0)) {
+               if (likely(blk_queue_enter(q, bio->bi_opf & REQ_NOWAIT) == 0)) {
                        struct bio_list lower, same;
 
                        /* Create a fresh bio_list for all subordinate requests */
@@ -2143,7 +2157,11 @@ blk_qc_t generic_make_request(struct bio *bio)
                        bio_list_merge(&bio_list_on_stack[0], &same);
                        bio_list_merge(&bio_list_on_stack[0], &bio_list_on_stack[1]);
                } else {
-                       bio_io_error(bio);
+                       if (unlikely(!blk_queue_dying(q) &&
+                                       (bio->bi_opf & REQ_NOWAIT)))
+                               bio_wouldblock_error(bio);
+                       else
+                               bio_io_error(bio);
                }
                bio = bio_list_pop(&bio_list_on_stack[0]);
        } while (bio);
index dd276a9e138edd2dea233d5ec6880a8ed998655c..ca03cd4b263f340e7de29718bc526368c1c3168a 100644 (file)
@@ -293,6 +293,8 @@ static struct request *blk_mq_get_request(struct request_queue *q,
                data->ctx = blk_mq_get_ctx(q);
        if (likely(!data->hctx))
                data->hctx = blk_mq_map_queue(q, data->ctx->cpu);
+       if (op & REQ_NOWAIT)
+               data->flags |= BLK_MQ_REQ_NOWAIT;
 
        if (e) {
                data->flags |= BLK_MQ_REQ_INTERNAL;
@@ -1544,6 +1546,8 @@ static blk_qc_t blk_mq_make_request(struct request_queue *q, struct bio *bio)
        rq = blk_mq_get_request(q, bio, bio->bi_opf, &data);
        if (unlikely(!rq)) {
                __wbt_done(q->rq_wb, wb_acct);
+               if (bio->bi_opf & REQ_NOWAIT)
+                       bio_wouldblock_error(bio);
                return BLK_QC_T_NONE;
        }
 
index e8baaabebf13b3368a13e706c3209f304339606e..c87077d1dc33ee85bf4aee7bfda17d0b68f20d3b 100644 (file)
@@ -479,8 +479,12 @@ static blk_status_t dio_bio_complete(struct dio *dio, struct bio *bio)
        unsigned i;
        blk_status_t err = bio->bi_status;
 
-       if (err)
-               dio->io_error = -EIO;
+       if (err) {
+               if (err == BLK_STS_AGAIN && (bio->bi_opf & REQ_NOWAIT))
+                       dio->io_error = -EAGAIN;
+               else
+                       dio->io_error = -EIO;
+       }
 
        if (dio->is_async && dio->op == REQ_OP_READ && dio->should_dirty) {
                bio_check_pages_dirty(bio);     /* transfers ownership */
@@ -1194,6 +1198,8 @@ do_blockdev_direct_IO(struct kiocb *iocb, struct inode *inode,
        if (iov_iter_rw(iter) == WRITE) {
                dio->op = REQ_OP_WRITE;
                dio->op_flags = REQ_SYNC | REQ_IDLE;
+               if (iocb->ki_flags & IOCB_NOWAIT)
+                       dio->op_flags |= REQ_NOWAIT;
        } else {
                dio->op = REQ_OP_READ;
        }
index 40d0541852771eec1c686702bc1159e91813494e..36aa641cde283d2b14f7b1bdd9698050be539c5c 100644 (file)
@@ -416,6 +416,12 @@ static inline void bio_io_error(struct bio *bio)
        bio_endio(bio);
 }
 
+static inline void bio_wouldblock_error(struct bio *bio)
+{
+       bio->bi_status = BLK_STS_AGAIN;
+       bio_endio(bio);
+}
+
 struct request_queue;
 extern int bio_phys_segments(struct request_queue *, struct bio *);
 
index dcd45b15a3a5d670fcc7253eca615bef0860f154..e210da6d14b8fb55716e261724c9f623c8240e8f 100644 (file)
@@ -36,6 +36,8 @@ typedef u8 __bitwise blk_status_t;
 /* hack for device mapper, don't use elsewhere: */
 #define BLK_STS_DM_REQUEUE    ((__force blk_status_t)11)
 
+#define BLK_STS_AGAIN          ((__force blk_status_t)12)
+
 struct blk_issue_stat {
        u64 stat;
 };
@@ -224,6 +226,7 @@ enum req_flag_bits {
        /* command specific flags for REQ_OP_WRITE_ZEROES: */
        __REQ_NOUNMAP,          /* do not free blocks when zeroing */
 
+       __REQ_NOWAIT,           /* Don't wait if request will block */
        __REQ_NR_BITS,          /* stops here */
 };
 
@@ -242,6 +245,7 @@ enum req_flag_bits {
 #define REQ_BACKGROUND         (1ULL << __REQ_BACKGROUND)
 
 #define REQ_NOUNMAP            (1ULL << __REQ_NOUNMAP)
+#define REQ_NOWAIT             (1ULL << __REQ_NOWAIT)
 
 #define REQ_FAILFAST_MASK \
        (REQ_FAILFAST_DEV | REQ_FAILFAST_TRANSPORT | REQ_FAILFAST_DRIVER)