dm mpath: avoid that path removal can trigger an infinite loop
authorBart Van Assche <bart.vanassche@sandisk.com>
Thu, 27 Apr 2017 17:11:15 +0000 (10:11 -0700)
committerMike Snitzer <snitzer@redhat.com>
Thu, 27 Apr 2017 21:04:27 +0000 (17:04 -0400)
If blk_get_request() fails, check whether the failure is due to a path
being removed.  If that is the case, fail the path by triggering a call
to fail_path().  This avoids that the following scenario can be
encountered while removing paths:
* CPU usage of a kworker thread jumps to 100%.
* Removing the DM device becomes impossible.

Delay requeueing if blk_get_request() returns -EBUSY or -EWOULDBLOCK,
and the queue is not dying, because in these cases immediate requeuing
is inappropriate.

Signed-off-by: Bart Van Assche <bart.vanassche@sandisk.com>
Cc: Hannes Reinecke <hare@suse.com>
Cc: Christoph Hellwig <hch@lst.de>
Cc: <stable@vger.kernel.org>
Signed-off-by: Mike Snitzer <snitzer@redhat.com>
drivers/md/dm-mpath.c

index 4b891d9ff349331a0054db1556ca8670c868f6a9..f3c79f18874796d0d7fc8310f6ec49deaf8d2660 100644 (file)
@@ -489,6 +489,7 @@ static int multipath_clone_and_map(struct dm_target *ti, struct request *rq,
        struct pgpath *pgpath;
        struct block_device *bdev;
        struct dm_mpath_io *mpio = get_mpio(map_context);
+       struct request_queue *q;
        struct request *clone;
 
        /* Do we need to select a new pgpath? */
@@ -511,12 +512,18 @@ static int multipath_clone_and_map(struct dm_target *ti, struct request *rq,
        mpio->nr_bytes = nr_bytes;
 
        bdev = pgpath->path.dev->bdev;
-
-       clone = blk_get_request(bdev_get_queue(bdev),
-                       rq->cmd_flags | REQ_NOMERGE,
-                       GFP_ATOMIC);
+       q = bdev_get_queue(bdev);
+       clone = blk_get_request(q, rq->cmd_flags | REQ_NOMERGE, GFP_ATOMIC);
        if (IS_ERR(clone)) {
                /* EBUSY, ENODEV or EWOULDBLOCK: requeue */
+               bool queue_dying = blk_queue_dying(q);
+               DMERR_LIMIT("blk_get_request() returned %ld%s - requeuing",
+                           PTR_ERR(clone), queue_dying ? " (path offline)" : "");
+               if (queue_dying) {
+                       atomic_inc(&m->pg_init_in_progress);
+                       activate_or_offline_path(pgpath);
+                       return DM_MAPIO_REQUEUE;
+               }
                return DM_MAPIO_DELAY_REQUEUE;
        }
        clone->bio = clone->biotail = NULL;