dm mpath: delay the requeue of blk-mq requests while all paths down
authorMike Snitzer <snitzer@redhat.com>
Fri, 9 Sep 2016 23:26:19 +0000 (19:26 -0400)
committerMike Snitzer <snitzer@redhat.com>
Thu, 15 Sep 2016 15:16:17 +0000 (11:16 -0400)
Return DM_MAPIO_DELAY_REQUEUE from .clone_and_map_rq.  Also, return
false from .busy, if all paths are down, so that blk-mq requests get
mapped via .clone_and_map_rq -- which results in DM_MAPIO_DELAY_REQUEUE
being returned to dm-rq.

This change allows for a noticeable reduction in cpu utilization
(reduced kworker load) while all paths are down, e.g.:

system CPU idleness (as measured by fio's --idle-prof=system):
before: system: 86.58%
after:  system: 98.60%

Signed-off-by: Mike Snitzer <snitzer@redhat.com>
Reviewed-by: Hannes Reinecke <hare@suse.com>
drivers/md/dm-mpath.c

index f69715bf05758d5ac1d3e4095251ff546225030d..f31fa1364abcc5304e54f552425be91d76a2a0dc 100644 (file)
@@ -550,9 +550,9 @@ static int __multipath_map(struct dm_target *ti, struct request *clone,
                pgpath = choose_pgpath(m, nr_bytes);
 
        if (!pgpath) {
-               if (!must_push_back_rq(m))
-                       r = -EIO;       /* Failed */
-               return r;
+               if (must_push_back_rq(m))
+                       return DM_MAPIO_DELAY_REQUEUE;
+               return -EIO;    /* Failed */
        } else if (test_bit(MPATHF_QUEUE_IO, &m->flags) ||
                   test_bit(MPATHF_PG_INIT_REQUIRED, &m->flags)) {
                pg_init_all_paths(m);
@@ -1992,11 +1992,14 @@ static int multipath_busy(struct dm_target *ti)
        struct priority_group *pg, *next_pg;
        struct pgpath *pgpath;
 
-       /* pg_init in progress or no paths available */
-       if (atomic_read(&m->pg_init_in_progress) ||
-           (!atomic_read(&m->nr_valid_paths) && test_bit(MPATHF_QUEUE_IF_NO_PATH, &m->flags)))
+       /* pg_init in progress */
+       if (atomic_read(&m->pg_init_in_progress))
                return true;
 
+       /* no paths available, for blk-mq: rely on IO mapping to delay requeue */
+       if (!atomic_read(&m->nr_valid_paths) && test_bit(MPATHF_QUEUE_IF_NO_PATH, &m->flags))
+               return (m->queue_mode != DM_TYPE_MQ_REQUEST_BASED);
+
        /* Guess which priority_group will be used at next mapping time */
        pg = lockless_dereference(m->current_pg);
        next_pg = lockless_dereference(m->next_pg);