scsi: core: Ensure that the SCSI error handler gets woken up
authorBart Van Assche <bart.vanassche@wdc.com>
Mon, 4 Dec 2017 18:06:23 +0000 (10:06 -0800)
committerGreg Kroah-Hartman <gregkh@linuxfoundation.org>
Fri, 16 Feb 2018 19:23:11 +0000 (20:23 +0100)
commit 3bd6f43f5cb3714f70c591514f344389df593501 upstream.

If scsi_eh_scmd_add() is called concurrently with
scsi_host_queue_ready() while shost->host_blocked > 0 then it can
happen that neither function wakes up the SCSI error handler. Fix
this by making every function that decreases the host_busy counter
wake up the error handler if necessary and by protecting the
host_failed checks with the SCSI host lock.

Reported-by: Pavel Tikhomirov <ptikhomirov@virtuozzo.com>
References: https://marc.info/?l=linux-kernel&m=150461610630736
Fixes: commit 746650160866 ("scsi: convert host_busy to atomic_t")
Signed-off-by: Bart Van Assche <bart.vanassche@wdc.com>
Reviewed-by: Pavel Tikhomirov <ptikhomirov@virtuozzo.com>
Tested-by: Stuart Hayes <stuart.w.hayes@gmail.com>
Cc: Konstantin Khorenko <khorenko@virtuozzo.com>
Cc: Stuart Hayes <stuart.w.hayes@gmail.com>
Cc: Pavel Tikhomirov <ptikhomirov@virtuozzo.com>
Cc: Christoph Hellwig <hch@lst.de>
Cc: Hannes Reinecke <hare@suse.com>
Cc: Johannes Thumshirn <jthumshirn@suse.de>
Signed-off-by: Martin K. Petersen <martin.petersen@oracle.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
drivers/scsi/hosts.c
drivers/scsi/scsi_error.c
drivers/scsi/scsi_lib.c
include/scsi/scsi_host.h

index fe3a0da3ec9783ce47ae6f8747c0c9669539ff75..57bf43e34863ec09c9c15217f7f02fa97711a75f 100644 (file)
@@ -318,6 +318,9 @@ static void scsi_host_dev_release(struct device *dev)
 
        scsi_proc_hostdir_rm(shost->hostt);
 
+       /* Wait for functions invoked through call_rcu(&shost->rcu, ...) */
+       rcu_barrier();
+
        if (shost->tmf_work_q)
                destroy_workqueue(shost->tmf_work_q);
        if (shost->ehandler)
@@ -325,6 +328,8 @@ static void scsi_host_dev_release(struct device *dev)
        if (shost->work_q)
                destroy_workqueue(shost->work_q);
 
+       destroy_rcu_head(&shost->rcu);
+
        if (shost->shost_state == SHOST_CREATED) {
                /*
                 * Free the shost_dev device name here if scsi_host_alloc()
@@ -399,6 +404,7 @@ struct Scsi_Host *scsi_host_alloc(struct scsi_host_template *sht, int privsize)
        INIT_LIST_HEAD(&shost->starved_list);
        init_waitqueue_head(&shost->host_wait);
        mutex_init(&shost->scan_mutex);
+       init_rcu_head(&shost->rcu);
 
        index = ida_simple_get(&host_index_ida, 0, 0, GFP_KERNEL);
        if (index < 0)
index dab876c6547392c0ccb75047c4cb767640407834..fa504ba83ade6fc11fed44bcb3480a7cccbed36c 100644 (file)
@@ -220,6 +220,17 @@ static void scsi_eh_reset(struct scsi_cmnd *scmd)
        }
 }
 
+static void scsi_eh_inc_host_failed(struct rcu_head *head)
+{
+       struct Scsi_Host *shost = container_of(head, typeof(*shost), rcu);
+       unsigned long flags;
+
+       spin_lock_irqsave(shost->host_lock, flags);
+       shost->host_failed++;
+       scsi_eh_wakeup(shost);
+       spin_unlock_irqrestore(shost->host_lock, flags);
+}
+
 /**
  * scsi_eh_scmd_add - add scsi cmd to error handling.
  * @scmd:      scmd to run eh on.
@@ -242,9 +253,12 @@ void scsi_eh_scmd_add(struct scsi_cmnd *scmd)
 
        scsi_eh_reset(scmd);
        list_add_tail(&scmd->eh_entry, &shost->eh_cmd_q);
-       shost->host_failed++;
-       scsi_eh_wakeup(shost);
        spin_unlock_irqrestore(shost->host_lock, flags);
+       /*
+        * Ensure that all tasks observe the host state change before the
+        * host_failed change.
+        */
+       call_rcu(&shost->rcu, scsi_eh_inc_host_failed);
 }
 
 /**
index 635cfa1f2aced8debc24c0b75997ec3c5242998d..0d3696e9ddddbd4daccbb9a194a6d8abae6a508e 100644 (file)
@@ -318,22 +318,39 @@ static void scsi_init_cmd_errh(struct scsi_cmnd *cmd)
                cmd->cmd_len = scsi_command_size(cmd->cmnd);
 }
 
-void scsi_device_unbusy(struct scsi_device *sdev)
+/*
+ * Decrement the host_busy counter and wake up the error handler if necessary.
+ * Avoid as follows that the error handler is not woken up if shost->host_busy
+ * == shost->host_failed: use call_rcu() in scsi_eh_scmd_add() in combination
+ * with an RCU read lock in this function to ensure that this function in its
+ * entirety either finishes before scsi_eh_scmd_add() increases the
+ * host_failed counter or that it notices the shost state change made by
+ * scsi_eh_scmd_add().
+ */
+static void scsi_dec_host_busy(struct Scsi_Host *shost)
 {
-       struct Scsi_Host *shost = sdev->host;
-       struct scsi_target *starget = scsi_target(sdev);
        unsigned long flags;
 
+       rcu_read_lock();
        atomic_dec(&shost->host_busy);
-       if (starget->can_queue > 0)
-               atomic_dec(&starget->target_busy);
-
-       if (unlikely(scsi_host_in_recovery(shost) &&
-                    (shost->host_failed || shost->host_eh_scheduled))) {
+       if (unlikely(scsi_host_in_recovery(shost))) {
                spin_lock_irqsave(shost->host_lock, flags);
-               scsi_eh_wakeup(shost);
+               if (shost->host_failed || shost->host_eh_scheduled)
+                       scsi_eh_wakeup(shost);
                spin_unlock_irqrestore(shost->host_lock, flags);
        }
+       rcu_read_unlock();
+}
+
+void scsi_device_unbusy(struct scsi_device *sdev)
+{
+       struct Scsi_Host *shost = sdev->host;
+       struct scsi_target *starget = scsi_target(sdev);
+
+       scsi_dec_host_busy(shost);
+
+       if (starget->can_queue > 0)
+               atomic_dec(&starget->target_busy);
 
        atomic_dec(&sdev->device_busy);
 }
@@ -1532,7 +1549,7 @@ starved:
                list_add_tail(&sdev->starved_entry, &shost->starved_list);
        spin_unlock_irq(shost->host_lock);
 out_dec:
-       atomic_dec(&shost->host_busy);
+       scsi_dec_host_busy(shost);
        return 0;
 }
 
@@ -1993,7 +2010,7 @@ static blk_status_t scsi_queue_rq(struct blk_mq_hw_ctx *hctx,
        return BLK_STS_OK;
 
 out_dec_host_busy:
-       atomic_dec(&shost->host_busy);
+       scsi_dec_host_busy(shost);
 out_dec_target_busy:
        if (scsi_target(sdev)->can_queue > 0)
                atomic_dec(&scsi_target(sdev)->target_busy);
index a8b7bf879cede4240d921a42f915a230163e5e4e..1a1df0d21ee3f9648cc02a6bc067783ae5e5ef03 100644 (file)
@@ -571,6 +571,8 @@ struct Scsi_Host {
                struct blk_mq_tag_set   tag_set;
        };
 
+       struct rcu_head rcu;
+
        atomic_t host_busy;                /* commands actually active on low-level */
        atomic_t host_blocked;