scsi: fix race between simultaneous decrements of ->host_failed

author Wei Fang <fangwei1@huawei.com>

Tue, 7 Jun 2016 06:53:56 +0000 (14:53 +0800)

committer Willy Tarreau <w@1wt.eu>

Sat, 27 Aug 2016 09:40:28 +0000 (11:40 +0200)
author Wei Fang <fangwei1@huawei.com>
Tue, 7 Jun 2016 06:53:56 +0000 (14:53 +0800)
committer Willy Tarreau <w@1wt.eu>
Sat, 27 Aug 2016 09:40:28 +0000 (11:40 +0200)
diff --git a/Documentation/scsi/scsi_eh.txt b/Documentation/scsi/scsi_eh.txt

index 6ff16b620d84cf63d3e6e669a0cb7dab7d350c54..c08b62d63afa1bc2c81f9607a8921e774c856ad5 100644 (file)
--- a/Documentation/scsi/scsi_eh.txt
+++ b/Documentation/scsi/scsi_eh.txt
@@ -255,19 +255,23 @@ scmd->allowed.
  
   3. scmd recovered
      ACTION: scsi_eh_finish_cmd() is invoked to EH-finish scmd
-       - shost->host_failed--
         - clear scmd->eh_eflags
         - scsi_setup_cmd_retry()
         - move from local eh_work_q to local eh_done_q
      LOCKING: none
+    CONCURRENCY: at most one thread per separate eh_work_q to
+                keep queue manipulation lockless
  
   4. EH completes
      ACTION: scsi_eh_flush_done_q() retries scmds or notifies upper
-           layer of failure.
+           layer of failure. May be called concurrently but must have
+           a no more than one thread per separate eh_work_q to
+           manipulate the queue locklessly
         - scmd is removed from eh_done_q and scmd->eh_entry is cleared
         - if retry is necessary, scmd is requeued using
            scsi_queue_insert()
         - otherwise, scsi_finish_command() is invoked for scmd
+       - zero shost->host_failed
      LOCKING: queue or finish function performs appropriate locking
  
  
diff --git a/drivers/ata/libata-eh.c b/drivers/ata/libata-eh.c

index 063036d876b0a896dc4438f0cd4b6562ce7b29bb..126eb86f239fd0faca27eea628a9305096d1a93c 100644 (file)
--- a/drivers/ata/libata-eh.c
+++ b/drivers/ata/libata-eh.c
@@ -604,7 +604,7 @@ void ata_scsi_error(struct Scsi_Host *host)
         ata_scsi_port_error_handler(host, ap);
  
         /* finish or retry handled scmd's and clean up */
-       WARN_ON(host->host_failed || !list_empty(&eh_work_q));
+       WARN_ON(!list_empty(&eh_work_q));
  
         DPRINTK("EXIT\n");
  }
diff --git a/drivers/scsi/scsi_error.c b/drivers/scsi/scsi_error.c

index 9acbc885239b3e4a659535b8e9bc2dcbb6afe355..5ba69ea8eb92e9c80e19980f0b88e31b124b87d5 100644 (file)
--- a/drivers/scsi/scsi_error.c
+++ b/drivers/scsi/scsi_error.c
@@ -898,7 +898,6 @@ static int scsi_request_sense(struct scsi_cmnd *scmd)
   */
  void scsi_eh_finish_cmd(struct scsi_cmnd *scmd, struct list_head *done_q)
  {
-       scmd->device->host->host_failed--;
         scmd->eh_eflags = 0;
         list_move_tail(&scmd->eh_entry, done_q);
  }
@@ -1892,6 +1891,9 @@ int scsi_error_handler(void *data)
                 else
                         scsi_unjam_host(shost);
  
+               /* All scmds have been handled */
+               shost->host_failed = 0;
+
                 /*
                  * Note - if the above fails completely, the action is to take
                  * individual devices offline and flush the queue of any
author	Wei Fang <fangwei1@huawei.com>
	Tue, 7 Jun 2016 06:53:56 +0000 (14:53 +0800)
committer	Willy Tarreau <w@1wt.eu>
	Sat, 27 Aug 2016 09:40:28 +0000 (11:40 +0200)
Documentation/scsi/scsi_eh.txt		patch \| blob \| blame \| history
drivers/ata/libata-eh.c		patch \| blob \| blame \| history
drivers/scsi/scsi_error.c		patch \| blob \| blame \| history