hpsa: correct handling of HBA device removal
authorDon Brace <don.brace@microsemi.com>
Wed, 27 Apr 2016 22:14:17 +0000 (17:14 -0500)
committerMartin K. Petersen <martin.petersen@oracle.com>
Fri, 29 Apr 2016 23:08:24 +0000 (19:08 -0400)
Need to report HBA device removal faster than the
event handler polling interval.

Stop I/O to the removed disk and wait for all
I/O operations to flush before removing the device.

Reviewed-by: Scott Teel <scott.teel@microsemi.com>
Reviewed-by: Kevin Barnett <kevin.barnett@microsemi.com>
Signed-off-by: Don Brace <don.brace@microsemi.com>
Reviewed-by: Johannes Thumshirn <jthumshirn@suse.de>
Signed-off-by: Martin K. Petersen <martin.petersen@oracle.com>
drivers/scsi/hpsa.c
drivers/scsi/hpsa.h

index 4f8e3285baa9dff8a94ce9c921737cc2bebcca5e..034624f1ca645f9a2bb8bb9ce4a7713f37fab3b3 100644 (file)
@@ -294,6 +294,9 @@ static void hpsa_disable_rld_caching(struct ctlr_info *h);
 static inline int hpsa_scsi_do_report_phys_luns(struct ctlr_info *h,
        struct ReportExtendedLUNdata *buf, int bufsize);
 static int hpsa_luns_changed(struct ctlr_info *h);
+static bool hpsa_cmd_dev_match(struct ctlr_info *h, struct CommandList *c,
+                              struct hpsa_scsi_dev_t *dev,
+                              unsigned char *scsi3addr);
 
 static inline struct ctlr_info *sdev_to_hba(struct scsi_device *sdev)
 {
@@ -1745,6 +1748,51 @@ static int hpsa_add_device(struct ctlr_info *h, struct hpsa_scsi_dev_t *device)
        return rc;
 }
 
+static int hpsa_find_outstanding_commands_for_dev(struct ctlr_info *h,
+                                               struct hpsa_scsi_dev_t *dev)
+{
+       int i;
+       int count = 0;
+
+       for (i = 0; i < h->nr_cmds; i++) {
+               struct CommandList *c = h->cmd_pool + i;
+               int refcount = atomic_inc_return(&c->refcount);
+
+               if (refcount > 1 && hpsa_cmd_dev_match(h, c, dev,
+                               dev->scsi3addr)) {
+                       unsigned long flags;
+
+                       spin_lock_irqsave(&h->lock, flags);     /* Implied MB */
+                       if (!hpsa_is_cmd_idle(c))
+                               ++count;
+                       spin_unlock_irqrestore(&h->lock, flags);
+               }
+
+               cmd_free(h, c);
+       }
+
+       return count;
+}
+
+static void hpsa_wait_for_outstanding_commands_for_dev(struct ctlr_info *h,
+                                               struct hpsa_scsi_dev_t *device)
+{
+       int cmds = 0;
+       int waits = 0;
+
+       while (1) {
+               cmds = hpsa_find_outstanding_commands_for_dev(h, device);
+               if (cmds == 0)
+                       break;
+               if (++waits > 20)
+                       break;
+               dev_warn(&h->pdev->dev,
+                       "%s: removing device with %d outstanding commands!\n",
+                       __func__, cmds);
+               msleep(1000);
+       }
+}
+
 static void hpsa_remove_device(struct ctlr_info *h,
                        struct hpsa_scsi_dev_t *device)
 {
@@ -1768,8 +1816,13 @@ static void hpsa_remove_device(struct ctlr_info *h,
                        hpsa_show_dev_msg(KERN_WARNING, h, device,
                                        "didn't find device for removal.");
                }
-       } else /* HBA */
+       } else { /* HBA */
+
+               device->removed = 1;
+               hpsa_wait_for_outstanding_commands_for_dev(h, device);
+
                hpsa_remove_sas_device(device);
+       }
 }
 
 static void adjust_hpsa_scsi_table(struct ctlr_info *h,
@@ -2171,7 +2224,8 @@ static void hpsa_unmap_sg_chain_block(struct ctlr_info *h,
 static int handle_ioaccel_mode2_error(struct ctlr_info *h,
                                        struct CommandList *c,
                                        struct scsi_cmnd *cmd,
-                                       struct io_accel2_cmd *c2)
+                                       struct io_accel2_cmd *c2,
+                                       struct hpsa_scsi_dev_t *dev)
 {
        int data_len;
        int retry = 0;
@@ -2235,8 +2289,27 @@ static int handle_ioaccel_mode2_error(struct ctlr_info *h,
                case IOACCEL2_STATUS_SR_NO_PATH_TO_DEVICE:
                case IOACCEL2_STATUS_SR_INVALID_DEVICE:
                case IOACCEL2_STATUS_SR_IOACCEL_DISABLED:
-                       /* We will get an event from ctlr to trigger rescan */
-                       retry = 1;
+                       /*
+                        * Did an HBA disk disappear? We will eventually
+                        * get a state change event from the controller but
+                        * in the meantime, we need to tell the OS that the
+                        * HBA disk is no longer there and stop I/O
+                        * from going down. This allows the potential re-insert
+                        * of the disk to get the same device node.
+                        */
+                       if (dev->physical_device && dev->expose_device) {
+                               cmd->result = DID_NO_CONNECT << 16;
+                               dev->removed = 1;
+                               h->drv_req_rescan = 1;
+                               dev_warn(&h->pdev->dev,
+                                       "%s: device is gone!\n", __func__);
+                       } else
+                               /*
+                                * Retry by sending down the RAID path.
+                                * We will get an event from ctlr to
+                                * trigger rescan regardless.
+                                */
+                               retry = 1;
                        break;
                default:
                        retry = 1;
@@ -2368,7 +2441,7 @@ static void process_ioaccel2_completion(struct ctlr_info *h,
                return hpsa_retry_cmd(h, c);
        }
 
-       if (handle_ioaccel_mode2_error(h, c, cmd, c2))
+       if (handle_ioaccel_mode2_error(h, c, cmd, c2, dev))
                return hpsa_retry_cmd(h, c);
 
        return hpsa_cmd_free_and_done(h, c, cmd);
@@ -5263,6 +5336,12 @@ static int hpsa_scsi_queue_command(struct Scsi_Host *sh, struct scsi_cmnd *cmd)
 
        dev = cmd->device->hostdata;
        if (!dev) {
+               cmd->result = NOT_READY << 16; /* host byte */
+               cmd->scsi_done(cmd);
+               return 0;
+       }
+
+       if (dev->removed) {
                cmd->result = DID_NO_CONNECT << 16;
                cmd->scsi_done(cmd);
                return 0;
index d06bb7417e365e9e5dd1741bc67bf3d50ed6258e..a1487e67f7a1580a0c5636bc69e2c410c7b7453e 100644 (file)
@@ -63,6 +63,7 @@ struct hpsa_scsi_dev_t {
        unsigned char scsi3addr[8];     /* as presented to the HW */
        u8 physical_device : 1;
        u8 expose_device;
+       u8 removed : 1;                 /* device is marked for death */
 #define RAID_CTLR_LUNID "\0\0\0\0\0\0\0\0"
        unsigned char device_id[16];    /* from inquiry pg. 0x83 */
        u64 sas_address;