[SCSI] lpfc 8.1.12 : Rework offline path to solve HBA reset issues
authorJames Smart <James.Smart@Emulex.Com>
Wed, 25 Apr 2007 13:51:45 +0000 (09:51 -0400)
committerJames Bottomley <jejb@mulgrave.il.steeleye.com>
Sun, 6 May 2007 14:33:13 +0000 (09:33 -0500)
Rework offline path to solve HBA reset issues

Signed-off-by: James Smart <James.Smart@emulex.com>
Signed-off-by: James Bottomley <James.Bottomley@SteelEye.com>
drivers/scsi/lpfc/lpfc.h
drivers/scsi/lpfc/lpfc_attr.c
drivers/scsi/lpfc/lpfc_crtn.h
drivers/scsi/lpfc/lpfc_disc.h
drivers/scsi/lpfc/lpfc_hbadisc.c
drivers/scsi/lpfc/lpfc_init.c

index e57a9f5b9d84bceb358bf2b6145af4a9bbb1fa92..6aa91530fcdef2f43ecf4e4618bf97e8ca774427 100644 (file)
@@ -244,6 +244,7 @@ struct lpfc_hba {
 #define FC_FABRIC               0x100  /* We are fabric attached */
 #define FC_ESTABLISH_LINK       0x200  /* Reestablish Link */
 #define FC_RSCN_DISCOVERY       0x400  /* Authenticate all devices after RSCN*/
+#define FC_BLOCK_MGMT_IO        0x800   /* Don't allow mgmt mbx or iocb cmds */
 #define FC_LOADING             0x1000  /* HBA in process of loading drvr */
 #define FC_UNLOADING           0x2000  /* HBA in process of unloading drvr */
 #define FC_SCSI_SCAN_TMO        0x4000 /* scsi scan timer running */
index 6cc88b198fa3251738ddb42ce99ce23d4d223525..c0b02b11d88f5b7cc390ee61d6fcda12e2f3c4ae 100644 (file)
@@ -20,6 +20,7 @@
  *******************************************************************/
 
 #include <linux/ctype.h>
+#include <linux/delay.h>
 #include <linux/pci.h>
 #include <linux/interrupt.h>
 
@@ -213,6 +214,7 @@ lpfc_issue_lip(struct Scsi_Host *host)
        int mbxstatus = MBXERR_ERROR;
 
        if ((phba->fc_flag & FC_OFFLINE_MODE) ||
+           (phba->fc_flag & FC_BLOCK_MGMT_IO) ||
            (phba->hba_state != LPFC_HBA_READY))
                return -EPERM;
 
@@ -247,19 +249,62 @@ lpfc_issue_lip(struct Scsi_Host *host)
 }
 
 static int
-lpfc_selective_reset(struct lpfc_hba *phba)
+lpfc_do_offline(struct lpfc_hba *phba, uint32_t type)
 {
        struct completion online_compl;
+       struct lpfc_sli_ring *pring;
+       struct lpfc_sli *psli;
        int status = 0;
+       int cnt = 0;
+       int i;
 
        init_completion(&online_compl);
        lpfc_workq_post_event(phba, &status, &online_compl,
-                             LPFC_EVT_OFFLINE);
+                             LPFC_EVT_OFFLINE_PREP);
+       wait_for_completion(&online_compl);
+
+       if (status != 0)
+               return -EIO;
+
+       psli = &phba->sli;
+
+       for (i = 0; i < psli->num_rings; i++) {
+               pring = &psli->ring[i];
+               /* The linkdown event takes 30 seconds to timeout. */
+               while (pring->txcmplq_cnt) {
+                       msleep(10);
+                       if (cnt++ > 3000) {
+                               lpfc_printf_log(phba,
+                                       KERN_WARNING, LOG_INIT,
+                                       "%d:0466 Outstanding IO when "
+                                       "bringing Adapter offline\n",
+                                       phba->brd_no);
+                               break;
+                       }
+               }
+       }
+
+       init_completion(&online_compl);
+       lpfc_workq_post_event(phba, &status, &online_compl, type);
        wait_for_completion(&online_compl);
 
        if (status != 0)
                return -EIO;
 
+       return 0;
+}
+
+static int
+lpfc_selective_reset(struct lpfc_hba *phba)
+{
+       struct completion online_compl;
+       int status = 0;
+
+       status = lpfc_do_offline(phba, LPFC_EVT_OFFLINE);
+
+       if (status != 0)
+               return status;
+
        init_completion(&online_compl);
        lpfc_workq_post_event(phba, &status, &online_compl,
                              LPFC_EVT_ONLINE);
@@ -324,23 +369,19 @@ lpfc_board_mode_store(struct class_device *cdev, const char *buf, size_t count)
 
        init_completion(&online_compl);
 
-       if(strncmp(buf, "online", sizeof("online") - 1) == 0)
+       if(strncmp(buf, "online", sizeof("online") - 1) == 0) {
                lpfc_workq_post_event(phba, &status, &online_compl,
                                      LPFC_EVT_ONLINE);
-       else if (strncmp(buf, "offline", sizeof("offline") - 1) == 0)
-               lpfc_workq_post_event(phba, &status, &online_compl,
-                                     LPFC_EVT_OFFLINE);
+               wait_for_completion(&online_compl);
+       } else if (strncmp(buf, "offline", sizeof("offline") - 1) == 0)
+               status = lpfc_do_offline(phba, LPFC_EVT_OFFLINE);
        else if (strncmp(buf, "warm", sizeof("warm") - 1) == 0)
-               lpfc_workq_post_event(phba, &status, &online_compl,
-                                     LPFC_EVT_WARM_START);
-       else if (strncmp(buf, "error", sizeof("error") - 1) == 0)
-               lpfc_workq_post_event(phba, &status, &online_compl,
-                                     LPFC_EVT_KILL);
+               status = lpfc_do_offline(phba, LPFC_EVT_WARM_START);
+       else if (strncmp(buf, "error", sizeof("error") - 1) == 0)
+               status = lpfc_do_offline(phba, LPFC_EVT_KILL);
        else
                return -EINVAL;
 
-       wait_for_completion(&online_compl);
-
        if (!status)
                return strlen(buf);
        else
@@ -645,9 +686,7 @@ lpfc_soft_wwpn_store(struct class_device *cdev, const char *buf, size_t count)
        dev_printk(KERN_NOTICE, &phba->pcidev->dev,
                   "lpfc%d: Reinitializing to use soft_wwpn\n", phba->brd_no);
 
-       init_completion(&online_compl);
-       lpfc_workq_post_event(phba, &stat1, &online_compl, LPFC_EVT_OFFLINE);
-       wait_for_completion(&online_compl);
+       stat1 = lpfc_do_offline(phba, LPFC_EVT_OFFLINE);
        if (stat1)
                lpfc_printf_log(phba, KERN_ERR, LOG_INIT,
                        "%d:0463 lpfc_soft_wwpn attribute set failed to reinit "
@@ -1307,6 +1346,12 @@ sysfs_mbox_read(struct kobject *kobj, char *buf, loff_t off, size_t count)
                        return -EPERM;
                }
 
+               if (phba->fc_flag & FC_BLOCK_MGMT_IO) {
+                       sysfs_mbox_idle(phba);
+                       spin_unlock_irq(host->host_lock);
+                       return  -EAGAIN;
+               }
+
                if ((phba->fc_flag & FC_OFFLINE_MODE) ||
                    (!(phba->sli.sli_flag & LPFC_SLI2_ACTIVE))){
 
@@ -1551,6 +1596,9 @@ lpfc_get_stats(struct Scsi_Host *shost)
        unsigned long seconds;
        int rc = 0;
 
+       if (phba->fc_flag & FC_BLOCK_MGMT_IO)
+               return NULL;
+
        pmboxq = mempool_alloc(phba->mbox_mem_pool, GFP_KERNEL);
        if (!pmboxq)
                return NULL;
@@ -1651,6 +1699,9 @@ lpfc_reset_stats(struct Scsi_Host *shost)
        MAILBOX_t *pmb;
        int rc = 0;
 
+       if (phba->fc_flag & FC_BLOCK_MGMT_IO)
+               return;
+
        pmboxq = mempool_alloc(phba->mbox_mem_pool, GFP_KERNEL);
        if (!pmboxq)
                return;
index 4132a2dfac5484e351968db8feeca0bdeff8a34e..bf555d37e8d9e34a99e636c1edbeefa82adbe340 100644 (file)
@@ -112,7 +112,10 @@ void lpfc_hba_init(struct lpfc_hba *, uint32_t *);
 int lpfc_post_buffer(struct lpfc_hba *, struct lpfc_sli_ring *, int, int);
 void lpfc_decode_firmware_rev(struct lpfc_hba *, char *, int);
 int lpfc_online(struct lpfc_hba *);
-int lpfc_offline(struct lpfc_hba *);
+void lpfc_block_mgmt_io(struct lpfc_hba *);
+void lpfc_unblock_mgmt_io(struct lpfc_hba *);
+void lpfc_offline_prep(struct lpfc_hba *);
+void lpfc_offline(struct lpfc_hba *);
 
 int lpfc_sli_setup(struct lpfc_hba *);
 int lpfc_sli_queue_setup(struct lpfc_hba *);
index 9766f909c9c69e02a912eb90686e62d3fdbd4bbf..cfafff09dcfe06c91a2c342bd019fda3c2d9ba69 100644 (file)
@@ -31,6 +31,7 @@
 /* worker thread events */
 enum lpfc_work_type {
        LPFC_EVT_ONLINE,
+       LPFC_EVT_OFFLINE_PREP,
        LPFC_EVT_OFFLINE,
        LPFC_EVT_WARM_START,
        LPFC_EVT_KILL,
index 485a13fa527d8bcf36975fac2bab50ef9d1b4b9e..92c0c4b8895321f588b9d9ff495b0cc8abcddbad 100644 (file)
@@ -185,29 +185,35 @@ lpfc_work_list_done(struct lpfc_hba * phba)
                                *(int *)(evtp->evt_arg1)  = 0;
                        complete((struct completion *)(evtp->evt_arg2));
                        break;
-               case LPFC_EVT_OFFLINE:
+               case LPFC_EVT_OFFLINE_PREP:
                        if (phba->hba_state >= LPFC_LINK_DOWN)
-                               lpfc_offline(phba);
+                               lpfc_offline_prep(phba);
+                       *(int *)(evtp->evt_arg1) = 0;
+                       complete((struct completion *)(evtp->evt_arg2));
+                       break;
+               case LPFC_EVT_OFFLINE:
+                       lpfc_offline(phba);
                        lpfc_sli_brdrestart(phba);
                        *(int *)(evtp->evt_arg1) =
-                               lpfc_sli_brdready(phba,HS_FFRDY | HS_MBRDY);
+                               lpfc_sli_brdready(phba, HS_FFRDY | HS_MBRDY);
+                       lpfc_unblock_mgmt_io(phba);
                        complete((struct completion *)(evtp->evt_arg2));
                        break;
                case LPFC_EVT_WARM_START:
-                       if (phba->hba_state >= LPFC_LINK_DOWN)
-                               lpfc_offline(phba);
+                       lpfc_offline(phba);
                        lpfc_reset_barrier(phba);
                        lpfc_sli_brdreset(phba);
                        lpfc_hba_down_post(phba);
                        *(int *)(evtp->evt_arg1) =
                                lpfc_sli_brdready(phba, HS_MBRDY);
+                       lpfc_unblock_mgmt_io(phba);
                        complete((struct completion *)(evtp->evt_arg2));
                        break;
                case LPFC_EVT_KILL:
-                       if (phba->hba_state >= LPFC_LINK_DOWN)
-                               lpfc_offline(phba);
+                       lpfc_offline(phba);
                        *(int *)(evtp->evt_arg1)
                                = (phba->stopped) ? 0 : lpfc_sli_brdkill(phba);
+                       lpfc_unblock_mgmt_io(phba);
                        complete((struct completion *)(evtp->evt_arg2));
                        break;
                }
index 62677da28c9d77f33b801afa06f775346013b120..dc0fd2e36de1abfced7aaac521f6b1871088865e 100644 (file)
@@ -549,12 +549,15 @@ lpfc_handle_eratt(struct lpfc_hba * phba)
                 * There was a firmware error.  Take the hba offline and then
                 * attempt to restart it.
                 */
+               lpfc_offline_prep(phba);
                lpfc_offline(phba);
                lpfc_sli_brdrestart(phba);
                if (lpfc_online(phba) == 0) {   /* Initialize the HBA */
                        mod_timer(&phba->fc_estabtmo, jiffies + HZ * 60);
+                       lpfc_unblock_mgmt_io(phba);
                        return;
                }
+               lpfc_unblock_mgmt_io(phba);
        } else {
                /* The if clause above forces this code path when the status
                 * failure is a value other than FFER6.  Do not call the offline
@@ -572,7 +575,9 @@ lpfc_handle_eratt(struct lpfc_hba * phba)
                                SCSI_NL_VID_TYPE_PCI | PCI_VENDOR_ID_EMULEX);
 
                psli->sli_flag &= ~LPFC_SLI2_ACTIVE;
+               lpfc_offline_prep(phba);
                lpfc_offline(phba);
+               lpfc_unblock_mgmt_io(phba);
                phba->hba_state = LPFC_HBA_ERROR;
                lpfc_hba_down_post(phba);
        }
@@ -1286,55 +1291,87 @@ lpfc_online(struct lpfc_hba * phba)
                       "%d:0458 Bring Adapter online\n",
                       phba->brd_no);
 
-       if (!lpfc_sli_queue_setup(phba))
+       lpfc_block_mgmt_io(phba);
+
+       if (!lpfc_sli_queue_setup(phba)) {
+               lpfc_unblock_mgmt_io(phba);
                return 1;
+       }
 
-       if (lpfc_sli_hba_setup(phba))   /* Initialize the HBA */
+       if (lpfc_sli_hba_setup(phba)) { /* Initialize the HBA */
+               lpfc_unblock_mgmt_io(phba);
                return 1;
+       }
 
        spin_lock_irq(phba->host->host_lock);
        phba->fc_flag &= ~FC_OFFLINE_MODE;
        spin_unlock_irq(phba->host->host_lock);
 
+       lpfc_unblock_mgmt_io(phba);
        return 0;
 }
 
-int
-lpfc_offline(struct lpfc_hba * phba)
+void
+lpfc_block_mgmt_io(struct lpfc_hba * phba)
 {
-       struct lpfc_sli_ring *pring;
-       struct lpfc_sli *psli;
        unsigned long iflag;
-       int i;
-       int cnt = 0;
 
-       if (!phba)
-               return 0;
+       spin_lock_irqsave(phba->host->host_lock, iflag);
+       phba->fc_flag |= FC_BLOCK_MGMT_IO;
+       spin_unlock_irqrestore(phba->host->host_lock, iflag);
+}
+
+void
+lpfc_unblock_mgmt_io(struct lpfc_hba * phba)
+{
+       unsigned long iflag;
+
+       spin_lock_irqsave(phba->host->host_lock, iflag);
+       phba->fc_flag &= ~FC_BLOCK_MGMT_IO;
+       spin_unlock_irqrestore(phba->host->host_lock, iflag);
+}
+
+void
+lpfc_offline_prep(struct lpfc_hba * phba)
+{
+       struct lpfc_nodelist  *ndlp, *next_ndlp;
+       struct list_head *listp, *node_list[7];
+       int i;
 
        if (phba->fc_flag & FC_OFFLINE_MODE)
-               return 0;
+               return;
 
-       psli = &phba->sli;
+       lpfc_block_mgmt_io(phba);
 
        lpfc_linkdown(phba);
-       lpfc_sli_flush_mbox_queue(phba);
 
-       for (i = 0; i < psli->num_rings; i++) {
-               pring = &psli->ring[i];
-               /* The linkdown event takes 30 seconds to timeout. */
-               while (pring->txcmplq_cnt) {
-                       msleep(10);
-                       if (cnt++ > 3000) {
-                               lpfc_printf_log(phba,
-                                       KERN_WARNING, LOG_INIT,
-                                       "%d:0466 Outstanding IO when "
-                                       "bringing Adapter offline\n",
-                                       phba->brd_no);
-                               break;
-                       }
-               }
+       /* Issue an unreg_login to all nodes */
+       node_list[0] = &phba->fc_npr_list;  /* MUST do this list first */
+       node_list[1] = &phba->fc_nlpmap_list;
+       node_list[2] = &phba->fc_nlpunmap_list;
+       node_list[3] = &phba->fc_prli_list;
+       node_list[4] = &phba->fc_reglogin_list;
+       node_list[5] = &phba->fc_adisc_list;
+       node_list[6] = &phba->fc_plogi_list;
+       for (i = 0; i < 7; i++) {
+               listp = node_list[i];
+               if (list_empty(listp))
+                       continue;
+
+               list_for_each_entry_safe(ndlp, next_ndlp, listp, nlp_listp)
+                       lpfc_unreg_rpi(phba, ndlp);
        }
 
+       lpfc_sli_flush_mbox_queue(phba);
+}
+
+void
+lpfc_offline(struct lpfc_hba * phba)
+{
+       unsigned long iflag;
+
+       if (phba->fc_flag & FC_OFFLINE_MODE)
+               return;
 
        /* stop all timers associated with this hba */
        lpfc_stop_timer(phba);
@@ -1354,7 +1391,6 @@ lpfc_offline(struct lpfc_hba * phba)
        spin_lock_irqsave(phba->host->host_lock, iflag);
        phba->fc_flag |= FC_OFFLINE_MODE;
        spin_unlock_irqrestore(phba->host->host_lock, iflag);
-       return 0;
 }
 
 /******************************************************************************