[SCSI] lpfc 8.3.31: Fixed system panic due to midlayer abort and driver complete...
authorJames Smart <james.smart@emulex.com>
Thu, 10 May 2012 01:17:07 +0000 (21:17 -0400)
committerJames Bottomley <JBottomley@Parallels.com>
Thu, 17 May 2012 09:57:29 +0000 (10:57 +0100)
Signed-off-by: Alex Iannicelli <alex.iannicelli@emulex.com>
Signed-off-by: James Smart <james.smart@emulex.com>
Signed-off-by: James Bottomley <JBottomley@Parallels.com>
drivers/scsi/lpfc/lpfc.h
drivers/scsi/lpfc/lpfc_crtn.h
drivers/scsi/lpfc/lpfc_scsi.c
drivers/scsi/lpfc/lpfc_sli.c
drivers/scsi/lpfc/lpfc_sli.h

index 3a1ffdd6d831ebfdd80fdb87b5b3dc15b0444cff..3099047d3faf7815929934af141feb6368c8bbb0 100644 (file)
@@ -620,6 +620,7 @@ struct lpfc_hba {
 #define HBA_AER_ENABLED                0x1000 /* AER enabled with HBA */
 #define HBA_DEVLOSS_TMO         0x2000 /* HBA in devloss timeout */
 #define HBA_RRQ_ACTIVE         0x4000 /* process the rrq active list */
+#define HBA_FCP_IOQ_FLUSH      0x8000 /* FCP I/O queues being flushed */
        uint32_t fcp_ring_in_use; /* When polling test if intr-hndlr active*/
        struct lpfc_dmabuf slim2p;
 
index 620fa45866dc1edb0c0038c1640718040fb95170..9b2a16f3bc795092e9fb0a7526ae31e657e16438 100644 (file)
@@ -254,6 +254,7 @@ int
 lpfc_sli_handle_fast_ring_event(struct lpfc_hba *,
                        struct lpfc_sli_ring *, uint32_t);
 
+struct lpfc_iocbq *__lpfc_sli_get_iocbq(struct lpfc_hba *);
 struct lpfc_iocbq * lpfc_sli_get_iocbq(struct lpfc_hba *);
 void lpfc_sli_release_iocbq(struct lpfc_hba *, struct lpfc_iocbq *);
 uint16_t lpfc_sli_next_iotag(struct lpfc_hba *, struct lpfc_iocbq *);
index bf0048a7a3026018a45e7b9ff46043580072f270..cdc5fb92c9f5ba65dcde9c92b7fe10cb3e4f9b61 100644 (file)
@@ -4396,8 +4396,20 @@ lpfc_abort_handler(struct scsi_cmnd *cmnd)
        ret = fc_block_scsi_eh(cmnd);
        if (ret)
                return ret;
+
+       spin_lock_irq(&phba->hbalock);
+       /* driver queued commands are in process of being flushed */
+       if (phba->hba_flag & HBA_FCP_IOQ_FLUSH) {
+               spin_unlock_irq(&phba->hbalock);
+               lpfc_printf_vlog(vport, KERN_WARNING, LOG_FCP,
+                       "3168 SCSI Layer abort requested I/O has been "
+                       "flushed by LLD.\n");
+               return FAILED;
+       }
+
        lpfc_cmd = (struct lpfc_scsi_buf *)cmnd->host_scribble;
        if (!lpfc_cmd) {
+               spin_unlock_irq(&phba->hbalock);
                lpfc_printf_vlog(vport, KERN_WARNING, LOG_FCP,
                         "2873 SCSI Layer I/O Abort Request IO CMPL Status "
                         "x%x ID %d LUN %d\n",
@@ -4405,23 +4417,34 @@ lpfc_abort_handler(struct scsi_cmnd *cmnd)
                return SUCCESS;
        }
 
+       iocb = &lpfc_cmd->cur_iocbq;
+       /* the command is in process of being cancelled */
+       if (!(iocb->iocb_flag & LPFC_IO_ON_TXCMPLQ)) {
+               spin_unlock_irq(&phba->hbalock);
+               lpfc_printf_vlog(vport, KERN_WARNING, LOG_FCP,
+                       "3169 SCSI Layer abort requested I/O has been "
+                       "cancelled by LLD.\n");
+               return FAILED;
+       }
        /*
         * If pCmd field of the corresponding lpfc_scsi_buf structure
         * points to a different SCSI command, then the driver has
         * already completed this command, but the midlayer did not
-        * see the completion before the eh fired.  Just return
-        * SUCCESS.
+        * see the completion before the eh fired. Just return SUCCESS.
         */
-       iocb = &lpfc_cmd->cur_iocbq;
-       if (lpfc_cmd->pCmd != cmnd)
-               goto out;
+       if (lpfc_cmd->pCmd != cmnd) {
+               lpfc_printf_vlog(vport, KERN_WARNING, LOG_FCP,
+                       "3170 SCSI Layer abort requested I/O has been "
+                       "completed by LLD.\n");
+               goto out_unlock;
+       }
 
        BUG_ON(iocb->context1 != lpfc_cmd);
 
-       abtsiocb = lpfc_sli_get_iocbq(phba);
+       abtsiocb = __lpfc_sli_get_iocbq(phba);
        if (abtsiocb == NULL) {
                ret = FAILED;
-               goto out;
+               goto out_unlock;
        }
 
        /*
@@ -4453,6 +4476,9 @@ lpfc_abort_handler(struct scsi_cmnd *cmnd)
 
        abtsiocb->iocb_cmpl = lpfc_sli_abort_fcp_cmpl;
        abtsiocb->vport = vport;
+       /* no longer need the lock after this point */
+       spin_unlock_irq(&phba->hbalock);
+
        if (lpfc_sli_issue_iocb(phba, LPFC_FCP_RING, abtsiocb, 0) ==
            IOCB_ERROR) {
                lpfc_sli_release_iocbq(phba, abtsiocb);
@@ -4469,10 +4495,7 @@ lpfc_abort_handler(struct scsi_cmnd *cmnd)
        wait_event_timeout(waitq,
                          (lpfc_cmd->pCmd != cmnd),
                           (2*vport->cfg_devloss_tmo*HZ));
-
-       spin_lock_irq(shost->host_lock);
        lpfc_cmd->waitq = NULL;
-       spin_unlock_irq(shost->host_lock);
 
        if (lpfc_cmd->pCmd == cmnd) {
                ret = FAILED;
@@ -4482,8 +4505,11 @@ lpfc_abort_handler(struct scsi_cmnd *cmnd)
                                 "LUN %d\n",
                                 ret, cmnd->device->id, cmnd->device->lun);
        }
+       goto out;
 
- out:
+out_unlock:
+       spin_unlock_irq(&phba->hbalock);
+out:
        lpfc_printf_vlog(vport, KERN_WARNING, LOG_FCP,
                         "0749 SCSI Layer I/O Abort Request Status x%x ID %d "
                         "LUN %d\n", ret, cmnd->device->id,
index 57eaaa51e1d6bbed24ef33e69a6dc9d8effd4473..7c4067913c29a18fdf156ecb0839da1b5289205d 100644 (file)
@@ -502,7 +502,7 @@ lpfc_resp_iocb(struct lpfc_hba *phba, struct lpfc_sli_ring *pring)
  * allocation is successful, it returns pointer to the newly
  * allocated iocb object else it returns NULL.
  **/
-static struct lpfc_iocbq *
+struct lpfc_iocbq *
 __lpfc_sli_get_iocbq(struct lpfc_hba *phba)
 {
        struct list_head *lpfc_iocb_list = &phba->lpfc_iocb_list;
@@ -1259,7 +1259,7 @@ lpfc_sli_ringtxcmpl_put(struct lpfc_hba *phba, struct lpfc_sli_ring *pring,
                        struct lpfc_iocbq *piocb)
 {
        list_add_tail(&piocb->list, &pring->txcmplq);
-       piocb->iocb_flag |= LPFC_IO_ON_Q;
+       piocb->iocb_flag |= LPFC_IO_ON_TXCMPLQ;
        pring->txcmplq_cnt++;
        if (pring->txcmplq_cnt > pring->txcmplq_max)
                pring->txcmplq_max = pring->txcmplq_cnt;
@@ -2558,9 +2558,9 @@ lpfc_sli_iocbq_lookup(struct lpfc_hba *phba,
        if (iotag != 0 && iotag <= phba->sli.last_iotag) {
                cmd_iocb = phba->sli.iocbq_lookup[iotag];
                list_del_init(&cmd_iocb->list);
-               if (cmd_iocb->iocb_flag & LPFC_IO_ON_Q) {
+               if (cmd_iocb->iocb_flag & LPFC_IO_ON_TXCMPLQ) {
                        pring->txcmplq_cnt--;
-                       cmd_iocb->iocb_flag &= ~LPFC_IO_ON_Q;
+                       cmd_iocb->iocb_flag &= ~LPFC_IO_ON_TXCMPLQ;
                }
                return cmd_iocb;
        }
@@ -2593,14 +2593,14 @@ lpfc_sli_iocbq_lookup_by_tag(struct lpfc_hba *phba,
 
        if (iotag != 0 && iotag <= phba->sli.last_iotag) {
                cmd_iocb = phba->sli.iocbq_lookup[iotag];
-               list_del_init(&cmd_iocb->list);
-               if (cmd_iocb->iocb_flag & LPFC_IO_ON_Q) {
-                       cmd_iocb->iocb_flag &= ~LPFC_IO_ON_Q;
+               if (cmd_iocb->iocb_flag & LPFC_IO_ON_TXCMPLQ) {
+                       /* remove from txcmpl queue list */
+                       list_del_init(&cmd_iocb->list);
+                       cmd_iocb->iocb_flag &= ~LPFC_IO_ON_TXCMPLQ;
                        pring->txcmplq_cnt--;
+                       return cmd_iocb;
                }
-               return cmd_iocb;
        }
-
        lpfc_printf_log(phba, KERN_ERR, LOG_SLI,
                        "0372 iotag x%x is out off range: max iotag (x%x)\n",
                        iotag, phba->sli.last_iotag);
@@ -3468,6 +3468,9 @@ lpfc_sli_flush_fcp_rings(struct lpfc_hba *phba)
        /* Retrieve everything on the txcmplq */
        list_splice_init(&pring->txcmplq, &txcmplq);
        pring->txcmplq_cnt = 0;
+
+       /* Indicate the I/O queues are flushed */
+       phba->hba_flag |= HBA_FCP_IOQ_FLUSH;
        spin_unlock_irq(&phba->hbalock);
 
        /* Flush the txq */
@@ -6069,6 +6072,8 @@ lpfc_sli4_hba_setup(struct lpfc_hba *phba)
        else
                phba->hba_flag &= ~HBA_FIP_SUPPORT;
 
+       phba->hba_flag &= ~HBA_FCP_IOQ_FLUSH;
+
        if (phba->sli_rev != LPFC_SLI_REV4) {
                lpfc_printf_log(phba, KERN_ERR, LOG_MBOX | LOG_SLI,
                        "0376 READ_REV Error. SLI Level %d "
index 3290b8e7ab655ef2bb9bb0d99dc39492832d0b20..2626f58c0747ac0e60d9a807bde5465b86c7dc71 100644 (file)
@@ -68,7 +68,7 @@ struct lpfc_iocbq {
 #define LPFC_EXCHANGE_BUSY     0x40    /* SLI4 hba reported XB in response */
 #define LPFC_USE_FCPWQIDX      0x80    /* Submit to specified FCPWQ index */
 #define DSS_SECURITY_OP                0x100   /* security IO */
-#define LPFC_IO_ON_Q           0x200   /* The IO is still on the TXCMPLQ */
+#define LPFC_IO_ON_TXCMPLQ     0x200   /* The IO is still on the TXCMPLQ */
 #define LPFC_IO_DIF            0x400   /* T10 DIF IO */
 
 #define LPFC_FIP_ELS_ID_MASK   0xc000  /* ELS_ID range 0-3, non-shifted mask */