[SCSI] ibmvfc: Fix soft lockup on resume
authorBrian King <brking@linux.vnet.ibm.com>
Thu, 17 Jun 2010 18:55:13 +0000 (13:55 -0500)
committerJames Bottomley <James.Bottomley@suse.de>
Tue, 27 Jul 2010 17:02:39 +0000 (12:02 -0500)
This fixes a softlockup seen on resume. During resume, the CRQ
must be reenabled. However, the H_ENABLE_CRQ hcall used to do
this may return H_BUSY or H_LONG_BUSY. When this happens, the
caller is expected to retry later. Normally the H_ENABLE_CRQ
succeeds relatively soon. However, we have seen cases where
this can take long enough to see softlockup warnings.
This patch changes a simple loop, which was causing the
softlockup, to a loop at task level which sleeps between
retries rather than simply spinning.

Signed-off-by: Brian King <brking@linux.vnet.ibm.com>
Signed-off-by: James Bottomley <James.Bottomley@suse.de>
drivers/scsi/ibmvscsi/ibmvfc.c
drivers/scsi/ibmvscsi/ibmvfc.h

index fef49521cbc3f6888684789f4bd50a67f1b5a3bb..d6fcb3f43969efb31c070f3df5cd60df9ca9e748 100644 (file)
@@ -504,12 +504,23 @@ static void ibmvfc_set_host_action(struct ibmvfc_host *vhost,
                if (vhost->action == IBMVFC_HOST_ACTION_ALLOC_TGTS)
                        vhost->action = action;
                break;
-       case IBMVFC_HOST_ACTION_LOGO:
        case IBMVFC_HOST_ACTION_INIT:
        case IBMVFC_HOST_ACTION_TGT_DEL:
+               switch (vhost->action) {
+               case IBMVFC_HOST_ACTION_RESET:
+               case IBMVFC_HOST_ACTION_REENABLE:
+                       break;
+               default:
+                       vhost->action = action;
+                       break;
+               };
+               break;
+       case IBMVFC_HOST_ACTION_LOGO:
        case IBMVFC_HOST_ACTION_QUERY_TGTS:
        case IBMVFC_HOST_ACTION_TGT_DEL_FAILED:
        case IBMVFC_HOST_ACTION_NONE:
+       case IBMVFC_HOST_ACTION_RESET:
+       case IBMVFC_HOST_ACTION_REENABLE:
        default:
                vhost->action = action;
                break;
@@ -641,7 +652,7 @@ static int ibmvfc_send_crq_init_complete(struct ibmvfc_host *vhost)
  **/
 static void ibmvfc_release_crq_queue(struct ibmvfc_host *vhost)
 {
-       long rc;
+       long rc = 0;
        struct vio_dev *vdev = to_vio_dev(vhost->dev);
        struct ibmvfc_crq_queue *crq = &vhost->crq;
 
@@ -649,6 +660,8 @@ static void ibmvfc_release_crq_queue(struct ibmvfc_host *vhost)
        free_irq(vdev->irq, vhost);
        tasklet_kill(&vhost->tasklet);
        do {
+               if (rc)
+                       msleep(100);
                rc = plpar_hcall_norets(H_FREE_CRQ, vdev->unit_address);
        } while (rc == H_BUSY || H_IS_LONG_BUSY(rc));
 
@@ -667,11 +680,13 @@ static void ibmvfc_release_crq_queue(struct ibmvfc_host *vhost)
  **/
 static int ibmvfc_reenable_crq_queue(struct ibmvfc_host *vhost)
 {
-       int rc;
+       int rc = 0;
        struct vio_dev *vdev = to_vio_dev(vhost->dev);
 
        /* Re-enable the CRQ */
        do {
+               if (rc)
+                       msleep(100);
                rc = plpar_hcall_norets(H_ENABLE_CRQ, vdev->unit_address);
        } while (rc == H_IN_PROGRESS || rc == H_BUSY || H_IS_LONG_BUSY(rc));
 
@@ -690,15 +705,19 @@ static int ibmvfc_reenable_crq_queue(struct ibmvfc_host *vhost)
  **/
 static int ibmvfc_reset_crq(struct ibmvfc_host *vhost)
 {
-       int rc;
+       int rc = 0;
+       unsigned long flags;
        struct vio_dev *vdev = to_vio_dev(vhost->dev);
        struct ibmvfc_crq_queue *crq = &vhost->crq;
 
        /* Close the CRQ */
        do {
+               if (rc)
+                       msleep(100);
                rc = plpar_hcall_norets(H_FREE_CRQ, vdev->unit_address);
        } while (rc == H_BUSY || H_IS_LONG_BUSY(rc));
 
+       spin_lock_irqsave(vhost->host->host_lock, flags);
        vhost->state = IBMVFC_NO_CRQ;
        vhost->logged_in = 0;
        ibmvfc_set_host_action(vhost, IBMVFC_HOST_ACTION_NONE);
@@ -716,6 +735,7 @@ static int ibmvfc_reset_crq(struct ibmvfc_host *vhost)
                dev_warn(vhost->dev, "Partner adapter not ready\n");
        else if (rc != 0)
                dev_warn(vhost->dev, "Couldn't register crq (rc=%d)\n", rc);
+       spin_unlock_irqrestore(vhost->host->host_lock, flags);
 
        return rc;
 }
@@ -821,17 +841,9 @@ static void ibmvfc_purge_requests(struct ibmvfc_host *vhost, int error_code)
  **/
 static void ibmvfc_hard_reset_host(struct ibmvfc_host *vhost)
 {
-       int rc;
-
-       scsi_block_requests(vhost->host);
        ibmvfc_purge_requests(vhost, DID_ERROR);
-       if ((rc = ibmvfc_reset_crq(vhost)) ||
-           (rc = ibmvfc_send_crq_init(vhost)) ||
-           (rc = vio_enable_interrupts(to_vio_dev(vhost->dev)))) {
-               dev_err(vhost->dev, "Error after reset rc=%d\n", rc);
-               ibmvfc_link_down(vhost, IBMVFC_LINK_DEAD);
-       } else
-               ibmvfc_link_down(vhost, IBMVFC_LINK_DOWN);
+       ibmvfc_link_down(vhost, IBMVFC_LINK_DOWN);
+       ibmvfc_set_host_action(vhost, IBMVFC_HOST_ACTION_RESET);
 }
 
 /**
@@ -2606,22 +2618,13 @@ static void ibmvfc_handle_crq(struct ibmvfc_crq *crq, struct ibmvfc_host *vhost)
                        dev_info(vhost->dev, "Re-enabling adapter\n");
                        vhost->client_migrated = 1;
                        ibmvfc_purge_requests(vhost, DID_REQUEUE);
-                       if ((rc = ibmvfc_reenable_crq_queue(vhost)) ||
-                           (rc = ibmvfc_send_crq_init(vhost))) {
-                               ibmvfc_link_down(vhost, IBMVFC_LINK_DEAD);
-                               dev_err(vhost->dev, "Error after enable (rc=%ld)\n", rc);
-                       } else
-                               ibmvfc_link_down(vhost, IBMVFC_LINK_DOWN);
+                       ibmvfc_link_down(vhost, IBMVFC_LINK_DOWN);
+                       ibmvfc_set_host_action(vhost, IBMVFC_HOST_ACTION_REENABLE);
                } else {
                        dev_err(vhost->dev, "Virtual adapter failed (rc=%d)\n", crq->format);
-
                        ibmvfc_purge_requests(vhost, DID_ERROR);
-                       if ((rc = ibmvfc_reset_crq(vhost)) ||
-                           (rc = ibmvfc_send_crq_init(vhost))) {
-                               ibmvfc_link_down(vhost, IBMVFC_LINK_DEAD);
-                               dev_err(vhost->dev, "Error after reset (rc=%ld)\n", rc);
-                       } else
-                               ibmvfc_link_down(vhost, IBMVFC_LINK_DOWN);
+                       ibmvfc_link_down(vhost, IBMVFC_LINK_DOWN);
+                       ibmvfc_set_host_action(vhost, IBMVFC_HOST_ACTION_RESET);
                }
                return;
        case IBMVFC_CRQ_CMD_RSP:
@@ -4123,6 +4126,8 @@ static int __ibmvfc_work_to_do(struct ibmvfc_host *vhost)
        case IBMVFC_HOST_ACTION_TGT_DEL:
        case IBMVFC_HOST_ACTION_TGT_DEL_FAILED:
        case IBMVFC_HOST_ACTION_QUERY:
+       case IBMVFC_HOST_ACTION_RESET:
+       case IBMVFC_HOST_ACTION_REENABLE:
        default:
                break;
        };
@@ -4220,6 +4225,7 @@ static void ibmvfc_do_work(struct ibmvfc_host *vhost)
        struct ibmvfc_target *tgt;
        unsigned long flags;
        struct fc_rport *rport;
+       int rc;
 
        ibmvfc_log_ae(vhost, vhost->events_to_log);
        spin_lock_irqsave(vhost->host->host_lock, flags);
@@ -4229,6 +4235,27 @@ static void ibmvfc_do_work(struct ibmvfc_host *vhost)
        case IBMVFC_HOST_ACTION_LOGO_WAIT:
        case IBMVFC_HOST_ACTION_INIT_WAIT:
                break;
+       case IBMVFC_HOST_ACTION_RESET:
+               vhost->action = IBMVFC_HOST_ACTION_TGT_DEL;
+               spin_unlock_irqrestore(vhost->host->host_lock, flags);
+               rc = ibmvfc_reset_crq(vhost);
+               spin_lock_irqsave(vhost->host->host_lock, flags);
+               if (rc || (rc = ibmvfc_send_crq_init(vhost)) ||
+                   (rc = vio_enable_interrupts(to_vio_dev(vhost->dev)))) {
+                       ibmvfc_link_down(vhost, IBMVFC_LINK_DEAD);
+                       dev_err(vhost->dev, "Error after reset (rc=%d)\n", rc);
+               }
+               break;
+       case IBMVFC_HOST_ACTION_REENABLE:
+               vhost->action = IBMVFC_HOST_ACTION_TGT_DEL;
+               spin_unlock_irqrestore(vhost->host->host_lock, flags);
+               rc = ibmvfc_reenable_crq_queue(vhost);
+               spin_lock_irqsave(vhost->host->host_lock, flags);
+               if (rc || (rc = ibmvfc_send_crq_init(vhost))) {
+                       ibmvfc_link_down(vhost, IBMVFC_LINK_DEAD);
+                       dev_err(vhost->dev, "Error after enable (rc=%d)\n", rc);
+               }
+               break;
        case IBMVFC_HOST_ACTION_LOGO:
                vhost->job_step(vhost);
                break;
index 7e9742764e4bc56d08061546b67c47efc6552fff..2010d73aca8100d9dcf9a83cf14f1d116a5f8e64 100644 (file)
@@ -649,6 +649,8 @@ struct ibmvfc_event_pool {
 
 enum ibmvfc_host_action {
        IBMVFC_HOST_ACTION_NONE = 0,
+       IBMVFC_HOST_ACTION_RESET,
+       IBMVFC_HOST_ACTION_REENABLE,
        IBMVFC_HOST_ACTION_LOGO,
        IBMVFC_HOST_ACTION_LOGO_WAIT,
        IBMVFC_HOST_ACTION_INIT,