RDMA/ocrdma: Do proper cleanup even if FW is in error state
authorMitesh Ahuja <mitesh.ahuja@emulex.Com>
Tue, 10 Jun 2014 14:02:21 +0000 (19:32 +0530)
committerRoland Dreier <roland@purestorage.com>
Fri, 1 Aug 2014 22:07:36 +0000 (15:07 -0700)
If any mailbox command reports timeout, save the state in the driver,
to prevent issuing any more commands to the HW.  Do proper cleanup
even if FW is in error state.

Signed-off-by: Mitesh Ahuja <mitesh.ahuja@emulex.Com>
Signed-off-by: Selvin Xavier <selvin.xavier@emulex.com>
Signed-off-by: Roland Dreier <roland@purestorage.com>
drivers/infiniband/hw/ocrdma/ocrdma.h
drivers/infiniband/hw/ocrdma/ocrdma_hw.c
drivers/infiniband/hw/ocrdma/ocrdma_verbs.c

index 5cd65c244191077149a7eebb935656a3215dc312..fc273782986e0b54c90ccbb3734729180655cb91 100644 (file)
@@ -137,6 +137,7 @@ struct mqe_ctx {
        u16 cqe_status;
        u16 ext_status;
        bool cmd_done;
+       bool fw_error_state;
 };
 
 struct ocrdma_hw_mr {
index 55308b667649e8b806b1e141029424fea2e489d3..5b6e9d9c779eb2ff15addec29c1fb6b6231b7e8b 100644 (file)
@@ -966,8 +966,12 @@ static int ocrdma_wait_mqe_cmpl(struct ocrdma_dev *dev)
                                    msecs_to_jiffies(30000));
        if (status)
                return 0;
-       else
+       else {
+               dev->mqe_ctx.fw_error_state = true;
+               pr_err("%s(%d) mailbox timeout: fw not responding\n",
+                      __func__, dev->id);
                return -1;
+       }
 }
 
 /* issue a mailbox command on the MQ */
@@ -979,6 +983,8 @@ static int ocrdma_mbx_cmd(struct ocrdma_dev *dev, struct ocrdma_mqe *mqe)
        struct ocrdma_mbx_rsp *rsp = NULL;
 
        mutex_lock(&dev->mqe_ctx.lock);
+       if (dev->mqe_ctx.fw_error_state)
+               goto mbx_err;
        ocrdma_post_mqe(dev, mqe);
        status = ocrdma_wait_mqe_cmpl(dev);
        if (status)
index 7f54d2478738a5ec0b6566b7e5980b339c829588..8cd16a18247534b8f0fc22d7b8ce318aab8eed1a 100644 (file)
@@ -329,7 +329,10 @@ static int ocrdma_dealloc_ucontext_pd(struct ocrdma_ucontext *uctx)
        struct ocrdma_pd *pd = uctx->cntxt_pd;
        struct ocrdma_dev *dev = get_ocrdma_dev(pd->ibpd.device);
 
-       BUG_ON(uctx->pd_in_use);
+       if (uctx->pd_in_use) {
+               pr_err("%s(%d) Freeing in use pdid=0x%x.\n",
+                      __func__, dev->id, pd->id);
+       }
        uctx->cntxt_pd = NULL;
        status = _ocrdma_dealloc_pd(dev, pd);
        return status;
@@ -844,6 +847,13 @@ int ocrdma_dereg_mr(struct ib_mr *ib_mr)
        if (mr->umem)
                ib_umem_release(mr->umem);
        kfree(mr);
+
+       /* Don't stop cleanup, in case FW is unresponsive */
+       if (dev->mqe_ctx.fw_error_state) {
+               status = 0;
+               pr_err("%s(%d) fw not responding.\n",
+                      __func__, dev->id);
+       }
        return status;
 }