RDMA/cxgb3: Correctly serialize peer abort path
authorSteve Wise <swise@opengridcomputing.com>
Tue, 29 Apr 2008 20:46:51 +0000 (13:46 -0700)
committerRoland Dreier <rolandd@cisco.com>
Tue, 29 Apr 2008 20:46:51 +0000 (13:46 -0700)
Open MPI and other stress testing exposed a few bad bugs in handling
aborts in the middle of a normal close.  Fix these by:

 - serializing abort reply and peer abort processing with disconnect
   processing

 - warning (and ignoring) if ep timer is stopped when it wasn't running

 - cleaning up disconnect path to correctly deal with aborting and
   dead endpoints

 - in iwch_modify_qp(), taking a ref on the ep before releasing the qp
   lock if iwch_ep_disconnect() will be called.  The ref is dropped
   after calling disconnect.

Signed-off-by: Steve Wise <swise@opengridcomputing.com>
Signed-off-by: Roland Dreier <rolandd@cisco.com>
drivers/infiniband/hw/cxgb3/iwch_cm.c
drivers/infiniband/hw/cxgb3/iwch_cm.h
drivers/infiniband/hw/cxgb3/iwch_qp.c

index 72ca360c3dbc12c2b0a36e865b5ad18601eb3bd2..0b515d899f6c24d548202ca5ea687de26c8d1b4a 100644 (file)
@@ -125,6 +125,12 @@ static void start_ep_timer(struct iwch_ep *ep)
 static void stop_ep_timer(struct iwch_ep *ep)
 {
        PDBG("%s ep %p\n", __func__, ep);
+       if (!timer_pending(&ep->timer)) {
+               printk(KERN_ERR "%s timer stopped when its not running!  ep %p state %u\n",
+                       __func__, ep, ep->com.state);
+               WARN_ON(1);
+               return;
+       }
        del_timer_sync(&ep->timer);
        put_ep(&ep->com);
 }
@@ -1083,8 +1089,11 @@ static int tx_ack(struct t3cdev *tdev, struct sk_buff *skb, void *ctx)
 static int abort_rpl(struct t3cdev *tdev, struct sk_buff *skb, void *ctx)
 {
        struct iwch_ep *ep = ctx;
+       unsigned long flags;
+       int release = 0;
 
        PDBG("%s ep %p\n", __func__, ep);
+       BUG_ON(!ep);
 
        /*
         * We get 2 abort replies from the HW.  The first one must
@@ -1095,9 +1104,22 @@ static int abort_rpl(struct t3cdev *tdev, struct sk_buff *skb, void *ctx)
                return CPL_RET_BUF_DONE;
        }
 
-       close_complete_upcall(ep);
-       state_set(&ep->com, DEAD);
-       release_ep_resources(ep);
+       spin_lock_irqsave(&ep->com.lock, flags);
+       switch (ep->com.state) {
+       case ABORTING:
+               close_complete_upcall(ep);
+               __state_set(&ep->com, DEAD);
+               release = 1;
+               break;
+       default:
+               printk(KERN_ERR "%s ep %p state %d\n",
+                    __func__, ep, ep->com.state);
+               break;
+       }
+       spin_unlock_irqrestore(&ep->com.lock, flags);
+
+       if (release)
+               release_ep_resources(ep);
        return CPL_RET_BUF_DONE;
 }
 
@@ -1470,7 +1492,8 @@ static int peer_abort(struct t3cdev *tdev, struct sk_buff *skb, void *ctx)
        struct sk_buff *rpl_skb;
        struct iwch_qp_attributes attrs;
        int ret;
-       int state;
+       int release = 0;
+       unsigned long flags;
 
        if (is_neg_adv_abort(req->status)) {
                PDBG("%s neg_adv_abort ep %p tid %d\n", __func__, ep,
@@ -1488,9 +1511,9 @@ static int peer_abort(struct t3cdev *tdev, struct sk_buff *skb, void *ctx)
                return CPL_RET_BUF_DONE;
        }
 
-       state = state_read(&ep->com);
-       PDBG("%s ep %p state %u\n", __func__, ep, state);
-       switch (state) {
+       spin_lock_irqsave(&ep->com.lock, flags);
+       PDBG("%s ep %p state %u\n", __func__, ep, ep->com.state);
+       switch (ep->com.state) {
        case CONNECTING:
                break;
        case MPA_REQ_WAIT:
@@ -1536,21 +1559,25 @@ static int peer_abort(struct t3cdev *tdev, struct sk_buff *skb, void *ctx)
                break;
        case DEAD:
                PDBG("%s PEER_ABORT IN DEAD STATE!!!!\n", __func__);
+               spin_unlock_irqrestore(&ep->com.lock, flags);
                return CPL_RET_BUF_DONE;
        default:
                BUG_ON(1);
                break;
        }
        dst_confirm(ep->dst);
+       if (ep->com.state != ABORTING) {
+               __state_set(&ep->com, DEAD);
+               release = 1;
+       }
+       spin_unlock_irqrestore(&ep->com.lock, flags);
 
        rpl_skb = get_skb(skb, sizeof(*rpl), GFP_KERNEL);
        if (!rpl_skb) {
                printk(KERN_ERR MOD "%s - cannot allocate skb!\n",
                       __func__);
-               dst_release(ep->dst);
-               l2t_release(L2DATA(ep->com.tdev), ep->l2t);
-               put_ep(&ep->com);
-               return CPL_RET_BUF_DONE;
+               release = 1;
+               goto out;
        }
        rpl_skb->priority = CPL_PRIORITY_DATA;
        rpl = (struct cpl_abort_rpl *) skb_put(rpl_skb, sizeof(*rpl));
@@ -1559,10 +1586,9 @@ static int peer_abort(struct t3cdev *tdev, struct sk_buff *skb, void *ctx)
        OPCODE_TID(rpl) = htonl(MK_OPCODE_TID(CPL_ABORT_RPL, ep->hwtid));
        rpl->cmd = CPL_ABORT_NO_RST;
        cxgb3_ofld_send(ep->com.tdev, rpl_skb);
-       if (state != ABORTING) {
-               state_set(&ep->com, DEAD);
+out:
+       if (release)
                release_ep_resources(ep);
-       }
        return CPL_RET_BUF_DONE;
 }
 
@@ -1661,15 +1687,18 @@ static void ep_timeout(unsigned long arg)
        struct iwch_ep *ep = (struct iwch_ep *)arg;
        struct iwch_qp_attributes attrs;
        unsigned long flags;
+       int abort = 1;
 
        spin_lock_irqsave(&ep->com.lock, flags);
        PDBG("%s ep %p tid %u state %d\n", __func__, ep, ep->hwtid,
             ep->com.state);
        switch (ep->com.state) {
        case MPA_REQ_SENT:
+               __state_set(&ep->com, ABORTING);
                connect_reply_upcall(ep, -ETIMEDOUT);
                break;
        case MPA_REQ_WAIT:
+               __state_set(&ep->com, ABORTING);
                break;
        case CLOSING:
        case MORIBUND:
@@ -1679,13 +1708,17 @@ static void ep_timeout(unsigned long arg)
                                     ep->com.qp, IWCH_QP_ATTR_NEXT_STATE,
                                     &attrs, 1);
                }
+               __state_set(&ep->com, ABORTING);
                break;
        default:
-               BUG();
+               printk(KERN_ERR "%s unexpected state ep %p state %u\n",
+                       __func__, ep, ep->com.state);
+               WARN_ON(1);
+               abort = 0;
        }
-       __state_set(&ep->com, CLOSING);
        spin_unlock_irqrestore(&ep->com.lock, flags);
-       abort_connection(ep, NULL, GFP_ATOMIC);
+       if (abort)
+               abort_connection(ep, NULL, GFP_ATOMIC);
        put_ep(&ep->com);
 }
 
@@ -1968,40 +2001,39 @@ int iwch_ep_disconnect(struct iwch_ep *ep, int abrupt, gfp_t gfp)
        PDBG("%s ep %p state %s, abrupt %d\n", __func__, ep,
             states[ep->com.state], abrupt);
 
-       if (ep->com.state == DEAD) {
-               PDBG("%s already dead ep %p\n", __func__, ep);
-               goto out;
-       }
-
-       if (abrupt) {
-               if (ep->com.state != ABORTING) {
-                       ep->com.state = ABORTING;
-                       close = 1;
-               }
-               goto out;
-       }
-
        switch (ep->com.state) {
        case MPA_REQ_WAIT:
        case MPA_REQ_SENT:
        case MPA_REQ_RCVD:
        case MPA_REP_SENT:
        case FPDU_MODE:
-               start_ep_timer(ep);
-               ep->com.state = CLOSING;
                close = 1;
+               if (abrupt)
+                       ep->com.state = ABORTING;
+               else {
+                       ep->com.state = CLOSING;
+                       start_ep_timer(ep);
+               }
                break;
        case CLOSING:
-               ep->com.state = MORIBUND;
                close = 1;
+               if (abrupt) {
+                       stop_ep_timer(ep);
+                       ep->com.state = ABORTING;
+               } else
+                       ep->com.state = MORIBUND;
                break;
        case MORIBUND:
+       case ABORTING:
+       case DEAD:
+               PDBG("%s ignoring disconnect ep %p state %u\n",
+                    __func__, ep, ep->com.state);
                break;
        default:
                BUG();
                break;
        }
-out:
+
        spin_unlock_irqrestore(&ep->com.lock, flags);
        if (close) {
                if (abrupt)
index 2bb7fbdb3ff44e3136a7e2c0d7edcec9b97e842f..a2f1b787d970774095b3acfd50dcd24d6d36303d 100644 (file)
@@ -56,6 +56,7 @@
 #define put_ep(ep) { \
        PDBG("put_ep (via %s:%u) ep %p refcnt %d\n", __func__, __LINE__,  \
             ep, atomic_read(&((ep)->kref.refcount))); \
+       WARN_ON(atomic_read(&((ep)->kref.refcount)) < 1); \
        kref_put(&((ep)->kref), __free_ep); \
 }
 
index 8891c3b0a3d51d1ecc1e2e47766a27d91a4c8be6..6cd484e11c114e851d8e60a30e601df448b486b2 100644 (file)
@@ -832,6 +832,7 @@ int iwch_modify_qp(struct iwch_dev *rhp, struct iwch_qp *qhp,
                                abort=0;
                                disconnect = 1;
                                ep = qhp->ep;
+                               get_ep(&ep->com);
                        }
                        flush_qp(qhp, &flag);
                        break;
@@ -848,6 +849,7 @@ int iwch_modify_qp(struct iwch_dev *rhp, struct iwch_qp *qhp,
                                abort=1;
                                disconnect = 1;
                                ep = qhp->ep;
+                               get_ep(&ep->com);
                        }
                        goto err;
                        break;
@@ -929,8 +931,10 @@ out:
         * on the EP.  This can be a normal close (RTS->CLOSING) or
         * an abnormal close (RTS/CLOSING->ERROR).
         */
-       if (disconnect)
+       if (disconnect) {
                iwch_ep_disconnect(ep, abort, GFP_KERNEL);
+               put_ep(&ep->com);
+       }
 
        /*
         * If free is 1, then we've disassociated the EP from the QP