IB/qib: Fix race between qib_error_qp() and receive packet processing
authorRalph Campbell <ralph.campbell@qlogic.com>
Mon, 2 Aug 2010 22:39:30 +0000 (22:39 +0000)
committerRoland Dreier <rolandd@cisco.com>
Tue, 3 Aug 2010 20:59:47 +0000 (13:59 -0700)
When transitioning a QP to the error state, in progress RWQEs need to
be marked complete.  This also involves releasing the reference count
to the memory regions referenced in the SGEs.  The locking in the
receive packet processing wasn't sufficient to prevent qib_error_qp()
from modifying the r_sge state at the same time, thus leading to
kernel panics.

Signed-off-by: Ralph Campbell <ralph.campbell@qlogic.com>
Signed-off-by: Roland Dreier <rolandd@cisco.com>
drivers/infiniband/hw/qib/qib_qp.c
drivers/infiniband/hw/qib/qib_rc.c
drivers/infiniband/hw/qib/qib_sdma.c
drivers/infiniband/hw/qib/qib_uc.c
drivers/infiniband/hw/qib/qib_ud.c
drivers/infiniband/hw/qib/qib_verbs.c

index e0f65e39076b00e64080d866800ccd0c7969644b..6c39851d2dedb3b02514dff420ddbc08acf178c1 100644 (file)
@@ -450,7 +450,7 @@ static void clear_mr_refs(struct qib_qp *qp, int clr_sends)
  *
  * Flushes both send and receive work queues.
  * Returns true if last WQE event should be generated.
- * The QP s_lock should be held and interrupts disabled.
+ * The QP r_lock and s_lock should be held and interrupts disabled.
  * If we are already in error state, just return.
  */
 int qib_error_qp(struct qib_qp *qp, enum ib_wc_status err)
index 40c0a373719c5651db8f362b5600771be91c39cc..a0931119bd78c50a7324b6da4350832e66312bac 100644 (file)
@@ -868,7 +868,7 @@ done:
 
 /*
  * Back up requester to resend the last un-ACKed request.
- * The QP s_lock should be held and interrupts disabled.
+ * The QP r_lock and s_lock should be held and interrupts disabled.
  */
 static void qib_restart_rc(struct qib_qp *qp, u32 psn, int wait)
 {
@@ -911,7 +911,8 @@ static void rc_timeout(unsigned long arg)
        struct qib_ibport *ibp;
        unsigned long flags;
 
-       spin_lock_irqsave(&qp->s_lock, flags);
+       spin_lock_irqsave(&qp->r_lock, flags);
+       spin_lock(&qp->s_lock);
        if (qp->s_flags & QIB_S_TIMER) {
                ibp = to_iport(qp->ibqp.device, qp->port_num);
                ibp->n_rc_timeouts++;
@@ -920,7 +921,8 @@ static void rc_timeout(unsigned long arg)
                qib_restart_rc(qp, qp->s_last_psn + 1, 1);
                qib_schedule_send(qp);
        }
-       spin_unlock_irqrestore(&qp->s_lock, flags);
+       spin_unlock(&qp->s_lock);
+       spin_unlock_irqrestore(&qp->r_lock, flags);
 }
 
 /*
@@ -1414,10 +1416,6 @@ static void qib_rc_rcv_resp(struct qib_ibport *ibp,
 
        spin_lock_irqsave(&qp->s_lock, flags);
 
-       /* Double check we can process this now that we hold the s_lock. */
-       if (!(ib_qib_state_ops[qp->state] & QIB_PROCESS_RECV_OK))
-               goto ack_done;
-
        /* Ignore invalid responses. */
        if (qib_cmp24(psn, qp->s_next_psn) >= 0)
                goto ack_done;
@@ -1661,9 +1659,6 @@ static int qib_rc_rcv_error(struct qib_other_headers *ohdr,
        ibp->n_rc_dupreq++;
 
        spin_lock_irqsave(&qp->s_lock, flags);
-       /* Double check we can process this now that we hold the s_lock. */
-       if (!(ib_qib_state_ops[qp->state] & QIB_PROCESS_RECV_OK))
-               goto unlock_done;
 
        for (i = qp->r_head_ack_queue; ; i = prev) {
                if (i == qp->s_tail_ack_queue)
@@ -1878,9 +1873,6 @@ void qib_rc_rcv(struct qib_ctxtdata *rcd, struct qib_ib_header *hdr,
        psn = be32_to_cpu(ohdr->bth[2]);
        opcode >>= 24;
 
-       /* Prevent simultaneous processing after APM on different CPUs */
-       spin_lock(&qp->r_lock);
-
        /*
         * Process responses (ACKs) before anything else.  Note that the
         * packet sequence number will be for something in the send work
@@ -1891,14 +1883,14 @@ void qib_rc_rcv(struct qib_ctxtdata *rcd, struct qib_ib_header *hdr,
            opcode <= OP(ATOMIC_ACKNOWLEDGE)) {
                qib_rc_rcv_resp(ibp, ohdr, data, tlen, qp, opcode, psn,
                                hdrsize, pmtu, rcd);
-               goto runlock;
+               return;
        }
 
        /* Compute 24 bits worth of difference. */
        diff = qib_cmp24(psn, qp->r_psn);
        if (unlikely(diff)) {
                if (qib_rc_rcv_error(ohdr, data, qp, opcode, psn, diff, rcd))
-                       goto runlock;
+                       return;
                goto send_ack;
        }
 
@@ -2090,9 +2082,6 @@ send_last:
                if (next > QIB_MAX_RDMA_ATOMIC)
                        next = 0;
                spin_lock_irqsave(&qp->s_lock, flags);
-               /* Double check we can process this while holding the s_lock. */
-               if (!(ib_qib_state_ops[qp->state] & QIB_PROCESS_RECV_OK))
-                       goto srunlock;
                if (unlikely(next == qp->s_tail_ack_queue)) {
                        if (!qp->s_ack_queue[next].sent)
                                goto nack_inv_unlck;
@@ -2146,7 +2135,7 @@ send_last:
                qp->s_flags |= QIB_S_RESP_PENDING;
                qib_schedule_send(qp);
 
-               goto srunlock;
+               goto sunlock;
        }
 
        case OP(COMPARE_SWAP):
@@ -2165,9 +2154,6 @@ send_last:
                if (next > QIB_MAX_RDMA_ATOMIC)
                        next = 0;
                spin_lock_irqsave(&qp->s_lock, flags);
-               /* Double check we can process this while holding the s_lock. */
-               if (!(ib_qib_state_ops[qp->state] & QIB_PROCESS_RECV_OK))
-                       goto srunlock;
                if (unlikely(next == qp->s_tail_ack_queue)) {
                        if (!qp->s_ack_queue[next].sent)
                                goto nack_inv_unlck;
@@ -2213,7 +2199,7 @@ send_last:
                qp->s_flags |= QIB_S_RESP_PENDING;
                qib_schedule_send(qp);
 
-               goto srunlock;
+               goto sunlock;
        }
 
        default:
@@ -2227,7 +2213,7 @@ send_last:
        /* Send an ACK if requested or required. */
        if (psn & (1 << 31))
                goto send_ack;
-       goto runlock;
+       return;
 
 rnr_nak:
        qp->r_nak_state = IB_RNR_NAK | qp->r_min_rnr_timer;
@@ -2238,7 +2224,7 @@ rnr_nak:
                atomic_inc(&qp->refcount);
                list_add_tail(&qp->rspwait, &rcd->qp_wait_list);
        }
-       goto runlock;
+       return;
 
 nack_op_err:
        qib_rc_error(qp, IB_WC_LOC_QP_OP_ERR);
@@ -2250,7 +2236,7 @@ nack_op_err:
                atomic_inc(&qp->refcount);
                list_add_tail(&qp->rspwait, &rcd->qp_wait_list);
        }
-       goto runlock;
+       return;
 
 nack_inv_unlck:
        spin_unlock_irqrestore(&qp->s_lock, flags);
@@ -2264,7 +2250,7 @@ nack_inv:
                atomic_inc(&qp->refcount);
                list_add_tail(&qp->rspwait, &rcd->qp_wait_list);
        }
-       goto runlock;
+       return;
 
 nack_acc_unlck:
        spin_unlock_irqrestore(&qp->s_lock, flags);
@@ -2274,13 +2260,6 @@ nack_acc:
        qp->r_ack_psn = qp->r_psn;
 send_ack:
        qib_send_rc_ack(qp);
-runlock:
-       spin_unlock(&qp->r_lock);
-       return;
-
-srunlock:
-       spin_unlock_irqrestore(&qp->s_lock, flags);
-       spin_unlock(&qp->r_lock);
        return;
 
 sunlock:
index b8456881f7f638f0357af412ef0590dfbb7ef335..cad44491320bc52b5f55409568e9f5edba6619f6 100644 (file)
@@ -656,6 +656,7 @@ unmap:
        }
        qp = tx->qp;
        qib_put_txreq(tx);
+       spin_lock(&qp->r_lock);
        spin_lock(&qp->s_lock);
        if (qp->ibqp.qp_type == IB_QPT_RC) {
                /* XXX what about error sending RDMA read responses? */
@@ -664,6 +665,7 @@ unmap:
        } else if (qp->s_wqe)
                qib_send_complete(qp, qp->s_wqe, IB_WC_GENERAL_ERR);
        spin_unlock(&qp->s_lock);
+       spin_unlock(&qp->r_lock);
        /* return zero to process the next send work request */
        goto unlock;
 
index 6c7fe78cca644f8b345f226ad1eb4abbd9969412..b9c8b6346c1b3e475a5d945a4ce611b36a5a6e3c 100644 (file)
@@ -272,9 +272,6 @@ void qib_uc_rcv(struct qib_ibport *ibp, struct qib_ib_header *hdr,
        opcode >>= 24;
        memset(&wc, 0, sizeof wc);
 
-       /* Prevent simultaneous processing after APM on different CPUs */
-       spin_lock(&qp->r_lock);
-
        /* Compare the PSN verses the expected PSN. */
        if (unlikely(qib_cmp24(psn, qp->r_psn) != 0)) {
                /*
@@ -534,7 +531,6 @@ rdma_last:
        }
        qp->r_psn++;
        qp->r_state = opcode;
-       spin_unlock(&qp->r_lock);
        return;
 
 rewind:
@@ -542,12 +538,10 @@ rewind:
        qp->r_sge.num_sge = 0;
 drop:
        ibp->n_pkt_drops++;
-       spin_unlock(&qp->r_lock);
        return;
 
 op_err:
        qib_rc_error(qp, IB_WC_LOC_QP_OP_ERR);
-       spin_unlock(&qp->r_lock);
        return;
 
 sunlock:
index c838cda73347fa354b0abee78773046af816131d..e1b3da2a1f85fc226e47ce74e0ead97bf61791c7 100644 (file)
@@ -534,13 +534,6 @@ void qib_ud_rcv(struct qib_ibport *ibp, struct qib_ib_header *hdr,
         */
        wc.byte_len = tlen + sizeof(struct ib_grh);
 
-       /*
-        * We need to serialize getting a receive work queue entry and
-        * generating a completion for it against QPs sending to this QP
-        * locally.
-        */
-       spin_lock(&qp->r_lock);
-
        /*
         * Get the next work request entry to find where to put the data.
         */
@@ -552,19 +545,19 @@ void qib_ud_rcv(struct qib_ibport *ibp, struct qib_ib_header *hdr,
                ret = qib_get_rwqe(qp, 0);
                if (ret < 0) {
                        qib_rc_error(qp, IB_WC_LOC_QP_OP_ERR);
-                       goto bail_unlock;
+                       return;
                }
                if (!ret) {
                        if (qp->ibqp.qp_num == 0)
                                ibp->n_vl15_dropped++;
-                       goto bail_unlock;
+                       return;
                }
        }
        /* Silently drop packets which are too big. */
        if (unlikely(wc.byte_len > qp->r_len)) {
                qp->r_flags |= QIB_R_REUSE_SGE;
                ibp->n_pkt_drops++;
-               goto bail_unlock;
+               return;
        }
        if (has_grh) {
                qib_copy_sge(&qp->r_sge, &hdr->u.l.grh,
@@ -579,7 +572,7 @@ void qib_ud_rcv(struct qib_ibport *ibp, struct qib_ib_header *hdr,
                        qp->r_sge.sge = *qp->r_sge.sg_list++;
        }
        if (!test_and_clear_bit(QIB_R_WRID_VALID, &qp->r_aflags))
-               goto bail_unlock;
+               return;
        wc.wr_id = qp->r_wr_id;
        wc.status = IB_WC_SUCCESS;
        wc.opcode = IB_WC_RECV;
@@ -601,7 +594,5 @@ void qib_ud_rcv(struct qib_ibport *ibp, struct qib_ib_header *hdr,
        qib_cq_enter(to_icq(qp->ibqp.recv_cq), &wc,
                     (ohdr->bth[0] &
                        cpu_to_be32(IB_BTH_SOLICITED)) != 0);
-bail_unlock:
-       spin_unlock(&qp->r_lock);
 bail:;
 }
index cda8f4173d232d12adc05dd5e53aef6b5c0b2c51..9fab404888505be57b9fdbc1f813f3ae28e290c1 100644 (file)
@@ -550,10 +550,12 @@ static void qib_qp_rcv(struct qib_ctxtdata *rcd, struct qib_ib_header *hdr,
 {
        struct qib_ibport *ibp = &rcd->ppd->ibport_data;
 
+       spin_lock(&qp->r_lock);
+
        /* Check for valid receive state. */
        if (!(ib_qib_state_ops[qp->state] & QIB_PROCESS_RECV_OK)) {
                ibp->n_pkt_drops++;
-               return;
+               goto unlock;
        }
 
        switch (qp->ibqp.qp_type) {
@@ -577,6 +579,9 @@ static void qib_qp_rcv(struct qib_ctxtdata *rcd, struct qib_ib_header *hdr,
        default:
                break;
        }
+
+unlock:
+       spin_unlock(&qp->r_lock);
 }
 
 /**