staging/rdma/hfi1: Split last 8 bytes of copy to user buffer
authorDean Luick <dean.luick@intel.com>
Wed, 3 Feb 2016 22:35:49 +0000 (14:35 -0800)
committerDoug Ledford <dledford@redhat.com>
Fri, 11 Mar 2016 01:37:56 +0000 (20:37 -0500)
Copy the last 8 bytes of user mode RC WRITE_ONLY and WRITE_LAST
opcodes separately from the rest of the data.

It is a de-facto standard for some MPI implementations to use a
poll on the last few bytes of a verbs message to indicate that
the message has been received rather than follow the required
function method.  The driver uses the kernel memcpy routine, which
becomes "rep movsb" on modern machines.  This copy, while very
fast, does not guarantee in-order copy completion and the result
is an occasional perceived corrupted packet.  Avoid the issue by
splitting the last 8 bytes to copy from the verbs opcodes where it
matters and performing an in-order byte copy.

Reviewed-by: Mike Marciniszyn <mike.marciniszyn@intel.com>
Signed-off-by: Dean Luick <dean.luick@intel.com>
Signed-off-by: Jubin John <jubin.john@intel.com>
Signed-off-by: Doug Ledford <dledford@redhat.com>
drivers/staging/rdma/hfi1/rc.c
drivers/staging/rdma/hfi1/ruc.c
drivers/staging/rdma/hfi1/uc.c
drivers/staging/rdma/hfi1/ud.c
drivers/staging/rdma/hfi1/verbs.c
drivers/staging/rdma/hfi1/verbs.h

index 50559fd14a701088b79c297a7fa8828468add0b3..371edc3dd4f66f47530eb4a9df3f22ecf832cbfe 100644 (file)
@@ -1539,7 +1539,7 @@ read_middle:
                qp->s_rdma_read_len -= pmtu;
                update_last_psn(qp, psn);
                spin_unlock_irqrestore(&qp->s_lock, flags);
-               hfi1_copy_sge(&qp->s_rdma_read_sge, data, pmtu, 0);
+               hfi1_copy_sge(&qp->s_rdma_read_sge, data, pmtu, 0, 0);
                goto bail;
 
        case OP(RDMA_READ_RESPONSE_ONLY):
@@ -1583,7 +1583,7 @@ read_last:
                if (unlikely(tlen != qp->s_rdma_read_len))
                        goto ack_len_err;
                aeth = be32_to_cpu(ohdr->u.aeth);
-               hfi1_copy_sge(&qp->s_rdma_read_sge, data, tlen, 0);
+               hfi1_copy_sge(&qp->s_rdma_read_sge, data, tlen, 0, 0);
                WARN_ON(qp->s_rdma_read_sge.num_sge);
                (void) do_rc_ack(qp, aeth, psn,
                                 OP(RDMA_READ_RESPONSE_LAST), 0, rcd);
@@ -1977,6 +1977,7 @@ void hfi1_rc_rcv(struct hfi1_packet *packet)
        unsigned long flags;
        u32 bth1;
        int ret, is_fecn = 0;
+       int copy_last = 0;
 
        bth0 = be32_to_cpu(ohdr->bth[0]);
        if (hfi1_ruc_check_hdr(ibp, hdr, rcv_flags & HFI1_HAS_GRH, qp, bth0))
@@ -2081,7 +2082,7 @@ send_middle:
                qp->r_rcv_len += pmtu;
                if (unlikely(qp->r_rcv_len > qp->r_len))
                        goto nack_inv;
-               hfi1_copy_sge(&qp->r_sge, data, pmtu, 1);
+               hfi1_copy_sge(&qp->r_sge, data, pmtu, 1, 0);
                break;
 
        case OP(RDMA_WRITE_LAST_WITH_IMMEDIATE):
@@ -2109,8 +2110,10 @@ send_last_imm:
                wc.ex.imm_data = ohdr->u.imm_data;
                wc.wc_flags = IB_WC_WITH_IMM;
                goto send_last;
-       case OP(SEND_LAST):
        case OP(RDMA_WRITE_LAST):
+               copy_last = ibpd_to_rvtpd(qp->ibqp.pd)->user;
+               /* fall through */
+       case OP(SEND_LAST):
 no_immediate_data:
                wc.wc_flags = 0;
                wc.ex.imm_data = 0;
@@ -2126,7 +2129,7 @@ send_last:
                wc.byte_len = tlen + qp->r_rcv_len;
                if (unlikely(wc.byte_len > qp->r_len))
                        goto nack_inv;
-               hfi1_copy_sge(&qp->r_sge, data, tlen, 1);
+               hfi1_copy_sge(&qp->r_sge, data, tlen, 1, copy_last);
                rvt_put_ss(&qp->r_sge);
                qp->r_msn++;
                if (!test_and_clear_bit(RVT_R_WRID_VALID, &qp->r_aflags))
@@ -2163,8 +2166,10 @@ send_last:
                             (bth0 & IB_BTH_SOLICITED) != 0);
                break;
 
-       case OP(RDMA_WRITE_FIRST):
        case OP(RDMA_WRITE_ONLY):
+               copy_last = 1;
+               /* fall through */
+       case OP(RDMA_WRITE_FIRST):
        case OP(RDMA_WRITE_ONLY_WITH_IMMEDIATE):
                if (unlikely(!(qp->qp_access_flags & IB_ACCESS_REMOTE_WRITE)))
                        goto nack_inv;
index f09badbfa51cb8e7187ce2c1ab7c0c79ae460987..6aeea6c4b23675d43b9206586139b68c817e9307 100644 (file)
@@ -370,6 +370,7 @@ static void ruc_loopback(struct rvt_qp *sqp)
        enum ib_wc_status send_status;
        int release;
        int ret;
+       int copy_last = 0;
 
        rcu_read_lock();
 
@@ -459,10 +460,13 @@ again:
                        goto op_err;
                if (!ret)
                        goto rnr_nak;
-               /* FALLTHROUGH */
+               /* skip copy_last set and qp_access_flags recheck */
+               goto do_write;
        case IB_WR_RDMA_WRITE:
+               copy_last = ibpd_to_rvtpd(qp->ibqp.pd)->user;
                if (unlikely(!(qp->qp_access_flags & IB_ACCESS_REMOTE_WRITE)))
                        goto inv_err;
+do_write:
                if (wqe->length == 0)
                if (unlikely(!rvt_rkey_ok(qp, &qp->r_sge.sge, wqe->length,
                                          wqe->rdma_wr.remote_addr,
@@ -526,7 +530,7 @@ again:
                if (len > sge->sge_length)
                        len = sge->sge_length;
                WARN_ON_ONCE(len == 0);
-               hfi1_copy_sge(&qp->r_sge, sge->vaddr, len, release);
+               hfi1_copy_sge(&qp->r_sge, sge->vaddr, len, release, copy_last);
                sge->vaddr += len;
                sge->length -= len;
                sge->sge_length -= len;
index 1e50d303c024bb6946218183d92e7c8a3baf0fdd..0aa604b7557b58b89bfec83334f89046f7547978 100644 (file)
@@ -418,7 +418,7 @@ send_first:
                qp->r_rcv_len += pmtu;
                if (unlikely(qp->r_rcv_len > qp->r_len))
                        goto rewind;
-               hfi1_copy_sge(&qp->r_sge, data, pmtu, 0);
+               hfi1_copy_sge(&qp->r_sge, data, pmtu, 0, 0);
                break;
 
        case OP(SEND_LAST_WITH_IMMEDIATE):
@@ -443,7 +443,7 @@ send_last:
                if (unlikely(wc.byte_len > qp->r_len))
                        goto rewind;
                wc.opcode = IB_WC_RECV;
-               hfi1_copy_sge(&qp->r_sge, data, tlen, 0);
+               hfi1_copy_sge(&qp->r_sge, data, tlen, 0, 0);
                rvt_put_ss(&qp->s_rdma_read_sge);
 last_imm:
                wc.wr_id = qp->r_wr_id;
@@ -518,7 +518,7 @@ rdma_first:
                qp->r_rcv_len += pmtu;
                if (unlikely(qp->r_rcv_len > qp->r_len))
                        goto drop;
-               hfi1_copy_sge(&qp->r_sge, data, pmtu, 1);
+               hfi1_copy_sge(&qp->r_sge, data, pmtu, 1, 0);
                break;
 
        case OP(RDMA_WRITE_LAST_WITH_IMMEDIATE):
@@ -547,7 +547,7 @@ rdma_last_imm:
                }
                wc.byte_len = qp->r_len;
                wc.opcode = IB_WC_RECV_RDMA_WITH_IMM;
-               hfi1_copy_sge(&qp->r_sge, data, tlen, 1);
+               hfi1_copy_sge(&qp->r_sge, data, tlen, 1, 0);
                rvt_put_ss(&qp->r_sge);
                goto last_imm;
 
@@ -563,7 +563,7 @@ rdma_last:
                tlen -= (hdrsize + pad + 4);
                if (unlikely(tlen + qp->r_rcv_len != qp->r_len))
                        goto drop;
-               hfi1_copy_sge(&qp->r_sge, data, tlen, 1);
+               hfi1_copy_sge(&qp->r_sge, data, tlen, 1, 0);
                rvt_put_ss(&qp->r_sge);
                break;
 
index 2eae167696889ab6b0bb23c120efde939e7d6b3e..fdf6e3bee8f15353207d7b26a8917cb729b9b4b0 100644 (file)
@@ -187,7 +187,7 @@ static void ud_loopback(struct rvt_qp *sqp, struct rvt_swqe *swqe)
 
        if (ah_attr->ah_flags & IB_AH_GRH) {
                hfi1_copy_sge(&qp->r_sge, &ah_attr->grh,
-                             sizeof(struct ib_grh), 1);
+                             sizeof(struct ib_grh), 1, 0);
                wc.wc_flags |= IB_WC_GRH;
        } else
                hfi1_skip_sge(&qp->r_sge, sizeof(struct ib_grh), 1);
@@ -203,7 +203,7 @@ static void ud_loopback(struct rvt_qp *sqp, struct rvt_swqe *swqe)
                if (len > sge->sge_length)
                        len = sge->sge_length;
                WARN_ON_ONCE(len == 0);
-               hfi1_copy_sge(&qp->r_sge, sge->vaddr, len, 1);
+               hfi1_copy_sge(&qp->r_sge, sge->vaddr, len, 1, 0);
                sge->vaddr += len;
                sge->length -= len;
                sge->sge_length -= len;
@@ -836,11 +836,12 @@ void hfi1_ud_rcv(struct hfi1_packet *packet)
        }
        if (has_grh) {
                hfi1_copy_sge(&qp->r_sge, &hdr->u.l.grh,
-                             sizeof(struct ib_grh), 1);
+                             sizeof(struct ib_grh), 1, 0);
                wc.wc_flags |= IB_WC_GRH;
        } else
                hfi1_skip_sge(&qp->r_sge, sizeof(struct ib_grh), 1);
-       hfi1_copy_sge(&qp->r_sge, data, wc.byte_len - sizeof(struct ib_grh), 1);
+       hfi1_copy_sge(&qp->r_sge, data, wc.byte_len - sizeof(struct ib_grh),
+                     1, 0);
        rvt_put_ss(&qp->r_sge);
        if (!test_and_clear_bit(RVT_R_WRID_VALID, &qp->r_aflags))
                return;
index d617324e3c48e77034fc2aa63aa1d094c34d7547..8f351bc157dff9afe8a6810f3f9d279f13333fdb 100644 (file)
@@ -242,14 +242,28 @@ __be64 ib_hfi1_sys_image_guid;
  * @ss: the SGE state
  * @data: the data to copy
  * @length: the length of the data
+ * @copy_last: do a separate copy of the last 8 bytes
  */
 void hfi1_copy_sge(
        struct rvt_sge_state *ss,
        void *data, u32 length,
-       int release)
+       int release,
+       int copy_last)
 {
        struct rvt_sge *sge = &ss->sge;
+       int in_last = 0;
+       int i;
+
+       if (copy_last) {
+               if (length > 8) {
+                       length -= 8;
+               } else {
+                       copy_last = 0;
+                       in_last = 1;
+               }
+       }
 
+again:
        while (length) {
                u32 len = sge->length;
 
@@ -258,7 +272,13 @@ void hfi1_copy_sge(
                if (len > sge->sge_length)
                        len = sge->sge_length;
                WARN_ON_ONCE(len == 0);
-               memcpy(sge->vaddr, data, len);
+               if (in_last) {
+                       /* enforce byte transer ordering */
+                       for (i = 0; i < len; i++)
+                               ((u8 *)sge->vaddr)[i] = ((u8 *)data)[i];
+               } else {
+                       memcpy(sge->vaddr, data, len);
+               }
                sge->vaddr += len;
                sge->length -= len;
                sge->sge_length -= len;
@@ -281,6 +301,13 @@ void hfi1_copy_sge(
                data += len;
                length -= len;
        }
+
+       if (copy_last) {
+               copy_last = 0;
+               in_last = 1;
+               length = 8;
+               goto again;
+       }
 }
 
 /**
index ac84dd70c6c7372bc883f0194d1dcd4e3c8e4f87..afb2d7fd6ae63821fa3f256909332b515ae3b17b 100644 (file)
@@ -398,7 +398,7 @@ void hfi1_put_txreq(struct verbs_txreq *tx);
 int hfi1_verbs_send(struct rvt_qp *qp, struct hfi1_pkt_state *ps);
 
 void hfi1_copy_sge(struct rvt_sge_state *ss, void *data, u32 length,
-                  int release);
+                  int release, int copy_last);
 
 void hfi1_skip_sge(struct rvt_sge_state *ss, u32 length, int release);