IB/rdmavt: Compress adjacent SGEs in rvt_lkey_ok()
authorMike Marciniszyn <mike.marciniszyn@intel.com>
Fri, 12 May 2017 16:20:31 +0000 (09:20 -0700)
committerDoug Ledford <dledford@redhat.com>
Tue, 27 Jun 2017 20:56:33 +0000 (16:56 -0400)
SGEs that are contiguous needlessly consume driver dependent TX resources.

The lkey validation logic is enhanced to compress the SGE that ends
up in the send wqe when consecutive addresses are detected.

The lkey validation API used to return 1 (success) or 0 (fail).

The return value is now an -errno, 0 (compressed), or 1 (uncompressed).  A
additional argument is added to pass the last SQE for the compression.

Loopback callers always pass a NULL to last_sge since the optimization is
of little benefit in that situation.

Reviewed-by: Dennis Dalessandro <dennis.dalessandro@intel.com>
Signed-off-by: Brian Welty <brian.welty@intel.com>
Signed-off-by: Venkata Sandeep Dhanalakota <venkata.s.dhanalakota@intel.com>
Signed-off-by: Mike Marciniszyn <mike.marciniszyn@intel.com>
Signed-off-by: Dennis Dalessandro <dennis.dalessandro@intel.com>
Signed-off-by: Doug Ledford <dledford@redhat.com>
drivers/infiniband/hw/hfi1/ruc.c
drivers/infiniband/hw/qib/qib_ruc.c
drivers/infiniband/sw/rdmavt/mr.c
drivers/infiniband/sw/rdmavt/qp.c
drivers/infiniband/sw/rdmavt/trace_mr.h
drivers/infiniband/sw/rdmavt/trace_tx.h
include/rdma/rdma_vt.h

index 9cc9c7be9dd4bea7b4d3ca8db7a4ab264d42d736..476fe5da29926d757c3a86902c873bc84869bfdb 100644 (file)
@@ -75,7 +75,7 @@ static int init_sge(struct rvt_qp *qp, struct rvt_rwqe *wqe)
                        continue;
                /* Check LKEY */
                if (!rvt_lkey_ok(rkt, pd, j ? &ss->sg_list[j - 1] : &ss->sge,
-                                &wqe->sg_list[i], IB_ACCESS_LOCAL_WRITE))
+                                NULL, &wqe->sg_list[i], IB_ACCESS_LOCAL_WRITE))
                        goto bad_lkey;
                qp->r_len += wqe->sg_list[i].length;
                j++;
index bd09de7c6e56c023fb2b93b1d7a8210887e62526..88d84cbf7e5afde5b117d0c2e6cc02e7f4b973e6 100644 (file)
@@ -59,7 +59,7 @@ static int qib_init_sge(struct rvt_qp *qp, struct rvt_rwqe *wqe)
                        continue;
                /* Check LKEY */
                if (!rvt_lkey_ok(rkt, pd, j ? &ss->sg_list[j - 1] : &ss->sge,
-                                &wqe->sg_list[i], IB_ACCESS_LOCAL_WRITE))
+                                NULL, &wqe->sg_list[i], IB_ACCESS_LOCAL_WRITE))
                        goto bad_lkey;
                qp->r_len += wqe->sg_list[i].length;
                j++;
index aa5f9ea318e45ab0ecd7169c0f14a675ffc121a3..ea95672d967515337d3ab02044a7421a78d221a6 100644 (file)
@@ -777,24 +777,55 @@ out:
        return ret;
 }
 
+/**
+ * rvt_sge_adjacent - is isge compressible
+ * @isge: outgoing internal SGE
+ * @last_sge: last outgoing SGE written
+ * @sge: SGE to check
+ *
+ * If adjacent will update last_sge to add length.
+ *
+ * Return: true if isge is adjacent to last sge
+ */
+static inline bool rvt_sge_adjacent(struct rvt_sge *isge,
+                                   struct rvt_sge *last_sge,
+                                   struct ib_sge *sge)
+{
+       if (last_sge && sge->lkey == last_sge->mr->lkey &&
+           ((uint64_t)(last_sge->vaddr + last_sge->length) == sge->addr)) {
+               if (sge->lkey) {
+                       if (unlikely((sge->addr - last_sge->mr->user_base +
+                             sge->length > last_sge->mr->length)))
+                               return false; /* overrun, caller will catch */
+               } else {
+                       last_sge->length += sge->length;
+               }
+               last_sge->sge_length += sge->length;
+               trace_rvt_sge_adjacent(last_sge, sge);
+               return true;
+       }
+       return false;
+}
+
 /**
  * rvt_lkey_ok - check IB SGE for validity and initialize
  * @rkt: table containing lkey to check SGE against
  * @pd: protection domain
  * @isge: outgoing internal SGE
+ * @last_sge: last outgoing SGE written
  * @sge: SGE to check
  * @acc: access flags
  *
  * Check the IB SGE for validity and initialize our internal version
  * of it.
  *
- * Return: 1 if valid and successful, otherwise returns 0.
- *
- * increments the reference count upon success
+ * Increments the reference count when a new sge is stored.
  *
+ * Return: 0 if compressed, 1 if added , otherwise returns -errno.
  */
 int rvt_lkey_ok(struct rvt_lkey_table *rkt, struct rvt_pd *pd,
-               struct rvt_sge *isge, struct ib_sge *sge, int acc)
+               struct rvt_sge *isge, struct rvt_sge *last_sge,
+               struct ib_sge *sge, int acc)
 {
        struct rvt_mregion *mr;
        unsigned n, m;
@@ -804,12 +835,14 @@ int rvt_lkey_ok(struct rvt_lkey_table *rkt, struct rvt_pd *pd,
         * We use LKEY == zero for kernel virtual addresses
         * (see rvt_get_dma_mr() and dma_virt_ops).
         */
-       rcu_read_lock();
        if (sge->lkey == 0) {
                struct rvt_dev_info *dev = ib_to_rvt(pd->ibpd.device);
 
                if (pd->user)
-                       goto bail;
+                       return -EINVAL;
+               if (rvt_sge_adjacent(isge, last_sge, sge))
+                       return 0;
+               rcu_read_lock();
                mr = rcu_dereference(dev->dma_mr);
                if (!mr)
                        goto bail;
@@ -824,6 +857,9 @@ int rvt_lkey_ok(struct rvt_lkey_table *rkt, struct rvt_pd *pd,
                isge->n = 0;
                goto ok;
        }
+       if (rvt_sge_adjacent(isge, last_sge, sge))
+               return 0;
+       rcu_read_lock();
        mr = rcu_dereference(rkt->table[sge->lkey >> rkt->shift]);
        if (!mr)
                goto bail;
@@ -874,12 +910,13 @@ int rvt_lkey_ok(struct rvt_lkey_table *rkt, struct rvt_pd *pd,
        isge->m = m;
        isge->n = n;
 ok:
+       trace_rvt_sge_new(isge, sge);
        return 1;
 bail_unref:
        rvt_put_mr(mr);
 bail:
        rcu_read_unlock();
-       return 0;
+       return -EINVAL;
 }
 EXPORT_SYMBOL(rvt_lkey_ok);
 
index 727e81cc2c8f6ad71b66b486c9b3d581e66e47d0..a3dd1e5368609dbd793d60d4cc5625ef32124945 100644 (file)
@@ -1646,7 +1646,7 @@ static int rvt_post_one_wr(struct rvt_qp *qp,
        struct rvt_pd *pd;
        struct rvt_dev_info *rdi = ib_to_rvt(qp->ibqp.device);
        u8 log_pmtu;
-       int ret;
+       int ret, incr;
        size_t cplen;
        bool reserved_op;
        int local_ops_delayed = 0;
@@ -1719,22 +1719,23 @@ static int rvt_post_one_wr(struct rvt_qp *qp,
        wqe->length = 0;
        j = 0;
        if (wr->num_sge) {
+               struct rvt_sge *last_sge = NULL;
+
                acc = wr->opcode >= IB_WR_RDMA_READ ?
                        IB_ACCESS_LOCAL_WRITE : 0;
                for (i = 0; i < wr->num_sge; i++) {
                        u32 length = wr->sg_list[i].length;
-                       int ok;
 
                        if (length == 0)
                                continue;
-                       ok = rvt_lkey_ok(rkt, pd, &wqe->sg_list[j],
-                                        &wr->sg_list[i], acc);
-                       if (!ok) {
-                               ret = -EINVAL;
-                               goto bail_inval_free;
-                       }
+                       incr = rvt_lkey_ok(rkt, pd, &wqe->sg_list[j], last_sge,
+                                          &wr->sg_list[i], acc);
+                       if (unlikely(incr < 0))
+                               goto bail_lkey_error;
                        wqe->length += length;
-                       j++;
+                       if (incr)
+                               last_sge = &wqe->sg_list[j];
+                       j += incr;
                }
                wqe->wr.num_sge = j;
        }
@@ -1781,12 +1782,14 @@ static int rvt_post_one_wr(struct rvt_qp *qp,
                wqe->wr.send_flags &= ~RVT_SEND_RESERVE_USED;
                qp->s_avail--;
        }
-       trace_rvt_post_one_wr(qp, wqe);
+       trace_rvt_post_one_wr(qp, wqe, wr->num_sge);
        smp_wmb(); /* see request builders */
        qp->s_head = next;
 
        return 0;
 
+bail_lkey_error:
+       ret = incr;
 bail_inval_free:
        /* release mr holds */
        while (j) {
index 3318a6c36373358f74a1582d9e156ed69ed807bb..976e482930a3f5e6cc6f4006093088b1f74fc85e 100644 (file)
@@ -103,6 +103,68 @@ DEFINE_EVENT(
        TP_PROTO(struct rvt_mregion *mr, u16 m, u16 n, void *v, size_t len),
        TP_ARGS(mr, m, n, v, len));
 
+DECLARE_EVENT_CLASS(
+       rvt_sge_template,
+       TP_PROTO(struct rvt_sge *sge, struct ib_sge *isge),
+       TP_ARGS(sge, isge),
+       TP_STRUCT__entry(
+               RDI_DEV_ENTRY(ib_to_rvt(sge->mr->pd->device))
+               __field(struct rvt_mregion *, mr)
+               __field(struct rvt_sge *, sge)
+               __field(struct ib_sge *, isge)
+               __field(void *, vaddr)
+               __field(u64, ivaddr)
+               __field(u32, lkey)
+               __field(u32, sge_length)
+               __field(u32, length)
+               __field(u32, ilength)
+               __field(int, user)
+               __field(u16, m)
+               __field(u16, n)
+       ),
+       TP_fast_assign(
+               RDI_DEV_ASSIGN(ib_to_rvt(sge->mr->pd->device));
+               __entry->mr = sge->mr;
+               __entry->sge = sge;
+               __entry->isge = isge;
+               __entry->vaddr = sge->vaddr;
+               __entry->ivaddr = isge->addr;
+               __entry->lkey = sge->mr->lkey;
+               __entry->sge_length = sge->sge_length;
+               __entry->length = sge->length;
+               __entry->ilength = isge->length;
+               __entry->m = sge->m;
+               __entry->n = sge->m;
+               __entry->user = ibpd_to_rvtpd(sge->mr->pd)->user;
+       ),
+       TP_printk(
+               "[%s] mr %p sge %p isge %p vaddr %p ivaddr %llx lkey %x sge_length %u length %u ilength %u m %u n %u user %u",
+               __get_str(dev),
+               __entry->mr,
+               __entry->sge,
+               __entry->isge,
+               __entry->vaddr,
+               __entry->ivaddr,
+               __entry->lkey,
+               __entry->sge_length,
+               __entry->length,
+               __entry->ilength,
+               __entry->m,
+               __entry->n,
+               __entry->user
+       )
+);
+
+DEFINE_EVENT(
+       rvt_sge_template, rvt_sge_adjacent,
+       TP_PROTO(struct rvt_sge *sge, struct ib_sge *isge),
+       TP_ARGS(sge, isge));
+
+DEFINE_EVENT(
+       rvt_sge_template, rvt_sge_new,
+       TP_PROTO(struct rvt_sge *sge, struct ib_sge *isge),
+       TP_ARGS(sge, isge));
+
 #endif /* __RVT_TRACE_MR_H */
 
 #undef TRACE_INCLUDE_PATH
index a613a22237510744a266a7c0fcd1213150b2f345..0ef25fc49f25a2c027daf911a06df0be3dfa0d43 100644 (file)
@@ -84,12 +84,12 @@ __print_symbolic(opcode,                                   \
        wr_opcode_name(RESERVED10))
 
 #define POS_PRN \
-"[%s] wqe %p wr_id %llx send_flags %x qpn %x qpt %u psn %x lpsn %x ssn %x length %u opcode 0x%.2x,%s size %u avail %u head %u last %u pid %u num_sge %u"
+"[%s] wqe %p wr_id %llx send_flags %x qpn %x qpt %u psn %x lpsn %x ssn %x length %u opcode 0x%.2x,%s size %u avail %u head %u last %u pid %u num_sge %u wr_num_sge %u"
 
 TRACE_EVENT(
        rvt_post_one_wr,
-       TP_PROTO(struct rvt_qp *qp, struct rvt_swqe *wqe),
-       TP_ARGS(qp, wqe),
+       TP_PROTO(struct rvt_qp *qp, struct rvt_swqe *wqe, int wr_num_sge),
+       TP_ARGS(qp, wqe, wr_num_sge),
        TP_STRUCT__entry(
                RDI_DEV_ENTRY(ib_to_rvt(qp->ibqp.device))
                __field(u64, wr_id)
@@ -108,6 +108,7 @@ TRACE_EVENT(
                __field(int, send_flags)
                __field(pid_t, pid)
                __field(int, num_sge)
+               __field(int, wr_num_sge)
        ),
        TP_fast_assign(
                RDI_DEV_ASSIGN(ib_to_rvt(qp->ibqp.device))
@@ -127,6 +128,7 @@ TRACE_EVENT(
                __entry->ssn = wqe->ssn;
                __entry->send_flags = wqe->wr.send_flags;
                __entry->num_sge = wqe->wr.num_sge;
+               __entry->wr_num_sge = wr_num_sge;
        ),
        TP_printk(
                POS_PRN,
@@ -146,7 +148,8 @@ TRACE_EVENT(
                __entry->head,
                __entry->last,
                __entry->pid,
-               __entry->num_sge
+               __entry->num_sge,
+               __entry->wr_num_sge
        )
 );
 
index 4878aaf7bdffd871515bc826470ec77886a1c02c..d0b9f91e5f4dd26a8af49562f30ca36cd514f315 100644 (file)
@@ -515,7 +515,8 @@ int rvt_invalidate_rkey(struct rvt_qp *qp, u32 rkey);
 int rvt_rkey_ok(struct rvt_qp *qp, struct rvt_sge *sge,
                u32 len, u64 vaddr, u32 rkey, int acc);
 int rvt_lkey_ok(struct rvt_lkey_table *rkt, struct rvt_pd *pd,
-               struct rvt_sge *isge, struct ib_sge *sge, int acc);
+               struct rvt_sge *isge, struct rvt_sge *last_sge,
+               struct ib_sge *sge, int acc);
 struct rvt_mcast *rvt_mcast_find(struct rvt_ibport *ibp, union ib_gid *mgid,
                                 u16 lid);