xprtrdma: Acquire FMRs in rpcrdma_fmr_register_external()
authorChuck Lever <chuck.lever@oracle.com>
Tue, 26 May 2015 15:52:16 +0000 (11:52 -0400)
committerAnna Schumaker <Anna.Schumaker@Netapp.com>
Fri, 12 Jun 2015 17:10:36 +0000 (13:10 -0400)
Acquiring 64 FMRs in rpcrdma_buffer_get() while holding the buffer
pool lock is expensive, and unnecessary because FMR mode can
transfer up to a 1MB payload using just a single ib_fmr.

Instead, acquire ib_fmrs one-at-a-time as chunks are registered, and
return them to rb_mws immediately during deregistration.

Signed-off-by: Chuck Lever <chuck.lever@oracle.com>
Reviewed-by: Steve Wise <swise@opengridcomputing.com>
Tested-By: Devesh Sharma <devesh.sharma@avagotech.com>
Reviewed-by: Doug Ledford <dledford@redhat.com>
Signed-off-by: Anna Schumaker <Anna.Schumaker@Netapp.com>
net/sunrpc/xprtrdma/fmr_ops.c
net/sunrpc/xprtrdma/verbs.c

index 0a96155bb03ad4bc2689dd8cd956d66459e580ee..53fb649071d94537ba1ab12325e76c6db32cb97a 100644 (file)
  * can take tens of usecs to complete.
  */
 
+/* Normal operation
+ *
+ * A Memory Region is prepared for RDMA READ or WRITE using the
+ * ib_map_phys_fmr verb (fmr_op_map). When the RDMA operation is
+ * finished, the Memory Region is unmapped using the ib_unmap_fmr
+ * verb (fmr_op_unmap).
+ */
+
+/* Transport recovery
+ *
+ * After a transport reconnect, fmr_op_map re-uses the MR already
+ * allocated for the RPC, but generates a fresh rkey then maps the
+ * MR again. This process is synchronous.
+ */
+
 #include "xprt_rdma.h"
 
 #if IS_ENABLED(CONFIG_SUNRPC_DEBUG)
@@ -77,6 +92,15 @@ out_fmr_err:
        return rc;
 }
 
+static int
+__fmr_unmap(struct rpcrdma_mw *r)
+{
+       LIST_HEAD(l);
+
+       list_add(&r->r.fmr->list, &l);
+       return ib_unmap_fmr(&l);
+}
+
 /* Use the ib_map_phys_fmr() verb to register a memory region
  * for remote access via RDMA READ or RDMA WRITE.
  */
@@ -88,9 +112,22 @@ fmr_op_map(struct rpcrdma_xprt *r_xprt, struct rpcrdma_mr_seg *seg,
        struct ib_device *device = ia->ri_device;
        enum dma_data_direction direction = rpcrdma_data_dir(writing);
        struct rpcrdma_mr_seg *seg1 = seg;
-       struct rpcrdma_mw *mw = seg1->rl_mw;
        u64 physaddrs[RPCRDMA_MAX_DATA_SEGS];
        int len, pageoff, i, rc;
+       struct rpcrdma_mw *mw;
+
+       mw = seg1->rl_mw;
+       seg1->rl_mw = NULL;
+       if (!mw) {
+               mw = rpcrdma_get_mw(r_xprt);
+               if (!mw)
+                       return -ENOMEM;
+       } else {
+               /* this is a retransmit; generate a fresh rkey */
+               rc = __fmr_unmap(mw);
+               if (rc)
+                       return rc;
+       }
 
        pageoff = offset_in_page(seg1->mr_offset);
        seg1->mr_offset -= pageoff;     /* start of page */
@@ -114,6 +151,7 @@ fmr_op_map(struct rpcrdma_xprt *r_xprt, struct rpcrdma_mr_seg *seg,
        if (rc)
                goto out_maperr;
 
+       seg1->rl_mw = mw;
        seg1->mr_rkey = mw->r.fmr->rkey;
        seg1->mr_base = seg1->mr_dma + pageoff;
        seg1->mr_nsegs = i;
@@ -137,18 +175,24 @@ fmr_op_unmap(struct rpcrdma_xprt *r_xprt, struct rpcrdma_mr_seg *seg)
 {
        struct rpcrdma_ia *ia = &r_xprt->rx_ia;
        struct rpcrdma_mr_seg *seg1 = seg;
+       struct rpcrdma_mw *mw = seg1->rl_mw;
        int rc, nsegs = seg->mr_nsegs;
-       LIST_HEAD(l);
 
-       list_add(&seg1->rl_mw->r.fmr->list, &l);
-       rc = ib_unmap_fmr(&l);
+       dprintk("RPC:       %s: FMR %p\n", __func__, mw);
+
+       seg1->rl_mw = NULL;
        while (seg1->mr_nsegs--)
                rpcrdma_unmap_one(ia->ri_device, seg++);
+       rc = __fmr_unmap(mw);
        if (rc)
                goto out_err;
+       rpcrdma_put_mw(r_xprt, mw);
        return nsegs;
 
 out_err:
+       /* The FMR is abandoned, but remains in rb_all. fmr_op_destroy
+        * will attempt to release it when the transport is destroyed.
+        */
        dprintk("RPC:       %s: ib_unmap_fmr status %i\n", __func__, rc);
        return nsegs;
 }
index b7ca73e7e2e63fe5c4d30a9033213e9d52f18d36..3188e368e0a248c1635f64f0bacf54f319f07c0b 100644 (file)
@@ -1324,28 +1324,6 @@ rpcrdma_buffer_get_frmrs(struct rpcrdma_req *req, struct rpcrdma_buffer *buf,
        return NULL;
 }
 
-static struct rpcrdma_req *
-rpcrdma_buffer_get_fmrs(struct rpcrdma_req *req, struct rpcrdma_buffer *buf)
-{
-       struct rpcrdma_mw *r;
-       int i;
-
-       i = RPCRDMA_MAX_SEGS - 1;
-       while (!list_empty(&buf->rb_mws)) {
-               r = list_entry(buf->rb_mws.next,
-                              struct rpcrdma_mw, mw_list);
-               list_del(&r->mw_list);
-               req->rl_segments[i].rl_mw = r;
-               if (unlikely(i-- == 0))
-                       return req;     /* Success */
-       }
-
-       /* Not enough entries on rb_mws for this req */
-       rpcrdma_buffer_put_sendbuf(req, buf);
-       rpcrdma_buffer_put_mrs(req, buf);
-       return NULL;
-}
-
 /*
  * Get a set of request/reply buffers.
  *
@@ -1387,9 +1365,6 @@ rpcrdma_buffer_get(struct rpcrdma_buffer *buffers)
        case RPCRDMA_FRMR:
                req = rpcrdma_buffer_get_frmrs(req, buffers, &stale);
                break;
-       case RPCRDMA_MTHCAFMR:
-               req = rpcrdma_buffer_get_fmrs(req, buffers);
-               break;
        default:
                break;
        }
@@ -1414,7 +1389,6 @@ rpcrdma_buffer_put(struct rpcrdma_req *req)
        rpcrdma_buffer_put_sendbuf(req, buffers);
        switch (ia->ri_memreg_strategy) {
        case RPCRDMA_FRMR:
-       case RPCRDMA_MTHCAFMR:
                rpcrdma_buffer_put_mrs(req, buffers);
                break;
        default: