xprtrdma: Client-side support for rpcrdma_connect_private
authorChuck Lever <chuck.lever@oracle.com>
Thu, 15 Sep 2016 14:57:07 +0000 (10:57 -0400)
committerAnna Schumaker <Anna.Schumaker@Netapp.com>
Mon, 19 Sep 2016 17:08:38 +0000 (13:08 -0400)
Send an RDMA-CM private message on connect, and look for one during
a connection-established event.

Both sides can communicate their various implementation limits.
Implementations that don't support this sideband protocol ignore it.

Once the client knows the server's inline threshold maxima, it can
adjust the use of Reply chunks, and eliminate most use of Position
Zero Read chunks. Moderately-sized I/O can be done using a pure
inline RDMA Send instead of RDMA operations that require memory
registration.

Signed-off-by: Chuck Lever <chuck.lever@oracle.com>
Signed-off-by: Anna Schumaker <Anna.Schumaker@Netapp.com>
include/linux/sunrpc/rpc_rdma.h
net/sunrpc/xprtrdma/fmr_ops.c
net/sunrpc/xprtrdma/frwr_ops.c
net/sunrpc/xprtrdma/rpc_rdma.c
net/sunrpc/xprtrdma/verbs.c
net/sunrpc/xprtrdma/xprt_rdma.h

index a7da6bf5661076558e08fd09892c3b9cc551fdfb..cfda6adcf33cfcf3c28e46066ec294c6d2902389 100644 (file)
 #define RPCRDMA_VERSION                1
 #define rpcrdma_version                cpu_to_be32(RPCRDMA_VERSION)
 
+enum {
+       RPCRDMA_V1_DEF_INLINE_SIZE      = 1024,
+};
+
 struct rpcrdma_segment {
        __be32 rs_handle;       /* Registered memory handle */
        __be32 rs_length;       /* Length of the chunk in bytes */
index 21cb3b150b371dc5e8aab5e02bc4af99a6a3d856..16690a1b653e80ed97b9cd674548ebf2643341ef 100644 (file)
@@ -160,9 +160,8 @@ static int
 fmr_op_open(struct rpcrdma_ia *ia, struct rpcrdma_ep *ep,
            struct rpcrdma_create_data_internal *cdata)
 {
-       rpcrdma_set_max_header_sizes(ia, cdata, max_t(unsigned int, 1,
-                                                     RPCRDMA_MAX_DATA_SEGS /
-                                                     RPCRDMA_MAX_FMR_SGES));
+       ia->ri_max_segs = max_t(unsigned int, 1, RPCRDMA_MAX_DATA_SEGS /
+                               RPCRDMA_MAX_FMR_SGES);
        return 0;
 }
 
index 892b5e1d9b099b217fba65a26b47105a0c027c51..fcfcf3ac030cb8109f96b0ec6c0fac37fee87a49 100644 (file)
@@ -242,9 +242,8 @@ frwr_op_open(struct rpcrdma_ia *ia, struct rpcrdma_ep *ep,
                                               depth;
        }
 
-       rpcrdma_set_max_header_sizes(ia, cdata, max_t(unsigned int, 1,
-                                                     RPCRDMA_MAX_DATA_SEGS /
-                                                     ia->ri_max_frmr_depth));
+       ia->ri_max_segs = max_t(unsigned int, 1, RPCRDMA_MAX_DATA_SEGS /
+                               ia->ri_max_frmr_depth);
        return 0;
 }
 
index c2906e31428740a7adcfa0b4e7e4e1bd1989b74d..ea734c2c7ddbcd424c17101bbfc39ce5ea6d2897 100644 (file)
@@ -118,10 +118,12 @@ static unsigned int rpcrdma_max_reply_header_size(unsigned int maxsegs)
        return size;
 }
 
-void rpcrdma_set_max_header_sizes(struct rpcrdma_ia *ia,
-                                 struct rpcrdma_create_data_internal *cdata,
-                                 unsigned int maxsegs)
+void rpcrdma_set_max_header_sizes(struct rpcrdma_xprt *r_xprt)
 {
+       struct rpcrdma_create_data_internal *cdata = &r_xprt->rx_data;
+       struct rpcrdma_ia *ia = &r_xprt->rx_ia;
+       unsigned int maxsegs = ia->ri_max_segs;
+
        ia->ri_max_inline_write = cdata->inline_wsize -
                                  rpcrdma_max_call_header_size(maxsegs);
        ia->ri_max_inline_read = cdata->inline_rsize -
index a49c788aa59a1632cd397543e73c2cd42dab5035..6bab8416a4fcd5023f1e1f7c731a0d27bbbba19a 100644 (file)
@@ -204,6 +204,33 @@ out_fail:
        goto out_schedule;
 }
 
+static void
+rpcrdma_update_connect_private(struct rpcrdma_xprt *r_xprt,
+                              struct rdma_conn_param *param)
+{
+       struct rpcrdma_create_data_internal *cdata = &r_xprt->rx_data;
+       const struct rpcrdma_connect_private *pmsg = param->private_data;
+       unsigned int rsize, wsize;
+
+       rsize = RPCRDMA_V1_DEF_INLINE_SIZE;
+       wsize = RPCRDMA_V1_DEF_INLINE_SIZE;
+
+       if (pmsg &&
+           pmsg->cp_magic == rpcrdma_cmp_magic &&
+           pmsg->cp_version == RPCRDMA_CMP_VERSION) {
+               rsize = rpcrdma_decode_buffer_size(pmsg->cp_send_size);
+               wsize = rpcrdma_decode_buffer_size(pmsg->cp_recv_size);
+       }
+
+       if (rsize < cdata->inline_rsize)
+               cdata->inline_rsize = rsize;
+       if (wsize < cdata->inline_wsize)
+               cdata->inline_wsize = wsize;
+       pr_info("rpcrdma: max send %u, max recv %u\n",
+               cdata->inline_wsize, cdata->inline_rsize);
+       rpcrdma_set_max_header_sizes(r_xprt);
+}
+
 static int
 rpcrdma_conn_upcall(struct rdma_cm_id *id, struct rdma_cm_event *event)
 {
@@ -244,6 +271,7 @@ rpcrdma_conn_upcall(struct rdma_cm_id *id, struct rdma_cm_event *event)
                        " (%d initiator)\n",
                        __func__, attr->max_dest_rd_atomic,
                        attr->max_rd_atomic);
+               rpcrdma_update_connect_private(xprt, &event->param.conn);
                goto connected;
        case RDMA_CM_EVENT_CONNECT_ERROR:
                connstate = -ENOTCONN;
@@ -454,6 +482,7 @@ int
 rpcrdma_ep_create(struct rpcrdma_ep *ep, struct rpcrdma_ia *ia,
                                struct rpcrdma_create_data_internal *cdata)
 {
+       struct rpcrdma_connect_private *pmsg = &ep->rep_cm_private;
        struct ib_cq *sendcq, *recvcq;
        unsigned int max_qp_wr;
        int rc;
@@ -536,9 +565,14 @@ rpcrdma_ep_create(struct rpcrdma_ep *ep, struct rpcrdma_ia *ia,
        /* Initialize cma parameters */
        memset(&ep->rep_remote_cma, 0, sizeof(ep->rep_remote_cma));
 
-       /* RPC/RDMA does not use private data */
-       ep->rep_remote_cma.private_data = NULL;
-       ep->rep_remote_cma.private_data_len = 0;
+       /* Prepare RDMA-CM private message */
+       pmsg->cp_magic = rpcrdma_cmp_magic;
+       pmsg->cp_version = RPCRDMA_CMP_VERSION;
+       pmsg->cp_flags = 0;
+       pmsg->cp_send_size = rpcrdma_encode_buffer_size(cdata->inline_wsize);
+       pmsg->cp_recv_size = rpcrdma_encode_buffer_size(cdata->inline_rsize);
+       ep->rep_remote_cma.private_data = pmsg;
+       ep->rep_remote_cma.private_data_len = sizeof(*pmsg);
 
        /* Client offers RDMA Read but does not initiate */
        ep->rep_remote_cma.initiator_depth = 0;
index 9aabca68c49deb9819dbbf938a2e2060067a26e3..89df1680b1eb68029f2441ae67e73b487b93c60d 100644 (file)
@@ -70,6 +70,7 @@ struct rpcrdma_ia {
        struct ib_pd            *ri_pd;
        struct completion       ri_done;
        int                     ri_async_rc;
+       unsigned int            ri_max_segs;
        unsigned int            ri_max_frmr_depth;
        unsigned int            ri_max_inline_write;
        unsigned int            ri_max_inline_read;
@@ -87,6 +88,7 @@ struct rpcrdma_ep {
        int                     rep_connected;
        struct ib_qp_init_attr  rep_attr;
        wait_queue_head_t       rep_connect_wait;
+       struct rpcrdma_connect_private  rep_cm_private;
        struct rdma_conn_param  rep_remote_cma;
        struct sockaddr_storage rep_remote_addr;
        struct delayed_work     rep_connect_worker;
@@ -523,9 +525,7 @@ void rpcrdma_reply_handler(struct rpcrdma_rep *);
  * RPC/RDMA protocol calls - xprtrdma/rpc_rdma.c
  */
 int rpcrdma_marshal_req(struct rpc_rqst *);
-void rpcrdma_set_max_header_sizes(struct rpcrdma_ia *,
-                                 struct rpcrdma_create_data_internal *,
-                                 unsigned int);
+void rpcrdma_set_max_header_sizes(struct rpcrdma_xprt *);
 
 /* RPC/RDMA module init - xprtrdma/transport.c
  */