xprtrdma: Allocate RPC/RDMA receive buffer separately from struct rpcrdma_rep
authorChuck Lever <chuck.lever@oracle.com>
Wed, 21 Jan 2015 16:04:25 +0000 (11:04 -0500)
committerAnna Schumaker <Anna.Schumaker@Netapp.com>
Fri, 30 Jan 2015 15:47:49 +0000 (10:47 -0500)
The rr_base field is currently the buffer where RPC replies land.

An RPC/RDMA reply header lands in this buffer. In some cases an RPC
reply header also lands in this buffer, just after the RPC/RDMA
header.

The inline threshold is an agreed-on size limit for RDMA SEND
operations that pass from server and client. The sum of the
RPC/RDMA reply header size and the RPC reply header size must be
less than this threshold.

The largest RDMA RECV that the client should have to handle is the
size of the inline threshold. The receive buffer should thus be the
size of the inline threshold, and not related to RPCRDMA_MAX_SEGS.

RPC replies received via RDMA WRITE (long replies) are caught in
rq_rcv_buf, which is the second half of the RPC send buffer. Ie,
such replies are not involved in any way with rr_base.

Signed-off-by: Chuck Lever <chuck.lever@oracle.com>
Signed-off-by: Anna Schumaker <Anna.Schumaker@Netapp.com>
net/sunrpc/xprtrdma/rpc_rdma.c
net/sunrpc/xprtrdma/verbs.c
net/sunrpc/xprtrdma/xprt_rdma.h

index c1d4a093b8f11dc41dc528d31de91290841d332c..02efcaa1bbacb170cbd071ffbe0c7797b0cae2f9 100644 (file)
@@ -572,6 +572,7 @@ rpcrdma_count_chunks(struct rpcrdma_rep *rep, unsigned int max, int wrchunk, __b
 {
        unsigned int i, total_len;
        struct rpcrdma_write_chunk *cur_wchunk;
+       char *base = (char *)rdmab_to_msg(rep->rr_rdmabuf);
 
        i = be32_to_cpu(**iptrp);
        if (i > max)
@@ -599,7 +600,7 @@ rpcrdma_count_chunks(struct rpcrdma_rep *rep, unsigned int max, int wrchunk, __b
                        return -1;
                cur_wchunk = (struct rpcrdma_write_chunk *) w;
        }
-       if ((char *) cur_wchunk > rep->rr_base + rep->rr_len)
+       if ((char *)cur_wchunk > base + rep->rr_len)
                return -1;
 
        *iptrp = (__be32 *) cur_wchunk;
@@ -753,7 +754,7 @@ rpcrdma_reply_handler(struct rpcrdma_rep *rep)
                dprintk("RPC:       %s: short/invalid reply\n", __func__);
                goto repost;
        }
-       headerp = (struct rpcrdma_msg *) rep->rr_base;
+       headerp = rdmab_to_msg(rep->rr_rdmabuf);
        if (headerp->rm_vers != rpcrdma_version) {
                dprintk("RPC:       %s: invalid version %d\n",
                        __func__, be32_to_cpu(headerp->rm_vers));
index c81749b9a0de973e67a949a601d5c1b8958878bd..f58521dd88e21cb91bdeec8ad32c9cb7a245c6c7 100644 (file)
@@ -298,8 +298,9 @@ rpcrdma_recvcq_process_wc(struct ib_wc *wc, struct list_head *sched_list)
 
        rep->rr_len = wc->byte_len;
        ib_dma_sync_single_for_cpu(rdmab_to_ia(rep->rr_buffer)->ri_id->device,
-                       rep->rr_iov.addr, rep->rr_len, DMA_FROM_DEVICE);
-       prefetch(rep->rr_base);
+                                  rdmab_addr(rep->rr_rdmabuf),
+                                  rep->rr_len, DMA_FROM_DEVICE);
+       prefetch(rdmab_to_msg(rep->rr_rdmabuf));
 
 out_schedule:
        list_add_tail(&rep->rr_list, sched_list);
@@ -1092,23 +1093,21 @@ static struct rpcrdma_rep *
 rpcrdma_create_rep(struct rpcrdma_xprt *r_xprt)
 {
        struct rpcrdma_create_data_internal *cdata = &r_xprt->rx_data;
-       size_t rlen = 1 << fls(cdata->inline_rsize +
-                              sizeof(struct rpcrdma_rep));
        struct rpcrdma_ia *ia = &r_xprt->rx_ia;
        struct rpcrdma_rep *rep;
        int rc;
 
        rc = -ENOMEM;
-       rep = kmalloc(rlen, GFP_KERNEL);
+       rep = kzalloc(sizeof(*rep), GFP_KERNEL);
        if (rep == NULL)
                goto out;
-       memset(rep, 0, sizeof(*rep));
 
-       rc = rpcrdma_register_internal(ia, rep->rr_base, rlen -
-                                      offsetof(struct rpcrdma_rep, rr_base),
-                                      &rep->rr_handle, &rep->rr_iov);
-       if (rc)
+       rep->rr_rdmabuf = rpcrdma_alloc_regbuf(ia, cdata->inline_rsize,
+                                              GFP_KERNEL);
+       if (IS_ERR(rep->rr_rdmabuf)) {
+               rc = PTR_ERR(rep->rr_rdmabuf);
                goto out_free;
+       }
 
        rep->rr_buffer = &r_xprt->rx_buf;
        return rep;
@@ -1306,7 +1305,7 @@ rpcrdma_destroy_rep(struct rpcrdma_ia *ia, struct rpcrdma_rep *rep)
        if (!rep)
                return;
 
-       rpcrdma_deregister_internal(ia, rep->rr_handle, &rep->rr_iov);
+       rpcrdma_free_regbuf(ia, rep->rr_rdmabuf);
        kfree(rep);
 }
 
@@ -2209,11 +2208,13 @@ rpcrdma_ep_post_recv(struct rpcrdma_ia *ia,
 
        recv_wr.next = NULL;
        recv_wr.wr_id = (u64) (unsigned long) rep;
-       recv_wr.sg_list = &rep->rr_iov;
+       recv_wr.sg_list = &rep->rr_rdmabuf->rg_iov;
        recv_wr.num_sge = 1;
 
        ib_dma_sync_single_for_cpu(ia->ri_id->device,
-               rep->rr_iov.addr, rep->rr_iov.length, DMA_BIDIRECTIONAL);
+                                  rdmab_addr(rep->rr_rdmabuf),
+                                  rdmab_length(rep->rr_rdmabuf),
+                                  DMA_BIDIRECTIONAL);
 
        rc = ib_post_recv(ia->ri_id->qp, &recv_wr, &recv_wr_fail);
 
index 84ad863fe637af9b39406ed767eecb19ccea7e32..2b69316dfd1148033f0857ee949027a7907dac1f 100644 (file)
@@ -180,14 +180,12 @@ enum rpcrdma_chunktype {
 struct rpcrdma_buffer;
 
 struct rpcrdma_rep {
-       unsigned int    rr_len;         /* actual received reply length */
-       struct rpcrdma_buffer *rr_buffer; /* home base for this structure */
-       struct rpc_xprt *rr_xprt;       /* needed for request/reply matching */
-       void (*rr_func)(struct rpcrdma_rep *);/* called by tasklet in softint */
-       struct list_head rr_list;       /* tasklet list */
-       struct ib_sge   rr_iov;         /* for posting */
-       struct ib_mr    *rr_handle;     /* handle for mem in rr_iov */
-       char    rr_base[MAX_RPCRDMAHDR]; /* minimal inline receive buffer */
+       unsigned int            rr_len;
+       struct rpcrdma_buffer   *rr_buffer;
+       struct rpc_xprt         *rr_xprt;
+       void                    (*rr_func)(struct rpcrdma_rep *);
+       struct list_head        rr_list;
+       struct rpcrdma_regbuf   *rr_rdmabuf;
 };
 
 /*