RPC/RDMA: adhere to protocol for unpadded client trailing write chunks.
authorTom Talpey <talpey@netapp.com>
Thu, 9 Oct 2008 19:01:11 +0000 (15:01 -0400)
committerTrond Myklebust <Trond.Myklebust@netapp.com>
Fri, 10 Oct 2008 19:12:33 +0000 (15:12 -0400)
The RPC/RDMA protocol allows clients and servers to avoid RDMA
operations for data which is purely the result of XDR padding.
On the client, automatically insert the necessary padding for
such server replies, and optionally don't marshal such chunks.

Signed-off-by: Tom Talpey <talpey@netapp.com>
Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>
net/sunrpc/xprtrdma/rpc_rdma.c
net/sunrpc/xprtrdma/transport.c
net/sunrpc/xprtrdma/xprt_rdma.h

index 721dae795d685ccb6bfeb6a232659126a9c64697..d245c0bf787360de03d5d7efef03ba34c0f6ad71 100644 (file)
@@ -118,6 +118,10 @@ rpcrdma_convert_iovs(struct xdr_buf *xdrbuf, unsigned int pos,
        }
 
        if (xdrbuf->tail[0].iov_len) {
+               /* the rpcrdma protocol allows us to omit any trailing
+                * xdr pad bytes, saving the server an RDMA operation. */
+               if (xdrbuf->tail[0].iov_len < 4 && xprt_rdma_pad_optimize)
+                       return n;
                if (n == nsegs)
                        return 0;
                seg[n].mr_page = NULL;
@@ -594,7 +598,7 @@ rpcrdma_count_chunks(struct rpcrdma_rep *rep, unsigned int max, int wrchunk, __b
  * Scatter inline received data back into provided iov's.
  */
 static void
-rpcrdma_inline_fixup(struct rpc_rqst *rqst, char *srcp, int copy_len)
+rpcrdma_inline_fixup(struct rpc_rqst *rqst, char *srcp, int copy_len, int pad)
 {
        int i, npages, curlen, olen;
        char *destp;
@@ -660,6 +664,13 @@ rpcrdma_inline_fixup(struct rpc_rqst *rqst, char *srcp, int copy_len)
        } else
                rqst->rq_rcv_buf.tail[0].iov_len = 0;
 
+       if (pad) {
+               /* implicit padding on terminal chunk */
+               unsigned char *p = rqst->rq_rcv_buf.tail[0].iov_base;
+               while (pad--)
+                       p[rqst->rq_rcv_buf.tail[0].iov_len++] = 0;
+       }
+
        if (copy_len)
                dprintk("RPC:       %s: %d bytes in"
                        " %d extra segments (%d lost)\n",
@@ -794,14 +805,20 @@ repost:
                            ((unsigned char *)iptr - (unsigned char *)headerp);
                        status = rep->rr_len + rdmalen;
                        r_xprt->rx_stats.total_rdma_reply += rdmalen;
+                       /* special case - last chunk may omit padding */
+                       if (rdmalen &= 3) {
+                               rdmalen = 4 - rdmalen;
+                               status += rdmalen;
+                       }
                } else {
                        /* else ordinary inline */
+                       rdmalen = 0;
                        iptr = (__be32 *)((unsigned char *)headerp + 28);
                        rep->rr_len -= 28; /*sizeof *headerp;*/
                        status = rep->rr_len;
                }
                /* Fix up the rpc results for upper layer */
-               rpcrdma_inline_fixup(rqst, (char *)iptr, rep->rr_len);
+               rpcrdma_inline_fixup(rqst, (char *)iptr, rep->rr_len, rdmalen);
                break;
 
        case __constant_htonl(RDMA_NOMSG):
index ec6d1e7a1941fe0b187077b4b8ff93f3085ee490..c7d2380bb5e371bc31c0649bc33b85fdd3deed73 100644 (file)
@@ -71,6 +71,7 @@ static unsigned int xprt_rdma_max_inline_read = RPCRDMA_DEF_INLINE;
 static unsigned int xprt_rdma_max_inline_write = RPCRDMA_DEF_INLINE;
 static unsigned int xprt_rdma_inline_write_padding;
 static unsigned int xprt_rdma_memreg_strategy = RPCRDMA_FRMR;
+                int xprt_rdma_pad_optimize = 0;
 
 #ifdef RPC_DEBUG
 
@@ -135,6 +136,14 @@ static ctl_table xr_tunables_table[] = {
                .extra1         = &min_memreg,
                .extra2         = &max_memreg,
        },
+       {
+               .ctl_name       = CTL_UNNUMBERED,
+               .procname       = "rdma_pad_optimize",
+               .data           = &xprt_rdma_pad_optimize,
+               .maxlen         = sizeof(unsigned int),
+               .mode           = 0644,
+               .proc_handler   = &proc_dointvec,
+       },
        {
                .ctl_name = 0,
        },
index 2db2344d487e39b2769776c1cc3ea504a2226396..fde6499a53b2bc4efb590dddefa90f1af8cde2b7 100644 (file)
@@ -280,6 +280,11 @@ struct rpcrdma_xprt {
 #define rpcx_to_rdmax(x) container_of(x, struct rpcrdma_xprt, xprt)
 #define rpcx_to_rdmad(x) (rpcx_to_rdmax(x)->rx_data)
 
+/* Setting this to 0 ensures interoperability with early servers.
+ * Setting this to 1 enhances certain unaligned read/write performance.
+ * Default is 0, see sysctl entry and rpc_rdma.c rpcrdma_convert_iovs() */
+extern int xprt_rdma_pad_optimize;
+
 /*
  * Interface Adapter calls - xprtrdma/verbs.c
  */