svcrdma: Support RDMA_NOMSG requests
authorChuck Lever <chuck.lever@oracle.com>
Tue, 13 Jan 2015 16:03:37 +0000 (11:03 -0500)
committerJ. Bruce Fields <bfields@redhat.com>
Thu, 15 Jan 2015 20:01:47 +0000 (15:01 -0500)
Currently the Linux server can not decode RDMA_NOMSG type requests.
Operations whose length exceeds the fixed size of RDMA SEND buffers,
like large NFSv4 CREATE(NF4LNK) operations, must be conveyed via
RDMA_NOMSG.

For an RDMA_MSG type request, the client sends the RPC/RDMA, RPC
headers, and some or all of the NFS arguments via RDMA SEND.

For an RDMA_NOMSG type request, the client sends just the RPC/RDMA
header via RDMA SEND. The request's read list contains elements for
the entire RPC message, including the RPC header.

NFSD expects the RPC/RMDA header and RPC header to be contiguous in
page zero of the XDR buffer. Add logic in the RDMA READ path to make
the read list contents land where the server prefers, when the
incoming message is a type RDMA_NOMSG message.

Signed-off-by: Chuck Lever <chuck.lever@oracle.com>
Reviewed-by: Steve Wise <swise@opengridcomputing.com>
Signed-off-by: J. Bruce Fields <bfields@redhat.com>
include/linux/sunrpc/svc_rdma.h
net/sunrpc/xprtrdma/svc_rdma_recvfrom.c

index f161e309f25edca385440e1871278c052c779ad9..c343a94bc7910eaf36a098951fa32dcc7d102f5a 100644 (file)
@@ -79,6 +79,7 @@ struct svc_rdma_op_ctxt {
        enum ib_wr_opcode wr_op;
        enum ib_wc_status wc_status;
        u32 byte_len;
+       u32 position;
        struct svcxprt_rdma *xprt;
        unsigned long flags;
        enum dma_data_direction direction;
index a67dd1a081ddacff82e7d99408d6e33f9f722e78..36cf51a3eab7382ec5684e8063ac587fe7718eac 100644 (file)
@@ -60,6 +60,7 @@ static void rdma_build_arg_xdr(struct svc_rqst *rqstp,
                               struct svc_rdma_op_ctxt *ctxt,
                               u32 byte_count)
 {
+       struct rpcrdma_msg *rmsgp;
        struct page *page;
        u32 bc;
        int sge_no;
@@ -82,7 +83,14 @@ static void rdma_build_arg_xdr(struct svc_rqst *rqstp,
        /* If data remains, store it in the pagelist */
        rqstp->rq_arg.page_len = bc;
        rqstp->rq_arg.page_base = 0;
-       rqstp->rq_arg.pages = &rqstp->rq_pages[1];
+
+       /* RDMA_NOMSG: RDMA READ data should land just after RDMA RECV data */
+       rmsgp = (struct rpcrdma_msg *)rqstp->rq_arg.head[0].iov_base;
+       if (be32_to_cpu(rmsgp->rm_type) == RDMA_NOMSG)
+               rqstp->rq_arg.pages = &rqstp->rq_pages[0];
+       else
+               rqstp->rq_arg.pages = &rqstp->rq_pages[1];
+
        sge_no = 1;
        while (bc && sge_no < ctxt->count) {
                page = ctxt->pages[sge_no];
@@ -383,7 +391,6 @@ static int rdma_read_chunks(struct svcxprt_rdma *xprt,
         */
        head->arg.head[0] = rqstp->rq_arg.head[0];
        head->arg.tail[0] = rqstp->rq_arg.tail[0];
-       head->arg.pages = &head->pages[head->count];
        head->hdr_count = head->count;
        head->arg.page_base = 0;
        head->arg.page_len = 0;
@@ -393,9 +400,17 @@ static int rdma_read_chunks(struct svcxprt_rdma *xprt,
        ch = (struct rpcrdma_read_chunk *)&rmsgp->rm_body.rm_chunks[0];
        position = be32_to_cpu(ch->rc_position);
 
+       /* RDMA_NOMSG: RDMA READ data should land just after RDMA RECV data */
+       if (position == 0) {
+               head->arg.pages = &head->pages[0];
+               page_offset = head->byte_len;
+       } else {
+               head->arg.pages = &head->pages[head->count];
+               page_offset = 0;
+       }
+
        ret = 0;
        page_no = 0;
-       page_offset = 0;
        for (; ch->rc_discrim != xdr_zero; ch++) {
                if (be32_to_cpu(ch->rc_position) != position)
                        goto err;
@@ -418,7 +433,10 @@ static int rdma_read_chunks(struct svcxprt_rdma *xprt,
                        head->arg.buflen += ret;
                }
        }
+
        ret = 1;
+       head->position = position;
+
  err:
        /* Detach arg pages. svc_recv will replenish them */
        for (page_no = 0;
@@ -465,6 +483,21 @@ static int rdma_read_complete(struct svc_rqst *rqstp,
                put_page(rqstp->rq_pages[page_no]);
                rqstp->rq_pages[page_no] = head->pages[page_no];
        }
+
+       /* Adjustments made for RDMA_NOMSG type requests */
+       if (head->position == 0) {
+               if (head->arg.len <= head->sge[0].length) {
+                       head->arg.head[0].iov_len = head->arg.len -
+                                                       head->byte_len;
+                       head->arg.page_len = 0;
+               } else {
+                       head->arg.head[0].iov_len = head->sge[0].length -
+                                                               head->byte_len;
+                       head->arg.page_len = head->arg.len -
+                                               head->sge[0].length;
+               }
+       }
+
        /* Point rq_arg.pages past header */
        rdma_fix_xdr_pad(&head->arg);
        rqstp->rq_arg.pages = &rqstp->rq_pages[head->hdr_count];