xprtrdma: Reduce required number of send SGEs
authorChuck Lever <chuck.lever@oracle.com>
Wed, 8 Feb 2017 22:00:10 +0000 (17:00 -0500)
committerAnna Schumaker <Anna.Schumaker@Netapp.com>
Fri, 10 Feb 2017 19:02:36 +0000 (14:02 -0500)
The MAX_SEND_SGES check introduced in commit 655fec6987be
("xprtrdma: Use gathered Send for large inline messages") fails
for devices that have a small max_sge.

Instead of checking for a large fixed maximum number of SGEs,
check for a minimum small number. RPC-over-RDMA will switch to
using a Read chunk if an xdr_buf has more pages than can fit in
the device's max_sge limit. This is considerably better than
failing all together to mount the server.

This fix supports devices that have as few as three send SGEs
available.

Reported-by: Selvin Xavier <selvin.xavier@broadcom.com>
Reported-by: Devesh Sharma <devesh.sharma@broadcom.com>
Reported-by: Honggang Li <honli@redhat.com>
Reported-by: Ram Amrani <Ram.Amrani@cavium.com>
Fixes: 655fec6987be ("xprtrdma: Use gathered Send for large ...")
Cc: stable@vger.kernel.org # v4.9+
Tested-by: Honggang Li <honli@redhat.com>
Tested-by: Ram Amrani <Ram.Amrani@cavium.com>
Tested-by: Steve Wise <swise@opengridcomputing.com>
Reviewed-by: Parav Pandit <parav@mellanox.com>
Signed-off-by: Chuck Lever <chuck.lever@oracle.com>
Signed-off-by: Anna Schumaker <Anna.Schumaker@Netapp.com>
net/sunrpc/xprtrdma/rpc_rdma.c
net/sunrpc/xprtrdma/verbs.c
net/sunrpc/xprtrdma/xprt_rdma.h

index c634f0f3f9ceeca55e1cf219fa0ebe06d4d566b8..d88988365cd24039d9532ea7da5fa04f184507d2 100644 (file)
@@ -125,14 +125,34 @@ void rpcrdma_set_max_header_sizes(struct rpcrdma_xprt *r_xprt)
 /* The client can send a request inline as long as the RPCRDMA header
  * plus the RPC call fit under the transport's inline limit. If the
  * combined call message size exceeds that limit, the client must use
- * the read chunk list for this operation.
+ * a Read chunk for this operation.
+ *
+ * A Read chunk is also required if sending the RPC call inline would
+ * exceed this device's max_sge limit.
  */
 static bool rpcrdma_args_inline(struct rpcrdma_xprt *r_xprt,
                                struct rpc_rqst *rqst)
 {
-       struct rpcrdma_ia *ia = &r_xprt->rx_ia;
+       struct xdr_buf *xdr = &rqst->rq_snd_buf;
+       unsigned int count, remaining, offset;
+
+       if (xdr->len > r_xprt->rx_ia.ri_max_inline_write)
+               return false;
 
-       return rqst->rq_snd_buf.len <= ia->ri_max_inline_write;
+       if (xdr->page_len) {
+               remaining = xdr->page_len;
+               offset = xdr->page_base & ~PAGE_MASK;
+               count = 0;
+               while (remaining) {
+                       remaining -= min_t(unsigned int,
+                                          PAGE_SIZE - offset, remaining);
+                       offset = 0;
+                       if (++count > r_xprt->rx_ia.ri_max_send_sges)
+                               return false;
+               }
+       }
+
+       return true;
 }
 
 /* The client can't know how large the actual reply will be. Thus it
index 23f4da419a648b2d03baf63f6196e3fd71282a0c..61d16c39e92c53935c4d2f02529f83b5b86061ee 100644 (file)
@@ -488,18 +488,19 @@ rpcrdma_ia_close(struct rpcrdma_ia *ia)
  */
 int
 rpcrdma_ep_create(struct rpcrdma_ep *ep, struct rpcrdma_ia *ia,
-                               struct rpcrdma_create_data_internal *cdata)
+                 struct rpcrdma_create_data_internal *cdata)
 {
        struct rpcrdma_connect_private *pmsg = &ep->rep_cm_private;
+       unsigned int max_qp_wr, max_sge;
        struct ib_cq *sendcq, *recvcq;
-       unsigned int max_qp_wr;
        int rc;
 
-       if (ia->ri_device->attrs.max_sge < RPCRDMA_MAX_SEND_SGES) {
-               dprintk("RPC:       %s: insufficient sge's available\n",
-                       __func__);
+       max_sge = min(ia->ri_device->attrs.max_sge, RPCRDMA_MAX_SEND_SGES);
+       if (max_sge < RPCRDMA_MIN_SEND_SGES) {
+               pr_warn("rpcrdma: HCA provides only %d send SGEs\n", max_sge);
                return -ENOMEM;
        }
+       ia->ri_max_send_sges = max_sge - RPCRDMA_MIN_SEND_SGES;
 
        if (ia->ri_device->attrs.max_qp_wr <= RPCRDMA_BACKWARD_WRS) {
                dprintk("RPC:       %s: insufficient wqe's available\n",
@@ -524,7 +525,7 @@ rpcrdma_ep_create(struct rpcrdma_ep *ep, struct rpcrdma_ia *ia,
        ep->rep_attr.cap.max_recv_wr = cdata->max_requests;
        ep->rep_attr.cap.max_recv_wr += RPCRDMA_BACKWARD_WRS;
        ep->rep_attr.cap.max_recv_wr += 1;      /* drain cqe */
-       ep->rep_attr.cap.max_send_sge = RPCRDMA_MAX_SEND_SGES;
+       ep->rep_attr.cap.max_send_sge = max_sge;
        ep->rep_attr.cap.max_recv_sge = 1;
        ep->rep_attr.cap.max_inline_data = 0;
        ep->rep_attr.sq_sig_type = IB_SIGNAL_REQ_WR;
index c13715431419cdffc24e409f5685cb9e03a99a91..3d7e9c9bad1f5bd8d6a4d6fe1c206cd82493dcaf 100644 (file)
@@ -74,6 +74,7 @@ struct rpcrdma_ia {
        unsigned int            ri_max_frmr_depth;
        unsigned int            ri_max_inline_write;
        unsigned int            ri_max_inline_read;
+       unsigned int            ri_max_send_sges;
        bool                    ri_reminv_expected;
        bool                    ri_implicit_roundup;
        enum ib_mr_type         ri_mrtype;
@@ -311,6 +312,7 @@ struct rpcrdma_mr_seg {             /* chunk descriptors */
  * - xdr_buf tail iovec
  */
 enum {
+       RPCRDMA_MIN_SEND_SGES = 3,
        RPCRDMA_MAX_SEND_PAGES = PAGE_SIZE + RPCRDMA_MAX_INLINE - 1,
        RPCRDMA_MAX_PAGE_SGES = (RPCRDMA_MAX_SEND_PAGES >> PAGE_SHIFT) + 1,
        RPCRDMA_MAX_SEND_SGES = 1 + 1 + RPCRDMA_MAX_PAGE_SGES + 1,