sunrpc: Advertise maximum backchannel payload size
[GitHub/moto-9609/android_kernel_motorola_exynos9610.git] / net / sunrpc / xprtrdma / transport.c
CommitLineData
f58851e6
TT
1/*
2 * Copyright (c) 2003-2007 Network Appliance, Inc. All rights reserved.
3 *
4 * This software is available to you under a choice of one of two
5 * licenses. You may choose to be licensed under the terms of the GNU
6 * General Public License (GPL) Version 2, available from the file
7 * COPYING in the main directory of this source tree, or the BSD-type
8 * license below:
9 *
10 * Redistribution and use in source and binary forms, with or without
11 * modification, are permitted provided that the following conditions
12 * are met:
13 *
14 * Redistributions of source code must retain the above copyright
15 * notice, this list of conditions and the following disclaimer.
16 *
17 * Redistributions in binary form must reproduce the above
18 * copyright notice, this list of conditions and the following
19 * disclaimer in the documentation and/or other materials provided
20 * with the distribution.
21 *
22 * Neither the name of the Network Appliance, Inc. nor the names of
23 * its contributors may be used to endorse or promote products
24 * derived from this software without specific prior written
25 * permission.
26 *
27 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
28 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
29 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
30 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
31 * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
32 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
33 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
34 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
35 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
36 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
37 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
38 */
39
40/*
41 * transport.c
42 *
43 * This file contains the top-level implementation of an RPC RDMA
44 * transport.
45 *
46 * Naming convention: functions beginning with xprt_ are part of the
47 * transport switch. All others are RPC RDMA internal.
48 */
49
50#include <linux/module.h>
5a0e3ad6 51#include <linux/slab.h>
f58851e6 52#include <linux/seq_file.h>
5976687a 53#include <linux/sunrpc/addr.h>
f58851e6
TT
54
55#include "xprt_rdma.h"
56
f895b252 57#if IS_ENABLED(CONFIG_SUNRPC_DEBUG)
f58851e6
TT
58# define RPCDBG_FACILITY RPCDBG_TRANS
59#endif
60
f58851e6
TT
61/*
62 * tunables
63 */
64
65static unsigned int xprt_rdma_slot_table_entries = RPCRDMA_DEF_SLOT_TABLE;
5d252f90 66unsigned int xprt_rdma_max_inline_read = RPCRDMA_DEF_INLINE;
f58851e6
TT
67static unsigned int xprt_rdma_max_inline_write = RPCRDMA_DEF_INLINE;
68static unsigned int xprt_rdma_inline_write_padding;
3197d309 69static unsigned int xprt_rdma_memreg_strategy = RPCRDMA_FRMR;
d5440e27 70 int xprt_rdma_pad_optimize = 1;
f58851e6 71
f895b252 72#if IS_ENABLED(CONFIG_SUNRPC_DEBUG)
f58851e6
TT
73
74static unsigned int min_slot_table_size = RPCRDMA_MIN_SLOT_TABLE;
75static unsigned int max_slot_table_size = RPCRDMA_MAX_SLOT_TABLE;
76static unsigned int zero;
77static unsigned int max_padding = PAGE_SIZE;
78static unsigned int min_memreg = RPCRDMA_BOUNCEBUFFERS;
79static unsigned int max_memreg = RPCRDMA_LAST - 1;
80
81static struct ctl_table_header *sunrpc_table_header;
82
fe2c6338 83static struct ctl_table xr_tunables_table[] = {
f58851e6 84 {
f58851e6
TT
85 .procname = "rdma_slot_table_entries",
86 .data = &xprt_rdma_slot_table_entries,
87 .maxlen = sizeof(unsigned int),
88 .mode = 0644,
6d456111 89 .proc_handler = proc_dointvec_minmax,
f58851e6
TT
90 .extra1 = &min_slot_table_size,
91 .extra2 = &max_slot_table_size
92 },
93 {
f58851e6
TT
94 .procname = "rdma_max_inline_read",
95 .data = &xprt_rdma_max_inline_read,
96 .maxlen = sizeof(unsigned int),
97 .mode = 0644,
6d456111 98 .proc_handler = proc_dointvec,
f58851e6
TT
99 },
100 {
f58851e6
TT
101 .procname = "rdma_max_inline_write",
102 .data = &xprt_rdma_max_inline_write,
103 .maxlen = sizeof(unsigned int),
104 .mode = 0644,
6d456111 105 .proc_handler = proc_dointvec,
f58851e6
TT
106 },
107 {
f58851e6
TT
108 .procname = "rdma_inline_write_padding",
109 .data = &xprt_rdma_inline_write_padding,
110 .maxlen = sizeof(unsigned int),
111 .mode = 0644,
6d456111 112 .proc_handler = proc_dointvec_minmax,
f58851e6
TT
113 .extra1 = &zero,
114 .extra2 = &max_padding,
115 },
116 {
f58851e6
TT
117 .procname = "rdma_memreg_strategy",
118 .data = &xprt_rdma_memreg_strategy,
119 .maxlen = sizeof(unsigned int),
120 .mode = 0644,
6d456111 121 .proc_handler = proc_dointvec_minmax,
f58851e6
TT
122 .extra1 = &min_memreg,
123 .extra2 = &max_memreg,
124 },
9191ca3b 125 {
9191ca3b
TT
126 .procname = "rdma_pad_optimize",
127 .data = &xprt_rdma_pad_optimize,
128 .maxlen = sizeof(unsigned int),
129 .mode = 0644,
6d456111 130 .proc_handler = proc_dointvec,
9191ca3b 131 },
f8572d8f 132 { },
f58851e6
TT
133};
134
fe2c6338 135static struct ctl_table sunrpc_table[] = {
f58851e6 136 {
f58851e6
TT
137 .procname = "sunrpc",
138 .mode = 0555,
139 .child = xr_tunables_table
140 },
f8572d8f 141 { },
f58851e6
TT
142};
143
144#endif
145
5d252f90 146static struct rpc_xprt_ops xprt_rdma_procs; /*forward reference */
f58851e6 147
0dd39cae
CL
148static void
149xprt_rdma_format_addresses4(struct rpc_xprt *xprt, struct sockaddr *sap)
150{
151 struct sockaddr_in *sin = (struct sockaddr_in *)sap;
152 char buf[20];
153
154 snprintf(buf, sizeof(buf), "%08x", ntohl(sin->sin_addr.s_addr));
155 xprt->address_strings[RPC_DISPLAY_HEX_ADDR] = kstrdup(buf, GFP_KERNEL);
156
157 xprt->address_strings[RPC_DISPLAY_NETID] = RPCBIND_NETID_RDMA;
158}
159
160static void
161xprt_rdma_format_addresses6(struct rpc_xprt *xprt, struct sockaddr *sap)
162{
163 struct sockaddr_in6 *sin6 = (struct sockaddr_in6 *)sap;
164 char buf[40];
165
166 snprintf(buf, sizeof(buf), "%pi6", &sin6->sin6_addr);
167 xprt->address_strings[RPC_DISPLAY_HEX_ADDR] = kstrdup(buf, GFP_KERNEL);
168
169 xprt->address_strings[RPC_DISPLAY_NETID] = RPCBIND_NETID_RDMA6;
170}
171
5d252f90 172void
5231eb97 173xprt_rdma_format_addresses(struct rpc_xprt *xprt, struct sockaddr *sap)
f58851e6 174{
0dd39cae
CL
175 char buf[128];
176
177 switch (sap->sa_family) {
178 case AF_INET:
179 xprt_rdma_format_addresses4(xprt, sap);
180 break;
181 case AF_INET6:
182 xprt_rdma_format_addresses6(xprt, sap);
183 break;
184 default:
185 pr_err("rpcrdma: Unrecognized address family\n");
186 return;
187 }
f58851e6 188
c877b849
CL
189 (void)rpc_ntop(sap, buf, sizeof(buf));
190 xprt->address_strings[RPC_DISPLAY_ADDR] = kstrdup(buf, GFP_KERNEL);
f58851e6 191
81160e66 192 snprintf(buf, sizeof(buf), "%u", rpc_get_port(sap));
c877b849 193 xprt->address_strings[RPC_DISPLAY_PORT] = kstrdup(buf, GFP_KERNEL);
f58851e6 194
81160e66 195 snprintf(buf, sizeof(buf), "%4hx", rpc_get_port(sap));
c877b849 196 xprt->address_strings[RPC_DISPLAY_HEX_PORT] = kstrdup(buf, GFP_KERNEL);
f58851e6 197
0dd39cae 198 xprt->address_strings[RPC_DISPLAY_PROTO] = "rdma";
f58851e6
TT
199}
200
5d252f90 201void
f58851e6
TT
202xprt_rdma_free_addresses(struct rpc_xprt *xprt)
203{
33e01dc7
CL
204 unsigned int i;
205
206 for (i = 0; i < RPC_DISPLAY_MAX; i++)
207 switch (i) {
208 case RPC_DISPLAY_PROTO:
209 case RPC_DISPLAY_NETID:
210 continue;
211 default:
212 kfree(xprt->address_strings[i]);
213 }
f58851e6
TT
214}
215
216static void
217xprt_rdma_connect_worker(struct work_struct *work)
218{
5abefb86
CL
219 struct rpcrdma_xprt *r_xprt = container_of(work, struct rpcrdma_xprt,
220 rx_connect_worker.work);
221 struct rpc_xprt *xprt = &r_xprt->rx_xprt;
f58851e6
TT
222 int rc = 0;
223
d19751e7
TM
224 xprt_clear_connected(xprt);
225
226 dprintk("RPC: %s: %sconnect\n", __func__,
227 r_xprt->rx_ep.rep_connected != 0 ? "re" : "");
228 rc = rpcrdma_ep_connect(&r_xprt->rx_ep, &r_xprt->rx_ia);
229 if (rc)
230 xprt_wake_pending_tasks(xprt, rc);
231
f58851e6
TT
232 dprintk("RPC: %s: exit\n", __func__);
233 xprt_clear_connecting(xprt);
234}
235
4a068258
CL
236static void
237xprt_rdma_inject_disconnect(struct rpc_xprt *xprt)
238{
239 struct rpcrdma_xprt *r_xprt = container_of(xprt, struct rpcrdma_xprt,
240 rx_xprt);
241
242 pr_info("rpcrdma: injecting transport disconnect on xprt=%p\n", xprt);
243 rdma_disconnect(r_xprt->rx_ia.ri_id);
244}
245
f58851e6
TT
246/*
247 * xprt_rdma_destroy
248 *
249 * Destroy the xprt.
250 * Free all memory associated with the object, including its own.
251 * NOTE: none of the *destroy methods free memory for their top-level
252 * objects, even though they may have allocated it (they do free
253 * private memory). It's up to the caller to handle it. In this
254 * case (RDMA transport), all structure memory is inlined with the
255 * struct rpcrdma_xprt.
256 */
257static void
258xprt_rdma_destroy(struct rpc_xprt *xprt)
259{
260 struct rpcrdma_xprt *r_xprt = rpcx_to_rdmax(xprt);
f58851e6
TT
261
262 dprintk("RPC: %s: called\n", __func__);
263
5abefb86 264 cancel_delayed_work_sync(&r_xprt->rx_connect_worker);
f58851e6
TT
265
266 xprt_clear_connected(xprt);
267
7f1d5419 268 rpcrdma_ep_destroy(&r_xprt->rx_ep, &r_xprt->rx_ia);
72c02173 269 rpcrdma_buffer_destroy(&r_xprt->rx_buf);
f58851e6
TT
270 rpcrdma_ia_close(&r_xprt->rx_ia);
271
272 xprt_rdma_free_addresses(xprt);
273
e204e621 274 xprt_free(xprt);
f58851e6
TT
275
276 dprintk("RPC: %s: returning\n", __func__);
277
278 module_put(THIS_MODULE);
279}
280
2881ae74
TM
281static const struct rpc_timeout xprt_rdma_default_timeout = {
282 .to_initval = 60 * HZ,
283 .to_maxval = 60 * HZ,
284};
285
f58851e6
TT
286/**
287 * xprt_setup_rdma - Set up transport to use RDMA
288 *
289 * @args: rpc transport arguments
290 */
291static struct rpc_xprt *
292xprt_setup_rdma(struct xprt_create *args)
293{
294 struct rpcrdma_create_data_internal cdata;
295 struct rpc_xprt *xprt;
296 struct rpcrdma_xprt *new_xprt;
297 struct rpcrdma_ep *new_ep;
5231eb97 298 struct sockaddr *sap;
f58851e6
TT
299 int rc;
300
301 if (args->addrlen > sizeof(xprt->addr)) {
302 dprintk("RPC: %s: address too large\n", __func__);
303 return ERR_PTR(-EBADF);
304 }
305
37aa2133 306 xprt = xprt_alloc(args->net, sizeof(struct rpcrdma_xprt),
d9ba131d 307 xprt_rdma_slot_table_entries,
bd1722d4 308 xprt_rdma_slot_table_entries);
f58851e6
TT
309 if (xprt == NULL) {
310 dprintk("RPC: %s: couldn't allocate rpcrdma_xprt\n",
311 __func__);
312 return ERR_PTR(-ENOMEM);
313 }
314
f58851e6 315 /* 60 second timeout, no retries */
ba7392bb 316 xprt->timeout = &xprt_rdma_default_timeout;
bfaee096
CL
317 xprt->bind_timeout = RPCRDMA_BIND_TO;
318 xprt->reestablish_timeout = RPCRDMA_INIT_REEST_TO;
319 xprt->idle_timeout = RPCRDMA_IDLE_DISC_TO;
f58851e6
TT
320
321 xprt->resvport = 0; /* privileged port not needed */
322 xprt->tsh_size = 0; /* RPC-RDMA handles framing */
f58851e6
TT
323 xprt->ops = &xprt_rdma_procs;
324
325 /*
326 * Set up RDMA-specific connect data.
327 */
328
5231eb97
CL
329 sap = (struct sockaddr *)&cdata.addr;
330 memcpy(sap, args->dstaddr, args->addrlen);
f58851e6
TT
331
332 /* Ensure xprt->addr holds valid server TCP (not RDMA)
333 * address, for any side protocols which peek at it */
334 xprt->prot = IPPROTO_TCP;
335 xprt->addrlen = args->addrlen;
5231eb97 336 memcpy(&xprt->addr, sap, xprt->addrlen);
f58851e6 337
5231eb97 338 if (rpc_get_port(sap))
f58851e6
TT
339 xprt_set_bound(xprt);
340
f58851e6
TT
341 cdata.max_requests = xprt->max_reqs;
342
f58851e6
TT
343 cdata.rsize = RPCRDMA_MAX_SEGS * PAGE_SIZE; /* RDMA write max */
344 cdata.wsize = RPCRDMA_MAX_SEGS * PAGE_SIZE; /* RDMA read max */
345
346 cdata.inline_wsize = xprt_rdma_max_inline_write;
347 if (cdata.inline_wsize > cdata.wsize)
348 cdata.inline_wsize = cdata.wsize;
349
350 cdata.inline_rsize = xprt_rdma_max_inline_read;
351 if (cdata.inline_rsize > cdata.rsize)
352 cdata.inline_rsize = cdata.rsize;
353
354 cdata.padding = xprt_rdma_inline_write_padding;
355
356 /*
357 * Create new transport instance, which includes initialized
358 * o ia
359 * o endpoint
360 * o buffers
361 */
362
363 new_xprt = rpcx_to_rdmax(xprt);
364
5231eb97 365 rc = rpcrdma_ia_open(new_xprt, sap, xprt_rdma_memreg_strategy);
f58851e6
TT
366 if (rc)
367 goto out1;
368
369 /*
370 * initialize and create ep
371 */
372 new_xprt->rx_data = cdata;
373 new_ep = &new_xprt->rx_ep;
374 new_ep->rep_remote_addr = cdata.addr;
375
376 rc = rpcrdma_ep_create(&new_xprt->rx_ep,
377 &new_xprt->rx_ia, &new_xprt->rx_data);
378 if (rc)
379 goto out2;
380
381 /*
382 * Allocate pre-registered send and receive buffers for headers and
383 * any inline data. Also specify any padding which will be provided
384 * from a preregistered zero buffer.
385 */
ac920d04 386 rc = rpcrdma_buffer_create(new_xprt);
f58851e6
TT
387 if (rc)
388 goto out3;
389
390 /*
391 * Register a callback for connection events. This is necessary because
392 * connection loss notification is async. We also catch connection loss
393 * when reaping receives.
394 */
5abefb86
CL
395 INIT_DELAYED_WORK(&new_xprt->rx_connect_worker,
396 xprt_rdma_connect_worker);
f58851e6 397
5231eb97 398 xprt_rdma_format_addresses(xprt, sap);
1c9351ee
CL
399 xprt->max_payload = new_xprt->rx_ia.ri_ops->ro_maxpages(new_xprt);
400 if (xprt->max_payload == 0)
401 goto out4;
402 xprt->max_payload <<= PAGE_SHIFT;
43e95988
CL
403 dprintk("RPC: %s: transport data payload maximum: %zu bytes\n",
404 __func__, xprt->max_payload);
f58851e6
TT
405
406 if (!try_module_get(THIS_MODULE))
407 goto out4;
408
5231eb97
CL
409 dprintk("RPC: %s: %s:%s\n", __func__,
410 xprt->address_strings[RPC_DISPLAY_ADDR],
411 xprt->address_strings[RPC_DISPLAY_PORT]);
f58851e6
TT
412 return xprt;
413
414out4:
415 xprt_rdma_free_addresses(xprt);
416 rc = -EINVAL;
417out3:
7f1d5419 418 rpcrdma_ep_destroy(new_ep, &new_xprt->rx_ia);
f58851e6
TT
419out2:
420 rpcrdma_ia_close(&new_xprt->rx_ia);
421out1:
e204e621 422 xprt_free(xprt);
f58851e6
TT
423 return ERR_PTR(rc);
424}
425
426/*
427 * Close a connection, during shutdown or timeout/reconnect
428 */
429static void
430xprt_rdma_close(struct rpc_xprt *xprt)
431{
432 struct rpcrdma_xprt *r_xprt = rpcx_to_rdmax(xprt);
433
434 dprintk("RPC: %s: closing\n", __func__);
08ca0dce
TT
435 if (r_xprt->rx_ep.rep_connected > 0)
436 xprt->reestablish_timeout = 0;
62da3b24 437 xprt_disconnect_done(xprt);
282191cb 438 rpcrdma_ep_disconnect(&r_xprt->rx_ep, &r_xprt->rx_ia);
f58851e6
TT
439}
440
441static void
442xprt_rdma_set_port(struct rpc_xprt *xprt, u16 port)
443{
444 struct sockaddr_in *sap;
445
446 sap = (struct sockaddr_in *)&xprt->addr;
447 sap->sin_port = htons(port);
448 sap = (struct sockaddr_in *)&rpcx_to_rdmad(xprt).addr;
449 sap->sin_port = htons(port);
450 dprintk("RPC: %s: %u\n", __func__, port);
451}
452
453static void
1b092092 454xprt_rdma_connect(struct rpc_xprt *xprt, struct rpc_task *task)
f58851e6 455{
f58851e6
TT
456 struct rpcrdma_xprt *r_xprt = rpcx_to_rdmax(xprt);
457
0b9e7943
TM
458 if (r_xprt->rx_ep.rep_connected != 0) {
459 /* Reconnect */
5abefb86
CL
460 schedule_delayed_work(&r_xprt->rx_connect_worker,
461 xprt->reestablish_timeout);
0b9e7943 462 xprt->reestablish_timeout <<= 1;
bfaee096
CL
463 if (xprt->reestablish_timeout > RPCRDMA_MAX_REEST_TO)
464 xprt->reestablish_timeout = RPCRDMA_MAX_REEST_TO;
465 else if (xprt->reestablish_timeout < RPCRDMA_INIT_REEST_TO)
466 xprt->reestablish_timeout = RPCRDMA_INIT_REEST_TO;
0b9e7943 467 } else {
5abefb86 468 schedule_delayed_work(&r_xprt->rx_connect_worker, 0);
0b9e7943 469 if (!RPC_IS_ASYNC(task))
5abefb86 470 flush_delayed_work(&r_xprt->rx_connect_worker);
f58851e6
TT
471 }
472}
473
f58851e6
TT
474/*
475 * The RDMA allocate/free functions need the task structure as a place
476 * to hide the struct rpcrdma_req, which is necessary for the actual send/recv
0ca77dc3
CL
477 * sequence.
478 *
479 * The RPC layer allocates both send and receive buffers in the same call
480 * (rq_send_buf and rq_rcv_buf are both part of a single contiguous buffer).
481 * We may register rq_rcv_buf when using reply chunks.
f58851e6
TT
482 */
483static void *
484xprt_rdma_allocate(struct rpc_task *task, size_t size)
485{
a4f0835c 486 struct rpc_xprt *xprt = task->tk_rqstp->rq_xprt;
0ca77dc3
CL
487 struct rpcrdma_xprt *r_xprt = rpcx_to_rdmax(xprt);
488 struct rpcrdma_regbuf *rb;
489 struct rpcrdma_req *req;
490 size_t min_size;
a0a1d50c 491 gfp_t flags;
f58851e6 492
0ca77dc3 493 req = rpcrdma_buffer_get(&r_xprt->rx_buf);
c977dea2
CL
494 if (req == NULL)
495 return NULL;
f58851e6 496
5d252f90 497 flags = RPCRDMA_DEF_GFP;
a0a1d50c
CL
498 if (RPC_IS_SWAPPER(task))
499 flags = __GFP_MEMALLOC | GFP_NOWAIT | __GFP_NOWARN;
500
85275c87
CL
501 if (req->rl_rdmabuf == NULL)
502 goto out_rdmabuf;
0ca77dc3
CL
503 if (req->rl_sendbuf == NULL)
504 goto out_sendbuf;
505 if (size > req->rl_sendbuf->rg_size)
506 goto out_sendbuf;
507
508out:
f58851e6 509 dprintk("RPC: %s: size %zd, request 0x%p\n", __func__, size, req);
575448bd 510 req->rl_connect_cookie = 0; /* our reserved value */
0ca77dc3
CL
511 return req->rl_sendbuf->rg_base;
512
85275c87
CL
513out_rdmabuf:
514 min_size = RPCRDMA_INLINE_WRITE_THRESHOLD(task->tk_rqstp);
515 rb = rpcrdma_alloc_regbuf(&r_xprt->rx_ia, min_size, flags);
516 if (IS_ERR(rb))
517 goto out_fail;
518 req->rl_rdmabuf = rb;
519
0ca77dc3
CL
520out_sendbuf:
521 /* XDR encoding and RPC/RDMA marshaling of this request has not
522 * yet occurred. Thus a lower bound is needed to prevent buffer
523 * overrun during marshaling.
524 *
525 * RPC/RDMA marshaling may choose to send payload bearing ops
526 * inline, if the result is smaller than the inline threshold.
527 * The value of the "size" argument accounts for header
528 * requirements but not for the payload in these cases.
529 *
530 * Likewise, allocate enough space to receive a reply up to the
531 * size of the inline threshold.
532 *
533 * It's unlikely that both the send header and the received
534 * reply will be large, but slush is provided here to allow
535 * flexibility when marshaling.
536 */
537 min_size = RPCRDMA_INLINE_READ_THRESHOLD(task->tk_rqstp);
538 min_size += RPCRDMA_INLINE_WRITE_THRESHOLD(task->tk_rqstp);
539 if (size < min_size)
540 size = min_size;
541
542 rb = rpcrdma_alloc_regbuf(&r_xprt->rx_ia, size, flags);
543 if (IS_ERR(rb))
544 goto out_fail;
545 rb->rg_owner = req;
546
547 r_xprt->rx_stats.hardway_register_count += size;
548 rpcrdma_free_regbuf(&r_xprt->rx_ia, req->rl_sendbuf);
549 req->rl_sendbuf = rb;
550 goto out;
551
552out_fail:
f58851e6 553 rpcrdma_buffer_put(req);
0ca77dc3 554 r_xprt->rx_stats.failed_marshal_count++;
f58851e6
TT
555 return NULL;
556}
557
558/*
559 * This function returns all RDMA resources to the pool.
560 */
561static void
562xprt_rdma_free(void *buffer)
563{
564 struct rpcrdma_req *req;
565 struct rpcrdma_xprt *r_xprt;
0ca77dc3 566 struct rpcrdma_regbuf *rb;
f58851e6
TT
567 int i;
568
569 if (buffer == NULL)
570 return;
571
0ca77dc3
CL
572 rb = container_of(buffer, struct rpcrdma_regbuf, rg_base[0]);
573 req = rb->rg_owner;
ffc4d9b1
CL
574 if (req->rl_backchannel)
575 return;
576
0ca77dc3 577 r_xprt = container_of(req->rl_buffer, struct rpcrdma_xprt, rx_buf);
f58851e6 578
0ca77dc3 579 dprintk("RPC: %s: called on 0x%p\n", __func__, req->rl_reply);
f58851e6 580
f58851e6
TT
581 for (i = 0; req->rl_nchunks;) {
582 --req->rl_nchunks;
6814baea
CL
583 i += r_xprt->rx_ia.ri_ops->ro_unmap(r_xprt,
584 &req->rl_segments[i]);
f58851e6
TT
585 }
586
f58851e6
TT
587 rpcrdma_buffer_put(req);
588}
589
590/*
591 * send_request invokes the meat of RPC RDMA. It must do the following:
592 * 1. Marshal the RPC request into an RPC RDMA request, which means
593 * putting a header in front of data, and creating IOVs for RDMA
594 * from those in the request.
595 * 2. In marshaling, detect opportunities for RDMA, and use them.
596 * 3. Post a recv message to set up asynch completion, then send
597 * the request (rpcrdma_ep_post).
598 * 4. No partial sends are possible in the RPC-RDMA protocol (as in UDP).
599 */
600
601static int
602xprt_rdma_send_request(struct rpc_task *task)
603{
604 struct rpc_rqst *rqst = task->tk_rqstp;
a4f0835c 605 struct rpc_xprt *xprt = rqst->rq_xprt;
f58851e6
TT
606 struct rpcrdma_req *req = rpcr_to_rdmar(rqst);
607 struct rpcrdma_xprt *r_xprt = rpcx_to_rdmax(xprt);
6ab59945 608 int rc = 0;
f58851e6 609
e2377945 610 rc = rpcrdma_marshal_req(rqst);
6ab59945
CL
611 if (rc < 0)
612 goto failed_marshal;
f58851e6
TT
613
614 if (req->rl_reply == NULL) /* e.g. reconnection */
615 rpcrdma_recv_buffer_get(req);
616
575448bd
TT
617 /* Must suppress retransmit to maintain credits */
618 if (req->rl_connect_cookie == xprt->connect_cookie)
619 goto drop_connection;
620 req->rl_connect_cookie = xprt->connect_cookie;
621
622 if (rpcrdma_ep_post(&r_xprt->rx_ia, &r_xprt->rx_ep, req))
623 goto drop_connection;
f58851e6 624
d60dbb20 625 rqst->rq_xmit_bytes_sent += rqst->rq_snd_buf.len;
f58851e6
TT
626 rqst->rq_bytes_sent = 0;
627 return 0;
575448bd 628
c93c6223
CL
629failed_marshal:
630 r_xprt->rx_stats.failed_marshal_count++;
631 dprintk("RPC: %s: rpcrdma_marshal_req failed, status %i\n",
632 __func__, rc);
633 if (rc == -EIO)
634 return -EIO;
575448bd
TT
635drop_connection:
636 xprt_disconnect_done(xprt);
637 return -ENOTCONN; /* implies disconnect */
f58851e6
TT
638}
639
5d252f90 640void xprt_rdma_print_stats(struct rpc_xprt *xprt, struct seq_file *seq)
f58851e6
TT
641{
642 struct rpcrdma_xprt *r_xprt = rpcx_to_rdmax(xprt);
643 long idle_time = 0;
644
645 if (xprt_connected(xprt))
646 idle_time = (long)(jiffies - xprt->last_used) / HZ;
647
763f7e4e
CL
648 seq_puts(seq, "\txprt:\trdma ");
649 seq_printf(seq, "%u %lu %lu %lu %ld %lu %lu %lu %llu %llu ",
650 0, /* need a local port? */
651 xprt->stat.bind_count,
652 xprt->stat.connect_count,
653 xprt->stat.connect_time,
654 idle_time,
655 xprt->stat.sends,
656 xprt->stat.recvs,
657 xprt->stat.bad_xids,
658 xprt->stat.req_u,
659 xprt->stat.bklog_u);
860477d1 660 seq_printf(seq, "%lu %lu %lu %llu %llu %llu %llu %lu %lu %lu %lu\n",
763f7e4e
CL
661 r_xprt->rx_stats.read_chunk_count,
662 r_xprt->rx_stats.write_chunk_count,
663 r_xprt->rx_stats.reply_chunk_count,
664 r_xprt->rx_stats.total_rdma_request,
665 r_xprt->rx_stats.total_rdma_reply,
666 r_xprt->rx_stats.pullup_copy_count,
667 r_xprt->rx_stats.fixup_copy_count,
668 r_xprt->rx_stats.hardway_register_count,
669 r_xprt->rx_stats.failed_marshal_count,
860477d1
CL
670 r_xprt->rx_stats.bad_reply_count,
671 r_xprt->rx_stats.nomsg_call_count);
f58851e6
TT
672}
673
d67fa4d8
JL
674static int
675xprt_rdma_enable_swap(struct rpc_xprt *xprt)
676{
a0451788 677 return 0;
d67fa4d8
JL
678}
679
680static void
681xprt_rdma_disable_swap(struct rpc_xprt *xprt)
682{
683}
684
f58851e6
TT
685/*
686 * Plumbing for rpc transport switch and kernel module
687 */
688
689static struct rpc_xprt_ops xprt_rdma_procs = {
e7ce710a 690 .reserve_xprt = xprt_reserve_xprt_cong,
f58851e6 691 .release_xprt = xprt_release_xprt_cong, /* sunrpc/xprt.c */
f39c1bfb 692 .alloc_slot = xprt_alloc_slot,
f58851e6
TT
693 .release_request = xprt_release_rqst_cong, /* ditto */
694 .set_retrans_timeout = xprt_set_retrans_timeout_def, /* ditto */
695 .rpcbind = rpcb_getport_async, /* sunrpc/rpcb_clnt.c */
696 .set_port = xprt_rdma_set_port,
697 .connect = xprt_rdma_connect,
698 .buf_alloc = xprt_rdma_allocate,
699 .buf_free = xprt_rdma_free,
700 .send_request = xprt_rdma_send_request,
701 .close = xprt_rdma_close,
702 .destroy = xprt_rdma_destroy,
d67fa4d8
JL
703 .print_stats = xprt_rdma_print_stats,
704 .enable_swap = xprt_rdma_enable_swap,
705 .disable_swap = xprt_rdma_disable_swap,
f531a5db
CL
706 .inject_disconnect = xprt_rdma_inject_disconnect,
707#if defined(CONFIG_SUNRPC_BACKCHANNEL)
708 .bc_setup = xprt_rdma_bc_setup,
76566773 709 .bc_up = xprt_rdma_bc_up,
6b26cc8c 710 .bc_maxpayload = xprt_rdma_bc_maxpayload,
f531a5db
CL
711 .bc_free_rqst = xprt_rdma_bc_free_rqst,
712 .bc_destroy = xprt_rdma_bc_destroy,
713#endif
f58851e6
TT
714};
715
716static struct xprt_class xprt_rdma = {
717 .list = LIST_HEAD_INIT(xprt_rdma.list),
718 .name = "rdma",
719 .owner = THIS_MODULE,
720 .ident = XPRT_TRANSPORT_RDMA,
721 .setup = xprt_setup_rdma,
722};
723
ffe1f0df 724void xprt_rdma_cleanup(void)
f58851e6
TT
725{
726 int rc;
727
3a0799a9 728 dprintk("RPCRDMA Module Removed, deregister RPC RDMA transport\n");
f895b252 729#if IS_ENABLED(CONFIG_SUNRPC_DEBUG)
f58851e6
TT
730 if (sunrpc_table_header) {
731 unregister_sysctl_table(sunrpc_table_header);
732 sunrpc_table_header = NULL;
733 }
734#endif
735 rc = xprt_unregister_transport(&xprt_rdma);
736 if (rc)
737 dprintk("RPC: %s: xprt_unregister returned %i\n",
738 __func__, rc);
951e721c 739
fe97b47c 740 rpcrdma_destroy_wq();
951e721c 741 frwr_destroy_recovery_wq();
5d252f90
CL
742
743 rc = xprt_unregister_transport(&xprt_rdma_bc);
744 if (rc)
745 dprintk("RPC: %s: xprt_unregister(bc) returned %i\n",
746 __func__, rc);
f58851e6
TT
747}
748
ffe1f0df 749int xprt_rdma_init(void)
f58851e6
TT
750{
751 int rc;
752
951e721c 753 rc = frwr_alloc_recovery_wq();
f58851e6
TT
754 if (rc)
755 return rc;
756
fe97b47c
CL
757 rc = rpcrdma_alloc_wq();
758 if (rc) {
759 frwr_destroy_recovery_wq();
760 return rc;
761 }
762
951e721c
CL
763 rc = xprt_register_transport(&xprt_rdma);
764 if (rc) {
fe97b47c 765 rpcrdma_destroy_wq();
951e721c
CL
766 frwr_destroy_recovery_wq();
767 return rc;
768 }
769
5d252f90
CL
770 rc = xprt_register_transport(&xprt_rdma_bc);
771 if (rc) {
772 xprt_unregister_transport(&xprt_rdma);
773 rpcrdma_destroy_wq();
774 frwr_destroy_recovery_wq();
775 return rc;
776 }
777
3a0799a9 778 dprintk("RPCRDMA Module Init, register RPC RDMA transport\n");
f58851e6 779
3a0799a9
CL
780 dprintk("Defaults:\n");
781 dprintk("\tSlots %d\n"
f58851e6
TT
782 "\tMaxInlineRead %d\n\tMaxInlineWrite %d\n",
783 xprt_rdma_slot_table_entries,
784 xprt_rdma_max_inline_read, xprt_rdma_max_inline_write);
3a0799a9 785 dprintk("\tPadding %d\n\tMemreg %d\n",
f58851e6
TT
786 xprt_rdma_inline_write_padding, xprt_rdma_memreg_strategy);
787
f895b252 788#if IS_ENABLED(CONFIG_SUNRPC_DEBUG)
f58851e6
TT
789 if (!sunrpc_table_header)
790 sunrpc_table_header = register_sysctl_table(sunrpc_table);
791#endif
792 return 0;
793}