Commit | Line | Data |
---|---|---|
d5b31be6 | 1 | /* |
0bf48289 | 2 | * Copyright (c) 2014 Open Grid Computing, Inc. All rights reserved. |
d5b31be6 TT |
3 | * Copyright (c) 2005-2006 Network Appliance, Inc. All rights reserved. |
4 | * | |
5 | * This software is available to you under a choice of one of two | |
6 | * licenses. You may choose to be licensed under the terms of the GNU | |
7 | * General Public License (GPL) Version 2, available from the file | |
8 | * COPYING in the main directory of this source tree, or the BSD-type | |
9 | * license below: | |
10 | * | |
11 | * Redistribution and use in source and binary forms, with or without | |
12 | * modification, are permitted provided that the following conditions | |
13 | * are met: | |
14 | * | |
15 | * Redistributions of source code must retain the above copyright | |
16 | * notice, this list of conditions and the following disclaimer. | |
17 | * | |
18 | * Redistributions in binary form must reproduce the above | |
19 | * copyright notice, this list of conditions and the following | |
20 | * disclaimer in the documentation and/or other materials provided | |
21 | * with the distribution. | |
22 | * | |
23 | * Neither the name of the Network Appliance, Inc. nor the names of | |
24 | * its contributors may be used to endorse or promote products | |
25 | * derived from this software without specific prior written | |
26 | * permission. | |
27 | * | |
28 | * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS | |
29 | * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT | |
30 | * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR | |
31 | * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT | |
32 | * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, | |
33 | * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT | |
34 | * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, | |
35 | * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY | |
36 | * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT | |
37 | * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE | |
38 | * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |
39 | * | |
40 | * Author: Tom Tucker <tom@opengridcomputing.com> | |
41 | */ | |
42 | ||
43 | #include <linux/sunrpc/debug.h> | |
44 | #include <linux/sunrpc/rpc_rdma.h> | |
45 | #include <linux/spinlock.h> | |
46 | #include <asm/unaligned.h> | |
47 | #include <rdma/ib_verbs.h> | |
48 | #include <rdma/rdma_cm.h> | |
49 | #include <linux/sunrpc/svc_rdma.h> | |
50 | ||
51 | #define RPCDBG_FACILITY RPCDBG_SVCXPRT | |
52 | ||
53 | /* | |
54 | * Replace the pages in the rq_argpages array with the pages from the SGE in | |
55 | * the RDMA_RECV completion. The SGL should contain full pages up until the | |
56 | * last one. | |
57 | */ | |
58 | static void rdma_build_arg_xdr(struct svc_rqst *rqstp, | |
59 | struct svc_rdma_op_ctxt *ctxt, | |
60 | u32 byte_count) | |
61 | { | |
0b056c22 | 62 | struct rpcrdma_msg *rmsgp; |
d5b31be6 TT |
63 | struct page *page; |
64 | u32 bc; | |
65 | int sge_no; | |
66 | ||
67 | /* Swap the page in the SGE with the page in argpages */ | |
68 | page = ctxt->pages[0]; | |
69 | put_page(rqstp->rq_pages[0]); | |
70 | rqstp->rq_pages[0] = page; | |
71 | ||
72 | /* Set up the XDR head */ | |
73 | rqstp->rq_arg.head[0].iov_base = page_address(page); | |
0bf48289 SW |
74 | rqstp->rq_arg.head[0].iov_len = |
75 | min_t(size_t, byte_count, ctxt->sge[0].length); | |
d5b31be6 TT |
76 | rqstp->rq_arg.len = byte_count; |
77 | rqstp->rq_arg.buflen = byte_count; | |
78 | ||
79 | /* Compute bytes past head in the SGL */ | |
80 | bc = byte_count - rqstp->rq_arg.head[0].iov_len; | |
81 | ||
82 | /* If data remains, store it in the pagelist */ | |
83 | rqstp->rq_arg.page_len = bc; | |
84 | rqstp->rq_arg.page_base = 0; | |
0b056c22 CL |
85 | |
86 | /* RDMA_NOMSG: RDMA READ data should land just after RDMA RECV data */ | |
87 | rmsgp = (struct rpcrdma_msg *)rqstp->rq_arg.head[0].iov_base; | |
30b7e246 | 88 | if (rmsgp->rm_type == rdma_nomsg) |
0b056c22 CL |
89 | rqstp->rq_arg.pages = &rqstp->rq_pages[0]; |
90 | else | |
91 | rqstp->rq_arg.pages = &rqstp->rq_pages[1]; | |
92 | ||
d5b31be6 TT |
93 | sge_no = 1; |
94 | while (bc && sge_no < ctxt->count) { | |
95 | page = ctxt->pages[sge_no]; | |
96 | put_page(rqstp->rq_pages[sge_no]); | |
97 | rqstp->rq_pages[sge_no] = page; | |
0bf48289 | 98 | bc -= min_t(u32, bc, ctxt->sge[sge_no].length); |
d5b31be6 TT |
99 | rqstp->rq_arg.buflen += ctxt->sge[sge_no].length; |
100 | sge_no++; | |
101 | } | |
102 | rqstp->rq_respages = &rqstp->rq_pages[sge_no]; | |
7e4359e2 | 103 | rqstp->rq_next_page = rqstp->rq_respages + 1; |
d5b31be6 | 104 | |
d5b31be6 TT |
105 | /* If not all pages were used from the SGL, free the remaining ones */ |
106 | bc = sge_no; | |
107 | while (sge_no < ctxt->count) { | |
108 | page = ctxt->pages[sge_no++]; | |
109 | put_page(page); | |
110 | } | |
111 | ctxt->count = bc; | |
112 | ||
113 | /* Set up tail */ | |
114 | rqstp->rq_arg.tail[0].iov_base = NULL; | |
115 | rqstp->rq_arg.tail[0].iov_len = 0; | |
116 | } | |
117 | ||
0bf48289 | 118 | /* Issue an RDMA_READ using the local lkey to map the data sink */ |
e5452411 CL |
119 | int rdma_read_chunk_lcl(struct svcxprt_rdma *xprt, |
120 | struct svc_rqst *rqstp, | |
121 | struct svc_rdma_op_ctxt *head, | |
122 | int *page_no, | |
123 | u32 *page_offset, | |
124 | u32 rs_handle, | |
125 | u32 rs_length, | |
126 | u64 rs_offset, | |
127 | bool last) | |
0bf48289 SW |
128 | { |
129 | struct ib_send_wr read_wr; | |
130 | int pages_needed = PAGE_ALIGN(*page_offset + rs_length) >> PAGE_SHIFT; | |
131 | struct svc_rdma_op_ctxt *ctxt = svc_rdma_get_context(xprt); | |
132 | int ret, read, pno; | |
133 | u32 pg_off = *page_offset; | |
134 | u32 pg_no = *page_no; | |
135 | ||
136 | ctxt->direction = DMA_FROM_DEVICE; | |
137 | ctxt->read_hdr = head; | |
bc3fe2e3 | 138 | pages_needed = min_t(int, pages_needed, xprt->sc_max_sge_rd); |
c91aed98 SW |
139 | read = min_t(int, (pages_needed << PAGE_SHIFT) - *page_offset, |
140 | rs_length); | |
0bf48289 SW |
141 | |
142 | for (pno = 0; pno < pages_needed; pno++) { | |
143 | int len = min_t(int, rs_length, PAGE_SIZE - pg_off); | |
144 | ||
145 | head->arg.pages[pg_no] = rqstp->rq_arg.pages[pg_no]; | |
146 | head->arg.page_len += len; | |
147 | head->arg.len += len; | |
148 | if (!pg_off) | |
149 | head->count++; | |
150 | rqstp->rq_respages = &rqstp->rq_arg.pages[pg_no+1]; | |
7e4359e2 | 151 | rqstp->rq_next_page = rqstp->rq_respages + 1; |
0bf48289 SW |
152 | ctxt->sge[pno].addr = |
153 | ib_dma_map_page(xprt->sc_cm_id->device, | |
154 | head->arg.pages[pg_no], pg_off, | |
155 | PAGE_SIZE - pg_off, | |
156 | DMA_FROM_DEVICE); | |
157 | ret = ib_dma_mapping_error(xprt->sc_cm_id->device, | |
158 | ctxt->sge[pno].addr); | |
159 | if (ret) | |
160 | goto err; | |
161 | atomic_inc(&xprt->sc_dma_used); | |
d5b31be6 | 162 | |
0bf48289 SW |
163 | /* The lkey here is either a local dma lkey or a dma_mr lkey */ |
164 | ctxt->sge[pno].lkey = xprt->sc_dma_lkey; | |
165 | ctxt->sge[pno].length = len; | |
166 | ctxt->count++; | |
167 | ||
168 | /* adjust offset and wrap to next page if needed */ | |
169 | pg_off += len; | |
170 | if (pg_off == PAGE_SIZE) { | |
171 | pg_off = 0; | |
172 | pg_no++; | |
d5b31be6 | 173 | } |
0bf48289 | 174 | rs_length -= len; |
d5b31be6 | 175 | } |
0bf48289 SW |
176 | |
177 | if (last && rs_length == 0) | |
178 | set_bit(RDMACTXT_F_LAST_CTXT, &ctxt->flags); | |
179 | else | |
180 | clear_bit(RDMACTXT_F_LAST_CTXT, &ctxt->flags); | |
181 | ||
182 | memset(&read_wr, 0, sizeof(read_wr)); | |
183 | read_wr.wr_id = (unsigned long)ctxt; | |
184 | read_wr.opcode = IB_WR_RDMA_READ; | |
185 | ctxt->wr_op = read_wr.opcode; | |
186 | read_wr.send_flags = IB_SEND_SIGNALED; | |
187 | read_wr.wr.rdma.rkey = rs_handle; | |
188 | read_wr.wr.rdma.remote_addr = rs_offset; | |
189 | read_wr.sg_list = ctxt->sge; | |
190 | read_wr.num_sge = pages_needed; | |
191 | ||
192 | ret = svc_rdma_send(xprt, &read_wr); | |
193 | if (ret) { | |
194 | pr_err("svcrdma: Error %d posting RDMA_READ\n", ret); | |
195 | set_bit(XPT_CLOSE, &xprt->sc_xprt.xpt_flags); | |
196 | goto err; | |
197 | } | |
198 | ||
199 | /* return current location in page array */ | |
200 | *page_no = pg_no; | |
201 | *page_offset = pg_off; | |
202 | ret = read; | |
203 | atomic_inc(&rdma_stat_read); | |
204 | return ret; | |
205 | err: | |
206 | svc_rdma_unmap_dma(ctxt); | |
207 | svc_rdma_put_context(ctxt, 0); | |
208 | return ret; | |
d5b31be6 TT |
209 | } |
210 | ||
0bf48289 | 211 | /* Issue an RDMA_READ using an FRMR to map the data sink */ |
e5452411 CL |
212 | int rdma_read_chunk_frmr(struct svcxprt_rdma *xprt, |
213 | struct svc_rqst *rqstp, | |
214 | struct svc_rdma_op_ctxt *head, | |
215 | int *page_no, | |
216 | u32 *page_offset, | |
217 | u32 rs_handle, | |
218 | u32 rs_length, | |
219 | u64 rs_offset, | |
220 | bool last) | |
146b6df6 | 221 | { |
0bf48289 SW |
222 | struct ib_send_wr read_wr; |
223 | struct ib_send_wr inv_wr; | |
224 | struct ib_send_wr fastreg_wr; | |
225 | u8 key; | |
226 | int pages_needed = PAGE_ALIGN(*page_offset + rs_length) >> PAGE_SHIFT; | |
227 | struct svc_rdma_op_ctxt *ctxt = svc_rdma_get_context(xprt); | |
228 | struct svc_rdma_fastreg_mr *frmr = svc_rdma_get_frmr(xprt); | |
229 | int ret, read, pno; | |
230 | u32 pg_off = *page_offset; | |
231 | u32 pg_no = *page_no; | |
146b6df6 | 232 | |
146b6df6 TT |
233 | if (IS_ERR(frmr)) |
234 | return -ENOMEM; | |
235 | ||
0bf48289 SW |
236 | ctxt->direction = DMA_FROM_DEVICE; |
237 | ctxt->frmr = frmr; | |
238 | pages_needed = min_t(int, pages_needed, xprt->sc_frmr_pg_list_len); | |
c91aed98 SW |
239 | read = min_t(int, (pages_needed << PAGE_SHIFT) - *page_offset, |
240 | rs_length); | |
146b6df6 | 241 | |
0bf48289 | 242 | frmr->kva = page_address(rqstp->rq_arg.pages[pg_no]); |
146b6df6 TT |
243 | frmr->direction = DMA_FROM_DEVICE; |
244 | frmr->access_flags = (IB_ACCESS_LOCAL_WRITE|IB_ACCESS_REMOTE_WRITE); | |
0bf48289 SW |
245 | frmr->map_len = pages_needed << PAGE_SHIFT; |
246 | frmr->page_list_len = pages_needed; | |
247 | ||
248 | for (pno = 0; pno < pages_needed; pno++) { | |
249 | int len = min_t(int, rs_length, PAGE_SIZE - pg_off); | |
250 | ||
251 | head->arg.pages[pg_no] = rqstp->rq_arg.pages[pg_no]; | |
252 | head->arg.page_len += len; | |
253 | head->arg.len += len; | |
254 | if (!pg_off) | |
255 | head->count++; | |
256 | rqstp->rq_respages = &rqstp->rq_arg.pages[pg_no+1]; | |
257 | rqstp->rq_next_page = rqstp->rq_respages + 1; | |
258 | frmr->page_list->page_list[pno] = | |
b432e6b3 | 259 | ib_dma_map_page(xprt->sc_cm_id->device, |
0bf48289 | 260 | head->arg.pages[pg_no], 0, |
b432e6b3 | 261 | PAGE_SIZE, DMA_FROM_DEVICE); |
0bf48289 SW |
262 | ret = ib_dma_mapping_error(xprt->sc_cm_id->device, |
263 | frmr->page_list->page_list[pno]); | |
264 | if (ret) | |
265 | goto err; | |
146b6df6 | 266 | atomic_inc(&xprt->sc_dma_used); |
146b6df6 | 267 | |
0bf48289 SW |
268 | /* adjust offset and wrap to next page if needed */ |
269 | pg_off += len; | |
270 | if (pg_off == PAGE_SIZE) { | |
271 | pg_off = 0; | |
272 | pg_no++; | |
273 | } | |
274 | rs_length -= len; | |
146b6df6 TT |
275 | } |
276 | ||
0bf48289 SW |
277 | if (last && rs_length == 0) |
278 | set_bit(RDMACTXT_F_LAST_CTXT, &ctxt->flags); | |
279 | else | |
280 | clear_bit(RDMACTXT_F_LAST_CTXT, &ctxt->flags); | |
d5b31be6 | 281 | |
0bf48289 SW |
282 | /* Bump the key */ |
283 | key = (u8)(frmr->mr->lkey & 0x000000FF); | |
284 | ib_update_fast_reg_key(frmr->mr, ++key); | |
285 | ||
286 | ctxt->sge[0].addr = (unsigned long)frmr->kva + *page_offset; | |
287 | ctxt->sge[0].lkey = frmr->mr->lkey; | |
288 | ctxt->sge[0].length = read; | |
289 | ctxt->count = 1; | |
290 | ctxt->read_hdr = head; | |
291 | ||
292 | /* Prepare FASTREG WR */ | |
293 | memset(&fastreg_wr, 0, sizeof(fastreg_wr)); | |
294 | fastreg_wr.opcode = IB_WR_FAST_REG_MR; | |
295 | fastreg_wr.send_flags = IB_SEND_SIGNALED; | |
296 | fastreg_wr.wr.fast_reg.iova_start = (unsigned long)frmr->kva; | |
297 | fastreg_wr.wr.fast_reg.page_list = frmr->page_list; | |
298 | fastreg_wr.wr.fast_reg.page_list_len = frmr->page_list_len; | |
299 | fastreg_wr.wr.fast_reg.page_shift = PAGE_SHIFT; | |
300 | fastreg_wr.wr.fast_reg.length = frmr->map_len; | |
301 | fastreg_wr.wr.fast_reg.access_flags = frmr->access_flags; | |
302 | fastreg_wr.wr.fast_reg.rkey = frmr->mr->lkey; | |
303 | fastreg_wr.next = &read_wr; | |
304 | ||
305 | /* Prepare RDMA_READ */ | |
306 | memset(&read_wr, 0, sizeof(read_wr)); | |
307 | read_wr.send_flags = IB_SEND_SIGNALED; | |
308 | read_wr.wr.rdma.rkey = rs_handle; | |
309 | read_wr.wr.rdma.remote_addr = rs_offset; | |
310 | read_wr.sg_list = ctxt->sge; | |
311 | read_wr.num_sge = 1; | |
312 | if (xprt->sc_dev_caps & SVCRDMA_DEVCAP_READ_W_INV) { | |
313 | read_wr.opcode = IB_WR_RDMA_READ_WITH_INV; | |
314 | read_wr.wr_id = (unsigned long)ctxt; | |
315 | read_wr.ex.invalidate_rkey = ctxt->frmr->mr->lkey; | |
316 | } else { | |
317 | read_wr.opcode = IB_WR_RDMA_READ; | |
318 | read_wr.next = &inv_wr; | |
319 | /* Prepare invalidate */ | |
320 | memset(&inv_wr, 0, sizeof(inv_wr)); | |
321 | inv_wr.wr_id = (unsigned long)ctxt; | |
322 | inv_wr.opcode = IB_WR_LOCAL_INV; | |
83710fc7 | 323 | inv_wr.send_flags = IB_SEND_SIGNALED | IB_SEND_FENCE; |
0bf48289 SW |
324 | inv_wr.ex.invalidate_rkey = frmr->mr->lkey; |
325 | } | |
326 | ctxt->wr_op = read_wr.opcode; | |
327 | ||
328 | /* Post the chain */ | |
329 | ret = svc_rdma_send(xprt, &fastreg_wr); | |
330 | if (ret) { | |
331 | pr_err("svcrdma: Error %d posting RDMA_READ\n", ret); | |
332 | set_bit(XPT_CLOSE, &xprt->sc_xprt.xpt_flags); | |
333 | goto err; | |
d5b31be6 | 334 | } |
d5b31be6 | 335 | |
0bf48289 SW |
336 | /* return current location in page array */ |
337 | *page_no = pg_no; | |
338 | *page_offset = pg_off; | |
339 | ret = read; | |
340 | atomic_inc(&rdma_stat_read); | |
341 | return ret; | |
342 | err: | |
343 | svc_rdma_unmap_dma(ctxt); | |
344 | svc_rdma_put_context(ctxt, 0); | |
345 | svc_rdma_put_frmr(xprt, frmr); | |
346 | return ret; | |
d5b31be6 TT |
347 | } |
348 | ||
2397aa8b CL |
349 | static unsigned int |
350 | rdma_rcl_chunk_count(struct rpcrdma_read_chunk *ch) | |
351 | { | |
352 | unsigned int count; | |
353 | ||
354 | for (count = 0; ch->rc_discrim != xdr_zero; ch++) | |
355 | count++; | |
356 | return count; | |
357 | } | |
358 | ||
a97c331f CL |
359 | /* If there was additional inline content, append it to the end of arg.pages. |
360 | * Tail copy has to be done after the reader function has determined how many | |
361 | * pages are needed for RDMA READ. | |
362 | */ | |
363 | static int | |
364 | rdma_copy_tail(struct svc_rqst *rqstp, struct svc_rdma_op_ctxt *head, | |
365 | u32 position, u32 byte_count, u32 page_offset, int page_no) | |
366 | { | |
367 | char *srcp, *destp; | |
368 | int ret; | |
369 | ||
370 | ret = 0; | |
371 | srcp = head->arg.head[0].iov_base + position; | |
372 | byte_count = head->arg.head[0].iov_len - position; | |
373 | if (byte_count > PAGE_SIZE) { | |
374 | dprintk("svcrdma: large tail unsupported\n"); | |
375 | return 0; | |
376 | } | |
377 | ||
378 | /* Fit as much of the tail on the current page as possible */ | |
379 | if (page_offset != PAGE_SIZE) { | |
380 | destp = page_address(rqstp->rq_arg.pages[page_no]); | |
381 | destp += page_offset; | |
382 | while (byte_count--) { | |
383 | *destp++ = *srcp++; | |
384 | page_offset++; | |
385 | if (page_offset == PAGE_SIZE && byte_count) | |
386 | goto more; | |
387 | } | |
388 | goto done; | |
389 | } | |
390 | ||
391 | more: | |
392 | /* Fit the rest on the next page */ | |
393 | page_no++; | |
394 | destp = page_address(rqstp->rq_arg.pages[page_no]); | |
395 | while (byte_count--) | |
396 | *destp++ = *srcp++; | |
397 | ||
398 | rqstp->rq_respages = &rqstp->rq_arg.pages[page_no+1]; | |
399 | rqstp->rq_next_page = rqstp->rq_respages + 1; | |
400 | ||
401 | done: | |
402 | byte_count = head->arg.head[0].iov_len - position; | |
403 | head->arg.page_len += byte_count; | |
404 | head->arg.len += byte_count; | |
405 | head->arg.buflen += byte_count; | |
406 | return 1; | |
407 | } | |
408 | ||
0bf48289 SW |
409 | static int rdma_read_chunks(struct svcxprt_rdma *xprt, |
410 | struct rpcrdma_msg *rmsgp, | |
411 | struct svc_rqst *rqstp, | |
412 | struct svc_rdma_op_ctxt *head) | |
d5b31be6 | 413 | { |
2397aa8b | 414 | int page_no, ret; |
d5b31be6 | 415 | struct rpcrdma_read_chunk *ch; |
e5452411 | 416 | u32 handle, page_offset, byte_count; |
61edbcb7 | 417 | u32 position; |
0bf48289 | 418 | u64 rs_offset; |
e5452411 | 419 | bool last; |
d5b31be6 TT |
420 | |
421 | /* If no read list is present, return 0 */ | |
422 | ch = svc_rdma_get_read_chunk(rmsgp); | |
423 | if (!ch) | |
424 | return 0; | |
425 | ||
2397aa8b | 426 | if (rdma_rcl_chunk_count(ch) > RPCSVC_MAXPAGES) |
a6f911c0 | 427 | return -EINVAL; |
146b6df6 | 428 | |
0bf48289 SW |
429 | /* The request is completed when the RDMA_READs complete. The |
430 | * head context keeps all the pages that comprise the | |
431 | * request. | |
432 | */ | |
433 | head->arg.head[0] = rqstp->rq_arg.head[0]; | |
434 | head->arg.tail[0] = rqstp->rq_arg.tail[0]; | |
0bf48289 SW |
435 | head->hdr_count = head->count; |
436 | head->arg.page_base = 0; | |
437 | head->arg.page_len = 0; | |
438 | head->arg.len = rqstp->rq_arg.len; | |
439 | head->arg.buflen = rqstp->rq_arg.buflen; | |
59fb3066 | 440 | |
61edbcb7 CL |
441 | ch = (struct rpcrdma_read_chunk *)&rmsgp->rm_body.rm_chunks[0]; |
442 | position = be32_to_cpu(ch->rc_position); | |
443 | ||
0b056c22 CL |
444 | /* RDMA_NOMSG: RDMA READ data should land just after RDMA RECV data */ |
445 | if (position == 0) { | |
446 | head->arg.pages = &head->pages[0]; | |
447 | page_offset = head->byte_len; | |
448 | } else { | |
449 | head->arg.pages = &head->pages[head->count]; | |
450 | page_offset = 0; | |
451 | } | |
452 | ||
61edbcb7 CL |
453 | ret = 0; |
454 | page_no = 0; | |
61edbcb7 CL |
455 | for (; ch->rc_discrim != xdr_zero; ch++) { |
456 | if (be32_to_cpu(ch->rc_position) != position) | |
457 | goto err; | |
458 | ||
459 | handle = be32_to_cpu(ch->rc_target.rs_handle), | |
e5452411 | 460 | byte_count = be32_to_cpu(ch->rc_target.rs_length); |
cec56c8f TT |
461 | xdr_decode_hyper((__be32 *)&ch->rc_target.rs_offset, |
462 | &rs_offset); | |
0bf48289 SW |
463 | |
464 | while (byte_count > 0) { | |
e5452411 CL |
465 | last = (ch + 1)->rc_discrim == xdr_zero; |
466 | ret = xprt->sc_reader(xprt, rqstp, head, | |
467 | &page_no, &page_offset, | |
468 | handle, byte_count, | |
469 | rs_offset, last); | |
0bf48289 SW |
470 | if (ret < 0) |
471 | goto err; | |
472 | byte_count -= ret; | |
473 | rs_offset += ret; | |
474 | head->arg.buflen += ret; | |
d5b31be6 | 475 | } |
d5b31be6 | 476 | } |
0b056c22 | 477 | |
fcbeced5 CL |
478 | /* Read list may need XDR round-up (see RFC 5666, s. 3.7) */ |
479 | if (page_offset & 3) { | |
480 | u32 pad = 4 - (page_offset & 3); | |
481 | ||
482 | head->arg.page_len += pad; | |
483 | head->arg.len += pad; | |
484 | head->arg.buflen += pad; | |
a97c331f | 485 | page_offset += pad; |
fcbeced5 CL |
486 | } |
487 | ||
0bf48289 | 488 | ret = 1; |
a97c331f CL |
489 | if (position && position < head->arg.head[0].iov_len) |
490 | ret = rdma_copy_tail(rqstp, head, position, | |
491 | byte_count, page_offset, page_no); | |
492 | head->arg.head[0].iov_len = position; | |
0b056c22 CL |
493 | head->position = position; |
494 | ||
0bf48289 | 495 | err: |
d5b31be6 | 496 | /* Detach arg pages. svc_recv will replenish them */ |
0bf48289 SW |
497 | for (page_no = 0; |
498 | &rqstp->rq_pages[page_no] < rqstp->rq_respages; page_no++) | |
499 | rqstp->rq_pages[page_no] = NULL; | |
d5b31be6 | 500 | |
0bf48289 | 501 | return ret; |
d5b31be6 TT |
502 | } |
503 | ||
504 | static int rdma_read_complete(struct svc_rqst *rqstp, | |
02e7452d | 505 | struct svc_rdma_op_ctxt *head) |
d5b31be6 | 506 | { |
d5b31be6 TT |
507 | int page_no; |
508 | int ret; | |
509 | ||
d5b31be6 TT |
510 | /* Copy RPC pages */ |
511 | for (page_no = 0; page_no < head->count; page_no++) { | |
512 | put_page(rqstp->rq_pages[page_no]); | |
513 | rqstp->rq_pages[page_no] = head->pages[page_no]; | |
514 | } | |
0b056c22 CL |
515 | |
516 | /* Adjustments made for RDMA_NOMSG type requests */ | |
517 | if (head->position == 0) { | |
518 | if (head->arg.len <= head->sge[0].length) { | |
519 | head->arg.head[0].iov_len = head->arg.len - | |
520 | head->byte_len; | |
521 | head->arg.page_len = 0; | |
522 | } else { | |
523 | head->arg.head[0].iov_len = head->sge[0].length - | |
524 | head->byte_len; | |
525 | head->arg.page_len = head->arg.len - | |
526 | head->sge[0].length; | |
527 | } | |
528 | } | |
529 | ||
d5b31be6 | 530 | /* Point rq_arg.pages past header */ |
f820c57e | 531 | rqstp->rq_arg.pages = &rqstp->rq_pages[head->hdr_count]; |
d5b31be6 TT |
532 | rqstp->rq_arg.page_len = head->arg.page_len; |
533 | rqstp->rq_arg.page_base = head->arg.page_base; | |
534 | ||
535 | /* rq_respages starts after the last arg page */ | |
3be7f328 | 536 | rqstp->rq_respages = &rqstp->rq_pages[page_no]; |
7e4359e2 | 537 | rqstp->rq_next_page = rqstp->rq_respages + 1; |
d5b31be6 TT |
538 | |
539 | /* Rebuild rq_arg head and tail. */ | |
540 | rqstp->rq_arg.head[0] = head->arg.head[0]; | |
541 | rqstp->rq_arg.tail[0] = head->arg.tail[0]; | |
542 | rqstp->rq_arg.len = head->arg.len; | |
543 | rqstp->rq_arg.buflen = head->arg.buflen; | |
544 | ||
02e7452d TT |
545 | /* Free the context */ |
546 | svc_rdma_put_context(head, 0); | |
547 | ||
d5b31be6 TT |
548 | /* XXX: What should this be? */ |
549 | rqstp->rq_prot = IPPROTO_MAX; | |
69500c43 | 550 | svc_xprt_copy_addrs(rqstp, rqstp->rq_xprt); |
d5b31be6 | 551 | |
d5b31be6 TT |
552 | ret = rqstp->rq_arg.head[0].iov_len |
553 | + rqstp->rq_arg.page_len | |
554 | + rqstp->rq_arg.tail[0].iov_len; | |
597561bf CL |
555 | dprintk("svcrdma: deferred read ret=%d, rq_arg.len=%u, " |
556 | "rq_arg.head[0].iov_base=%p, rq_arg.head[0].iov_len=%zu\n", | |
d5b31be6 TT |
557 | ret, rqstp->rq_arg.len, rqstp->rq_arg.head[0].iov_base, |
558 | rqstp->rq_arg.head[0].iov_len); | |
559 | ||
d5b31be6 TT |
560 | return ret; |
561 | } | |
562 | ||
563 | /* | |
564 | * Set up the rqstp thread context to point to the RQ buffer. If | |
565 | * necessary, pull additional data from the client with an RDMA_READ | |
566 | * request. | |
567 | */ | |
568 | int svc_rdma_recvfrom(struct svc_rqst *rqstp) | |
569 | { | |
570 | struct svc_xprt *xprt = rqstp->rq_xprt; | |
571 | struct svcxprt_rdma *rdma_xprt = | |
572 | container_of(xprt, struct svcxprt_rdma, sc_xprt); | |
573 | struct svc_rdma_op_ctxt *ctxt = NULL; | |
574 | struct rpcrdma_msg *rmsgp; | |
575 | int ret = 0; | |
576 | int len; | |
577 | ||
578 | dprintk("svcrdma: rqstp=%p\n", rqstp); | |
579 | ||
24b8b447 | 580 | spin_lock_bh(&rdma_xprt->sc_rq_dto_lock); |
d5b31be6 TT |
581 | if (!list_empty(&rdma_xprt->sc_read_complete_q)) { |
582 | ctxt = list_entry(rdma_xprt->sc_read_complete_q.next, | |
583 | struct svc_rdma_op_ctxt, | |
584 | dto_q); | |
585 | list_del_init(&ctxt->dto_q); | |
24b8b447 | 586 | spin_unlock_bh(&rdma_xprt->sc_rq_dto_lock); |
d5b31be6 | 587 | return rdma_read_complete(rqstp, ctxt); |
0bf48289 | 588 | } else if (!list_empty(&rdma_xprt->sc_rq_dto_q)) { |
d5b31be6 TT |
589 | ctxt = list_entry(rdma_xprt->sc_rq_dto_q.next, |
590 | struct svc_rdma_op_ctxt, | |
591 | dto_q); | |
592 | list_del_init(&ctxt->dto_q); | |
593 | } else { | |
594 | atomic_inc(&rdma_stat_rq_starve); | |
595 | clear_bit(XPT_DATA, &xprt->xpt_flags); | |
596 | ctxt = NULL; | |
597 | } | |
598 | spin_unlock_bh(&rdma_xprt->sc_rq_dto_lock); | |
599 | if (!ctxt) { | |
600 | /* This is the EAGAIN path. The svc_recv routine will | |
601 | * return -EAGAIN, the nfsd thread will go to call into | |
602 | * svc_recv again and we shouldn't be on the active | |
603 | * transport list | |
604 | */ | |
605 | if (test_bit(XPT_CLOSE, &xprt->xpt_flags)) | |
606 | goto close_out; | |
607 | ||
d5b31be6 TT |
608 | goto out; |
609 | } | |
610 | dprintk("svcrdma: processing ctxt=%p on xprt=%p, rqstp=%p, status=%d\n", | |
611 | ctxt, rdma_xprt, rqstp, ctxt->wc_status); | |
d5b31be6 TT |
612 | atomic_inc(&rdma_stat_recv); |
613 | ||
614 | /* Build up the XDR from the receive buffers. */ | |
615 | rdma_build_arg_xdr(rqstp, ctxt, ctxt->byte_len); | |
616 | ||
617 | /* Decode the RDMA header. */ | |
618 | len = svc_rdma_xdr_decode_req(&rmsgp, rqstp); | |
619 | rqstp->rq_xprt_hlen = len; | |
620 | ||
621 | /* If the request is invalid, reply with an error */ | |
622 | if (len < 0) { | |
623 | if (len == -ENOSYS) | |
008fdbc5 | 624 | svc_rdma_send_error(rdma_xprt, rmsgp, ERR_VERS); |
d5b31be6 TT |
625 | goto close_out; |
626 | } | |
627 | ||
d16d4009 | 628 | /* Read read-list data. */ |
0bf48289 | 629 | ret = rdma_read_chunks(rdma_xprt, rmsgp, rqstp, ctxt); |
d16d4009 TT |
630 | if (ret > 0) { |
631 | /* read-list posted, defer until data received from client. */ | |
b1721d2b | 632 | goto defer; |
0bf48289 | 633 | } else if (ret < 0) { |
d16d4009 TT |
634 | /* Post of read-list failed, free context. */ |
635 | svc_rdma_put_context(ctxt, 1); | |
636 | return 0; | |
637 | } | |
d5b31be6 | 638 | |
d5b31be6 TT |
639 | ret = rqstp->rq_arg.head[0].iov_len |
640 | + rqstp->rq_arg.page_len | |
641 | + rqstp->rq_arg.tail[0].iov_len; | |
642 | svc_rdma_put_context(ctxt, 0); | |
643 | out: | |
597561bf CL |
644 | dprintk("svcrdma: ret=%d, rq_arg.len=%u, " |
645 | "rq_arg.head[0].iov_base=%p, rq_arg.head[0].iov_len=%zd\n", | |
d5b31be6 TT |
646 | ret, rqstp->rq_arg.len, |
647 | rqstp->rq_arg.head[0].iov_base, | |
648 | rqstp->rq_arg.head[0].iov_len); | |
649 | rqstp->rq_prot = IPPROTO_MAX; | |
650 | svc_xprt_copy_addrs(rqstp, xprt); | |
d5b31be6 TT |
651 | return ret; |
652 | ||
653 | close_out: | |
0e7f011a | 654 | if (ctxt) |
d5b31be6 | 655 | svc_rdma_put_context(ctxt, 1); |
d5b31be6 TT |
656 | dprintk("svcrdma: transport %p is closing\n", xprt); |
657 | /* | |
658 | * Set the close bit and enqueue it. svc_recv will see the | |
659 | * close bit and call svc_xprt_delete | |
660 | */ | |
661 | set_bit(XPT_CLOSE, &xprt->xpt_flags); | |
b1721d2b | 662 | defer: |
d5b31be6 TT |
663 | return 0; |
664 | } |