2 * Copyright (c) 2004, 2005, 2006 Voltaire, Inc. All rights reserved.
4 * This software is available to you under a choice of one of two
5 * licenses. You may choose to be licensed under the terms of the GNU
6 * General Public License (GPL) Version 2, available from the file
7 * COPYING in the main directory of this source tree, or the
8 * OpenIB.org BSD license below:
10 * Redistribution and use in source and binary forms, with or
11 * without modification, are permitted provided that the following
14 * - Redistributions of source code must retain the above
15 * copyright notice, this list of conditions and the following
18 * - Redistributions in binary form must reproduce the above
19 * copyright notice, this list of conditions and the following
20 * disclaimer in the documentation and/or other materials
21 * provided with the distribution.
23 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
24 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
25 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
26 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
27 * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
28 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
29 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
32 #include <linux/kernel.h>
33 #include <linux/slab.h>
35 #include <linux/scatterlist.h>
36 #include <linux/kfifo.h>
37 #include <scsi/scsi_cmnd.h>
38 #include <scsi/scsi_host.h>
40 #include "iscsi_iser.h"
42 /* Register user buffer memory and initialize passive rdma
43 * dto descriptor. Total data size is stored in
44 * iser_task->data[ISER_DIR_IN].data_len
46 static int iser_prepare_read_cmd(struct iscsi_task
*task
,
50 struct iscsi_iser_task
*iser_task
= task
->dd_data
;
51 struct iser_regd_buf
*regd_buf
;
53 struct iser_hdr
*hdr
= &iser_task
->desc
.iser_header
;
54 struct iser_data_buf
*buf_in
= &iser_task
->data
[ISER_DIR_IN
];
56 err
= iser_dma_map_task_data(iser_task
,
63 if (edtl
> iser_task
->data
[ISER_DIR_IN
].data_len
) {
64 iser_err("Total data length: %ld, less than EDTL: "
65 "%d, in READ cmd BHS itt: %d, conn: 0x%p\n",
66 iser_task
->data
[ISER_DIR_IN
].data_len
, edtl
,
67 task
->itt
, iser_task
->iser_conn
);
71 err
= iser_reg_rdma_mem(iser_task
,ISER_DIR_IN
);
73 iser_err("Failed to set up Data-IN RDMA\n");
76 regd_buf
= &iser_task
->rdma_regd
[ISER_DIR_IN
];
78 hdr
->flags
|= ISER_RSV
;
79 hdr
->read_stag
= cpu_to_be32(regd_buf
->reg
.rkey
);
80 hdr
->read_va
= cpu_to_be64(regd_buf
->reg
.va
);
82 iser_dbg("Cmd itt:%d READ tags RKEY:%#.4X VA:%#llX\n",
83 task
->itt
, regd_buf
->reg
.rkey
,
84 (unsigned long long)regd_buf
->reg
.va
);
89 /* Register user buffer memory and initialize passive rdma
90 * dto descriptor. Total data size is stored in
91 * task->data[ISER_DIR_OUT].data_len
94 iser_prepare_write_cmd(struct iscsi_task
*task
,
96 unsigned int unsol_sz
,
99 struct iscsi_iser_task
*iser_task
= task
->dd_data
;
100 struct iser_regd_buf
*regd_buf
;
102 struct iser_hdr
*hdr
= &iser_task
->desc
.iser_header
;
103 struct iser_data_buf
*buf_out
= &iser_task
->data
[ISER_DIR_OUT
];
104 struct ib_sge
*tx_dsg
= &iser_task
->desc
.tx_sg
[1];
106 err
= iser_dma_map_task_data(iser_task
,
113 if (edtl
> iser_task
->data
[ISER_DIR_OUT
].data_len
) {
114 iser_err("Total data length: %ld, less than EDTL: %d, "
115 "in WRITE cmd BHS itt: %d, conn: 0x%p\n",
116 iser_task
->data
[ISER_DIR_OUT
].data_len
,
117 edtl
, task
->itt
, task
->conn
);
121 err
= iser_reg_rdma_mem(iser_task
,ISER_DIR_OUT
);
123 iser_err("Failed to register write cmd RDMA mem\n");
127 regd_buf
= &iser_task
->rdma_regd
[ISER_DIR_OUT
];
129 if (unsol_sz
< edtl
) {
130 hdr
->flags
|= ISER_WSV
;
131 hdr
->write_stag
= cpu_to_be32(regd_buf
->reg
.rkey
);
132 hdr
->write_va
= cpu_to_be64(regd_buf
->reg
.va
+ unsol_sz
);
134 iser_dbg("Cmd itt:%d, WRITE tags, RKEY:%#.4X "
135 "VA:%#llX + unsol:%d\n",
136 task
->itt
, regd_buf
->reg
.rkey
,
137 (unsigned long long)regd_buf
->reg
.va
, unsol_sz
);
141 iser_dbg("Cmd itt:%d, WRITE, adding imm.data sz: %d\n",
143 tx_dsg
->addr
= regd_buf
->reg
.va
;
144 tx_dsg
->length
= imm_sz
;
145 tx_dsg
->lkey
= regd_buf
->reg
.lkey
;
146 iser_task
->desc
.num_sge
= 2;
152 /* creates a new tx descriptor and adds header regd buffer */
153 static void iser_create_send_desc(struct iser_conn
*ib_conn
,
154 struct iser_tx_desc
*tx_desc
)
156 struct iser_device
*device
= ib_conn
->device
;
158 ib_dma_sync_single_for_cpu(device
->ib_device
,
159 tx_desc
->dma_addr
, ISER_HEADERS_LEN
, DMA_TO_DEVICE
);
161 memset(&tx_desc
->iser_header
, 0, sizeof(struct iser_hdr
));
162 tx_desc
->iser_header
.flags
= ISER_VER
;
164 tx_desc
->num_sge
= 1;
166 if (tx_desc
->tx_sg
[0].lkey
!= device
->mr
->lkey
) {
167 tx_desc
->tx_sg
[0].lkey
= device
->mr
->lkey
;
168 iser_dbg("sdesc %p lkey mismatch, fixing\n", tx_desc
);
173 int iser_alloc_rx_descriptors(struct iser_conn
*ib_conn
)
177 struct iser_rx_desc
*rx_desc
;
178 struct ib_sge
*rx_sg
;
179 struct iser_device
*device
= ib_conn
->device
;
181 ib_conn
->rx_descs
= kmalloc(ISER_QP_MAX_RECV_DTOS
*
182 sizeof(struct iser_rx_desc
), GFP_KERNEL
);
183 if (!ib_conn
->rx_descs
)
184 goto rx_desc_alloc_fail
;
186 rx_desc
= ib_conn
->rx_descs
;
188 for (i
= 0; i
< ISER_QP_MAX_RECV_DTOS
; i
++, rx_desc
++) {
189 dma_addr
= ib_dma_map_single(device
->ib_device
, (void *)rx_desc
,
190 ISER_RX_PAYLOAD_SIZE
, DMA_FROM_DEVICE
);
191 if (ib_dma_mapping_error(device
->ib_device
, dma_addr
))
192 goto rx_desc_dma_map_failed
;
194 rx_desc
->dma_addr
= dma_addr
;
196 rx_sg
= &rx_desc
->rx_sg
;
197 rx_sg
->addr
= rx_desc
->dma_addr
;
198 rx_sg
->length
= ISER_RX_PAYLOAD_SIZE
;
199 rx_sg
->lkey
= device
->mr
->lkey
;
202 ib_conn
->rx_desc_head
= 0;
205 rx_desc_dma_map_failed
:
206 rx_desc
= ib_conn
->rx_descs
;
207 for (j
= 0; j
< i
; j
++, rx_desc
++)
208 ib_dma_unmap_single(device
->ib_device
, rx_desc
->dma_addr
,
209 ISER_RX_PAYLOAD_SIZE
, DMA_FROM_DEVICE
);
210 kfree(ib_conn
->rx_descs
);
211 ib_conn
->rx_descs
= NULL
;
213 iser_err("failed allocating rx descriptors / data buffers\n");
217 void iser_free_rx_descriptors(struct iser_conn
*ib_conn
)
220 struct iser_rx_desc
*rx_desc
;
221 struct iser_device
*device
= ib_conn
->device
;
223 if (!ib_conn
->rx_descs
)
226 rx_desc
= ib_conn
->rx_descs
;
227 for (i
= 0; i
< ISER_QP_MAX_RECV_DTOS
; i
++, rx_desc
++)
228 ib_dma_unmap_single(device
->ib_device
, rx_desc
->dma_addr
,
229 ISER_RX_PAYLOAD_SIZE
, DMA_FROM_DEVICE
);
230 kfree(ib_conn
->rx_descs
);
233 static int iser_post_rx_bufs(struct iscsi_conn
*conn
, struct iscsi_hdr
*req
)
235 struct iscsi_iser_conn
*iser_conn
= conn
->dd_data
;
237 iser_dbg("req op %x flags %x\n", req
->opcode
, req
->flags
);
238 /* check if this is the last login - going to full feature phase */
239 if ((req
->flags
& ISCSI_FULL_FEATURE_PHASE
) != ISCSI_FULL_FEATURE_PHASE
)
243 * Check that there is one posted recv buffer (for the last login
244 * response) and no posted send buffers left - they must have been
245 * consumed during previous login phases.
247 WARN_ON(iser_conn
->ib_conn
->post_recv_buf_count
!= 1);
248 WARN_ON(atomic_read(&iser_conn
->ib_conn
->post_send_buf_count
) != 0);
250 iser_dbg("Initially post: %d\n", ISER_MIN_POSTED_RX
);
251 /* Initial post receive buffers */
252 if (iser_post_recvm(iser_conn
->ib_conn
, ISER_MIN_POSTED_RX
))
259 * iser_send_command - send command PDU
261 int iser_send_command(struct iscsi_conn
*conn
,
262 struct iscsi_task
*task
)
264 struct iscsi_iser_conn
*iser_conn
= conn
->dd_data
;
265 struct iscsi_iser_task
*iser_task
= task
->dd_data
;
268 struct iser_data_buf
*data_buf
;
269 struct iscsi_scsi_req
*hdr
= (struct iscsi_scsi_req
*)task
->hdr
;
270 struct scsi_cmnd
*sc
= task
->sc
;
271 struct iser_tx_desc
*tx_desc
= &iser_task
->desc
;
273 edtl
= ntohl(hdr
->data_length
);
275 /* build the tx desc regd header and add it to the tx desc dto */
276 tx_desc
->type
= ISCSI_TX_SCSI_COMMAND
;
277 iser_create_send_desc(iser_conn
->ib_conn
, tx_desc
);
279 if (hdr
->flags
& ISCSI_FLAG_CMD_READ
)
280 data_buf
= &iser_task
->data
[ISER_DIR_IN
];
282 data_buf
= &iser_task
->data
[ISER_DIR_OUT
];
284 if (scsi_sg_count(sc
)) { /* using a scatter list */
285 data_buf
->buf
= scsi_sglist(sc
);
286 data_buf
->size
= scsi_sg_count(sc
);
289 data_buf
->data_len
= scsi_bufflen(sc
);
291 if (hdr
->flags
& ISCSI_FLAG_CMD_READ
) {
292 err
= iser_prepare_read_cmd(task
, edtl
);
294 goto send_command_error
;
296 if (hdr
->flags
& ISCSI_FLAG_CMD_WRITE
) {
297 err
= iser_prepare_write_cmd(task
,
300 task
->unsol_r2t
.data_length
,
303 goto send_command_error
;
306 iser_task
->status
= ISER_TASK_STATUS_STARTED
;
308 err
= iser_post_send(iser_conn
->ib_conn
, tx_desc
);
313 iser_err("conn %p failed task->itt %d err %d\n",conn
, task
->itt
, err
);
318 * iser_send_data_out - send data out PDU
320 int iser_send_data_out(struct iscsi_conn
*conn
,
321 struct iscsi_task
*task
,
322 struct iscsi_data
*hdr
)
324 struct iscsi_iser_conn
*iser_conn
= conn
->dd_data
;
325 struct iscsi_iser_task
*iser_task
= task
->dd_data
;
326 struct iser_tx_desc
*tx_desc
= NULL
;
327 struct iser_regd_buf
*regd_buf
;
328 unsigned long buf_offset
;
329 unsigned long data_seg_len
;
332 struct ib_sge
*tx_dsg
;
334 itt
= (__force
uint32_t)hdr
->itt
;
335 data_seg_len
= ntoh24(hdr
->dlength
);
336 buf_offset
= ntohl(hdr
->offset
);
338 iser_dbg("%s itt %d dseg_len %d offset %d\n",
339 __func__
,(int)itt
,(int)data_seg_len
,(int)buf_offset
);
341 tx_desc
= kmem_cache_zalloc(ig
.desc_cache
, GFP_ATOMIC
);
342 if (tx_desc
== NULL
) {
343 iser_err("Failed to alloc desc for post dataout\n");
347 tx_desc
->type
= ISCSI_TX_DATAOUT
;
348 tx_desc
->iser_header
.flags
= ISER_VER
;
349 memcpy(&tx_desc
->iscsi_header
, hdr
, sizeof(struct iscsi_hdr
));
351 /* build the tx desc */
352 iser_initialize_task_headers(task
, tx_desc
);
354 regd_buf
= &iser_task
->rdma_regd
[ISER_DIR_OUT
];
355 tx_dsg
= &tx_desc
->tx_sg
[1];
356 tx_dsg
->addr
= regd_buf
->reg
.va
+ buf_offset
;
357 tx_dsg
->length
= data_seg_len
;
358 tx_dsg
->lkey
= regd_buf
->reg
.lkey
;
359 tx_desc
->num_sge
= 2;
361 if (buf_offset
+ data_seg_len
> iser_task
->data
[ISER_DIR_OUT
].data_len
) {
362 iser_err("Offset:%ld & DSL:%ld in Data-Out "
363 "inconsistent with total len:%ld, itt:%d\n",
364 buf_offset
, data_seg_len
,
365 iser_task
->data
[ISER_DIR_OUT
].data_len
, itt
);
367 goto send_data_out_error
;
369 iser_dbg("data-out itt: %d, offset: %ld, sz: %ld\n",
370 itt
, buf_offset
, data_seg_len
);
373 err
= iser_post_send(iser_conn
->ib_conn
, tx_desc
);
378 kmem_cache_free(ig
.desc_cache
, tx_desc
);
379 iser_err("conn %p failed err %d\n",conn
, err
);
383 int iser_send_control(struct iscsi_conn
*conn
,
384 struct iscsi_task
*task
)
386 struct iscsi_iser_conn
*iser_conn
= conn
->dd_data
;
387 struct iscsi_iser_task
*iser_task
= task
->dd_data
;
388 struct iser_tx_desc
*mdesc
= &iser_task
->desc
;
389 unsigned long data_seg_len
;
391 struct iser_device
*device
;
392 struct iser_conn
*ib_conn
= iser_conn
->ib_conn
;
394 /* build the tx desc regd header and add it to the tx desc dto */
395 mdesc
->type
= ISCSI_TX_CONTROL
;
396 iser_create_send_desc(iser_conn
->ib_conn
, mdesc
);
398 device
= iser_conn
->ib_conn
->device
;
400 data_seg_len
= ntoh24(task
->hdr
->dlength
);
402 if (data_seg_len
> 0) {
403 struct ib_sge
*tx_dsg
= &mdesc
->tx_sg
[1];
404 if (task
!= conn
->login_task
) {
405 iser_err("data present on non login task!!!\n");
406 goto send_control_error
;
409 ib_dma_sync_single_for_cpu(device
->ib_device
,
410 ib_conn
->login_req_dma
, task
->data_count
,
413 memcpy(iser_conn
->ib_conn
->login_req_buf
, task
->data
,
416 ib_dma_sync_single_for_device(device
->ib_device
,
417 ib_conn
->login_req_dma
, task
->data_count
,
420 tx_dsg
->addr
= iser_conn
->ib_conn
->login_req_dma
;
421 tx_dsg
->length
= task
->data_count
;
422 tx_dsg
->lkey
= device
->mr
->lkey
;
426 if (task
== conn
->login_task
) {
427 err
= iser_post_recvl(iser_conn
->ib_conn
);
429 goto send_control_error
;
430 err
= iser_post_rx_bufs(conn
, task
->hdr
);
432 goto send_control_error
;
435 err
= iser_post_send(iser_conn
->ib_conn
, mdesc
);
440 iser_err("conn %p failed err %d\n",conn
, err
);
445 * iser_rcv_dto_completion - recv DTO completion
447 void iser_rcv_completion(struct iser_rx_desc
*rx_desc
,
448 unsigned long rx_xfer_len
,
449 struct iser_conn
*ib_conn
)
451 struct iscsi_iser_conn
*conn
= ib_conn
->iser_conn
;
452 struct iscsi_hdr
*hdr
;
454 int rx_buflen
, outstanding
, count
, err
;
456 /* differentiate between login to all other PDUs */
457 if ((char *)rx_desc
== ib_conn
->login_resp_buf
) {
458 rx_dma
= ib_conn
->login_resp_dma
;
459 rx_buflen
= ISER_RX_LOGIN_SIZE
;
461 rx_dma
= rx_desc
->dma_addr
;
462 rx_buflen
= ISER_RX_PAYLOAD_SIZE
;
465 ib_dma_sync_single_for_cpu(ib_conn
->device
->ib_device
, rx_dma
,
466 rx_buflen
, DMA_FROM_DEVICE
);
468 hdr
= &rx_desc
->iscsi_header
;
470 iser_dbg("op 0x%x itt 0x%x dlen %d\n", hdr
->opcode
,
471 hdr
->itt
, (int)(rx_xfer_len
- ISER_HEADERS_LEN
));
473 iscsi_iser_recv(conn
->iscsi_conn
, hdr
,
474 rx_desc
->data
, rx_xfer_len
- ISER_HEADERS_LEN
);
476 ib_dma_sync_single_for_device(ib_conn
->device
->ib_device
, rx_dma
,
477 rx_buflen
, DMA_FROM_DEVICE
);
479 /* decrementing conn->post_recv_buf_count only --after-- freeing the *
480 * task eliminates the need to worry on tasks which are completed in *
481 * parallel to the execution of iser_conn_term. So the code that waits *
482 * for the posted rx bufs refcount to become zero handles everything */
483 conn
->ib_conn
->post_recv_buf_count
--;
485 if (rx_dma
== ib_conn
->login_resp_dma
)
488 outstanding
= ib_conn
->post_recv_buf_count
;
489 if (outstanding
+ ISER_MIN_POSTED_RX
<= ISER_QP_MAX_RECV_DTOS
) {
490 count
= min(ISER_QP_MAX_RECV_DTOS
- outstanding
,
492 err
= iser_post_recvm(ib_conn
, count
);
494 iser_err("posting %d rx bufs err %d\n", count
, err
);
498 void iser_snd_completion(struct iser_tx_desc
*tx_desc
,
499 struct iser_conn
*ib_conn
)
501 struct iscsi_task
*task
;
502 struct iser_device
*device
= ib_conn
->device
;
504 if (tx_desc
->type
== ISCSI_TX_DATAOUT
) {
505 ib_dma_unmap_single(device
->ib_device
, tx_desc
->dma_addr
,
506 ISER_HEADERS_LEN
, DMA_TO_DEVICE
);
507 kmem_cache_free(ig
.desc_cache
, tx_desc
);
510 atomic_dec(&ib_conn
->post_send_buf_count
);
512 if (tx_desc
->type
== ISCSI_TX_CONTROL
) {
513 /* this arithmetic is legal by libiscsi dd_data allocation */
514 task
= (void *) ((long)(void *)tx_desc
-
515 sizeof(struct iscsi_task
));
516 if (task
->hdr
->itt
== RESERVED_ITT
)
517 iscsi_put_task(task
);
521 void iser_task_rdma_init(struct iscsi_iser_task
*iser_task
)
524 iser_task
->status
= ISER_TASK_STATUS_INIT
;
526 iser_task
->dir
[ISER_DIR_IN
] = 0;
527 iser_task
->dir
[ISER_DIR_OUT
] = 0;
529 iser_task
->data
[ISER_DIR_IN
].data_len
= 0;
530 iser_task
->data
[ISER_DIR_OUT
].data_len
= 0;
532 memset(&iser_task
->rdma_regd
[ISER_DIR_IN
], 0,
533 sizeof(struct iser_regd_buf
));
534 memset(&iser_task
->rdma_regd
[ISER_DIR_OUT
], 0,
535 sizeof(struct iser_regd_buf
));
538 void iser_task_rdma_finalize(struct iscsi_iser_task
*iser_task
)
540 int is_rdma_aligned
= 1;
541 struct iser_regd_buf
*regd
;
543 /* if we were reading, copy back to unaligned sglist,
544 * anyway dma_unmap and free the copy
546 if (iser_task
->data_copy
[ISER_DIR_IN
].copy_buf
!= NULL
) {
548 iser_finalize_rdma_unaligned_sg(iser_task
, ISER_DIR_IN
);
550 if (iser_task
->data_copy
[ISER_DIR_OUT
].copy_buf
!= NULL
) {
552 iser_finalize_rdma_unaligned_sg(iser_task
, ISER_DIR_OUT
);
555 if (iser_task
->dir
[ISER_DIR_IN
]) {
556 regd
= &iser_task
->rdma_regd
[ISER_DIR_IN
];
557 if (regd
->reg
.is_fmr
)
558 iser_unreg_mem(®d
->reg
);
561 if (iser_task
->dir
[ISER_DIR_OUT
]) {
562 regd
= &iser_task
->rdma_regd
[ISER_DIR_OUT
];
563 if (regd
->reg
.is_fmr
)
564 iser_unreg_mem(®d
->reg
);
567 /* if the data was unaligned, it was already unmapped and then copied */
569 iser_dma_unmap_task_data(iser_task
);