2 * Copyright (c) 2016 Avago Technologies. All rights reserved.
4 * This program is free software; you can redistribute it and/or modify
5 * it under the terms of version 2 of the GNU General Public License as
6 * published by the Free Software Foundation.
8 * This program is distributed in the hope that it will be useful.
9 * ALL EXPRESS OR IMPLIED CONDITIONS, REPRESENTATIONS AND WARRANTIES,
10 * INCLUDING ANY IMPLIED WARRANTY OF MERCHANTABILITY, FITNESS FOR A
11 * PARTICULAR PURPOSE, OR NON-INFRINGEMENT, ARE DISCLAIMED, EXCEPT TO
12 * THE EXTENT THAT SUCH DISCLAIMERS ARE HELD TO BE LEGALLY INVALID.
13 * See the GNU General Public License for more details, a copy of which
14 * can be found in the file COPYING included with this package
17 #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
18 #include <linux/module.h>
19 #include <linux/parser.h>
20 #include <uapi/scsi/fc/fc_fs.h>
21 #include <uapi/scsi/fc/fc_els.h>
22 #include <linux/delay.h>
26 #include <linux/nvme-fc-driver.h>
27 #include <linux/nvme-fc.h>
30 /* *************************** Data Structures/Defines ****************** */
34 * We handle AEN commands ourselves and don't even let the
35 * block layer know about them.
37 #define NVME_FC_NR_AEN_COMMANDS 1
38 #define NVME_FC_AQ_BLKMQ_DEPTH \
39 (NVME_AQ_DEPTH - NVME_FC_NR_AEN_COMMANDS)
40 #define AEN_CMDID_BASE (NVME_FC_AQ_BLKMQ_DEPTH + 1)
42 enum nvme_fc_queue_flags
{
43 NVME_FC_Q_CONNECTED
= (1 << 0),
44 NVME_FC_Q_LIVE
= (1 << 1),
47 #define NVMEFC_QUEUE_DELAY 3 /* ms units */
49 struct nvme_fc_queue
{
50 struct nvme_fc_ctrl
*ctrl
;
52 struct blk_mq_hw_ctx
*hctx
;
55 size_t cmnd_capsule_len
;
64 } __aligned(sizeof(u64
)); /* alignment for other things alloc'd with */
66 enum nvme_fcop_flags
{
67 FCOP_FLAGS_TERMIO
= (1 << 0),
68 FCOP_FLAGS_RELEASED
= (1 << 1),
69 FCOP_FLAGS_COMPLETE
= (1 << 2),
70 FCOP_FLAGS_AEN
= (1 << 3),
73 struct nvmefc_ls_req_op
{
74 struct nvmefc_ls_req ls_req
;
76 struct nvme_fc_rport
*rport
;
77 struct nvme_fc_queue
*queue
;
82 struct completion ls_done
;
83 struct list_head lsreq_list
; /* rport->ls_req_list */
87 enum nvme_fcpop_state
{
88 FCPOP_STATE_UNINIT
= 0,
90 FCPOP_STATE_ACTIVE
= 2,
91 FCPOP_STATE_ABORTED
= 3,
92 FCPOP_STATE_COMPLETE
= 4,
95 struct nvme_fc_fcp_op
{
96 struct nvme_request nreq
; /*
99 * the 1st element in the
101 * associated with the
104 struct nvmefc_fcp_req fcp_req
;
106 struct nvme_fc_ctrl
*ctrl
;
107 struct nvme_fc_queue
*queue
;
115 struct nvme_fc_cmd_iu cmd_iu
;
116 struct nvme_fc_ersp_iu rsp_iu
;
119 struct nvme_fc_lport
{
120 struct nvme_fc_local_port localport
;
123 struct list_head port_list
; /* nvme_fc_port_list */
124 struct list_head endp_list
;
125 struct device
*dev
; /* physical device for dma */
126 struct nvme_fc_port_template
*ops
;
128 } __aligned(sizeof(u64
)); /* alignment for other things alloc'd with */
130 struct nvme_fc_rport
{
131 struct nvme_fc_remote_port remoteport
;
133 struct list_head endp_list
; /* for lport->endp_list */
134 struct list_head ctrl_list
;
135 struct list_head ls_req_list
;
136 struct device
*dev
; /* physical device for dma */
137 struct nvme_fc_lport
*lport
;
140 } __aligned(sizeof(u64
)); /* alignment for other things alloc'd with */
142 enum nvme_fcctrl_flags
{
143 FCCTRL_TERMIO
= (1 << 0),
146 struct nvme_fc_ctrl
{
148 struct nvme_fc_queue
*queues
;
150 struct nvme_fc_lport
*lport
;
151 struct nvme_fc_rport
*rport
;
156 struct list_head ctrl_list
; /* rport->ctrl_list */
158 struct blk_mq_tag_set admin_tag_set
;
159 struct blk_mq_tag_set tag_set
;
161 struct work_struct delete_work
;
162 struct delayed_work connect_work
;
167 wait_queue_head_t ioabort_wait
;
169 struct nvme_fc_fcp_op aen_ops
[NVME_FC_NR_AEN_COMMANDS
];
171 struct nvme_ctrl ctrl
;
174 static inline struct nvme_fc_ctrl
*
175 to_fc_ctrl(struct nvme_ctrl
*ctrl
)
177 return container_of(ctrl
, struct nvme_fc_ctrl
, ctrl
);
180 static inline struct nvme_fc_lport
*
181 localport_to_lport(struct nvme_fc_local_port
*portptr
)
183 return container_of(portptr
, struct nvme_fc_lport
, localport
);
186 static inline struct nvme_fc_rport
*
187 remoteport_to_rport(struct nvme_fc_remote_port
*portptr
)
189 return container_of(portptr
, struct nvme_fc_rport
, remoteport
);
192 static inline struct nvmefc_ls_req_op
*
193 ls_req_to_lsop(struct nvmefc_ls_req
*lsreq
)
195 return container_of(lsreq
, struct nvmefc_ls_req_op
, ls_req
);
198 static inline struct nvme_fc_fcp_op
*
199 fcp_req_to_fcp_op(struct nvmefc_fcp_req
*fcpreq
)
201 return container_of(fcpreq
, struct nvme_fc_fcp_op
, fcp_req
);
206 /* *************************** Globals **************************** */
209 static DEFINE_SPINLOCK(nvme_fc_lock
);
211 static LIST_HEAD(nvme_fc_lport_list
);
212 static DEFINE_IDA(nvme_fc_local_port_cnt
);
213 static DEFINE_IDA(nvme_fc_ctrl_cnt
);
218 /* *********************** FC-NVME Port Management ************************ */
220 static int __nvme_fc_del_ctrl(struct nvme_fc_ctrl
*);
221 static void __nvme_fc_delete_hw_queue(struct nvme_fc_ctrl
*,
222 struct nvme_fc_queue
*, unsigned int);
225 nvme_fc_free_lport(struct kref
*ref
)
227 struct nvme_fc_lport
*lport
=
228 container_of(ref
, struct nvme_fc_lport
, ref
);
231 WARN_ON(lport
->localport
.port_state
!= FC_OBJSTATE_DELETED
);
232 WARN_ON(!list_empty(&lport
->endp_list
));
234 /* remove from transport list */
235 spin_lock_irqsave(&nvme_fc_lock
, flags
);
236 list_del(&lport
->port_list
);
237 spin_unlock_irqrestore(&nvme_fc_lock
, flags
);
239 /* let the LLDD know we've finished tearing it down */
240 lport
->ops
->localport_delete(&lport
->localport
);
242 ida_simple_remove(&nvme_fc_local_port_cnt
, lport
->localport
.port_num
);
243 ida_destroy(&lport
->endp_cnt
);
245 put_device(lport
->dev
);
251 nvme_fc_lport_put(struct nvme_fc_lport
*lport
)
253 kref_put(&lport
->ref
, nvme_fc_free_lport
);
257 nvme_fc_lport_get(struct nvme_fc_lport
*lport
)
259 return kref_get_unless_zero(&lport
->ref
);
263 static struct nvme_fc_lport
*
264 nvme_fc_attach_to_unreg_lport(struct nvme_fc_port_info
*pinfo
)
266 struct nvme_fc_lport
*lport
;
269 spin_lock_irqsave(&nvme_fc_lock
, flags
);
271 list_for_each_entry(lport
, &nvme_fc_lport_list
, port_list
) {
272 if (lport
->localport
.node_name
!= pinfo
->node_name
||
273 lport
->localport
.port_name
!= pinfo
->port_name
)
276 if (lport
->localport
.port_state
!= FC_OBJSTATE_DELETED
) {
277 lport
= ERR_PTR(-EEXIST
);
281 if (!nvme_fc_lport_get(lport
)) {
283 * fails if ref cnt already 0. If so,
284 * act as if lport already deleted
290 /* resume the lport */
292 lport
->localport
.port_role
= pinfo
->port_role
;
293 lport
->localport
.port_id
= pinfo
->port_id
;
294 lport
->localport
.port_state
= FC_OBJSTATE_ONLINE
;
296 spin_unlock_irqrestore(&nvme_fc_lock
, flags
);
304 spin_unlock_irqrestore(&nvme_fc_lock
, flags
);
310 * nvme_fc_register_localport - transport entry point called by an
311 * LLDD to register the existence of a NVME
313 * @pinfo: pointer to information about the port to be registered
314 * @template: LLDD entrypoints and operational parameters for the port
315 * @dev: physical hardware device node port corresponds to. Will be
316 * used for DMA mappings
317 * @lport_p: pointer to a local port pointer. Upon success, the routine
318 * will allocate a nvme_fc_local_port structure and place its
319 * address in the local port pointer. Upon failure, local port
320 * pointer will be set to 0.
323 * a completion status. Must be 0 upon success; a negative errno
324 * (ex: -ENXIO) upon failure.
327 nvme_fc_register_localport(struct nvme_fc_port_info
*pinfo
,
328 struct nvme_fc_port_template
*template,
330 struct nvme_fc_local_port
**portptr
)
332 struct nvme_fc_lport
*newrec
;
336 if (!template->localport_delete
|| !template->remoteport_delete
||
337 !template->ls_req
|| !template->fcp_io
||
338 !template->ls_abort
|| !template->fcp_abort
||
339 !template->max_hw_queues
|| !template->max_sgl_segments
||
340 !template->max_dif_sgl_segments
|| !template->dma_boundary
) {
342 goto out_reghost_failed
;
346 * look to see if there is already a localport that had been
347 * deregistered and in the process of waiting for all the
348 * references to fully be removed. If the references haven't
349 * expired, we can simply re-enable the localport. Remoteports
350 * and controller reconnections should resume naturally.
352 newrec
= nvme_fc_attach_to_unreg_lport(pinfo
);
354 /* found an lport, but something about its state is bad */
355 if (IS_ERR(newrec
)) {
356 ret
= PTR_ERR(newrec
);
357 goto out_reghost_failed
;
359 /* found existing lport, which was resumed */
361 *portptr
= &newrec
->localport
;
365 /* nothing found - allocate a new localport struct */
367 newrec
= kmalloc((sizeof(*newrec
) + template->local_priv_sz
),
371 goto out_reghost_failed
;
374 idx
= ida_simple_get(&nvme_fc_local_port_cnt
, 0, 0, GFP_KERNEL
);
380 if (!get_device(dev
) && dev
) {
385 INIT_LIST_HEAD(&newrec
->port_list
);
386 INIT_LIST_HEAD(&newrec
->endp_list
);
387 kref_init(&newrec
->ref
);
388 newrec
->ops
= template;
390 ida_init(&newrec
->endp_cnt
);
391 newrec
->localport
.private = &newrec
[1];
392 newrec
->localport
.node_name
= pinfo
->node_name
;
393 newrec
->localport
.port_name
= pinfo
->port_name
;
394 newrec
->localport
.port_role
= pinfo
->port_role
;
395 newrec
->localport
.port_id
= pinfo
->port_id
;
396 newrec
->localport
.port_state
= FC_OBJSTATE_ONLINE
;
397 newrec
->localport
.port_num
= idx
;
399 spin_lock_irqsave(&nvme_fc_lock
, flags
);
400 list_add_tail(&newrec
->port_list
, &nvme_fc_lport_list
);
401 spin_unlock_irqrestore(&nvme_fc_lock
, flags
);
404 dma_set_seg_boundary(dev
, template->dma_boundary
);
406 *portptr
= &newrec
->localport
;
410 ida_simple_remove(&nvme_fc_local_port_cnt
, idx
);
418 EXPORT_SYMBOL_GPL(nvme_fc_register_localport
);
421 * nvme_fc_unregister_localport - transport entry point called by an
422 * LLDD to deregister/remove a previously
423 * registered a NVME host FC port.
424 * @localport: pointer to the (registered) local port that is to be
428 * a completion status. Must be 0 upon success; a negative errno
429 * (ex: -ENXIO) upon failure.
432 nvme_fc_unregister_localport(struct nvme_fc_local_port
*portptr
)
434 struct nvme_fc_lport
*lport
= localport_to_lport(portptr
);
440 spin_lock_irqsave(&nvme_fc_lock
, flags
);
442 if (portptr
->port_state
!= FC_OBJSTATE_ONLINE
) {
443 spin_unlock_irqrestore(&nvme_fc_lock
, flags
);
446 portptr
->port_state
= FC_OBJSTATE_DELETED
;
448 spin_unlock_irqrestore(&nvme_fc_lock
, flags
);
450 nvme_fc_lport_put(lport
);
454 EXPORT_SYMBOL_GPL(nvme_fc_unregister_localport
);
457 * nvme_fc_register_remoteport - transport entry point called by an
458 * LLDD to register the existence of a NVME
459 * subsystem FC port on its fabric.
460 * @localport: pointer to the (registered) local port that the remote
461 * subsystem port is connected to.
462 * @pinfo: pointer to information about the port to be registered
463 * @rport_p: pointer to a remote port pointer. Upon success, the routine
464 * will allocate a nvme_fc_remote_port structure and place its
465 * address in the remote port pointer. Upon failure, remote port
466 * pointer will be set to 0.
469 * a completion status. Must be 0 upon success; a negative errno
470 * (ex: -ENXIO) upon failure.
473 nvme_fc_register_remoteport(struct nvme_fc_local_port
*localport
,
474 struct nvme_fc_port_info
*pinfo
,
475 struct nvme_fc_remote_port
**portptr
)
477 struct nvme_fc_lport
*lport
= localport_to_lport(localport
);
478 struct nvme_fc_rport
*newrec
;
482 newrec
= kmalloc((sizeof(*newrec
) + lport
->ops
->remote_priv_sz
),
486 goto out_reghost_failed
;
489 if (!nvme_fc_lport_get(lport
)) {
491 goto out_kfree_rport
;
494 idx
= ida_simple_get(&lport
->endp_cnt
, 0, 0, GFP_KERNEL
);
500 INIT_LIST_HEAD(&newrec
->endp_list
);
501 INIT_LIST_HEAD(&newrec
->ctrl_list
);
502 INIT_LIST_HEAD(&newrec
->ls_req_list
);
503 kref_init(&newrec
->ref
);
504 spin_lock_init(&newrec
->lock
);
505 newrec
->remoteport
.localport
= &lport
->localport
;
506 newrec
->dev
= lport
->dev
;
507 newrec
->lport
= lport
;
508 newrec
->remoteport
.private = &newrec
[1];
509 newrec
->remoteport
.port_role
= pinfo
->port_role
;
510 newrec
->remoteport
.node_name
= pinfo
->node_name
;
511 newrec
->remoteport
.port_name
= pinfo
->port_name
;
512 newrec
->remoteport
.port_id
= pinfo
->port_id
;
513 newrec
->remoteport
.port_state
= FC_OBJSTATE_ONLINE
;
514 newrec
->remoteport
.port_num
= idx
;
516 spin_lock_irqsave(&nvme_fc_lock
, flags
);
517 list_add_tail(&newrec
->endp_list
, &lport
->endp_list
);
518 spin_unlock_irqrestore(&nvme_fc_lock
, flags
);
520 *portptr
= &newrec
->remoteport
;
524 nvme_fc_lport_put(lport
);
531 EXPORT_SYMBOL_GPL(nvme_fc_register_remoteport
);
534 nvme_fc_free_rport(struct kref
*ref
)
536 struct nvme_fc_rport
*rport
=
537 container_of(ref
, struct nvme_fc_rport
, ref
);
538 struct nvme_fc_lport
*lport
=
539 localport_to_lport(rport
->remoteport
.localport
);
542 WARN_ON(rport
->remoteport
.port_state
!= FC_OBJSTATE_DELETED
);
543 WARN_ON(!list_empty(&rport
->ctrl_list
));
545 /* remove from lport list */
546 spin_lock_irqsave(&nvme_fc_lock
, flags
);
547 list_del(&rport
->endp_list
);
548 spin_unlock_irqrestore(&nvme_fc_lock
, flags
);
550 /* let the LLDD know we've finished tearing it down */
551 lport
->ops
->remoteport_delete(&rport
->remoteport
);
553 ida_simple_remove(&lport
->endp_cnt
, rport
->remoteport
.port_num
);
557 nvme_fc_lport_put(lport
);
561 nvme_fc_rport_put(struct nvme_fc_rport
*rport
)
563 kref_put(&rport
->ref
, nvme_fc_free_rport
);
567 nvme_fc_rport_get(struct nvme_fc_rport
*rport
)
569 return kref_get_unless_zero(&rport
->ref
);
573 nvme_fc_abort_lsops(struct nvme_fc_rport
*rport
)
575 struct nvmefc_ls_req_op
*lsop
;
579 spin_lock_irqsave(&rport
->lock
, flags
);
581 list_for_each_entry(lsop
, &rport
->ls_req_list
, lsreq_list
) {
582 if (!(lsop
->flags
& FCOP_FLAGS_TERMIO
)) {
583 lsop
->flags
|= FCOP_FLAGS_TERMIO
;
584 spin_unlock_irqrestore(&rport
->lock
, flags
);
585 rport
->lport
->ops
->ls_abort(&rport
->lport
->localport
,
591 spin_unlock_irqrestore(&rport
->lock
, flags
);
597 * nvme_fc_unregister_remoteport - transport entry point called by an
598 * LLDD to deregister/remove a previously
599 * registered a NVME subsystem FC port.
600 * @remoteport: pointer to the (registered) remote port that is to be
604 * a completion status. Must be 0 upon success; a negative errno
605 * (ex: -ENXIO) upon failure.
608 nvme_fc_unregister_remoteport(struct nvme_fc_remote_port
*portptr
)
610 struct nvme_fc_rport
*rport
= remoteport_to_rport(portptr
);
611 struct nvme_fc_ctrl
*ctrl
;
617 spin_lock_irqsave(&rport
->lock
, flags
);
619 if (portptr
->port_state
!= FC_OBJSTATE_ONLINE
) {
620 spin_unlock_irqrestore(&rport
->lock
, flags
);
623 portptr
->port_state
= FC_OBJSTATE_DELETED
;
625 /* tear down all associations to the remote port */
626 list_for_each_entry(ctrl
, &rport
->ctrl_list
, ctrl_list
)
627 __nvme_fc_del_ctrl(ctrl
);
629 spin_unlock_irqrestore(&rport
->lock
, flags
);
631 nvme_fc_abort_lsops(rport
);
633 nvme_fc_rport_put(rport
);
636 EXPORT_SYMBOL_GPL(nvme_fc_unregister_remoteport
);
639 /* *********************** FC-NVME DMA Handling **************************** */
642 * The fcloop device passes in a NULL device pointer. Real LLD's will
643 * pass in a valid device pointer. If NULL is passed to the dma mapping
644 * routines, depending on the platform, it may or may not succeed, and
648 * Wrapper all the dma routines and check the dev pointer.
650 * If simple mappings (return just a dma address, we'll noop them,
651 * returning a dma address of 0.
653 * On more complex mappings (dma_map_sg), a pseudo routine fills
654 * in the scatter list, setting all dma addresses to 0.
657 static inline dma_addr_t
658 fc_dma_map_single(struct device
*dev
, void *ptr
, size_t size
,
659 enum dma_data_direction dir
)
661 return dev
? dma_map_single(dev
, ptr
, size
, dir
) : (dma_addr_t
)0L;
665 fc_dma_mapping_error(struct device
*dev
, dma_addr_t dma_addr
)
667 return dev
? dma_mapping_error(dev
, dma_addr
) : 0;
671 fc_dma_unmap_single(struct device
*dev
, dma_addr_t addr
, size_t size
,
672 enum dma_data_direction dir
)
675 dma_unmap_single(dev
, addr
, size
, dir
);
679 fc_dma_sync_single_for_cpu(struct device
*dev
, dma_addr_t addr
, size_t size
,
680 enum dma_data_direction dir
)
683 dma_sync_single_for_cpu(dev
, addr
, size
, dir
);
687 fc_dma_sync_single_for_device(struct device
*dev
, dma_addr_t addr
, size_t size
,
688 enum dma_data_direction dir
)
691 dma_sync_single_for_device(dev
, addr
, size
, dir
);
694 /* pseudo dma_map_sg call */
696 fc_map_sg(struct scatterlist
*sg
, int nents
)
698 struct scatterlist
*s
;
701 WARN_ON(nents
== 0 || sg
[0].length
== 0);
703 for_each_sg(sg
, s
, nents
, i
) {
705 #ifdef CONFIG_NEED_SG_DMA_LENGTH
706 s
->dma_length
= s
->length
;
713 fc_dma_map_sg(struct device
*dev
, struct scatterlist
*sg
, int nents
,
714 enum dma_data_direction dir
)
716 return dev
? dma_map_sg(dev
, sg
, nents
, dir
) : fc_map_sg(sg
, nents
);
720 fc_dma_unmap_sg(struct device
*dev
, struct scatterlist
*sg
, int nents
,
721 enum dma_data_direction dir
)
724 dma_unmap_sg(dev
, sg
, nents
, dir
);
728 /* *********************** FC-NVME LS Handling **************************** */
730 static void nvme_fc_ctrl_put(struct nvme_fc_ctrl
*);
731 static int nvme_fc_ctrl_get(struct nvme_fc_ctrl
*);
735 __nvme_fc_finish_ls_req(struct nvmefc_ls_req_op
*lsop
)
737 struct nvme_fc_rport
*rport
= lsop
->rport
;
738 struct nvmefc_ls_req
*lsreq
= &lsop
->ls_req
;
741 spin_lock_irqsave(&rport
->lock
, flags
);
743 if (!lsop
->req_queued
) {
744 spin_unlock_irqrestore(&rport
->lock
, flags
);
748 list_del(&lsop
->lsreq_list
);
750 lsop
->req_queued
= false;
752 spin_unlock_irqrestore(&rport
->lock
, flags
);
754 fc_dma_unmap_single(rport
->dev
, lsreq
->rqstdma
,
755 (lsreq
->rqstlen
+ lsreq
->rsplen
),
758 nvme_fc_rport_put(rport
);
762 __nvme_fc_send_ls_req(struct nvme_fc_rport
*rport
,
763 struct nvmefc_ls_req_op
*lsop
,
764 void (*done
)(struct nvmefc_ls_req
*req
, int status
))
766 struct nvmefc_ls_req
*lsreq
= &lsop
->ls_req
;
770 if (rport
->remoteport
.port_state
!= FC_OBJSTATE_ONLINE
)
771 return -ECONNREFUSED
;
773 if (!nvme_fc_rport_get(rport
))
778 lsop
->req_queued
= false;
779 INIT_LIST_HEAD(&lsop
->lsreq_list
);
780 init_completion(&lsop
->ls_done
);
782 lsreq
->rqstdma
= fc_dma_map_single(rport
->dev
, lsreq
->rqstaddr
,
783 lsreq
->rqstlen
+ lsreq
->rsplen
,
785 if (fc_dma_mapping_error(rport
->dev
, lsreq
->rqstdma
)) {
789 lsreq
->rspdma
= lsreq
->rqstdma
+ lsreq
->rqstlen
;
791 spin_lock_irqsave(&rport
->lock
, flags
);
793 list_add_tail(&lsop
->lsreq_list
, &rport
->ls_req_list
);
795 lsop
->req_queued
= true;
797 spin_unlock_irqrestore(&rport
->lock
, flags
);
799 ret
= rport
->lport
->ops
->ls_req(&rport
->lport
->localport
,
800 &rport
->remoteport
, lsreq
);
807 lsop
->ls_error
= ret
;
808 spin_lock_irqsave(&rport
->lock
, flags
);
809 lsop
->req_queued
= false;
810 list_del(&lsop
->lsreq_list
);
811 spin_unlock_irqrestore(&rport
->lock
, flags
);
812 fc_dma_unmap_single(rport
->dev
, lsreq
->rqstdma
,
813 (lsreq
->rqstlen
+ lsreq
->rsplen
),
816 nvme_fc_rport_put(rport
);
822 nvme_fc_send_ls_req_done(struct nvmefc_ls_req
*lsreq
, int status
)
824 struct nvmefc_ls_req_op
*lsop
= ls_req_to_lsop(lsreq
);
826 lsop
->ls_error
= status
;
827 complete(&lsop
->ls_done
);
831 nvme_fc_send_ls_req(struct nvme_fc_rport
*rport
, struct nvmefc_ls_req_op
*lsop
)
833 struct nvmefc_ls_req
*lsreq
= &lsop
->ls_req
;
834 struct fcnvme_ls_rjt
*rjt
= lsreq
->rspaddr
;
837 ret
= __nvme_fc_send_ls_req(rport
, lsop
, nvme_fc_send_ls_req_done
);
841 * No timeout/not interruptible as we need the struct
842 * to exist until the lldd calls us back. Thus mandate
843 * wait until driver calls back. lldd responsible for
846 wait_for_completion(&lsop
->ls_done
);
848 __nvme_fc_finish_ls_req(lsop
);
850 ret
= lsop
->ls_error
;
856 /* ACC or RJT payload ? */
857 if (rjt
->w0
.ls_cmd
== FCNVME_LS_RJT
)
864 nvme_fc_send_ls_req_async(struct nvme_fc_rport
*rport
,
865 struct nvmefc_ls_req_op
*lsop
,
866 void (*done
)(struct nvmefc_ls_req
*req
, int status
))
868 /* don't wait for completion */
870 return __nvme_fc_send_ls_req(rport
, lsop
, done
);
873 /* Validation Error indexes into the string table below */
877 VERR_LSDESC_RQST
= 2,
878 VERR_LSDESC_RQST_LEN
= 3,
880 VERR_ASSOC_ID_LEN
= 5,
882 VERR_CONN_ID_LEN
= 7,
884 VERR_CR_ASSOC_ACC_LEN
= 9,
886 VERR_CR_CONN_ACC_LEN
= 11,
888 VERR_DISCONN_ACC_LEN
= 13,
891 static char *validation_errors
[] = {
895 "Bad LSDESC_RQST Length",
896 "Not Association ID",
897 "Bad Association ID Length",
899 "Bad Connection ID Length",
901 "Bad CR_ASSOC ACC Length",
903 "Bad CR_CONN ACC Length",
904 "Not Disconnect Rqst",
905 "Bad Disconnect ACC Length",
909 nvme_fc_connect_admin_queue(struct nvme_fc_ctrl
*ctrl
,
910 struct nvme_fc_queue
*queue
, u16 qsize
, u16 ersp_ratio
)
912 struct nvmefc_ls_req_op
*lsop
;
913 struct nvmefc_ls_req
*lsreq
;
914 struct fcnvme_ls_cr_assoc_rqst
*assoc_rqst
;
915 struct fcnvme_ls_cr_assoc_acc
*assoc_acc
;
918 lsop
= kzalloc((sizeof(*lsop
) +
919 ctrl
->lport
->ops
->lsrqst_priv_sz
+
920 sizeof(*assoc_rqst
) + sizeof(*assoc_acc
)), GFP_KERNEL
);
925 lsreq
= &lsop
->ls_req
;
927 lsreq
->private = (void *)&lsop
[1];
928 assoc_rqst
= (struct fcnvme_ls_cr_assoc_rqst
*)
929 (lsreq
->private + ctrl
->lport
->ops
->lsrqst_priv_sz
);
930 assoc_acc
= (struct fcnvme_ls_cr_assoc_acc
*)&assoc_rqst
[1];
932 assoc_rqst
->w0
.ls_cmd
= FCNVME_LS_CREATE_ASSOCIATION
;
933 assoc_rqst
->desc_list_len
=
934 cpu_to_be32(sizeof(struct fcnvme_lsdesc_cr_assoc_cmd
));
936 assoc_rqst
->assoc_cmd
.desc_tag
=
937 cpu_to_be32(FCNVME_LSDESC_CREATE_ASSOC_CMD
);
938 assoc_rqst
->assoc_cmd
.desc_len
=
940 sizeof(struct fcnvme_lsdesc_cr_assoc_cmd
));
942 assoc_rqst
->assoc_cmd
.ersp_ratio
= cpu_to_be16(ersp_ratio
);
943 assoc_rqst
->assoc_cmd
.sqsize
= cpu_to_be16(qsize
);
944 /* Linux supports only Dynamic controllers */
945 assoc_rqst
->assoc_cmd
.cntlid
= cpu_to_be16(0xffff);
946 uuid_copy(&assoc_rqst
->assoc_cmd
.hostid
, &ctrl
->ctrl
.opts
->host
->id
);
947 strncpy(assoc_rqst
->assoc_cmd
.hostnqn
, ctrl
->ctrl
.opts
->host
->nqn
,
948 min(FCNVME_ASSOC_HOSTNQN_LEN
, NVMF_NQN_SIZE
));
949 strncpy(assoc_rqst
->assoc_cmd
.subnqn
, ctrl
->ctrl
.opts
->subsysnqn
,
950 min(FCNVME_ASSOC_SUBNQN_LEN
, NVMF_NQN_SIZE
));
953 lsreq
->rqstaddr
= assoc_rqst
;
954 lsreq
->rqstlen
= sizeof(*assoc_rqst
);
955 lsreq
->rspaddr
= assoc_acc
;
956 lsreq
->rsplen
= sizeof(*assoc_acc
);
957 lsreq
->timeout
= NVME_FC_CONNECT_TIMEOUT_SEC
;
959 ret
= nvme_fc_send_ls_req(ctrl
->rport
, lsop
);
961 goto out_free_buffer
;
963 /* process connect LS completion */
965 /* validate the ACC response */
966 if (assoc_acc
->hdr
.w0
.ls_cmd
!= FCNVME_LS_ACC
)
968 else if (assoc_acc
->hdr
.desc_list_len
!=
970 sizeof(struct fcnvme_ls_cr_assoc_acc
)))
971 fcret
= VERR_CR_ASSOC_ACC_LEN
;
972 else if (assoc_acc
->hdr
.rqst
.desc_tag
!=
973 cpu_to_be32(FCNVME_LSDESC_RQST
))
974 fcret
= VERR_LSDESC_RQST
;
975 else if (assoc_acc
->hdr
.rqst
.desc_len
!=
976 fcnvme_lsdesc_len(sizeof(struct fcnvme_lsdesc_rqst
)))
977 fcret
= VERR_LSDESC_RQST_LEN
;
978 else if (assoc_acc
->hdr
.rqst
.w0
.ls_cmd
!= FCNVME_LS_CREATE_ASSOCIATION
)
979 fcret
= VERR_CR_ASSOC
;
980 else if (assoc_acc
->associd
.desc_tag
!=
981 cpu_to_be32(FCNVME_LSDESC_ASSOC_ID
))
982 fcret
= VERR_ASSOC_ID
;
983 else if (assoc_acc
->associd
.desc_len
!=
985 sizeof(struct fcnvme_lsdesc_assoc_id
)))
986 fcret
= VERR_ASSOC_ID_LEN
;
987 else if (assoc_acc
->connectid
.desc_tag
!=
988 cpu_to_be32(FCNVME_LSDESC_CONN_ID
))
989 fcret
= VERR_CONN_ID
;
990 else if (assoc_acc
->connectid
.desc_len
!=
991 fcnvme_lsdesc_len(sizeof(struct fcnvme_lsdesc_conn_id
)))
992 fcret
= VERR_CONN_ID_LEN
;
997 "q %d connect failed: %s\n",
998 queue
->qnum
, validation_errors
[fcret
]);
1000 ctrl
->association_id
=
1001 be64_to_cpu(assoc_acc
->associd
.association_id
);
1002 queue
->connection_id
=
1003 be64_to_cpu(assoc_acc
->connectid
.connection_id
);
1004 set_bit(NVME_FC_Q_CONNECTED
, &queue
->flags
);
1012 "queue %d connect admin queue failed (%d).\n",
1018 nvme_fc_connect_queue(struct nvme_fc_ctrl
*ctrl
, struct nvme_fc_queue
*queue
,
1019 u16 qsize
, u16 ersp_ratio
)
1021 struct nvmefc_ls_req_op
*lsop
;
1022 struct nvmefc_ls_req
*lsreq
;
1023 struct fcnvme_ls_cr_conn_rqst
*conn_rqst
;
1024 struct fcnvme_ls_cr_conn_acc
*conn_acc
;
1027 lsop
= kzalloc((sizeof(*lsop
) +
1028 ctrl
->lport
->ops
->lsrqst_priv_sz
+
1029 sizeof(*conn_rqst
) + sizeof(*conn_acc
)), GFP_KERNEL
);
1034 lsreq
= &lsop
->ls_req
;
1036 lsreq
->private = (void *)&lsop
[1];
1037 conn_rqst
= (struct fcnvme_ls_cr_conn_rqst
*)
1038 (lsreq
->private + ctrl
->lport
->ops
->lsrqst_priv_sz
);
1039 conn_acc
= (struct fcnvme_ls_cr_conn_acc
*)&conn_rqst
[1];
1041 conn_rqst
->w0
.ls_cmd
= FCNVME_LS_CREATE_CONNECTION
;
1042 conn_rqst
->desc_list_len
= cpu_to_be32(
1043 sizeof(struct fcnvme_lsdesc_assoc_id
) +
1044 sizeof(struct fcnvme_lsdesc_cr_conn_cmd
));
1046 conn_rqst
->associd
.desc_tag
= cpu_to_be32(FCNVME_LSDESC_ASSOC_ID
);
1047 conn_rqst
->associd
.desc_len
=
1049 sizeof(struct fcnvme_lsdesc_assoc_id
));
1050 conn_rqst
->associd
.association_id
= cpu_to_be64(ctrl
->association_id
);
1051 conn_rqst
->connect_cmd
.desc_tag
=
1052 cpu_to_be32(FCNVME_LSDESC_CREATE_CONN_CMD
);
1053 conn_rqst
->connect_cmd
.desc_len
=
1055 sizeof(struct fcnvme_lsdesc_cr_conn_cmd
));
1056 conn_rqst
->connect_cmd
.ersp_ratio
= cpu_to_be16(ersp_ratio
);
1057 conn_rqst
->connect_cmd
.qid
= cpu_to_be16(queue
->qnum
);
1058 conn_rqst
->connect_cmd
.sqsize
= cpu_to_be16(qsize
);
1060 lsop
->queue
= queue
;
1061 lsreq
->rqstaddr
= conn_rqst
;
1062 lsreq
->rqstlen
= sizeof(*conn_rqst
);
1063 lsreq
->rspaddr
= conn_acc
;
1064 lsreq
->rsplen
= sizeof(*conn_acc
);
1065 lsreq
->timeout
= NVME_FC_CONNECT_TIMEOUT_SEC
;
1067 ret
= nvme_fc_send_ls_req(ctrl
->rport
, lsop
);
1069 goto out_free_buffer
;
1071 /* process connect LS completion */
1073 /* validate the ACC response */
1074 if (conn_acc
->hdr
.w0
.ls_cmd
!= FCNVME_LS_ACC
)
1076 else if (conn_acc
->hdr
.desc_list_len
!=
1077 fcnvme_lsdesc_len(sizeof(struct fcnvme_ls_cr_conn_acc
)))
1078 fcret
= VERR_CR_CONN_ACC_LEN
;
1079 else if (conn_acc
->hdr
.rqst
.desc_tag
!= cpu_to_be32(FCNVME_LSDESC_RQST
))
1080 fcret
= VERR_LSDESC_RQST
;
1081 else if (conn_acc
->hdr
.rqst
.desc_len
!=
1082 fcnvme_lsdesc_len(sizeof(struct fcnvme_lsdesc_rqst
)))
1083 fcret
= VERR_LSDESC_RQST_LEN
;
1084 else if (conn_acc
->hdr
.rqst
.w0
.ls_cmd
!= FCNVME_LS_CREATE_CONNECTION
)
1085 fcret
= VERR_CR_CONN
;
1086 else if (conn_acc
->connectid
.desc_tag
!=
1087 cpu_to_be32(FCNVME_LSDESC_CONN_ID
))
1088 fcret
= VERR_CONN_ID
;
1089 else if (conn_acc
->connectid
.desc_len
!=
1090 fcnvme_lsdesc_len(sizeof(struct fcnvme_lsdesc_conn_id
)))
1091 fcret
= VERR_CONN_ID_LEN
;
1096 "q %d connect failed: %s\n",
1097 queue
->qnum
, validation_errors
[fcret
]);
1099 queue
->connection_id
=
1100 be64_to_cpu(conn_acc
->connectid
.connection_id
);
1101 set_bit(NVME_FC_Q_CONNECTED
, &queue
->flags
);
1109 "queue %d connect command failed (%d).\n",
1115 nvme_fc_disconnect_assoc_done(struct nvmefc_ls_req
*lsreq
, int status
)
1117 struct nvmefc_ls_req_op
*lsop
= ls_req_to_lsop(lsreq
);
1119 __nvme_fc_finish_ls_req(lsop
);
1121 /* fc-nvme iniator doesn't care about success or failure of cmd */
1127 * This routine sends a FC-NVME LS to disconnect (aka terminate)
1128 * the FC-NVME Association. Terminating the association also
1129 * terminates the FC-NVME connections (per queue, both admin and io
1130 * queues) that are part of the association. E.g. things are torn
1131 * down, and the related FC-NVME Association ID and Connection IDs
1134 * The behavior of the fc-nvme initiator is such that it's
1135 * understanding of the association and connections will implicitly
1136 * be torn down. The action is implicit as it may be due to a loss of
1137 * connectivity with the fc-nvme target, so you may never get a
1138 * response even if you tried. As such, the action of this routine
1139 * is to asynchronously send the LS, ignore any results of the LS, and
1140 * continue on with terminating the association. If the fc-nvme target
1141 * is present and receives the LS, it too can tear down.
1144 nvme_fc_xmt_disconnect_assoc(struct nvme_fc_ctrl
*ctrl
)
1146 struct fcnvme_ls_disconnect_rqst
*discon_rqst
;
1147 struct fcnvme_ls_disconnect_acc
*discon_acc
;
1148 struct nvmefc_ls_req_op
*lsop
;
1149 struct nvmefc_ls_req
*lsreq
;
1152 lsop
= kzalloc((sizeof(*lsop
) +
1153 ctrl
->lport
->ops
->lsrqst_priv_sz
+
1154 sizeof(*discon_rqst
) + sizeof(*discon_acc
)),
1157 /* couldn't sent it... too bad */
1160 lsreq
= &lsop
->ls_req
;
1162 lsreq
->private = (void *)&lsop
[1];
1163 discon_rqst
= (struct fcnvme_ls_disconnect_rqst
*)
1164 (lsreq
->private + ctrl
->lport
->ops
->lsrqst_priv_sz
);
1165 discon_acc
= (struct fcnvme_ls_disconnect_acc
*)&discon_rqst
[1];
1167 discon_rqst
->w0
.ls_cmd
= FCNVME_LS_DISCONNECT
;
1168 discon_rqst
->desc_list_len
= cpu_to_be32(
1169 sizeof(struct fcnvme_lsdesc_assoc_id
) +
1170 sizeof(struct fcnvme_lsdesc_disconn_cmd
));
1172 discon_rqst
->associd
.desc_tag
= cpu_to_be32(FCNVME_LSDESC_ASSOC_ID
);
1173 discon_rqst
->associd
.desc_len
=
1175 sizeof(struct fcnvme_lsdesc_assoc_id
));
1177 discon_rqst
->associd
.association_id
= cpu_to_be64(ctrl
->association_id
);
1179 discon_rqst
->discon_cmd
.desc_tag
= cpu_to_be32(
1180 FCNVME_LSDESC_DISCONN_CMD
);
1181 discon_rqst
->discon_cmd
.desc_len
=
1183 sizeof(struct fcnvme_lsdesc_disconn_cmd
));
1184 discon_rqst
->discon_cmd
.scope
= FCNVME_DISCONN_ASSOCIATION
;
1185 discon_rqst
->discon_cmd
.id
= cpu_to_be64(ctrl
->association_id
);
1187 lsreq
->rqstaddr
= discon_rqst
;
1188 lsreq
->rqstlen
= sizeof(*discon_rqst
);
1189 lsreq
->rspaddr
= discon_acc
;
1190 lsreq
->rsplen
= sizeof(*discon_acc
);
1191 lsreq
->timeout
= NVME_FC_CONNECT_TIMEOUT_SEC
;
1193 ret
= nvme_fc_send_ls_req_async(ctrl
->rport
, lsop
,
1194 nvme_fc_disconnect_assoc_done
);
1198 /* only meaningful part to terminating the association */
1199 ctrl
->association_id
= 0;
1203 /* *********************** NVME Ctrl Routines **************************** */
1205 static void __nvme_fc_final_op_cleanup(struct request
*rq
);
1206 static void nvme_fc_error_recovery(struct nvme_fc_ctrl
*ctrl
, char *errmsg
);
1209 nvme_fc_reinit_request(void *data
, struct request
*rq
)
1211 struct nvme_fc_fcp_op
*op
= blk_mq_rq_to_pdu(rq
);
1212 struct nvme_fc_cmd_iu
*cmdiu
= &op
->cmd_iu
;
1214 memset(cmdiu
, 0, sizeof(*cmdiu
));
1215 cmdiu
->scsi_id
= NVME_CMD_SCSI_ID
;
1216 cmdiu
->fc_id
= NVME_CMD_FC_ID
;
1217 cmdiu
->iu_len
= cpu_to_be16(sizeof(*cmdiu
) / sizeof(u32
));
1218 memset(&op
->rsp_iu
, 0, sizeof(op
->rsp_iu
));
1224 __nvme_fc_exit_request(struct nvme_fc_ctrl
*ctrl
,
1225 struct nvme_fc_fcp_op
*op
)
1227 fc_dma_unmap_single(ctrl
->lport
->dev
, op
->fcp_req
.rspdma
,
1228 sizeof(op
->rsp_iu
), DMA_FROM_DEVICE
);
1229 fc_dma_unmap_single(ctrl
->lport
->dev
, op
->fcp_req
.cmddma
,
1230 sizeof(op
->cmd_iu
), DMA_TO_DEVICE
);
1232 atomic_set(&op
->state
, FCPOP_STATE_UNINIT
);
1236 nvme_fc_exit_request(struct blk_mq_tag_set
*set
, struct request
*rq
,
1237 unsigned int hctx_idx
)
1239 struct nvme_fc_fcp_op
*op
= blk_mq_rq_to_pdu(rq
);
1241 return __nvme_fc_exit_request(set
->driver_data
, op
);
1245 __nvme_fc_abort_op(struct nvme_fc_ctrl
*ctrl
, struct nvme_fc_fcp_op
*op
)
1249 state
= atomic_xchg(&op
->state
, FCPOP_STATE_ABORTED
);
1250 if (state
!= FCPOP_STATE_ACTIVE
) {
1251 atomic_set(&op
->state
, state
);
1255 ctrl
->lport
->ops
->fcp_abort(&ctrl
->lport
->localport
,
1256 &ctrl
->rport
->remoteport
,
1257 op
->queue
->lldd_handle
,
1264 nvme_fc_abort_aen_ops(struct nvme_fc_ctrl
*ctrl
)
1266 struct nvme_fc_fcp_op
*aen_op
= ctrl
->aen_ops
;
1267 unsigned long flags
;
1270 for (i
= 0; i
< NVME_FC_NR_AEN_COMMANDS
; i
++, aen_op
++) {
1271 if (atomic_read(&aen_op
->state
) != FCPOP_STATE_ACTIVE
)
1274 spin_lock_irqsave(&ctrl
->lock
, flags
);
1275 if (ctrl
->flags
& FCCTRL_TERMIO
) {
1277 aen_op
->flags
|= FCOP_FLAGS_TERMIO
;
1279 spin_unlock_irqrestore(&ctrl
->lock
, flags
);
1281 ret
= __nvme_fc_abort_op(ctrl
, aen_op
);
1284 * if __nvme_fc_abort_op failed the io wasn't
1285 * active. Thus this call path is running in
1286 * parallel to the io complete. Treat as non-error.
1289 /* back out the flags/counters */
1290 spin_lock_irqsave(&ctrl
->lock
, flags
);
1291 if (ctrl
->flags
& FCCTRL_TERMIO
)
1293 aen_op
->flags
&= ~FCOP_FLAGS_TERMIO
;
1294 spin_unlock_irqrestore(&ctrl
->lock
, flags
);
1301 __nvme_fc_fcpop_chk_teardowns(struct nvme_fc_ctrl
*ctrl
,
1302 struct nvme_fc_fcp_op
*op
)
1304 unsigned long flags
;
1305 bool complete_rq
= false;
1307 spin_lock_irqsave(&ctrl
->lock
, flags
);
1308 if (unlikely(op
->flags
& FCOP_FLAGS_TERMIO
)) {
1309 if (ctrl
->flags
& FCCTRL_TERMIO
) {
1311 wake_up(&ctrl
->ioabort_wait
);
1314 if (op
->flags
& FCOP_FLAGS_RELEASED
)
1317 op
->flags
|= FCOP_FLAGS_COMPLETE
;
1318 spin_unlock_irqrestore(&ctrl
->lock
, flags
);
1324 nvme_fc_fcpio_done(struct nvmefc_fcp_req
*req
)
1326 struct nvme_fc_fcp_op
*op
= fcp_req_to_fcp_op(req
);
1327 struct request
*rq
= op
->rq
;
1328 struct nvmefc_fcp_req
*freq
= &op
->fcp_req
;
1329 struct nvme_fc_ctrl
*ctrl
= op
->ctrl
;
1330 struct nvme_fc_queue
*queue
= op
->queue
;
1331 struct nvme_completion
*cqe
= &op
->rsp_iu
.cqe
;
1332 struct nvme_command
*sqe
= &op
->cmd_iu
.sqe
;
1333 __le16 status
= cpu_to_le16(NVME_SC_SUCCESS
<< 1);
1334 union nvme_result result
;
1335 bool complete_rq
, terminate_assoc
= true;
1339 * The current linux implementation of a nvme controller
1340 * allocates a single tag set for all io queues and sizes
1341 * the io queues to fully hold all possible tags. Thus, the
1342 * implementation does not reference or care about the sqhd
1343 * value as it never needs to use the sqhd/sqtail pointers
1344 * for submission pacing.
1346 * This affects the FC-NVME implementation in two ways:
1347 * 1) As the value doesn't matter, we don't need to waste
1348 * cycles extracting it from ERSPs and stamping it in the
1349 * cases where the transport fabricates CQEs on successful
1351 * 2) The FC-NVME implementation requires that delivery of
1352 * ERSP completions are to go back to the nvme layer in order
1353 * relative to the rsn, such that the sqhd value will always
1354 * be "in order" for the nvme layer. As the nvme layer in
1355 * linux doesn't care about sqhd, there's no need to return
1359 * As the core nvme layer in linux currently does not look at
1360 * every field in the cqe - in cases where the FC transport must
1361 * fabricate a CQE, the following fields will not be set as they
1362 * are not referenced:
1363 * cqe.sqid, cqe.sqhd, cqe.command_id
1365 * Failure or error of an individual i/o, in a transport
1366 * detected fashion unrelated to the nvme completion status,
1367 * potentially cause the initiator and target sides to get out
1368 * of sync on SQ head/tail (aka outstanding io count allowed).
1369 * Per FC-NVME spec, failure of an individual command requires
1370 * the connection to be terminated, which in turn requires the
1371 * association to be terminated.
1374 fc_dma_sync_single_for_cpu(ctrl
->lport
->dev
, op
->fcp_req
.rspdma
,
1375 sizeof(op
->rsp_iu
), DMA_FROM_DEVICE
);
1377 if (atomic_read(&op
->state
) == FCPOP_STATE_ABORTED
)
1378 status
= cpu_to_le16((NVME_SC_ABORT_REQ
| NVME_SC_DNR
) << 1);
1379 else if (freq
->status
)
1380 status
= cpu_to_le16(NVME_SC_INTERNAL
<< 1);
1383 * For the linux implementation, if we have an unsuccesful
1384 * status, they blk-mq layer can typically be called with the
1385 * non-zero status and the content of the cqe isn't important.
1391 * command completed successfully relative to the wire
1392 * protocol. However, validate anything received and
1393 * extract the status and result from the cqe (create it
1397 switch (freq
->rcv_rsplen
) {
1400 case NVME_FC_SIZEOF_ZEROS_RSP
:
1402 * No response payload or 12 bytes of payload (which
1403 * should all be zeros) are considered successful and
1404 * no payload in the CQE by the transport.
1406 if (freq
->transferred_length
!=
1407 be32_to_cpu(op
->cmd_iu
.data_len
)) {
1408 status
= cpu_to_le16(NVME_SC_INTERNAL
<< 1);
1414 case sizeof(struct nvme_fc_ersp_iu
):
1416 * The ERSP IU contains a full completion with CQE.
1417 * Validate ERSP IU and look at cqe.
1419 if (unlikely(be16_to_cpu(op
->rsp_iu
.iu_len
) !=
1420 (freq
->rcv_rsplen
/ 4) ||
1421 be32_to_cpu(op
->rsp_iu
.xfrd_len
) !=
1422 freq
->transferred_length
||
1423 op
->rsp_iu
.status_code
||
1424 sqe
->common
.command_id
!= cqe
->command_id
)) {
1425 status
= cpu_to_le16(NVME_SC_INTERNAL
<< 1);
1428 result
= cqe
->result
;
1429 status
= cqe
->status
;
1433 status
= cpu_to_le16(NVME_SC_INTERNAL
<< 1);
1437 terminate_assoc
= false;
1440 if (op
->flags
& FCOP_FLAGS_AEN
) {
1441 nvme_complete_async_event(&queue
->ctrl
->ctrl
, status
, &result
);
1442 complete_rq
= __nvme_fc_fcpop_chk_teardowns(ctrl
, op
);
1443 atomic_set(&op
->state
, FCPOP_STATE_IDLE
);
1444 op
->flags
= FCOP_FLAGS_AEN
; /* clear other flags */
1445 nvme_fc_ctrl_put(ctrl
);
1449 complete_rq
= __nvme_fc_fcpop_chk_teardowns(ctrl
, op
);
1451 if (unlikely(op
->flags
& FCOP_FLAGS_TERMIO
)) {
1452 status
= cpu_to_le16(NVME_SC_ABORT_REQ
<< 1);
1453 if (blk_queue_dying(rq
->q
))
1454 status
|= cpu_to_le16(NVME_SC_DNR
<< 1);
1456 nvme_end_request(rq
, status
, result
);
1458 __nvme_fc_final_op_cleanup(rq
);
1461 if (terminate_assoc
)
1462 nvme_fc_error_recovery(ctrl
, "transport detected io error");
1466 __nvme_fc_init_request(struct nvme_fc_ctrl
*ctrl
,
1467 struct nvme_fc_queue
*queue
, struct nvme_fc_fcp_op
*op
,
1468 struct request
*rq
, u32 rqno
)
1470 struct nvme_fc_cmd_iu
*cmdiu
= &op
->cmd_iu
;
1473 memset(op
, 0, sizeof(*op
));
1474 op
->fcp_req
.cmdaddr
= &op
->cmd_iu
;
1475 op
->fcp_req
.cmdlen
= sizeof(op
->cmd_iu
);
1476 op
->fcp_req
.rspaddr
= &op
->rsp_iu
;
1477 op
->fcp_req
.rsplen
= sizeof(op
->rsp_iu
);
1478 op
->fcp_req
.done
= nvme_fc_fcpio_done
;
1479 op
->fcp_req
.first_sgl
= (struct scatterlist
*)&op
[1];
1480 op
->fcp_req
.private = &op
->fcp_req
.first_sgl
[SG_CHUNK_SIZE
];
1486 cmdiu
->scsi_id
= NVME_CMD_SCSI_ID
;
1487 cmdiu
->fc_id
= NVME_CMD_FC_ID
;
1488 cmdiu
->iu_len
= cpu_to_be16(sizeof(*cmdiu
) / sizeof(u32
));
1490 op
->fcp_req
.cmddma
= fc_dma_map_single(ctrl
->lport
->dev
,
1491 &op
->cmd_iu
, sizeof(op
->cmd_iu
), DMA_TO_DEVICE
);
1492 if (fc_dma_mapping_error(ctrl
->lport
->dev
, op
->fcp_req
.cmddma
)) {
1494 "FCP Op failed - cmdiu dma mapping failed.\n");
1499 op
->fcp_req
.rspdma
= fc_dma_map_single(ctrl
->lport
->dev
,
1500 &op
->rsp_iu
, sizeof(op
->rsp_iu
),
1502 if (fc_dma_mapping_error(ctrl
->lport
->dev
, op
->fcp_req
.rspdma
)) {
1504 "FCP Op failed - rspiu dma mapping failed.\n");
1508 atomic_set(&op
->state
, FCPOP_STATE_IDLE
);
1514 nvme_fc_init_request(struct blk_mq_tag_set
*set
, struct request
*rq
,
1515 unsigned int hctx_idx
, unsigned int numa_node
)
1517 struct nvme_fc_ctrl
*ctrl
= set
->driver_data
;
1518 struct nvme_fc_fcp_op
*op
= blk_mq_rq_to_pdu(rq
);
1519 int queue_idx
= (set
== &ctrl
->tag_set
) ? hctx_idx
+ 1 : 0;
1520 struct nvme_fc_queue
*queue
= &ctrl
->queues
[queue_idx
];
1522 return __nvme_fc_init_request(ctrl
, queue
, op
, rq
, queue
->rqcnt
++);
1526 nvme_fc_init_aen_ops(struct nvme_fc_ctrl
*ctrl
)
1528 struct nvme_fc_fcp_op
*aen_op
;
1529 struct nvme_fc_cmd_iu
*cmdiu
;
1530 struct nvme_command
*sqe
;
1534 aen_op
= ctrl
->aen_ops
;
1535 for (i
= 0; i
< NVME_FC_NR_AEN_COMMANDS
; i
++, aen_op
++) {
1536 private = kzalloc(ctrl
->lport
->ops
->fcprqst_priv_sz
,
1541 cmdiu
= &aen_op
->cmd_iu
;
1543 ret
= __nvme_fc_init_request(ctrl
, &ctrl
->queues
[0],
1544 aen_op
, (struct request
*)NULL
,
1545 (AEN_CMDID_BASE
+ i
));
1551 aen_op
->flags
= FCOP_FLAGS_AEN
;
1552 aen_op
->fcp_req
.first_sgl
= NULL
; /* no sg list */
1553 aen_op
->fcp_req
.private = private;
1555 memset(sqe
, 0, sizeof(*sqe
));
1556 sqe
->common
.opcode
= nvme_admin_async_event
;
1557 /* Note: core layer may overwrite the sqe.command_id value */
1558 sqe
->common
.command_id
= AEN_CMDID_BASE
+ i
;
1564 nvme_fc_term_aen_ops(struct nvme_fc_ctrl
*ctrl
)
1566 struct nvme_fc_fcp_op
*aen_op
;
1569 aen_op
= ctrl
->aen_ops
;
1570 for (i
= 0; i
< NVME_FC_NR_AEN_COMMANDS
; i
++, aen_op
++) {
1571 if (!aen_op
->fcp_req
.private)
1574 __nvme_fc_exit_request(ctrl
, aen_op
);
1576 kfree(aen_op
->fcp_req
.private);
1577 aen_op
->fcp_req
.private = NULL
;
1582 __nvme_fc_init_hctx(struct blk_mq_hw_ctx
*hctx
, struct nvme_fc_ctrl
*ctrl
,
1585 struct nvme_fc_queue
*queue
= &ctrl
->queues
[qidx
];
1587 hctx
->driver_data
= queue
;
1592 nvme_fc_init_hctx(struct blk_mq_hw_ctx
*hctx
, void *data
,
1593 unsigned int hctx_idx
)
1595 struct nvme_fc_ctrl
*ctrl
= data
;
1597 __nvme_fc_init_hctx(hctx
, ctrl
, hctx_idx
+ 1);
1603 nvme_fc_init_admin_hctx(struct blk_mq_hw_ctx
*hctx
, void *data
,
1604 unsigned int hctx_idx
)
1606 struct nvme_fc_ctrl
*ctrl
= data
;
1608 __nvme_fc_init_hctx(hctx
, ctrl
, hctx_idx
);
1614 nvme_fc_init_queue(struct nvme_fc_ctrl
*ctrl
, int idx
, size_t queue_size
)
1616 struct nvme_fc_queue
*queue
;
1618 queue
= &ctrl
->queues
[idx
];
1619 memset(queue
, 0, sizeof(*queue
));
1622 atomic_set(&queue
->csn
, 1);
1623 queue
->dev
= ctrl
->dev
;
1626 queue
->cmnd_capsule_len
= ctrl
->ctrl
.ioccsz
* 16;
1628 queue
->cmnd_capsule_len
= sizeof(struct nvme_command
);
1630 queue
->queue_size
= queue_size
;
1633 * Considered whether we should allocate buffers for all SQEs
1634 * and CQEs and dma map them - mapping their respective entries
1635 * into the request structures (kernel vm addr and dma address)
1636 * thus the driver could use the buffers/mappings directly.
1637 * It only makes sense if the LLDD would use them for its
1638 * messaging api. It's very unlikely most adapter api's would use
1639 * a native NVME sqe/cqe. More reasonable if FC-NVME IU payload
1640 * structures were used instead.
1645 * This routine terminates a queue at the transport level.
1646 * The transport has already ensured that all outstanding ios on
1647 * the queue have been terminated.
1648 * The transport will send a Disconnect LS request to terminate
1649 * the queue's connection. Termination of the admin queue will also
1650 * terminate the association at the target.
1653 nvme_fc_free_queue(struct nvme_fc_queue
*queue
)
1655 if (!test_and_clear_bit(NVME_FC_Q_CONNECTED
, &queue
->flags
))
1658 clear_bit(NVME_FC_Q_LIVE
, &queue
->flags
);
1660 * Current implementation never disconnects a single queue.
1661 * It always terminates a whole association. So there is never
1662 * a disconnect(queue) LS sent to the target.
1665 queue
->connection_id
= 0;
1669 __nvme_fc_delete_hw_queue(struct nvme_fc_ctrl
*ctrl
,
1670 struct nvme_fc_queue
*queue
, unsigned int qidx
)
1672 if (ctrl
->lport
->ops
->delete_queue
)
1673 ctrl
->lport
->ops
->delete_queue(&ctrl
->lport
->localport
, qidx
,
1674 queue
->lldd_handle
);
1675 queue
->lldd_handle
= NULL
;
1679 nvme_fc_free_io_queues(struct nvme_fc_ctrl
*ctrl
)
1683 for (i
= 1; i
< ctrl
->ctrl
.queue_count
; i
++)
1684 nvme_fc_free_queue(&ctrl
->queues
[i
]);
1688 __nvme_fc_create_hw_queue(struct nvme_fc_ctrl
*ctrl
,
1689 struct nvme_fc_queue
*queue
, unsigned int qidx
, u16 qsize
)
1693 queue
->lldd_handle
= NULL
;
1694 if (ctrl
->lport
->ops
->create_queue
)
1695 ret
= ctrl
->lport
->ops
->create_queue(&ctrl
->lport
->localport
,
1696 qidx
, qsize
, &queue
->lldd_handle
);
1702 nvme_fc_delete_hw_io_queues(struct nvme_fc_ctrl
*ctrl
)
1704 struct nvme_fc_queue
*queue
= &ctrl
->queues
[ctrl
->ctrl
.queue_count
- 1];
1707 for (i
= ctrl
->ctrl
.queue_count
- 1; i
>= 1; i
--, queue
--)
1708 __nvme_fc_delete_hw_queue(ctrl
, queue
, i
);
1712 nvme_fc_create_hw_io_queues(struct nvme_fc_ctrl
*ctrl
, u16 qsize
)
1714 struct nvme_fc_queue
*queue
= &ctrl
->queues
[1];
1717 for (i
= 1; i
< ctrl
->ctrl
.queue_count
; i
++, queue
++) {
1718 ret
= __nvme_fc_create_hw_queue(ctrl
, queue
, i
, qsize
);
1727 __nvme_fc_delete_hw_queue(ctrl
, &ctrl
->queues
[i
], i
);
1732 nvme_fc_connect_io_queues(struct nvme_fc_ctrl
*ctrl
, u16 qsize
)
1736 for (i
= 1; i
< ctrl
->ctrl
.queue_count
; i
++) {
1737 ret
= nvme_fc_connect_queue(ctrl
, &ctrl
->queues
[i
], qsize
,
1741 ret
= nvmf_connect_io_queue(&ctrl
->ctrl
, i
);
1745 set_bit(NVME_FC_Q_LIVE
, &ctrl
->queues
[i
].flags
);
1752 nvme_fc_init_io_queues(struct nvme_fc_ctrl
*ctrl
)
1756 for (i
= 1; i
< ctrl
->ctrl
.queue_count
; i
++)
1757 nvme_fc_init_queue(ctrl
, i
, ctrl
->ctrl
.sqsize
);
1761 nvme_fc_ctrl_free(struct kref
*ref
)
1763 struct nvme_fc_ctrl
*ctrl
=
1764 container_of(ref
, struct nvme_fc_ctrl
, ref
);
1765 unsigned long flags
;
1767 if (ctrl
->ctrl
.tagset
) {
1768 blk_cleanup_queue(ctrl
->ctrl
.connect_q
);
1769 blk_mq_free_tag_set(&ctrl
->tag_set
);
1772 /* remove from rport list */
1773 spin_lock_irqsave(&ctrl
->rport
->lock
, flags
);
1774 list_del(&ctrl
->ctrl_list
);
1775 spin_unlock_irqrestore(&ctrl
->rport
->lock
, flags
);
1777 blk_mq_unquiesce_queue(ctrl
->ctrl
.admin_q
);
1778 blk_cleanup_queue(ctrl
->ctrl
.admin_q
);
1779 blk_mq_free_tag_set(&ctrl
->admin_tag_set
);
1781 kfree(ctrl
->queues
);
1783 put_device(ctrl
->dev
);
1784 nvme_fc_rport_put(ctrl
->rport
);
1786 ida_simple_remove(&nvme_fc_ctrl_cnt
, ctrl
->cnum
);
1787 if (ctrl
->ctrl
.opts
)
1788 nvmf_free_options(ctrl
->ctrl
.opts
);
1793 nvme_fc_ctrl_put(struct nvme_fc_ctrl
*ctrl
)
1795 kref_put(&ctrl
->ref
, nvme_fc_ctrl_free
);
1799 nvme_fc_ctrl_get(struct nvme_fc_ctrl
*ctrl
)
1801 return kref_get_unless_zero(&ctrl
->ref
);
1805 * All accesses from nvme core layer done - can now free the
1806 * controller. Called after last nvme_put_ctrl() call
1809 nvme_fc_nvme_ctrl_freed(struct nvme_ctrl
*nctrl
)
1811 struct nvme_fc_ctrl
*ctrl
= to_fc_ctrl(nctrl
);
1813 WARN_ON(nctrl
!= &ctrl
->ctrl
);
1815 nvme_fc_ctrl_put(ctrl
);
1819 nvme_fc_error_recovery(struct nvme_fc_ctrl
*ctrl
, char *errmsg
)
1821 /* only proceed if in LIVE state - e.g. on first error */
1822 if (ctrl
->ctrl
.state
!= NVME_CTRL_LIVE
)
1825 dev_warn(ctrl
->ctrl
.device
,
1826 "NVME-FC{%d}: transport association error detected: %s\n",
1827 ctrl
->cnum
, errmsg
);
1828 dev_warn(ctrl
->ctrl
.device
,
1829 "NVME-FC{%d}: resetting controller\n", ctrl
->cnum
);
1831 if (!nvme_change_ctrl_state(&ctrl
->ctrl
, NVME_CTRL_RECONNECTING
)) {
1832 dev_err(ctrl
->ctrl
.device
,
1833 "NVME-FC{%d}: error_recovery: Couldn't change state "
1834 "to RECONNECTING\n", ctrl
->cnum
);
1838 nvme_reset_ctrl(&ctrl
->ctrl
);
1841 static enum blk_eh_timer_return
1842 nvme_fc_timeout(struct request
*rq
, bool reserved
)
1844 struct nvme_fc_fcp_op
*op
= blk_mq_rq_to_pdu(rq
);
1845 struct nvme_fc_ctrl
*ctrl
= op
->ctrl
;
1849 return BLK_EH_RESET_TIMER
;
1851 ret
= __nvme_fc_abort_op(ctrl
, op
);
1853 /* io wasn't active to abort consider it done */
1854 return BLK_EH_HANDLED
;
1857 * we can't individually ABTS an io without affecting the queue,
1858 * thus killing the queue, adn thus the association.
1859 * So resolve by performing a controller reset, which will stop
1860 * the host/io stack, terminate the association on the link,
1861 * and recreate an association on the link.
1863 nvme_fc_error_recovery(ctrl
, "io timeout error");
1865 return BLK_EH_HANDLED
;
1869 nvme_fc_map_data(struct nvme_fc_ctrl
*ctrl
, struct request
*rq
,
1870 struct nvme_fc_fcp_op
*op
)
1872 struct nvmefc_fcp_req
*freq
= &op
->fcp_req
;
1873 enum dma_data_direction dir
;
1878 if (!blk_rq_payload_bytes(rq
))
1881 freq
->sg_table
.sgl
= freq
->first_sgl
;
1882 ret
= sg_alloc_table_chained(&freq
->sg_table
,
1883 blk_rq_nr_phys_segments(rq
), freq
->sg_table
.sgl
);
1887 op
->nents
= blk_rq_map_sg(rq
->q
, rq
, freq
->sg_table
.sgl
);
1888 WARN_ON(op
->nents
> blk_rq_nr_phys_segments(rq
));
1889 dir
= (rq_data_dir(rq
) == WRITE
) ? DMA_TO_DEVICE
: DMA_FROM_DEVICE
;
1890 freq
->sg_cnt
= fc_dma_map_sg(ctrl
->lport
->dev
, freq
->sg_table
.sgl
,
1892 if (unlikely(freq
->sg_cnt
<= 0)) {
1893 sg_free_table_chained(&freq
->sg_table
, true);
1899 * TODO: blk_integrity_rq(rq) for DIF
1905 nvme_fc_unmap_data(struct nvme_fc_ctrl
*ctrl
, struct request
*rq
,
1906 struct nvme_fc_fcp_op
*op
)
1908 struct nvmefc_fcp_req
*freq
= &op
->fcp_req
;
1913 fc_dma_unmap_sg(ctrl
->lport
->dev
, freq
->sg_table
.sgl
, op
->nents
,
1914 ((rq_data_dir(rq
) == WRITE
) ?
1915 DMA_TO_DEVICE
: DMA_FROM_DEVICE
));
1917 nvme_cleanup_cmd(rq
);
1919 sg_free_table_chained(&freq
->sg_table
, true);
1925 * In FC, the queue is a logical thing. At transport connect, the target
1926 * creates its "queue" and returns a handle that is to be given to the
1927 * target whenever it posts something to the corresponding SQ. When an
1928 * SQE is sent on a SQ, FC effectively considers the SQE, or rather the
1929 * command contained within the SQE, an io, and assigns a FC exchange
1930 * to it. The SQE and the associated SQ handle are sent in the initial
1931 * CMD IU sents on the exchange. All transfers relative to the io occur
1932 * as part of the exchange. The CQE is the last thing for the io,
1933 * which is transferred (explicitly or implicitly) with the RSP IU
1934 * sent on the exchange. After the CQE is received, the FC exchange is
1935 * terminaed and the Exchange may be used on a different io.
1937 * The transport to LLDD api has the transport making a request for a
1938 * new fcp io request to the LLDD. The LLDD then allocates a FC exchange
1939 * resource and transfers the command. The LLDD will then process all
1940 * steps to complete the io. Upon completion, the transport done routine
1943 * So - while the operation is outstanding to the LLDD, there is a link
1944 * level FC exchange resource that is also outstanding. This must be
1945 * considered in all cleanup operations.
1948 nvme_fc_start_fcp_op(struct nvme_fc_ctrl
*ctrl
, struct nvme_fc_queue
*queue
,
1949 struct nvme_fc_fcp_op
*op
, u32 data_len
,
1950 enum nvmefc_fcp_datadir io_dir
)
1952 struct nvme_fc_cmd_iu
*cmdiu
= &op
->cmd_iu
;
1953 struct nvme_command
*sqe
= &cmdiu
->sqe
;
1958 * before attempting to send the io, check to see if we believe
1959 * the target device is present
1961 if (ctrl
->rport
->remoteport
.port_state
!= FC_OBJSTATE_ONLINE
)
1964 if (!nvme_fc_ctrl_get(ctrl
))
1965 return BLK_STS_IOERR
;
1967 /* format the FC-NVME CMD IU and fcp_req */
1968 cmdiu
->connection_id
= cpu_to_be64(queue
->connection_id
);
1969 csn
= atomic_inc_return(&queue
->csn
);
1970 cmdiu
->csn
= cpu_to_be32(csn
);
1971 cmdiu
->data_len
= cpu_to_be32(data_len
);
1973 case NVMEFC_FCP_WRITE
:
1974 cmdiu
->flags
= FCNVME_CMD_FLAGS_WRITE
;
1976 case NVMEFC_FCP_READ
:
1977 cmdiu
->flags
= FCNVME_CMD_FLAGS_READ
;
1979 case NVMEFC_FCP_NODATA
:
1983 op
->fcp_req
.payload_length
= data_len
;
1984 op
->fcp_req
.io_dir
= io_dir
;
1985 op
->fcp_req
.transferred_length
= 0;
1986 op
->fcp_req
.rcv_rsplen
= 0;
1987 op
->fcp_req
.status
= NVME_SC_SUCCESS
;
1988 op
->fcp_req
.sqid
= cpu_to_le16(queue
->qnum
);
1991 * validate per fabric rules, set fields mandated by fabric spec
1992 * as well as those by FC-NVME spec.
1994 WARN_ON_ONCE(sqe
->common
.metadata
);
1995 sqe
->common
.flags
|= NVME_CMD_SGL_METABUF
;
1998 * format SQE DPTR field per FC-NVME rules:
1999 * type=0x5 Transport SGL Data Block Descriptor
2000 * subtype=0xA Transport-specific value
2002 * length=length of the data series
2004 sqe
->rw
.dptr
.sgl
.type
= (NVME_TRANSPORT_SGL_DATA_DESC
<< 4) |
2005 NVME_SGL_FMT_TRANSPORT_A
;
2006 sqe
->rw
.dptr
.sgl
.length
= cpu_to_le32(data_len
);
2007 sqe
->rw
.dptr
.sgl
.addr
= 0;
2009 if (!(op
->flags
& FCOP_FLAGS_AEN
)) {
2010 ret
= nvme_fc_map_data(ctrl
, op
->rq
, op
);
2012 nvme_cleanup_cmd(op
->rq
);
2013 nvme_fc_ctrl_put(ctrl
);
2014 if (ret
== -ENOMEM
|| ret
== -EAGAIN
)
2015 return BLK_STS_RESOURCE
;
2016 return BLK_STS_IOERR
;
2020 fc_dma_sync_single_for_device(ctrl
->lport
->dev
, op
->fcp_req
.cmddma
,
2021 sizeof(op
->cmd_iu
), DMA_TO_DEVICE
);
2023 atomic_set(&op
->state
, FCPOP_STATE_ACTIVE
);
2025 if (!(op
->flags
& FCOP_FLAGS_AEN
))
2026 blk_mq_start_request(op
->rq
);
2028 ret
= ctrl
->lport
->ops
->fcp_io(&ctrl
->lport
->localport
,
2029 &ctrl
->rport
->remoteport
,
2030 queue
->lldd_handle
, &op
->fcp_req
);
2033 if (!(op
->flags
& FCOP_FLAGS_AEN
))
2034 nvme_fc_unmap_data(ctrl
, op
->rq
, op
);
2036 nvme_fc_ctrl_put(ctrl
);
2038 if (ctrl
->rport
->remoteport
.port_state
== FC_OBJSTATE_ONLINE
&&
2040 return BLK_STS_IOERR
;
2048 if (!(op
->flags
& FCOP_FLAGS_AEN
) && queue
->hctx
)
2049 blk_mq_delay_run_hw_queue(queue
->hctx
, NVMEFC_QUEUE_DELAY
);
2051 return BLK_STS_RESOURCE
;
2054 static inline blk_status_t
nvme_fc_is_ready(struct nvme_fc_queue
*queue
,
2057 if (unlikely(!test_bit(NVME_FC_Q_LIVE
, &queue
->flags
)))
2058 return nvmf_check_init_req(&queue
->ctrl
->ctrl
, rq
);
2063 nvme_fc_queue_rq(struct blk_mq_hw_ctx
*hctx
,
2064 const struct blk_mq_queue_data
*bd
)
2066 struct nvme_ns
*ns
= hctx
->queue
->queuedata
;
2067 struct nvme_fc_queue
*queue
= hctx
->driver_data
;
2068 struct nvme_fc_ctrl
*ctrl
= queue
->ctrl
;
2069 struct request
*rq
= bd
->rq
;
2070 struct nvme_fc_fcp_op
*op
= blk_mq_rq_to_pdu(rq
);
2071 struct nvme_fc_cmd_iu
*cmdiu
= &op
->cmd_iu
;
2072 struct nvme_command
*sqe
= &cmdiu
->sqe
;
2073 enum nvmefc_fcp_datadir io_dir
;
2077 ret
= nvme_fc_is_ready(queue
, rq
);
2081 ret
= nvme_setup_cmd(ns
, rq
, sqe
);
2085 data_len
= blk_rq_payload_bytes(rq
);
2087 io_dir
= ((rq_data_dir(rq
) == WRITE
) ?
2088 NVMEFC_FCP_WRITE
: NVMEFC_FCP_READ
);
2090 io_dir
= NVMEFC_FCP_NODATA
;
2092 return nvme_fc_start_fcp_op(ctrl
, queue
, op
, data_len
, io_dir
);
2095 static struct blk_mq_tags
*
2096 nvme_fc_tagset(struct nvme_fc_queue
*queue
)
2098 if (queue
->qnum
== 0)
2099 return queue
->ctrl
->admin_tag_set
.tags
[queue
->qnum
];
2101 return queue
->ctrl
->tag_set
.tags
[queue
->qnum
- 1];
2105 nvme_fc_poll(struct blk_mq_hw_ctx
*hctx
, unsigned int tag
)
2108 struct nvme_fc_queue
*queue
= hctx
->driver_data
;
2109 struct nvme_fc_ctrl
*ctrl
= queue
->ctrl
;
2110 struct request
*req
;
2111 struct nvme_fc_fcp_op
*op
;
2113 req
= blk_mq_tag_to_rq(nvme_fc_tagset(queue
), tag
);
2117 op
= blk_mq_rq_to_pdu(req
);
2119 if ((atomic_read(&op
->state
) == FCPOP_STATE_ACTIVE
) &&
2120 (ctrl
->lport
->ops
->poll_queue
))
2121 ctrl
->lport
->ops
->poll_queue(&ctrl
->lport
->localport
,
2122 queue
->lldd_handle
);
2124 return ((atomic_read(&op
->state
) != FCPOP_STATE_ACTIVE
));
2128 nvme_fc_submit_async_event(struct nvme_ctrl
*arg
, int aer_idx
)
2130 struct nvme_fc_ctrl
*ctrl
= to_fc_ctrl(arg
);
2131 struct nvme_fc_fcp_op
*aen_op
;
2132 unsigned long flags
;
2133 bool terminating
= false;
2136 if (aer_idx
> NVME_FC_NR_AEN_COMMANDS
)
2139 spin_lock_irqsave(&ctrl
->lock
, flags
);
2140 if (ctrl
->flags
& FCCTRL_TERMIO
)
2142 spin_unlock_irqrestore(&ctrl
->lock
, flags
);
2147 aen_op
= &ctrl
->aen_ops
[aer_idx
];
2149 ret
= nvme_fc_start_fcp_op(ctrl
, aen_op
->queue
, aen_op
, 0,
2152 dev_err(ctrl
->ctrl
.device
,
2153 "failed async event work [%d]\n", aer_idx
);
2157 __nvme_fc_final_op_cleanup(struct request
*rq
)
2159 struct nvme_fc_fcp_op
*op
= blk_mq_rq_to_pdu(rq
);
2160 struct nvme_fc_ctrl
*ctrl
= op
->ctrl
;
2162 atomic_set(&op
->state
, FCPOP_STATE_IDLE
);
2163 op
->flags
&= ~(FCOP_FLAGS_TERMIO
| FCOP_FLAGS_RELEASED
|
2164 FCOP_FLAGS_COMPLETE
);
2166 nvme_fc_unmap_data(ctrl
, rq
, op
);
2167 nvme_complete_rq(rq
);
2168 nvme_fc_ctrl_put(ctrl
);
2173 nvme_fc_complete_rq(struct request
*rq
)
2175 struct nvme_fc_fcp_op
*op
= blk_mq_rq_to_pdu(rq
);
2176 struct nvme_fc_ctrl
*ctrl
= op
->ctrl
;
2177 unsigned long flags
;
2178 bool completed
= false;
2181 * the core layer, on controller resets after calling
2182 * nvme_shutdown_ctrl(), calls complete_rq without our
2183 * calling blk_mq_complete_request(), thus there may still
2184 * be live i/o outstanding with the LLDD. Means transport has
2185 * to track complete calls vs fcpio_done calls to know what
2186 * path to take on completes and dones.
2188 spin_lock_irqsave(&ctrl
->lock
, flags
);
2189 if (op
->flags
& FCOP_FLAGS_COMPLETE
)
2192 op
->flags
|= FCOP_FLAGS_RELEASED
;
2193 spin_unlock_irqrestore(&ctrl
->lock
, flags
);
2196 __nvme_fc_final_op_cleanup(rq
);
2200 * This routine is used by the transport when it needs to find active
2201 * io on a queue that is to be terminated. The transport uses
2202 * blk_mq_tagset_busy_itr() to find the busy requests, which then invoke
2203 * this routine to kill them on a 1 by 1 basis.
2205 * As FC allocates FC exchange for each io, the transport must contact
2206 * the LLDD to terminate the exchange, thus releasing the FC exchange.
2207 * After terminating the exchange the LLDD will call the transport's
2208 * normal io done path for the request, but it will have an aborted
2209 * status. The done path will return the io request back to the block
2210 * layer with an error status.
2213 nvme_fc_terminate_exchange(struct request
*req
, void *data
, bool reserved
)
2215 struct nvme_ctrl
*nctrl
= data
;
2216 struct nvme_fc_ctrl
*ctrl
= to_fc_ctrl(nctrl
);
2217 struct nvme_fc_fcp_op
*op
= blk_mq_rq_to_pdu(req
);
2218 unsigned long flags
;
2221 if (!blk_mq_request_started(req
))
2224 spin_lock_irqsave(&ctrl
->lock
, flags
);
2225 if (ctrl
->flags
& FCCTRL_TERMIO
) {
2227 op
->flags
|= FCOP_FLAGS_TERMIO
;
2229 spin_unlock_irqrestore(&ctrl
->lock
, flags
);
2231 status
= __nvme_fc_abort_op(ctrl
, op
);
2234 * if __nvme_fc_abort_op failed the io wasn't
2235 * active. Thus this call path is running in
2236 * parallel to the io complete. Treat as non-error.
2239 /* back out the flags/counters */
2240 spin_lock_irqsave(&ctrl
->lock
, flags
);
2241 if (ctrl
->flags
& FCCTRL_TERMIO
)
2243 op
->flags
&= ~FCOP_FLAGS_TERMIO
;
2244 spin_unlock_irqrestore(&ctrl
->lock
, flags
);
2250 static const struct blk_mq_ops nvme_fc_mq_ops
= {
2251 .queue_rq
= nvme_fc_queue_rq
,
2252 .complete
= nvme_fc_complete_rq
,
2253 .init_request
= nvme_fc_init_request
,
2254 .exit_request
= nvme_fc_exit_request
,
2255 .init_hctx
= nvme_fc_init_hctx
,
2256 .poll
= nvme_fc_poll
,
2257 .timeout
= nvme_fc_timeout
,
2261 nvme_fc_create_io_queues(struct nvme_fc_ctrl
*ctrl
)
2263 struct nvmf_ctrl_options
*opts
= ctrl
->ctrl
.opts
;
2264 unsigned int nr_io_queues
;
2267 nr_io_queues
= min(min(opts
->nr_io_queues
, num_online_cpus()),
2268 ctrl
->lport
->ops
->max_hw_queues
);
2269 ret
= nvme_set_queue_count(&ctrl
->ctrl
, &nr_io_queues
);
2271 dev_info(ctrl
->ctrl
.device
,
2272 "set_queue_count failed: %d\n", ret
);
2276 ctrl
->ctrl
.queue_count
= nr_io_queues
+ 1;
2280 nvme_fc_init_io_queues(ctrl
);
2282 memset(&ctrl
->tag_set
, 0, sizeof(ctrl
->tag_set
));
2283 ctrl
->tag_set
.ops
= &nvme_fc_mq_ops
;
2284 ctrl
->tag_set
.queue_depth
= ctrl
->ctrl
.opts
->queue_size
;
2285 ctrl
->tag_set
.reserved_tags
= 1; /* fabric connect */
2286 ctrl
->tag_set
.numa_node
= NUMA_NO_NODE
;
2287 ctrl
->tag_set
.flags
= BLK_MQ_F_SHOULD_MERGE
;
2288 ctrl
->tag_set
.cmd_size
= sizeof(struct nvme_fc_fcp_op
) +
2290 sizeof(struct scatterlist
)) +
2291 ctrl
->lport
->ops
->fcprqst_priv_sz
;
2292 ctrl
->tag_set
.driver_data
= ctrl
;
2293 ctrl
->tag_set
.nr_hw_queues
= ctrl
->ctrl
.queue_count
- 1;
2294 ctrl
->tag_set
.timeout
= NVME_IO_TIMEOUT
;
2296 ret
= blk_mq_alloc_tag_set(&ctrl
->tag_set
);
2300 ctrl
->ctrl
.tagset
= &ctrl
->tag_set
;
2302 ctrl
->ctrl
.connect_q
= blk_mq_init_queue(&ctrl
->tag_set
);
2303 if (IS_ERR(ctrl
->ctrl
.connect_q
)) {
2304 ret
= PTR_ERR(ctrl
->ctrl
.connect_q
);
2305 goto out_free_tag_set
;
2308 ret
= nvme_fc_create_hw_io_queues(ctrl
, ctrl
->ctrl
.opts
->queue_size
);
2310 goto out_cleanup_blk_queue
;
2312 ret
= nvme_fc_connect_io_queues(ctrl
, ctrl
->ctrl
.opts
->queue_size
);
2314 goto out_delete_hw_queues
;
2318 out_delete_hw_queues
:
2319 nvme_fc_delete_hw_io_queues(ctrl
);
2320 out_cleanup_blk_queue
:
2321 blk_cleanup_queue(ctrl
->ctrl
.connect_q
);
2323 blk_mq_free_tag_set(&ctrl
->tag_set
);
2324 nvme_fc_free_io_queues(ctrl
);
2326 /* force put free routine to ignore io queues */
2327 ctrl
->ctrl
.tagset
= NULL
;
2333 nvme_fc_reinit_io_queues(struct nvme_fc_ctrl
*ctrl
)
2335 struct nvmf_ctrl_options
*opts
= ctrl
->ctrl
.opts
;
2336 unsigned int nr_io_queues
;
2339 nr_io_queues
= min(min(opts
->nr_io_queues
, num_online_cpus()),
2340 ctrl
->lport
->ops
->max_hw_queues
);
2341 ret
= nvme_set_queue_count(&ctrl
->ctrl
, &nr_io_queues
);
2343 dev_info(ctrl
->ctrl
.device
,
2344 "set_queue_count failed: %d\n", ret
);
2348 ctrl
->ctrl
.queue_count
= nr_io_queues
+ 1;
2349 /* check for io queues existing */
2350 if (ctrl
->ctrl
.queue_count
== 1)
2353 nvme_fc_init_io_queues(ctrl
);
2355 ret
= blk_mq_reinit_tagset(&ctrl
->tag_set
, nvme_fc_reinit_request
);
2357 goto out_free_io_queues
;
2359 ret
= nvme_fc_create_hw_io_queues(ctrl
, ctrl
->ctrl
.opts
->queue_size
);
2361 goto out_free_io_queues
;
2363 ret
= nvme_fc_connect_io_queues(ctrl
, ctrl
->ctrl
.opts
->queue_size
);
2365 goto out_delete_hw_queues
;
2367 blk_mq_update_nr_hw_queues(&ctrl
->tag_set
, nr_io_queues
);
2371 out_delete_hw_queues
:
2372 nvme_fc_delete_hw_io_queues(ctrl
);
2374 nvme_fc_free_io_queues(ctrl
);
2379 * This routine restarts the controller on the host side, and
2380 * on the link side, recreates the controller association.
2383 nvme_fc_create_association(struct nvme_fc_ctrl
*ctrl
)
2385 struct nvmf_ctrl_options
*opts
= ctrl
->ctrl
.opts
;
2390 ++ctrl
->ctrl
.nr_reconnects
;
2393 * Create the admin queue
2396 nvme_fc_init_queue(ctrl
, 0, NVME_FC_AQ_BLKMQ_DEPTH
);
2398 ret
= __nvme_fc_create_hw_queue(ctrl
, &ctrl
->queues
[0], 0,
2399 NVME_FC_AQ_BLKMQ_DEPTH
);
2401 goto out_free_queue
;
2403 ret
= nvme_fc_connect_admin_queue(ctrl
, &ctrl
->queues
[0],
2404 NVME_FC_AQ_BLKMQ_DEPTH
,
2405 (NVME_FC_AQ_BLKMQ_DEPTH
/ 4));
2407 goto out_delete_hw_queue
;
2409 if (ctrl
->ctrl
.state
!= NVME_CTRL_NEW
)
2410 blk_mq_unquiesce_queue(ctrl
->ctrl
.admin_q
);
2412 ret
= nvmf_connect_admin_queue(&ctrl
->ctrl
);
2414 goto out_disconnect_admin_queue
;
2416 set_bit(NVME_FC_Q_LIVE
, &ctrl
->queues
[0].flags
);
2419 * Check controller capabilities
2421 * todo:- add code to check if ctrl attributes changed from
2422 * prior connection values
2425 ret
= nvmf_reg_read64(&ctrl
->ctrl
, NVME_REG_CAP
, &ctrl
->ctrl
.cap
);
2427 dev_err(ctrl
->ctrl
.device
,
2428 "prop_get NVME_REG_CAP failed\n");
2429 goto out_disconnect_admin_queue
;
2433 min_t(int, NVME_CAP_MQES(ctrl
->ctrl
.cap
) + 1, ctrl
->ctrl
.sqsize
);
2435 ret
= nvme_enable_ctrl(&ctrl
->ctrl
, ctrl
->ctrl
.cap
);
2437 goto out_disconnect_admin_queue
;
2439 segs
= min_t(u32
, NVME_FC_MAX_SEGMENTS
,
2440 ctrl
->lport
->ops
->max_sgl_segments
);
2441 ctrl
->ctrl
.max_hw_sectors
= (segs
- 1) << (PAGE_SHIFT
- 9);
2443 ret
= nvme_init_identify(&ctrl
->ctrl
);
2445 goto out_disconnect_admin_queue
;
2449 /* FC-NVME does not have other data in the capsule */
2450 if (ctrl
->ctrl
.icdoff
) {
2451 dev_err(ctrl
->ctrl
.device
, "icdoff %d is not supported!\n",
2453 goto out_disconnect_admin_queue
;
2456 /* FC-NVME supports normal SGL Data Block Descriptors */
2458 if (opts
->queue_size
> ctrl
->ctrl
.maxcmd
) {
2459 /* warn if maxcmd is lower than queue_size */
2460 dev_warn(ctrl
->ctrl
.device
,
2461 "queue_size %zu > ctrl maxcmd %u, reducing "
2463 opts
->queue_size
, ctrl
->ctrl
.maxcmd
);
2464 opts
->queue_size
= ctrl
->ctrl
.maxcmd
;
2467 ret
= nvme_fc_init_aen_ops(ctrl
);
2469 goto out_term_aen_ops
;
2472 * Create the io queues
2475 if (ctrl
->ctrl
.queue_count
> 1) {
2476 if (ctrl
->ctrl
.state
== NVME_CTRL_NEW
)
2477 ret
= nvme_fc_create_io_queues(ctrl
);
2479 ret
= nvme_fc_reinit_io_queues(ctrl
);
2481 goto out_term_aen_ops
;
2484 changed
= nvme_change_ctrl_state(&ctrl
->ctrl
, NVME_CTRL_LIVE
);
2485 WARN_ON_ONCE(!changed
);
2487 ctrl
->ctrl
.nr_reconnects
= 0;
2489 nvme_start_ctrl(&ctrl
->ctrl
);
2491 return 0; /* Success */
2494 nvme_fc_term_aen_ops(ctrl
);
2495 out_disconnect_admin_queue
:
2496 /* send a Disconnect(association) LS to fc-nvme target */
2497 nvme_fc_xmt_disconnect_assoc(ctrl
);
2498 out_delete_hw_queue
:
2499 __nvme_fc_delete_hw_queue(ctrl
, &ctrl
->queues
[0], 0);
2501 nvme_fc_free_queue(&ctrl
->queues
[0]);
2507 * This routine stops operation of the controller on the host side.
2508 * On the host os stack side: Admin and IO queues are stopped,
2509 * outstanding ios on them terminated via FC ABTS.
2510 * On the link side: the association is terminated.
2513 nvme_fc_delete_association(struct nvme_fc_ctrl
*ctrl
)
2515 unsigned long flags
;
2517 spin_lock_irqsave(&ctrl
->lock
, flags
);
2518 ctrl
->flags
|= FCCTRL_TERMIO
;
2520 spin_unlock_irqrestore(&ctrl
->lock
, flags
);
2523 * If io queues are present, stop them and terminate all outstanding
2524 * ios on them. As FC allocates FC exchange for each io, the
2525 * transport must contact the LLDD to terminate the exchange,
2526 * thus releasing the FC exchange. We use blk_mq_tagset_busy_itr()
2527 * to tell us what io's are busy and invoke a transport routine
2528 * to kill them with the LLDD. After terminating the exchange
2529 * the LLDD will call the transport's normal io done path, but it
2530 * will have an aborted status. The done path will return the
2531 * io requests back to the block layer as part of normal completions
2532 * (but with error status).
2534 if (ctrl
->ctrl
.queue_count
> 1) {
2535 nvme_stop_queues(&ctrl
->ctrl
);
2536 blk_mq_tagset_busy_iter(&ctrl
->tag_set
,
2537 nvme_fc_terminate_exchange
, &ctrl
->ctrl
);
2541 * Other transports, which don't have link-level contexts bound
2542 * to sqe's, would try to gracefully shutdown the controller by
2543 * writing the registers for shutdown and polling (call
2544 * nvme_shutdown_ctrl()). Given a bunch of i/o was potentially
2545 * just aborted and we will wait on those contexts, and given
2546 * there was no indication of how live the controlelr is on the
2547 * link, don't send more io to create more contexts for the
2548 * shutdown. Let the controller fail via keepalive failure if
2549 * its still present.
2553 * clean up the admin queue. Same thing as above.
2554 * use blk_mq_tagset_busy_itr() and the transport routine to
2555 * terminate the exchanges.
2557 blk_mq_quiesce_queue(ctrl
->ctrl
.admin_q
);
2558 blk_mq_tagset_busy_iter(&ctrl
->admin_tag_set
,
2559 nvme_fc_terminate_exchange
, &ctrl
->ctrl
);
2561 /* kill the aens as they are a separate path */
2562 nvme_fc_abort_aen_ops(ctrl
);
2564 /* wait for all io that had to be aborted */
2565 spin_lock_irq(&ctrl
->lock
);
2566 wait_event_lock_irq(ctrl
->ioabort_wait
, ctrl
->iocnt
== 0, ctrl
->lock
);
2567 ctrl
->flags
&= ~FCCTRL_TERMIO
;
2568 spin_unlock_irq(&ctrl
->lock
);
2570 nvme_fc_term_aen_ops(ctrl
);
2573 * send a Disconnect(association) LS to fc-nvme target
2574 * Note: could have been sent at top of process, but
2575 * cleaner on link traffic if after the aborts complete.
2576 * Note: if association doesn't exist, association_id will be 0
2578 if (ctrl
->association_id
)
2579 nvme_fc_xmt_disconnect_assoc(ctrl
);
2581 if (ctrl
->ctrl
.tagset
) {
2582 nvme_fc_delete_hw_io_queues(ctrl
);
2583 nvme_fc_free_io_queues(ctrl
);
2586 __nvme_fc_delete_hw_queue(ctrl
, &ctrl
->queues
[0], 0);
2587 nvme_fc_free_queue(&ctrl
->queues
[0]);
2591 nvme_fc_delete_ctrl_work(struct work_struct
*work
)
2593 struct nvme_fc_ctrl
*ctrl
=
2594 container_of(work
, struct nvme_fc_ctrl
, delete_work
);
2596 cancel_work_sync(&ctrl
->ctrl
.reset_work
);
2597 cancel_delayed_work_sync(&ctrl
->connect_work
);
2598 nvme_stop_ctrl(&ctrl
->ctrl
);
2599 nvme_remove_namespaces(&ctrl
->ctrl
);
2601 * kill the association on the link side. this will block
2602 * waiting for io to terminate
2604 nvme_fc_delete_association(ctrl
);
2607 * tear down the controller
2608 * After the last reference on the nvme ctrl is removed,
2609 * the transport nvme_fc_nvme_ctrl_freed() callback will be
2610 * invoked. From there, the transport will tear down it's
2611 * logical queues and association.
2613 nvme_uninit_ctrl(&ctrl
->ctrl
);
2615 nvme_put_ctrl(&ctrl
->ctrl
);
2619 __nvme_fc_schedule_delete_work(struct nvme_fc_ctrl
*ctrl
)
2621 if (!nvme_change_ctrl_state(&ctrl
->ctrl
, NVME_CTRL_DELETING
))
2624 if (!queue_work(nvme_wq
, &ctrl
->delete_work
))
2631 __nvme_fc_del_ctrl(struct nvme_fc_ctrl
*ctrl
)
2633 return __nvme_fc_schedule_delete_work(ctrl
) ? -EBUSY
: 0;
2637 * Request from nvme core layer to delete the controller
2640 nvme_fc_del_nvme_ctrl(struct nvme_ctrl
*nctrl
)
2642 struct nvme_fc_ctrl
*ctrl
= to_fc_ctrl(nctrl
);
2645 if (!kref_get_unless_zero(&ctrl
->ctrl
.kref
))
2648 ret
= __nvme_fc_del_ctrl(ctrl
);
2651 flush_workqueue(nvme_wq
);
2653 nvme_put_ctrl(&ctrl
->ctrl
);
2659 nvme_fc_reconnect_or_delete(struct nvme_fc_ctrl
*ctrl
, int status
)
2661 /* If we are resetting/deleting then do nothing */
2662 if (ctrl
->ctrl
.state
!= NVME_CTRL_RECONNECTING
) {
2663 WARN_ON_ONCE(ctrl
->ctrl
.state
== NVME_CTRL_NEW
||
2664 ctrl
->ctrl
.state
== NVME_CTRL_LIVE
);
2668 dev_info(ctrl
->ctrl
.device
,
2669 "NVME-FC{%d}: reset: Reconnect attempt failed (%d)\n",
2670 ctrl
->cnum
, status
);
2672 if (nvmf_should_reconnect(&ctrl
->ctrl
)) {
2673 dev_info(ctrl
->ctrl
.device
,
2674 "NVME-FC{%d}: Reconnect attempt in %d seconds.\n",
2675 ctrl
->cnum
, ctrl
->ctrl
.opts
->reconnect_delay
);
2676 queue_delayed_work(nvme_wq
, &ctrl
->connect_work
,
2677 ctrl
->ctrl
.opts
->reconnect_delay
* HZ
);
2679 dev_warn(ctrl
->ctrl
.device
,
2680 "NVME-FC{%d}: Max reconnect attempts (%d) "
2681 "reached. Removing controller\n",
2682 ctrl
->cnum
, ctrl
->ctrl
.nr_reconnects
);
2683 WARN_ON(__nvme_fc_schedule_delete_work(ctrl
));
2688 nvme_fc_reset_ctrl_work(struct work_struct
*work
)
2690 struct nvme_fc_ctrl
*ctrl
=
2691 container_of(work
, struct nvme_fc_ctrl
, ctrl
.reset_work
);
2694 nvme_stop_ctrl(&ctrl
->ctrl
);
2695 /* will block will waiting for io to terminate */
2696 nvme_fc_delete_association(ctrl
);
2698 ret
= nvme_fc_create_association(ctrl
);
2700 nvme_fc_reconnect_or_delete(ctrl
, ret
);
2702 dev_info(ctrl
->ctrl
.device
,
2703 "NVME-FC{%d}: controller reset complete\n", ctrl
->cnum
);
2706 static const struct nvme_ctrl_ops nvme_fc_ctrl_ops
= {
2708 .module
= THIS_MODULE
,
2709 .flags
= NVME_F_FABRICS
,
2710 .reg_read32
= nvmf_reg_read32
,
2711 .reg_read64
= nvmf_reg_read64
,
2712 .reg_write32
= nvmf_reg_write32
,
2713 .free_ctrl
= nvme_fc_nvme_ctrl_freed
,
2714 .submit_async_event
= nvme_fc_submit_async_event
,
2715 .delete_ctrl
= nvme_fc_del_nvme_ctrl
,
2716 .get_address
= nvmf_get_address
,
2720 nvme_fc_connect_ctrl_work(struct work_struct
*work
)
2724 struct nvme_fc_ctrl
*ctrl
=
2725 container_of(to_delayed_work(work
),
2726 struct nvme_fc_ctrl
, connect_work
);
2728 ret
= nvme_fc_create_association(ctrl
);
2730 nvme_fc_reconnect_or_delete(ctrl
, ret
);
2732 dev_info(ctrl
->ctrl
.device
,
2733 "NVME-FC{%d}: controller reconnect complete\n",
2738 static const struct blk_mq_ops nvme_fc_admin_mq_ops
= {
2739 .queue_rq
= nvme_fc_queue_rq
,
2740 .complete
= nvme_fc_complete_rq
,
2741 .init_request
= nvme_fc_init_request
,
2742 .exit_request
= nvme_fc_exit_request
,
2743 .init_hctx
= nvme_fc_init_admin_hctx
,
2744 .timeout
= nvme_fc_timeout
,
2748 static struct nvme_ctrl
*
2749 nvme_fc_init_ctrl(struct device
*dev
, struct nvmf_ctrl_options
*opts
,
2750 struct nvme_fc_lport
*lport
, struct nvme_fc_rport
*rport
)
2752 struct nvme_fc_ctrl
*ctrl
;
2753 unsigned long flags
;
2754 int ret
, idx
, retry
;
2756 if (!(rport
->remoteport
.port_role
&
2757 (FC_PORT_ROLE_NVME_DISCOVERY
| FC_PORT_ROLE_NVME_TARGET
))) {
2762 ctrl
= kzalloc(sizeof(*ctrl
), GFP_KERNEL
);
2768 idx
= ida_simple_get(&nvme_fc_ctrl_cnt
, 0, 0, GFP_KERNEL
);
2774 ctrl
->ctrl
.opts
= opts
;
2775 INIT_LIST_HEAD(&ctrl
->ctrl_list
);
2776 ctrl
->lport
= lport
;
2777 ctrl
->rport
= rport
;
2778 ctrl
->dev
= lport
->dev
;
2780 init_waitqueue_head(&ctrl
->ioabort_wait
);
2782 get_device(ctrl
->dev
);
2783 kref_init(&ctrl
->ref
);
2785 INIT_WORK(&ctrl
->delete_work
, nvme_fc_delete_ctrl_work
);
2786 INIT_WORK(&ctrl
->ctrl
.reset_work
, nvme_fc_reset_ctrl_work
);
2787 INIT_DELAYED_WORK(&ctrl
->connect_work
, nvme_fc_connect_ctrl_work
);
2788 spin_lock_init(&ctrl
->lock
);
2790 /* io queue count */
2791 ctrl
->ctrl
.queue_count
= min_t(unsigned int,
2793 lport
->ops
->max_hw_queues
);
2794 ctrl
->ctrl
.queue_count
++; /* +1 for admin queue */
2796 ctrl
->ctrl
.sqsize
= opts
->queue_size
- 1;
2797 ctrl
->ctrl
.kato
= opts
->kato
;
2800 ctrl
->queues
= kcalloc(ctrl
->ctrl
.queue_count
,
2801 sizeof(struct nvme_fc_queue
), GFP_KERNEL
);
2805 memset(&ctrl
->admin_tag_set
, 0, sizeof(ctrl
->admin_tag_set
));
2806 ctrl
->admin_tag_set
.ops
= &nvme_fc_admin_mq_ops
;
2807 ctrl
->admin_tag_set
.queue_depth
= NVME_FC_AQ_BLKMQ_DEPTH
;
2808 ctrl
->admin_tag_set
.reserved_tags
= 2; /* fabric connect + Keep-Alive */
2809 ctrl
->admin_tag_set
.numa_node
= NUMA_NO_NODE
;
2810 ctrl
->admin_tag_set
.cmd_size
= sizeof(struct nvme_fc_fcp_op
) +
2812 sizeof(struct scatterlist
)) +
2813 ctrl
->lport
->ops
->fcprqst_priv_sz
;
2814 ctrl
->admin_tag_set
.driver_data
= ctrl
;
2815 ctrl
->admin_tag_set
.nr_hw_queues
= 1;
2816 ctrl
->admin_tag_set
.timeout
= ADMIN_TIMEOUT
;
2818 ret
= blk_mq_alloc_tag_set(&ctrl
->admin_tag_set
);
2820 goto out_free_queues
;
2821 ctrl
->ctrl
.admin_tagset
= &ctrl
->admin_tag_set
;
2823 ctrl
->ctrl
.admin_q
= blk_mq_init_queue(&ctrl
->admin_tag_set
);
2824 if (IS_ERR(ctrl
->ctrl
.admin_q
)) {
2825 ret
= PTR_ERR(ctrl
->ctrl
.admin_q
);
2826 goto out_free_admin_tag_set
;
2830 * Would have been nice to init io queues tag set as well.
2831 * However, we require interaction from the controller
2832 * for max io queue count before we can do so.
2833 * Defer this to the connect path.
2836 ret
= nvme_init_ctrl(&ctrl
->ctrl
, dev
, &nvme_fc_ctrl_ops
, 0);
2838 goto out_cleanup_admin_q
;
2840 /* at this point, teardown path changes to ref counting on nvme ctrl */
2842 spin_lock_irqsave(&rport
->lock
, flags
);
2843 list_add_tail(&ctrl
->ctrl_list
, &rport
->ctrl_list
);
2844 spin_unlock_irqrestore(&rport
->lock
, flags
);
2847 * It's possible that transactions used to create the association
2848 * may fail. Examples: CreateAssociation LS or CreateIOConnection
2849 * LS gets dropped/corrupted/fails; or a frame gets dropped or a
2850 * command times out for one of the actions to init the controller
2851 * (Connect, Get/Set_Property, Set_Features, etc). Many of these
2852 * transport errors (frame drop, LS failure) inherently must kill
2853 * the association. The transport is coded so that any command used
2854 * to create the association (prior to a LIVE state transition
2855 * while NEW or RECONNECTING) will fail if it completes in error or
2858 * As such: as the connect request was mostly likely due to a
2859 * udev event that discovered the remote port, meaning there is
2860 * not an admin or script there to restart if the connect
2861 * request fails, retry the initial connection creation up to
2862 * three times before giving up and declaring failure.
2864 for (retry
= 0; retry
< 3; retry
++) {
2865 ret
= nvme_fc_create_association(ctrl
);
2871 /* couldn't schedule retry - fail out */
2872 dev_err(ctrl
->ctrl
.device
,
2873 "NVME-FC{%d}: Connect retry failed\n", ctrl
->cnum
);
2875 ctrl
->ctrl
.opts
= NULL
;
2877 /* initiate nvme ctrl ref counting teardown */
2878 nvme_uninit_ctrl(&ctrl
->ctrl
);
2879 nvme_put_ctrl(&ctrl
->ctrl
);
2881 /* Remove core ctrl ref. */
2882 nvme_put_ctrl(&ctrl
->ctrl
);
2884 /* as we're past the point where we transition to the ref
2885 * counting teardown path, if we return a bad pointer here,
2886 * the calling routine, thinking it's prior to the
2887 * transition, will do an rport put. Since the teardown
2888 * path also does a rport put, we do an extra get here to
2889 * so proper order/teardown happens.
2891 nvme_fc_rport_get(rport
);
2895 return ERR_PTR(ret
);
2898 kref_get(&ctrl
->ctrl
.kref
);
2900 dev_info(ctrl
->ctrl
.device
,
2901 "NVME-FC{%d}: new ctrl: NQN \"%s\"\n",
2902 ctrl
->cnum
, ctrl
->ctrl
.opts
->subsysnqn
);
2906 out_cleanup_admin_q
:
2907 blk_cleanup_queue(ctrl
->ctrl
.admin_q
);
2908 out_free_admin_tag_set
:
2909 blk_mq_free_tag_set(&ctrl
->admin_tag_set
);
2911 kfree(ctrl
->queues
);
2913 put_device(ctrl
->dev
);
2914 ida_simple_remove(&nvme_fc_ctrl_cnt
, ctrl
->cnum
);
2918 /* exit via here doesn't follow ctlr ref points */
2919 return ERR_PTR(ret
);
2923 struct nvmet_fc_traddr
{
2929 __nvme_fc_parse_u64(substring_t
*sstr
, u64
*val
)
2933 if (match_u64(sstr
, &token64
))
2941 * This routine validates and extracts the WWN's from the TRADDR string.
2942 * As kernel parsers need the 0x to determine number base, universally
2943 * build string to parse with 0x prefix before parsing name strings.
2946 nvme_fc_parse_traddr(struct nvmet_fc_traddr
*traddr
, char *buf
, size_t blen
)
2948 char name
[2 + NVME_FC_TRADDR_HEXNAMELEN
+ 1];
2949 substring_t wwn
= { name
, &name
[sizeof(name
)-1] };
2950 int nnoffset
, pnoffset
;
2952 /* validate it string one of the 2 allowed formats */
2953 if (strnlen(buf
, blen
) == NVME_FC_TRADDR_MAXLENGTH
&&
2954 !strncmp(buf
, "nn-0x", NVME_FC_TRADDR_OXNNLEN
) &&
2955 !strncmp(&buf
[NVME_FC_TRADDR_MAX_PN_OFFSET
],
2956 "pn-0x", NVME_FC_TRADDR_OXNNLEN
)) {
2957 nnoffset
= NVME_FC_TRADDR_OXNNLEN
;
2958 pnoffset
= NVME_FC_TRADDR_MAX_PN_OFFSET
+
2959 NVME_FC_TRADDR_OXNNLEN
;
2960 } else if ((strnlen(buf
, blen
) == NVME_FC_TRADDR_MINLENGTH
&&
2961 !strncmp(buf
, "nn-", NVME_FC_TRADDR_NNLEN
) &&
2962 !strncmp(&buf
[NVME_FC_TRADDR_MIN_PN_OFFSET
],
2963 "pn-", NVME_FC_TRADDR_NNLEN
))) {
2964 nnoffset
= NVME_FC_TRADDR_NNLEN
;
2965 pnoffset
= NVME_FC_TRADDR_MIN_PN_OFFSET
+ NVME_FC_TRADDR_NNLEN
;
2971 name
[2 + NVME_FC_TRADDR_HEXNAMELEN
] = 0;
2973 memcpy(&name
[2], &buf
[nnoffset
], NVME_FC_TRADDR_HEXNAMELEN
);
2974 if (__nvme_fc_parse_u64(&wwn
, &traddr
->nn
))
2977 memcpy(&name
[2], &buf
[pnoffset
], NVME_FC_TRADDR_HEXNAMELEN
);
2978 if (__nvme_fc_parse_u64(&wwn
, &traddr
->pn
))
2984 pr_warn("%s: bad traddr string\n", __func__
);
2988 static struct nvme_ctrl
*
2989 nvme_fc_create_ctrl(struct device
*dev
, struct nvmf_ctrl_options
*opts
)
2991 struct nvme_fc_lport
*lport
;
2992 struct nvme_fc_rport
*rport
;
2993 struct nvme_ctrl
*ctrl
;
2994 struct nvmet_fc_traddr laddr
= { 0L, 0L };
2995 struct nvmet_fc_traddr raddr
= { 0L, 0L };
2996 unsigned long flags
;
2999 ret
= nvme_fc_parse_traddr(&raddr
, opts
->traddr
, NVMF_TRADDR_SIZE
);
3000 if (ret
|| !raddr
.nn
|| !raddr
.pn
)
3001 return ERR_PTR(-EINVAL
);
3003 ret
= nvme_fc_parse_traddr(&laddr
, opts
->host_traddr
, NVMF_TRADDR_SIZE
);
3004 if (ret
|| !laddr
.nn
|| !laddr
.pn
)
3005 return ERR_PTR(-EINVAL
);
3007 /* find the host and remote ports to connect together */
3008 spin_lock_irqsave(&nvme_fc_lock
, flags
);
3009 list_for_each_entry(lport
, &nvme_fc_lport_list
, port_list
) {
3010 if (lport
->localport
.node_name
!= laddr
.nn
||
3011 lport
->localport
.port_name
!= laddr
.pn
)
3014 list_for_each_entry(rport
, &lport
->endp_list
, endp_list
) {
3015 if (rport
->remoteport
.node_name
!= raddr
.nn
||
3016 rport
->remoteport
.port_name
!= raddr
.pn
)
3019 /* if fail to get reference fall through. Will error */
3020 if (!nvme_fc_rport_get(rport
))
3023 spin_unlock_irqrestore(&nvme_fc_lock
, flags
);
3025 ctrl
= nvme_fc_init_ctrl(dev
, opts
, lport
, rport
);
3027 nvme_fc_rport_put(rport
);
3031 spin_unlock_irqrestore(&nvme_fc_lock
, flags
);
3033 return ERR_PTR(-ENOENT
);
3037 static struct nvmf_transport_ops nvme_fc_transport
= {
3039 .required_opts
= NVMF_OPT_TRADDR
| NVMF_OPT_HOST_TRADDR
,
3040 .allowed_opts
= NVMF_OPT_RECONNECT_DELAY
| NVMF_OPT_CTRL_LOSS_TMO
,
3041 .create_ctrl
= nvme_fc_create_ctrl
,
3044 static int __init
nvme_fc_init_module(void)
3046 return nvmf_register_transport(&nvme_fc_transport
);
3049 static void __exit
nvme_fc_exit_module(void)
3051 /* sanity check - all lports should be removed */
3052 if (!list_empty(&nvme_fc_lport_list
))
3053 pr_warn("%s: localport list not empty\n", __func__
);
3055 nvmf_unregister_transport(&nvme_fc_transport
);
3057 ida_destroy(&nvme_fc_local_port_cnt
);
3058 ida_destroy(&nvme_fc_ctrl_cnt
);
3061 module_init(nvme_fc_init_module
);
3062 module_exit(nvme_fc_exit_module
);
3064 MODULE_LICENSE("GPL v2");