Merge branch 'bind_unbind' of git://git.kernel.org/pub/scm/linux/kernel/git/gregkh...
[GitHub/LineageOS/android_kernel_motorola_exynos9610.git] / drivers / infiniband / ulp / srp / ib_srp.c
1 /*
2 * Copyright (c) 2005 Cisco Systems. All rights reserved.
3 *
4 * This software is available to you under a choice of one of two
5 * licenses. You may choose to be licensed under the terms of the GNU
6 * General Public License (GPL) Version 2, available from the file
7 * COPYING in the main directory of this source tree, or the
8 * OpenIB.org BSD license below:
9 *
10 * Redistribution and use in source and binary forms, with or
11 * without modification, are permitted provided that the following
12 * conditions are met:
13 *
14 * - Redistributions of source code must retain the above
15 * copyright notice, this list of conditions and the following
16 * disclaimer.
17 *
18 * - Redistributions in binary form must reproduce the above
19 * copyright notice, this list of conditions and the following
20 * disclaimer in the documentation and/or other materials
21 * provided with the distribution.
22 *
23 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
24 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
25 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
26 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
27 * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
28 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
29 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
30 * SOFTWARE.
31 */
32
33 #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
34
35 #include <linux/module.h>
36 #include <linux/init.h>
37 #include <linux/slab.h>
38 #include <linux/err.h>
39 #include <linux/string.h>
40 #include <linux/parser.h>
41 #include <linux/random.h>
42 #include <linux/jiffies.h>
43 #include <linux/lockdep.h>
44 #include <rdma/ib_cache.h>
45
46 #include <linux/atomic.h>
47
48 #include <scsi/scsi.h>
49 #include <scsi/scsi_device.h>
50 #include <scsi/scsi_dbg.h>
51 #include <scsi/scsi_tcq.h>
52 #include <scsi/srp.h>
53 #include <scsi/scsi_transport_srp.h>
54
55 #include "ib_srp.h"
56
57 #define DRV_NAME "ib_srp"
58 #define PFX DRV_NAME ": "
59 #define DRV_VERSION "2.0"
60 #define DRV_RELDATE "July 26, 2015"
61
62 MODULE_AUTHOR("Roland Dreier");
63 MODULE_DESCRIPTION("InfiniBand SCSI RDMA Protocol initiator");
64 MODULE_LICENSE("Dual BSD/GPL");
65 MODULE_VERSION(DRV_VERSION);
66 MODULE_INFO(release_date, DRV_RELDATE);
67
68 #if !defined(CONFIG_DYNAMIC_DEBUG)
69 #define DEFINE_DYNAMIC_DEBUG_METADATA(name, fmt)
70 #define DYNAMIC_DEBUG_BRANCH(descriptor) false
71 #endif
72
73 static unsigned int srp_sg_tablesize;
74 static unsigned int cmd_sg_entries;
75 static unsigned int indirect_sg_entries;
76 static bool allow_ext_sg;
77 static bool prefer_fr = true;
78 static bool register_always = true;
79 static bool never_register;
80 static int topspin_workarounds = 1;
81
82 module_param(srp_sg_tablesize, uint, 0444);
83 MODULE_PARM_DESC(srp_sg_tablesize, "Deprecated name for cmd_sg_entries");
84
85 module_param(cmd_sg_entries, uint, 0444);
86 MODULE_PARM_DESC(cmd_sg_entries,
87 "Default number of gather/scatter entries in the SRP command (default is 12, max 255)");
88
89 module_param(indirect_sg_entries, uint, 0444);
90 MODULE_PARM_DESC(indirect_sg_entries,
91 "Default max number of gather/scatter entries (default is 12, max is " __stringify(SG_MAX_SEGMENTS) ")");
92
93 module_param(allow_ext_sg, bool, 0444);
94 MODULE_PARM_DESC(allow_ext_sg,
95 "Default behavior when there are more than cmd_sg_entries S/G entries after mapping; fails the request when false (default false)");
96
97 module_param(topspin_workarounds, int, 0444);
98 MODULE_PARM_DESC(topspin_workarounds,
99 "Enable workarounds for Topspin/Cisco SRP target bugs if != 0");
100
101 module_param(prefer_fr, bool, 0444);
102 MODULE_PARM_DESC(prefer_fr,
103 "Whether to use fast registration if both FMR and fast registration are supported");
104
105 module_param(register_always, bool, 0444);
106 MODULE_PARM_DESC(register_always,
107 "Use memory registration even for contiguous memory regions");
108
109 module_param(never_register, bool, 0444);
110 MODULE_PARM_DESC(never_register, "Never register memory");
111
112 static const struct kernel_param_ops srp_tmo_ops;
113
114 static int srp_reconnect_delay = 10;
115 module_param_cb(reconnect_delay, &srp_tmo_ops, &srp_reconnect_delay,
116 S_IRUGO | S_IWUSR);
117 MODULE_PARM_DESC(reconnect_delay, "Time between successive reconnect attempts");
118
119 static int srp_fast_io_fail_tmo = 15;
120 module_param_cb(fast_io_fail_tmo, &srp_tmo_ops, &srp_fast_io_fail_tmo,
121 S_IRUGO | S_IWUSR);
122 MODULE_PARM_DESC(fast_io_fail_tmo,
123 "Number of seconds between the observation of a transport"
124 " layer error and failing all I/O. \"off\" means that this"
125 " functionality is disabled.");
126
127 static int srp_dev_loss_tmo = 600;
128 module_param_cb(dev_loss_tmo, &srp_tmo_ops, &srp_dev_loss_tmo,
129 S_IRUGO | S_IWUSR);
130 MODULE_PARM_DESC(dev_loss_tmo,
131 "Maximum number of seconds that the SRP transport should"
132 " insulate transport layer errors. After this time has been"
133 " exceeded the SCSI host is removed. Should be"
134 " between 1 and " __stringify(SCSI_DEVICE_BLOCK_MAX_TIMEOUT)
135 " if fast_io_fail_tmo has not been set. \"off\" means that"
136 " this functionality is disabled.");
137
138 static unsigned ch_count;
139 module_param(ch_count, uint, 0444);
140 MODULE_PARM_DESC(ch_count,
141 "Number of RDMA channels to use for communication with an SRP target. Using more than one channel improves performance if the HCA supports multiple completion vectors. The default value is the minimum of four times the number of online CPU sockets and the number of completion vectors supported by the HCA.");
142
143 static void srp_add_one(struct ib_device *device);
144 static void srp_remove_one(struct ib_device *device, void *client_data);
145 static void srp_recv_done(struct ib_cq *cq, struct ib_wc *wc);
146 static void srp_handle_qp_err(struct ib_cq *cq, struct ib_wc *wc,
147 const char *opname);
148 static int srp_cm_handler(struct ib_cm_id *cm_id, struct ib_cm_event *event);
149
150 static struct scsi_transport_template *ib_srp_transport_template;
151 static struct workqueue_struct *srp_remove_wq;
152
153 static struct ib_client srp_client = {
154 .name = "srp",
155 .add = srp_add_one,
156 .remove = srp_remove_one
157 };
158
159 static struct ib_sa_client srp_sa_client;
160
161 static int srp_tmo_get(char *buffer, const struct kernel_param *kp)
162 {
163 int tmo = *(int *)kp->arg;
164
165 if (tmo >= 0)
166 return sprintf(buffer, "%d", tmo);
167 else
168 return sprintf(buffer, "off");
169 }
170
171 static int srp_tmo_set(const char *val, const struct kernel_param *kp)
172 {
173 int tmo, res;
174
175 res = srp_parse_tmo(&tmo, val);
176 if (res)
177 goto out;
178
179 if (kp->arg == &srp_reconnect_delay)
180 res = srp_tmo_valid(tmo, srp_fast_io_fail_tmo,
181 srp_dev_loss_tmo);
182 else if (kp->arg == &srp_fast_io_fail_tmo)
183 res = srp_tmo_valid(srp_reconnect_delay, tmo, srp_dev_loss_tmo);
184 else
185 res = srp_tmo_valid(srp_reconnect_delay, srp_fast_io_fail_tmo,
186 tmo);
187 if (res)
188 goto out;
189 *(int *)kp->arg = tmo;
190
191 out:
192 return res;
193 }
194
195 static const struct kernel_param_ops srp_tmo_ops = {
196 .get = srp_tmo_get,
197 .set = srp_tmo_set,
198 };
199
200 static inline struct srp_target_port *host_to_target(struct Scsi_Host *host)
201 {
202 return (struct srp_target_port *) host->hostdata;
203 }
204
205 static const char *srp_target_info(struct Scsi_Host *host)
206 {
207 return host_to_target(host)->target_name;
208 }
209
210 static int srp_target_is_topspin(struct srp_target_port *target)
211 {
212 static const u8 topspin_oui[3] = { 0x00, 0x05, 0xad };
213 static const u8 cisco_oui[3] = { 0x00, 0x1b, 0x0d };
214
215 return topspin_workarounds &&
216 (!memcmp(&target->ioc_guid, topspin_oui, sizeof topspin_oui) ||
217 !memcmp(&target->ioc_guid, cisco_oui, sizeof cisco_oui));
218 }
219
220 static struct srp_iu *srp_alloc_iu(struct srp_host *host, size_t size,
221 gfp_t gfp_mask,
222 enum dma_data_direction direction)
223 {
224 struct srp_iu *iu;
225
226 iu = kmalloc(sizeof *iu, gfp_mask);
227 if (!iu)
228 goto out;
229
230 iu->buf = kzalloc(size, gfp_mask);
231 if (!iu->buf)
232 goto out_free_iu;
233
234 iu->dma = ib_dma_map_single(host->srp_dev->dev, iu->buf, size,
235 direction);
236 if (ib_dma_mapping_error(host->srp_dev->dev, iu->dma))
237 goto out_free_buf;
238
239 iu->size = size;
240 iu->direction = direction;
241
242 return iu;
243
244 out_free_buf:
245 kfree(iu->buf);
246 out_free_iu:
247 kfree(iu);
248 out:
249 return NULL;
250 }
251
252 static void srp_free_iu(struct srp_host *host, struct srp_iu *iu)
253 {
254 if (!iu)
255 return;
256
257 ib_dma_unmap_single(host->srp_dev->dev, iu->dma, iu->size,
258 iu->direction);
259 kfree(iu->buf);
260 kfree(iu);
261 }
262
263 static void srp_qp_event(struct ib_event *event, void *context)
264 {
265 pr_debug("QP event %s (%d)\n",
266 ib_event_msg(event->event), event->event);
267 }
268
269 static int srp_init_qp(struct srp_target_port *target,
270 struct ib_qp *qp)
271 {
272 struct ib_qp_attr *attr;
273 int ret;
274
275 attr = kmalloc(sizeof *attr, GFP_KERNEL);
276 if (!attr)
277 return -ENOMEM;
278
279 ret = ib_find_cached_pkey(target->srp_host->srp_dev->dev,
280 target->srp_host->port,
281 be16_to_cpu(target->pkey),
282 &attr->pkey_index);
283 if (ret)
284 goto out;
285
286 attr->qp_state = IB_QPS_INIT;
287 attr->qp_access_flags = (IB_ACCESS_REMOTE_READ |
288 IB_ACCESS_REMOTE_WRITE);
289 attr->port_num = target->srp_host->port;
290
291 ret = ib_modify_qp(qp, attr,
292 IB_QP_STATE |
293 IB_QP_PKEY_INDEX |
294 IB_QP_ACCESS_FLAGS |
295 IB_QP_PORT);
296
297 out:
298 kfree(attr);
299 return ret;
300 }
301
302 static int srp_new_cm_id(struct srp_rdma_ch *ch)
303 {
304 struct srp_target_port *target = ch->target;
305 struct ib_cm_id *new_cm_id;
306
307 new_cm_id = ib_create_cm_id(target->srp_host->srp_dev->dev,
308 srp_cm_handler, ch);
309 if (IS_ERR(new_cm_id))
310 return PTR_ERR(new_cm_id);
311
312 if (ch->cm_id)
313 ib_destroy_cm_id(ch->cm_id);
314 ch->cm_id = new_cm_id;
315 if (rdma_cap_opa_ah(target->srp_host->srp_dev->dev,
316 target->srp_host->port))
317 ch->path.rec_type = SA_PATH_REC_TYPE_OPA;
318 else
319 ch->path.rec_type = SA_PATH_REC_TYPE_IB;
320 ch->path.sgid = target->sgid;
321 ch->path.dgid = target->orig_dgid;
322 ch->path.pkey = target->pkey;
323 ch->path.service_id = target->service_id;
324
325 return 0;
326 }
327
328 static struct ib_fmr_pool *srp_alloc_fmr_pool(struct srp_target_port *target)
329 {
330 struct srp_device *dev = target->srp_host->srp_dev;
331 struct ib_fmr_pool_param fmr_param;
332
333 memset(&fmr_param, 0, sizeof(fmr_param));
334 fmr_param.pool_size = target->mr_pool_size;
335 fmr_param.dirty_watermark = fmr_param.pool_size / 4;
336 fmr_param.cache = 1;
337 fmr_param.max_pages_per_fmr = dev->max_pages_per_mr;
338 fmr_param.page_shift = ilog2(dev->mr_page_size);
339 fmr_param.access = (IB_ACCESS_LOCAL_WRITE |
340 IB_ACCESS_REMOTE_WRITE |
341 IB_ACCESS_REMOTE_READ);
342
343 return ib_create_fmr_pool(dev->pd, &fmr_param);
344 }
345
346 /**
347 * srp_destroy_fr_pool() - free the resources owned by a pool
348 * @pool: Fast registration pool to be destroyed.
349 */
350 static void srp_destroy_fr_pool(struct srp_fr_pool *pool)
351 {
352 int i;
353 struct srp_fr_desc *d;
354
355 if (!pool)
356 return;
357
358 for (i = 0, d = &pool->desc[0]; i < pool->size; i++, d++) {
359 if (d->mr)
360 ib_dereg_mr(d->mr);
361 }
362 kfree(pool);
363 }
364
365 /**
366 * srp_create_fr_pool() - allocate and initialize a pool for fast registration
367 * @device: IB device to allocate fast registration descriptors for.
368 * @pd: Protection domain associated with the FR descriptors.
369 * @pool_size: Number of descriptors to allocate.
370 * @max_page_list_len: Maximum fast registration work request page list length.
371 */
372 static struct srp_fr_pool *srp_create_fr_pool(struct ib_device *device,
373 struct ib_pd *pd, int pool_size,
374 int max_page_list_len)
375 {
376 struct srp_fr_pool *pool;
377 struct srp_fr_desc *d;
378 struct ib_mr *mr;
379 int i, ret = -EINVAL;
380
381 if (pool_size <= 0)
382 goto err;
383 ret = -ENOMEM;
384 pool = kzalloc(sizeof(struct srp_fr_pool) +
385 pool_size * sizeof(struct srp_fr_desc), GFP_KERNEL);
386 if (!pool)
387 goto err;
388 pool->size = pool_size;
389 pool->max_page_list_len = max_page_list_len;
390 spin_lock_init(&pool->lock);
391 INIT_LIST_HEAD(&pool->free_list);
392
393 for (i = 0, d = &pool->desc[0]; i < pool->size; i++, d++) {
394 mr = ib_alloc_mr(pd, IB_MR_TYPE_MEM_REG,
395 max_page_list_len);
396 if (IS_ERR(mr)) {
397 ret = PTR_ERR(mr);
398 if (ret == -ENOMEM)
399 pr_info("%s: ib_alloc_mr() failed. Try to reduce max_cmd_per_lun, max_sect or ch_count\n",
400 dev_name(&device->dev));
401 goto destroy_pool;
402 }
403 d->mr = mr;
404 list_add_tail(&d->entry, &pool->free_list);
405 }
406
407 out:
408 return pool;
409
410 destroy_pool:
411 srp_destroy_fr_pool(pool);
412
413 err:
414 pool = ERR_PTR(ret);
415 goto out;
416 }
417
418 /**
419 * srp_fr_pool_get() - obtain a descriptor suitable for fast registration
420 * @pool: Pool to obtain descriptor from.
421 */
422 static struct srp_fr_desc *srp_fr_pool_get(struct srp_fr_pool *pool)
423 {
424 struct srp_fr_desc *d = NULL;
425 unsigned long flags;
426
427 spin_lock_irqsave(&pool->lock, flags);
428 if (!list_empty(&pool->free_list)) {
429 d = list_first_entry(&pool->free_list, typeof(*d), entry);
430 list_del(&d->entry);
431 }
432 spin_unlock_irqrestore(&pool->lock, flags);
433
434 return d;
435 }
436
437 /**
438 * srp_fr_pool_put() - put an FR descriptor back in the free list
439 * @pool: Pool the descriptor was allocated from.
440 * @desc: Pointer to an array of fast registration descriptor pointers.
441 * @n: Number of descriptors to put back.
442 *
443 * Note: The caller must already have queued an invalidation request for
444 * desc->mr->rkey before calling this function.
445 */
446 static void srp_fr_pool_put(struct srp_fr_pool *pool, struct srp_fr_desc **desc,
447 int n)
448 {
449 unsigned long flags;
450 int i;
451
452 spin_lock_irqsave(&pool->lock, flags);
453 for (i = 0; i < n; i++)
454 list_add(&desc[i]->entry, &pool->free_list);
455 spin_unlock_irqrestore(&pool->lock, flags);
456 }
457
458 static struct srp_fr_pool *srp_alloc_fr_pool(struct srp_target_port *target)
459 {
460 struct srp_device *dev = target->srp_host->srp_dev;
461
462 return srp_create_fr_pool(dev->dev, dev->pd, target->mr_pool_size,
463 dev->max_pages_per_mr);
464 }
465
466 /**
467 * srp_destroy_qp() - destroy an RDMA queue pair
468 * @qp: RDMA queue pair.
469 *
470 * Drain the qp before destroying it. This avoids that the receive
471 * completion handler can access the queue pair while it is
472 * being destroyed.
473 */
474 static void srp_destroy_qp(struct srp_rdma_ch *ch, struct ib_qp *qp)
475 {
476 spin_lock_irq(&ch->lock);
477 ib_process_cq_direct(ch->send_cq, -1);
478 spin_unlock_irq(&ch->lock);
479
480 ib_drain_qp(qp);
481 ib_destroy_qp(qp);
482 }
483
484 static int srp_create_ch_ib(struct srp_rdma_ch *ch)
485 {
486 struct srp_target_port *target = ch->target;
487 struct srp_device *dev = target->srp_host->srp_dev;
488 struct ib_qp_init_attr *init_attr;
489 struct ib_cq *recv_cq, *send_cq;
490 struct ib_qp *qp;
491 struct ib_fmr_pool *fmr_pool = NULL;
492 struct srp_fr_pool *fr_pool = NULL;
493 const int m = 1 + dev->use_fast_reg * target->mr_per_cmd * 2;
494 int ret;
495
496 init_attr = kzalloc(sizeof *init_attr, GFP_KERNEL);
497 if (!init_attr)
498 return -ENOMEM;
499
500 /* queue_size + 1 for ib_drain_rq() */
501 recv_cq = ib_alloc_cq(dev->dev, ch, target->queue_size + 1,
502 ch->comp_vector, IB_POLL_SOFTIRQ);
503 if (IS_ERR(recv_cq)) {
504 ret = PTR_ERR(recv_cq);
505 goto err;
506 }
507
508 send_cq = ib_alloc_cq(dev->dev, ch, m * target->queue_size,
509 ch->comp_vector, IB_POLL_DIRECT);
510 if (IS_ERR(send_cq)) {
511 ret = PTR_ERR(send_cq);
512 goto err_recv_cq;
513 }
514
515 init_attr->event_handler = srp_qp_event;
516 init_attr->cap.max_send_wr = m * target->queue_size;
517 init_attr->cap.max_recv_wr = target->queue_size + 1;
518 init_attr->cap.max_recv_sge = 1;
519 init_attr->cap.max_send_sge = 1;
520 init_attr->sq_sig_type = IB_SIGNAL_REQ_WR;
521 init_attr->qp_type = IB_QPT_RC;
522 init_attr->send_cq = send_cq;
523 init_attr->recv_cq = recv_cq;
524
525 qp = ib_create_qp(dev->pd, init_attr);
526 if (IS_ERR(qp)) {
527 ret = PTR_ERR(qp);
528 goto err_send_cq;
529 }
530
531 ret = srp_init_qp(target, qp);
532 if (ret)
533 goto err_qp;
534
535 if (dev->use_fast_reg) {
536 fr_pool = srp_alloc_fr_pool(target);
537 if (IS_ERR(fr_pool)) {
538 ret = PTR_ERR(fr_pool);
539 shost_printk(KERN_WARNING, target->scsi_host, PFX
540 "FR pool allocation failed (%d)\n", ret);
541 goto err_qp;
542 }
543 } else if (dev->use_fmr) {
544 fmr_pool = srp_alloc_fmr_pool(target);
545 if (IS_ERR(fmr_pool)) {
546 ret = PTR_ERR(fmr_pool);
547 shost_printk(KERN_WARNING, target->scsi_host, PFX
548 "FMR pool allocation failed (%d)\n", ret);
549 goto err_qp;
550 }
551 }
552
553 if (ch->qp)
554 srp_destroy_qp(ch, ch->qp);
555 if (ch->recv_cq)
556 ib_free_cq(ch->recv_cq);
557 if (ch->send_cq)
558 ib_free_cq(ch->send_cq);
559
560 ch->qp = qp;
561 ch->recv_cq = recv_cq;
562 ch->send_cq = send_cq;
563
564 if (dev->use_fast_reg) {
565 if (ch->fr_pool)
566 srp_destroy_fr_pool(ch->fr_pool);
567 ch->fr_pool = fr_pool;
568 } else if (dev->use_fmr) {
569 if (ch->fmr_pool)
570 ib_destroy_fmr_pool(ch->fmr_pool);
571 ch->fmr_pool = fmr_pool;
572 }
573
574 kfree(init_attr);
575 return 0;
576
577 err_qp:
578 ib_destroy_qp(qp);
579
580 err_send_cq:
581 ib_free_cq(send_cq);
582
583 err_recv_cq:
584 ib_free_cq(recv_cq);
585
586 err:
587 kfree(init_attr);
588 return ret;
589 }
590
591 /*
592 * Note: this function may be called without srp_alloc_iu_bufs() having been
593 * invoked. Hence the ch->[rt]x_ring checks.
594 */
595 static void srp_free_ch_ib(struct srp_target_port *target,
596 struct srp_rdma_ch *ch)
597 {
598 struct srp_device *dev = target->srp_host->srp_dev;
599 int i;
600
601 if (!ch->target)
602 return;
603
604 if (ch->cm_id) {
605 ib_destroy_cm_id(ch->cm_id);
606 ch->cm_id = NULL;
607 }
608
609 /* If srp_new_cm_id() succeeded but srp_create_ch_ib() not, return. */
610 if (!ch->qp)
611 return;
612
613 if (dev->use_fast_reg) {
614 if (ch->fr_pool)
615 srp_destroy_fr_pool(ch->fr_pool);
616 } else if (dev->use_fmr) {
617 if (ch->fmr_pool)
618 ib_destroy_fmr_pool(ch->fmr_pool);
619 }
620
621 srp_destroy_qp(ch, ch->qp);
622 ib_free_cq(ch->send_cq);
623 ib_free_cq(ch->recv_cq);
624
625 /*
626 * Avoid that the SCSI error handler tries to use this channel after
627 * it has been freed. The SCSI error handler can namely continue
628 * trying to perform recovery actions after scsi_remove_host()
629 * returned.
630 */
631 ch->target = NULL;
632
633 ch->qp = NULL;
634 ch->send_cq = ch->recv_cq = NULL;
635
636 if (ch->rx_ring) {
637 for (i = 0; i < target->queue_size; ++i)
638 srp_free_iu(target->srp_host, ch->rx_ring[i]);
639 kfree(ch->rx_ring);
640 ch->rx_ring = NULL;
641 }
642 if (ch->tx_ring) {
643 for (i = 0; i < target->queue_size; ++i)
644 srp_free_iu(target->srp_host, ch->tx_ring[i]);
645 kfree(ch->tx_ring);
646 ch->tx_ring = NULL;
647 }
648 }
649
650 static void srp_path_rec_completion(int status,
651 struct sa_path_rec *pathrec,
652 void *ch_ptr)
653 {
654 struct srp_rdma_ch *ch = ch_ptr;
655 struct srp_target_port *target = ch->target;
656
657 ch->status = status;
658 if (status)
659 shost_printk(KERN_ERR, target->scsi_host,
660 PFX "Got failed path rec status %d\n", status);
661 else
662 ch->path = *pathrec;
663 complete(&ch->done);
664 }
665
666 static int srp_lookup_path(struct srp_rdma_ch *ch)
667 {
668 struct srp_target_port *target = ch->target;
669 int ret;
670
671 ch->path.numb_path = 1;
672
673 init_completion(&ch->done);
674
675 ch->path_query_id = ib_sa_path_rec_get(&srp_sa_client,
676 target->srp_host->srp_dev->dev,
677 target->srp_host->port,
678 &ch->path,
679 IB_SA_PATH_REC_SERVICE_ID |
680 IB_SA_PATH_REC_DGID |
681 IB_SA_PATH_REC_SGID |
682 IB_SA_PATH_REC_NUMB_PATH |
683 IB_SA_PATH_REC_PKEY,
684 SRP_PATH_REC_TIMEOUT_MS,
685 GFP_KERNEL,
686 srp_path_rec_completion,
687 ch, &ch->path_query);
688 if (ch->path_query_id < 0)
689 return ch->path_query_id;
690
691 ret = wait_for_completion_interruptible(&ch->done);
692 if (ret < 0)
693 return ret;
694
695 if (ch->status < 0)
696 shost_printk(KERN_WARNING, target->scsi_host,
697 PFX "Path record query failed\n");
698
699 return ch->status;
700 }
701
702 static int srp_send_req(struct srp_rdma_ch *ch, bool multich)
703 {
704 struct srp_target_port *target = ch->target;
705 struct {
706 struct ib_cm_req_param param;
707 struct srp_login_req priv;
708 } *req = NULL;
709 int status;
710
711 req = kzalloc(sizeof *req, GFP_KERNEL);
712 if (!req)
713 return -ENOMEM;
714
715 req->param.primary_path = &ch->path;
716 req->param.alternate_path = NULL;
717 req->param.service_id = target->service_id;
718 req->param.qp_num = ch->qp->qp_num;
719 req->param.qp_type = ch->qp->qp_type;
720 req->param.private_data = &req->priv;
721 req->param.private_data_len = sizeof req->priv;
722 req->param.flow_control = 1;
723
724 get_random_bytes(&req->param.starting_psn, 4);
725 req->param.starting_psn &= 0xffffff;
726
727 /*
728 * Pick some arbitrary defaults here; we could make these
729 * module parameters if anyone cared about setting them.
730 */
731 req->param.responder_resources = 4;
732 req->param.remote_cm_response_timeout = 20;
733 req->param.local_cm_response_timeout = 20;
734 req->param.retry_count = target->tl_retry_count;
735 req->param.rnr_retry_count = 7;
736 req->param.max_cm_retries = 15;
737
738 req->priv.opcode = SRP_LOGIN_REQ;
739 req->priv.tag = 0;
740 req->priv.req_it_iu_len = cpu_to_be32(target->max_iu_len);
741 req->priv.req_buf_fmt = cpu_to_be16(SRP_BUF_FORMAT_DIRECT |
742 SRP_BUF_FORMAT_INDIRECT);
743 req->priv.req_flags = (multich ? SRP_MULTICHAN_MULTI :
744 SRP_MULTICHAN_SINGLE);
745 /*
746 * In the published SRP specification (draft rev. 16a), the
747 * port identifier format is 8 bytes of ID extension followed
748 * by 8 bytes of GUID. Older drafts put the two halves in the
749 * opposite order, so that the GUID comes first.
750 *
751 * Targets conforming to these obsolete drafts can be
752 * recognized by the I/O Class they report.
753 */
754 if (target->io_class == SRP_REV10_IB_IO_CLASS) {
755 memcpy(req->priv.initiator_port_id,
756 &target->sgid.global.interface_id, 8);
757 memcpy(req->priv.initiator_port_id + 8,
758 &target->initiator_ext, 8);
759 memcpy(req->priv.target_port_id, &target->ioc_guid, 8);
760 memcpy(req->priv.target_port_id + 8, &target->id_ext, 8);
761 } else {
762 memcpy(req->priv.initiator_port_id,
763 &target->initiator_ext, 8);
764 memcpy(req->priv.initiator_port_id + 8,
765 &target->sgid.global.interface_id, 8);
766 memcpy(req->priv.target_port_id, &target->id_ext, 8);
767 memcpy(req->priv.target_port_id + 8, &target->ioc_guid, 8);
768 }
769
770 /*
771 * Topspin/Cisco SRP targets will reject our login unless we
772 * zero out the first 8 bytes of our initiator port ID and set
773 * the second 8 bytes to the local node GUID.
774 */
775 if (srp_target_is_topspin(target)) {
776 shost_printk(KERN_DEBUG, target->scsi_host,
777 PFX "Topspin/Cisco initiator port ID workaround "
778 "activated for target GUID %016llx\n",
779 be64_to_cpu(target->ioc_guid));
780 memset(req->priv.initiator_port_id, 0, 8);
781 memcpy(req->priv.initiator_port_id + 8,
782 &target->srp_host->srp_dev->dev->node_guid, 8);
783 }
784
785 status = ib_send_cm_req(ch->cm_id, &req->param);
786
787 kfree(req);
788
789 return status;
790 }
791
792 static bool srp_queue_remove_work(struct srp_target_port *target)
793 {
794 bool changed = false;
795
796 spin_lock_irq(&target->lock);
797 if (target->state != SRP_TARGET_REMOVED) {
798 target->state = SRP_TARGET_REMOVED;
799 changed = true;
800 }
801 spin_unlock_irq(&target->lock);
802
803 if (changed)
804 queue_work(srp_remove_wq, &target->remove_work);
805
806 return changed;
807 }
808
809 static void srp_disconnect_target(struct srp_target_port *target)
810 {
811 struct srp_rdma_ch *ch;
812 int i;
813
814 /* XXX should send SRP_I_LOGOUT request */
815
816 for (i = 0; i < target->ch_count; i++) {
817 ch = &target->ch[i];
818 ch->connected = false;
819 if (ch->cm_id && ib_send_cm_dreq(ch->cm_id, NULL, 0)) {
820 shost_printk(KERN_DEBUG, target->scsi_host,
821 PFX "Sending CM DREQ failed\n");
822 }
823 }
824 }
825
826 static void srp_free_req_data(struct srp_target_port *target,
827 struct srp_rdma_ch *ch)
828 {
829 struct srp_device *dev = target->srp_host->srp_dev;
830 struct ib_device *ibdev = dev->dev;
831 struct srp_request *req;
832 int i;
833
834 if (!ch->req_ring)
835 return;
836
837 for (i = 0; i < target->req_ring_size; ++i) {
838 req = &ch->req_ring[i];
839 if (dev->use_fast_reg) {
840 kfree(req->fr_list);
841 } else {
842 kfree(req->fmr_list);
843 kfree(req->map_page);
844 }
845 if (req->indirect_dma_addr) {
846 ib_dma_unmap_single(ibdev, req->indirect_dma_addr,
847 target->indirect_size,
848 DMA_TO_DEVICE);
849 }
850 kfree(req->indirect_desc);
851 }
852
853 kfree(ch->req_ring);
854 ch->req_ring = NULL;
855 }
856
857 static int srp_alloc_req_data(struct srp_rdma_ch *ch)
858 {
859 struct srp_target_port *target = ch->target;
860 struct srp_device *srp_dev = target->srp_host->srp_dev;
861 struct ib_device *ibdev = srp_dev->dev;
862 struct srp_request *req;
863 void *mr_list;
864 dma_addr_t dma_addr;
865 int i, ret = -ENOMEM;
866
867 ch->req_ring = kcalloc(target->req_ring_size, sizeof(*ch->req_ring),
868 GFP_KERNEL);
869 if (!ch->req_ring)
870 goto out;
871
872 for (i = 0; i < target->req_ring_size; ++i) {
873 req = &ch->req_ring[i];
874 mr_list = kmalloc(target->mr_per_cmd * sizeof(void *),
875 GFP_KERNEL);
876 if (!mr_list)
877 goto out;
878 if (srp_dev->use_fast_reg) {
879 req->fr_list = mr_list;
880 } else {
881 req->fmr_list = mr_list;
882 req->map_page = kmalloc(srp_dev->max_pages_per_mr *
883 sizeof(void *), GFP_KERNEL);
884 if (!req->map_page)
885 goto out;
886 }
887 req->indirect_desc = kmalloc(target->indirect_size, GFP_KERNEL);
888 if (!req->indirect_desc)
889 goto out;
890
891 dma_addr = ib_dma_map_single(ibdev, req->indirect_desc,
892 target->indirect_size,
893 DMA_TO_DEVICE);
894 if (ib_dma_mapping_error(ibdev, dma_addr))
895 goto out;
896
897 req->indirect_dma_addr = dma_addr;
898 }
899 ret = 0;
900
901 out:
902 return ret;
903 }
904
905 /**
906 * srp_del_scsi_host_attr() - Remove attributes defined in the host template.
907 * @shost: SCSI host whose attributes to remove from sysfs.
908 *
909 * Note: Any attributes defined in the host template and that did not exist
910 * before invocation of this function will be ignored.
911 */
912 static void srp_del_scsi_host_attr(struct Scsi_Host *shost)
913 {
914 struct device_attribute **attr;
915
916 for (attr = shost->hostt->shost_attrs; attr && *attr; ++attr)
917 device_remove_file(&shost->shost_dev, *attr);
918 }
919
920 static void srp_remove_target(struct srp_target_port *target)
921 {
922 struct srp_rdma_ch *ch;
923 int i;
924
925 WARN_ON_ONCE(target->state != SRP_TARGET_REMOVED);
926
927 srp_del_scsi_host_attr(target->scsi_host);
928 srp_rport_get(target->rport);
929 srp_remove_host(target->scsi_host);
930 scsi_remove_host(target->scsi_host);
931 srp_stop_rport_timers(target->rport);
932 srp_disconnect_target(target);
933 for (i = 0; i < target->ch_count; i++) {
934 ch = &target->ch[i];
935 srp_free_ch_ib(target, ch);
936 }
937 cancel_work_sync(&target->tl_err_work);
938 srp_rport_put(target->rport);
939 for (i = 0; i < target->ch_count; i++) {
940 ch = &target->ch[i];
941 srp_free_req_data(target, ch);
942 }
943 kfree(target->ch);
944 target->ch = NULL;
945
946 spin_lock(&target->srp_host->target_lock);
947 list_del(&target->list);
948 spin_unlock(&target->srp_host->target_lock);
949
950 scsi_host_put(target->scsi_host);
951 }
952
953 static void srp_remove_work(struct work_struct *work)
954 {
955 struct srp_target_port *target =
956 container_of(work, struct srp_target_port, remove_work);
957
958 WARN_ON_ONCE(target->state != SRP_TARGET_REMOVED);
959
960 srp_remove_target(target);
961 }
962
963 static void srp_rport_delete(struct srp_rport *rport)
964 {
965 struct srp_target_port *target = rport->lld_data;
966
967 srp_queue_remove_work(target);
968 }
969
970 /**
971 * srp_connected_ch() - number of connected channels
972 * @target: SRP target port.
973 */
974 static int srp_connected_ch(struct srp_target_port *target)
975 {
976 int i, c = 0;
977
978 for (i = 0; i < target->ch_count; i++)
979 c += target->ch[i].connected;
980
981 return c;
982 }
983
984 static int srp_connect_ch(struct srp_rdma_ch *ch, bool multich)
985 {
986 struct srp_target_port *target = ch->target;
987 int ret;
988
989 WARN_ON_ONCE(!multich && srp_connected_ch(target) > 0);
990
991 ret = srp_lookup_path(ch);
992 if (ret)
993 goto out;
994
995 while (1) {
996 init_completion(&ch->done);
997 ret = srp_send_req(ch, multich);
998 if (ret)
999 goto out;
1000 ret = wait_for_completion_interruptible(&ch->done);
1001 if (ret < 0)
1002 goto out;
1003
1004 /*
1005 * The CM event handling code will set status to
1006 * SRP_PORT_REDIRECT if we get a port redirect REJ
1007 * back, or SRP_DLID_REDIRECT if we get a lid/qp
1008 * redirect REJ back.
1009 */
1010 ret = ch->status;
1011 switch (ret) {
1012 case 0:
1013 ch->connected = true;
1014 goto out;
1015
1016 case SRP_PORT_REDIRECT:
1017 ret = srp_lookup_path(ch);
1018 if (ret)
1019 goto out;
1020 break;
1021
1022 case SRP_DLID_REDIRECT:
1023 break;
1024
1025 case SRP_STALE_CONN:
1026 shost_printk(KERN_ERR, target->scsi_host, PFX
1027 "giving up on stale connection\n");
1028 ret = -ECONNRESET;
1029 goto out;
1030
1031 default:
1032 goto out;
1033 }
1034 }
1035
1036 out:
1037 return ret <= 0 ? ret : -ENODEV;
1038 }
1039
1040 static void srp_inv_rkey_err_done(struct ib_cq *cq, struct ib_wc *wc)
1041 {
1042 srp_handle_qp_err(cq, wc, "INV RKEY");
1043 }
1044
1045 static int srp_inv_rkey(struct srp_request *req, struct srp_rdma_ch *ch,
1046 u32 rkey)
1047 {
1048 struct ib_send_wr *bad_wr;
1049 struct ib_send_wr wr = {
1050 .opcode = IB_WR_LOCAL_INV,
1051 .next = NULL,
1052 .num_sge = 0,
1053 .send_flags = 0,
1054 .ex.invalidate_rkey = rkey,
1055 };
1056
1057 wr.wr_cqe = &req->reg_cqe;
1058 req->reg_cqe.done = srp_inv_rkey_err_done;
1059 return ib_post_send(ch->qp, &wr, &bad_wr);
1060 }
1061
1062 static void srp_unmap_data(struct scsi_cmnd *scmnd,
1063 struct srp_rdma_ch *ch,
1064 struct srp_request *req)
1065 {
1066 struct srp_target_port *target = ch->target;
1067 struct srp_device *dev = target->srp_host->srp_dev;
1068 struct ib_device *ibdev = dev->dev;
1069 int i, res;
1070
1071 if (!scsi_sglist(scmnd) ||
1072 (scmnd->sc_data_direction != DMA_TO_DEVICE &&
1073 scmnd->sc_data_direction != DMA_FROM_DEVICE))
1074 return;
1075
1076 if (dev->use_fast_reg) {
1077 struct srp_fr_desc **pfr;
1078
1079 for (i = req->nmdesc, pfr = req->fr_list; i > 0; i--, pfr++) {
1080 res = srp_inv_rkey(req, ch, (*pfr)->mr->rkey);
1081 if (res < 0) {
1082 shost_printk(KERN_ERR, target->scsi_host, PFX
1083 "Queueing INV WR for rkey %#x failed (%d)\n",
1084 (*pfr)->mr->rkey, res);
1085 queue_work(system_long_wq,
1086 &target->tl_err_work);
1087 }
1088 }
1089 if (req->nmdesc)
1090 srp_fr_pool_put(ch->fr_pool, req->fr_list,
1091 req->nmdesc);
1092 } else if (dev->use_fmr) {
1093 struct ib_pool_fmr **pfmr;
1094
1095 for (i = req->nmdesc, pfmr = req->fmr_list; i > 0; i--, pfmr++)
1096 ib_fmr_pool_unmap(*pfmr);
1097 }
1098
1099 ib_dma_unmap_sg(ibdev, scsi_sglist(scmnd), scsi_sg_count(scmnd),
1100 scmnd->sc_data_direction);
1101 }
1102
1103 /**
1104 * srp_claim_req - Take ownership of the scmnd associated with a request.
1105 * @ch: SRP RDMA channel.
1106 * @req: SRP request.
1107 * @sdev: If not NULL, only take ownership for this SCSI device.
1108 * @scmnd: If NULL, take ownership of @req->scmnd. If not NULL, only take
1109 * ownership of @req->scmnd if it equals @scmnd.
1110 *
1111 * Return value:
1112 * Either NULL or a pointer to the SCSI command the caller became owner of.
1113 */
1114 static struct scsi_cmnd *srp_claim_req(struct srp_rdma_ch *ch,
1115 struct srp_request *req,
1116 struct scsi_device *sdev,
1117 struct scsi_cmnd *scmnd)
1118 {
1119 unsigned long flags;
1120
1121 spin_lock_irqsave(&ch->lock, flags);
1122 if (req->scmnd &&
1123 (!sdev || req->scmnd->device == sdev) &&
1124 (!scmnd || req->scmnd == scmnd)) {
1125 scmnd = req->scmnd;
1126 req->scmnd = NULL;
1127 } else {
1128 scmnd = NULL;
1129 }
1130 spin_unlock_irqrestore(&ch->lock, flags);
1131
1132 return scmnd;
1133 }
1134
1135 /**
1136 * srp_free_req() - Unmap data and adjust ch->req_lim.
1137 * @ch: SRP RDMA channel.
1138 * @req: Request to be freed.
1139 * @scmnd: SCSI command associated with @req.
1140 * @req_lim_delta: Amount to be added to @target->req_lim.
1141 */
1142 static void srp_free_req(struct srp_rdma_ch *ch, struct srp_request *req,
1143 struct scsi_cmnd *scmnd, s32 req_lim_delta)
1144 {
1145 unsigned long flags;
1146
1147 srp_unmap_data(scmnd, ch, req);
1148
1149 spin_lock_irqsave(&ch->lock, flags);
1150 ch->req_lim += req_lim_delta;
1151 spin_unlock_irqrestore(&ch->lock, flags);
1152 }
1153
1154 static void srp_finish_req(struct srp_rdma_ch *ch, struct srp_request *req,
1155 struct scsi_device *sdev, int result)
1156 {
1157 struct scsi_cmnd *scmnd = srp_claim_req(ch, req, sdev, NULL);
1158
1159 if (scmnd) {
1160 srp_free_req(ch, req, scmnd, 0);
1161 scmnd->result = result;
1162 scmnd->scsi_done(scmnd);
1163 }
1164 }
1165
1166 static void srp_terminate_io(struct srp_rport *rport)
1167 {
1168 struct srp_target_port *target = rport->lld_data;
1169 struct srp_rdma_ch *ch;
1170 struct Scsi_Host *shost = target->scsi_host;
1171 struct scsi_device *sdev;
1172 int i, j;
1173
1174 /*
1175 * Invoking srp_terminate_io() while srp_queuecommand() is running
1176 * is not safe. Hence the warning statement below.
1177 */
1178 shost_for_each_device(sdev, shost)
1179 WARN_ON_ONCE(sdev->request_queue->request_fn_active);
1180
1181 for (i = 0; i < target->ch_count; i++) {
1182 ch = &target->ch[i];
1183
1184 for (j = 0; j < target->req_ring_size; ++j) {
1185 struct srp_request *req = &ch->req_ring[j];
1186
1187 srp_finish_req(ch, req, NULL,
1188 DID_TRANSPORT_FAILFAST << 16);
1189 }
1190 }
1191 }
1192
1193 /*
1194 * It is up to the caller to ensure that srp_rport_reconnect() calls are
1195 * serialized and that no concurrent srp_queuecommand(), srp_abort(),
1196 * srp_reset_device() or srp_reset_host() calls will occur while this function
1197 * is in progress. One way to realize that is not to call this function
1198 * directly but to call srp_reconnect_rport() instead since that last function
1199 * serializes calls of this function via rport->mutex and also blocks
1200 * srp_queuecommand() calls before invoking this function.
1201 */
1202 static int srp_rport_reconnect(struct srp_rport *rport)
1203 {
1204 struct srp_target_port *target = rport->lld_data;
1205 struct srp_rdma_ch *ch;
1206 int i, j, ret = 0;
1207 bool multich = false;
1208
1209 srp_disconnect_target(target);
1210
1211 if (target->state == SRP_TARGET_SCANNING)
1212 return -ENODEV;
1213
1214 /*
1215 * Now get a new local CM ID so that we avoid confusing the target in
1216 * case things are really fouled up. Doing so also ensures that all CM
1217 * callbacks will have finished before a new QP is allocated.
1218 */
1219 for (i = 0; i < target->ch_count; i++) {
1220 ch = &target->ch[i];
1221 ret += srp_new_cm_id(ch);
1222 }
1223 for (i = 0; i < target->ch_count; i++) {
1224 ch = &target->ch[i];
1225 for (j = 0; j < target->req_ring_size; ++j) {
1226 struct srp_request *req = &ch->req_ring[j];
1227
1228 srp_finish_req(ch, req, NULL, DID_RESET << 16);
1229 }
1230 }
1231 for (i = 0; i < target->ch_count; i++) {
1232 ch = &target->ch[i];
1233 /*
1234 * Whether or not creating a new CM ID succeeded, create a new
1235 * QP. This guarantees that all completion callback function
1236 * invocations have finished before request resetting starts.
1237 */
1238 ret += srp_create_ch_ib(ch);
1239
1240 INIT_LIST_HEAD(&ch->free_tx);
1241 for (j = 0; j < target->queue_size; ++j)
1242 list_add(&ch->tx_ring[j]->list, &ch->free_tx);
1243 }
1244
1245 target->qp_in_error = false;
1246
1247 for (i = 0; i < target->ch_count; i++) {
1248 ch = &target->ch[i];
1249 if (ret)
1250 break;
1251 ret = srp_connect_ch(ch, multich);
1252 multich = true;
1253 }
1254
1255 if (ret == 0)
1256 shost_printk(KERN_INFO, target->scsi_host,
1257 PFX "reconnect succeeded\n");
1258
1259 return ret;
1260 }
1261
1262 static void srp_map_desc(struct srp_map_state *state, dma_addr_t dma_addr,
1263 unsigned int dma_len, u32 rkey)
1264 {
1265 struct srp_direct_buf *desc = state->desc;
1266
1267 WARN_ON_ONCE(!dma_len);
1268
1269 desc->va = cpu_to_be64(dma_addr);
1270 desc->key = cpu_to_be32(rkey);
1271 desc->len = cpu_to_be32(dma_len);
1272
1273 state->total_len += dma_len;
1274 state->desc++;
1275 state->ndesc++;
1276 }
1277
1278 static int srp_map_finish_fmr(struct srp_map_state *state,
1279 struct srp_rdma_ch *ch)
1280 {
1281 struct srp_target_port *target = ch->target;
1282 struct srp_device *dev = target->srp_host->srp_dev;
1283 struct ib_pd *pd = target->pd;
1284 struct ib_pool_fmr *fmr;
1285 u64 io_addr = 0;
1286
1287 if (state->fmr.next >= state->fmr.end) {
1288 shost_printk(KERN_ERR, ch->target->scsi_host,
1289 PFX "Out of MRs (mr_per_cmd = %d)\n",
1290 ch->target->mr_per_cmd);
1291 return -ENOMEM;
1292 }
1293
1294 WARN_ON_ONCE(!dev->use_fmr);
1295
1296 if (state->npages == 0)
1297 return 0;
1298
1299 if (state->npages == 1 && (pd->flags & IB_PD_UNSAFE_GLOBAL_RKEY)) {
1300 srp_map_desc(state, state->base_dma_addr, state->dma_len,
1301 pd->unsafe_global_rkey);
1302 goto reset_state;
1303 }
1304
1305 fmr = ib_fmr_pool_map_phys(ch->fmr_pool, state->pages,
1306 state->npages, io_addr);
1307 if (IS_ERR(fmr))
1308 return PTR_ERR(fmr);
1309
1310 *state->fmr.next++ = fmr;
1311 state->nmdesc++;
1312
1313 srp_map_desc(state, state->base_dma_addr & ~dev->mr_page_mask,
1314 state->dma_len, fmr->fmr->rkey);
1315
1316 reset_state:
1317 state->npages = 0;
1318 state->dma_len = 0;
1319
1320 return 0;
1321 }
1322
1323 static void srp_reg_mr_err_done(struct ib_cq *cq, struct ib_wc *wc)
1324 {
1325 srp_handle_qp_err(cq, wc, "FAST REG");
1326 }
1327
1328 /*
1329 * Map up to sg_nents elements of state->sg where *sg_offset_p is the offset
1330 * where to start in the first element. If sg_offset_p != NULL then
1331 * *sg_offset_p is updated to the offset in state->sg[retval] of the first
1332 * byte that has not yet been mapped.
1333 */
1334 static int srp_map_finish_fr(struct srp_map_state *state,
1335 struct srp_request *req,
1336 struct srp_rdma_ch *ch, int sg_nents,
1337 unsigned int *sg_offset_p)
1338 {
1339 struct srp_target_port *target = ch->target;
1340 struct srp_device *dev = target->srp_host->srp_dev;
1341 struct ib_pd *pd = target->pd;
1342 struct ib_send_wr *bad_wr;
1343 struct ib_reg_wr wr;
1344 struct srp_fr_desc *desc;
1345 u32 rkey;
1346 int n, err;
1347
1348 if (state->fr.next >= state->fr.end) {
1349 shost_printk(KERN_ERR, ch->target->scsi_host,
1350 PFX "Out of MRs (mr_per_cmd = %d)\n",
1351 ch->target->mr_per_cmd);
1352 return -ENOMEM;
1353 }
1354
1355 WARN_ON_ONCE(!dev->use_fast_reg);
1356
1357 if (sg_nents == 1 && (pd->flags & IB_PD_UNSAFE_GLOBAL_RKEY)) {
1358 unsigned int sg_offset = sg_offset_p ? *sg_offset_p : 0;
1359
1360 srp_map_desc(state, sg_dma_address(state->sg) + sg_offset,
1361 sg_dma_len(state->sg) - sg_offset,
1362 pd->unsafe_global_rkey);
1363 if (sg_offset_p)
1364 *sg_offset_p = 0;
1365 return 1;
1366 }
1367
1368 desc = srp_fr_pool_get(ch->fr_pool);
1369 if (!desc)
1370 return -ENOMEM;
1371
1372 rkey = ib_inc_rkey(desc->mr->rkey);
1373 ib_update_fast_reg_key(desc->mr, rkey);
1374
1375 n = ib_map_mr_sg(desc->mr, state->sg, sg_nents, sg_offset_p,
1376 dev->mr_page_size);
1377 if (unlikely(n < 0)) {
1378 srp_fr_pool_put(ch->fr_pool, &desc, 1);
1379 pr_debug("%s: ib_map_mr_sg(%d, %d) returned %d.\n",
1380 dev_name(&req->scmnd->device->sdev_gendev), sg_nents,
1381 sg_offset_p ? *sg_offset_p : -1, n);
1382 return n;
1383 }
1384
1385 WARN_ON_ONCE(desc->mr->length == 0);
1386
1387 req->reg_cqe.done = srp_reg_mr_err_done;
1388
1389 wr.wr.next = NULL;
1390 wr.wr.opcode = IB_WR_REG_MR;
1391 wr.wr.wr_cqe = &req->reg_cqe;
1392 wr.wr.num_sge = 0;
1393 wr.wr.send_flags = 0;
1394 wr.mr = desc->mr;
1395 wr.key = desc->mr->rkey;
1396 wr.access = (IB_ACCESS_LOCAL_WRITE |
1397 IB_ACCESS_REMOTE_READ |
1398 IB_ACCESS_REMOTE_WRITE);
1399
1400 *state->fr.next++ = desc;
1401 state->nmdesc++;
1402
1403 srp_map_desc(state, desc->mr->iova,
1404 desc->mr->length, desc->mr->rkey);
1405
1406 err = ib_post_send(ch->qp, &wr.wr, &bad_wr);
1407 if (unlikely(err)) {
1408 WARN_ON_ONCE(err == -ENOMEM);
1409 return err;
1410 }
1411
1412 return n;
1413 }
1414
1415 static int srp_map_sg_entry(struct srp_map_state *state,
1416 struct srp_rdma_ch *ch,
1417 struct scatterlist *sg)
1418 {
1419 struct srp_target_port *target = ch->target;
1420 struct srp_device *dev = target->srp_host->srp_dev;
1421 struct ib_device *ibdev = dev->dev;
1422 dma_addr_t dma_addr = ib_sg_dma_address(ibdev, sg);
1423 unsigned int dma_len = ib_sg_dma_len(ibdev, sg);
1424 unsigned int len = 0;
1425 int ret;
1426
1427 WARN_ON_ONCE(!dma_len);
1428
1429 while (dma_len) {
1430 unsigned offset = dma_addr & ~dev->mr_page_mask;
1431
1432 if (state->npages == dev->max_pages_per_mr ||
1433 (state->npages > 0 && offset != 0)) {
1434 ret = srp_map_finish_fmr(state, ch);
1435 if (ret)
1436 return ret;
1437 }
1438
1439 len = min_t(unsigned int, dma_len, dev->mr_page_size - offset);
1440
1441 if (!state->npages)
1442 state->base_dma_addr = dma_addr;
1443 state->pages[state->npages++] = dma_addr & dev->mr_page_mask;
1444 state->dma_len += len;
1445 dma_addr += len;
1446 dma_len -= len;
1447 }
1448
1449 /*
1450 * If the end of the MR is not on a page boundary then we need to
1451 * close it out and start a new one -- we can only merge at page
1452 * boundaries.
1453 */
1454 ret = 0;
1455 if ((dma_addr & ~dev->mr_page_mask) != 0)
1456 ret = srp_map_finish_fmr(state, ch);
1457 return ret;
1458 }
1459
1460 static int srp_map_sg_fmr(struct srp_map_state *state, struct srp_rdma_ch *ch,
1461 struct srp_request *req, struct scatterlist *scat,
1462 int count)
1463 {
1464 struct scatterlist *sg;
1465 int i, ret;
1466
1467 state->pages = req->map_page;
1468 state->fmr.next = req->fmr_list;
1469 state->fmr.end = req->fmr_list + ch->target->mr_per_cmd;
1470
1471 for_each_sg(scat, sg, count, i) {
1472 ret = srp_map_sg_entry(state, ch, sg);
1473 if (ret)
1474 return ret;
1475 }
1476
1477 ret = srp_map_finish_fmr(state, ch);
1478 if (ret)
1479 return ret;
1480
1481 return 0;
1482 }
1483
1484 static int srp_map_sg_fr(struct srp_map_state *state, struct srp_rdma_ch *ch,
1485 struct srp_request *req, struct scatterlist *scat,
1486 int count)
1487 {
1488 unsigned int sg_offset = 0;
1489
1490 state->fr.next = req->fr_list;
1491 state->fr.end = req->fr_list + ch->target->mr_per_cmd;
1492 state->sg = scat;
1493
1494 if (count == 0)
1495 return 0;
1496
1497 while (count) {
1498 int i, n;
1499
1500 n = srp_map_finish_fr(state, req, ch, count, &sg_offset);
1501 if (unlikely(n < 0))
1502 return n;
1503
1504 count -= n;
1505 for (i = 0; i < n; i++)
1506 state->sg = sg_next(state->sg);
1507 }
1508
1509 return 0;
1510 }
1511
1512 static int srp_map_sg_dma(struct srp_map_state *state, struct srp_rdma_ch *ch,
1513 struct srp_request *req, struct scatterlist *scat,
1514 int count)
1515 {
1516 struct srp_target_port *target = ch->target;
1517 struct srp_device *dev = target->srp_host->srp_dev;
1518 struct scatterlist *sg;
1519 int i;
1520
1521 for_each_sg(scat, sg, count, i) {
1522 srp_map_desc(state, ib_sg_dma_address(dev->dev, sg),
1523 ib_sg_dma_len(dev->dev, sg),
1524 target->pd->unsafe_global_rkey);
1525 }
1526
1527 return 0;
1528 }
1529
1530 /*
1531 * Register the indirect data buffer descriptor with the HCA.
1532 *
1533 * Note: since the indirect data buffer descriptor has been allocated with
1534 * kmalloc() it is guaranteed that this buffer is a physically contiguous
1535 * memory buffer.
1536 */
1537 static int srp_map_idb(struct srp_rdma_ch *ch, struct srp_request *req,
1538 void **next_mr, void **end_mr, u32 idb_len,
1539 __be32 *idb_rkey)
1540 {
1541 struct srp_target_port *target = ch->target;
1542 struct srp_device *dev = target->srp_host->srp_dev;
1543 struct srp_map_state state;
1544 struct srp_direct_buf idb_desc;
1545 u64 idb_pages[1];
1546 struct scatterlist idb_sg[1];
1547 int ret;
1548
1549 memset(&state, 0, sizeof(state));
1550 memset(&idb_desc, 0, sizeof(idb_desc));
1551 state.gen.next = next_mr;
1552 state.gen.end = end_mr;
1553 state.desc = &idb_desc;
1554 state.base_dma_addr = req->indirect_dma_addr;
1555 state.dma_len = idb_len;
1556
1557 if (dev->use_fast_reg) {
1558 state.sg = idb_sg;
1559 sg_init_one(idb_sg, req->indirect_desc, idb_len);
1560 idb_sg->dma_address = req->indirect_dma_addr; /* hack! */
1561 #ifdef CONFIG_NEED_SG_DMA_LENGTH
1562 idb_sg->dma_length = idb_sg->length; /* hack^2 */
1563 #endif
1564 ret = srp_map_finish_fr(&state, req, ch, 1, NULL);
1565 if (ret < 0)
1566 return ret;
1567 WARN_ON_ONCE(ret < 1);
1568 } else if (dev->use_fmr) {
1569 state.pages = idb_pages;
1570 state.pages[0] = (req->indirect_dma_addr &
1571 dev->mr_page_mask);
1572 state.npages = 1;
1573 ret = srp_map_finish_fmr(&state, ch);
1574 if (ret < 0)
1575 return ret;
1576 } else {
1577 return -EINVAL;
1578 }
1579
1580 *idb_rkey = idb_desc.key;
1581
1582 return 0;
1583 }
1584
1585 static void srp_check_mapping(struct srp_map_state *state,
1586 struct srp_rdma_ch *ch, struct srp_request *req,
1587 struct scatterlist *scat, int count)
1588 {
1589 struct srp_device *dev = ch->target->srp_host->srp_dev;
1590 struct srp_fr_desc **pfr;
1591 u64 desc_len = 0, mr_len = 0;
1592 int i;
1593
1594 for (i = 0; i < state->ndesc; i++)
1595 desc_len += be32_to_cpu(req->indirect_desc[i].len);
1596 if (dev->use_fast_reg)
1597 for (i = 0, pfr = req->fr_list; i < state->nmdesc; i++, pfr++)
1598 mr_len += (*pfr)->mr->length;
1599 else if (dev->use_fmr)
1600 for (i = 0; i < state->nmdesc; i++)
1601 mr_len += be32_to_cpu(req->indirect_desc[i].len);
1602 if (desc_len != scsi_bufflen(req->scmnd) ||
1603 mr_len > scsi_bufflen(req->scmnd))
1604 pr_err("Inconsistent: scsi len %d <> desc len %lld <> mr len %lld; ndesc %d; nmdesc = %d\n",
1605 scsi_bufflen(req->scmnd), desc_len, mr_len,
1606 state->ndesc, state->nmdesc);
1607 }
1608
1609 /**
1610 * srp_map_data() - map SCSI data buffer onto an SRP request
1611 * @scmnd: SCSI command to map
1612 * @ch: SRP RDMA channel
1613 * @req: SRP request
1614 *
1615 * Returns the length in bytes of the SRP_CMD IU or a negative value if
1616 * mapping failed.
1617 */
1618 static int srp_map_data(struct scsi_cmnd *scmnd, struct srp_rdma_ch *ch,
1619 struct srp_request *req)
1620 {
1621 struct srp_target_port *target = ch->target;
1622 struct ib_pd *pd = target->pd;
1623 struct scatterlist *scat;
1624 struct srp_cmd *cmd = req->cmd->buf;
1625 int len, nents, count, ret;
1626 struct srp_device *dev;
1627 struct ib_device *ibdev;
1628 struct srp_map_state state;
1629 struct srp_indirect_buf *indirect_hdr;
1630 u32 idb_len, table_len;
1631 __be32 idb_rkey;
1632 u8 fmt;
1633
1634 if (!scsi_sglist(scmnd) || scmnd->sc_data_direction == DMA_NONE)
1635 return sizeof (struct srp_cmd);
1636
1637 if (scmnd->sc_data_direction != DMA_FROM_DEVICE &&
1638 scmnd->sc_data_direction != DMA_TO_DEVICE) {
1639 shost_printk(KERN_WARNING, target->scsi_host,
1640 PFX "Unhandled data direction %d\n",
1641 scmnd->sc_data_direction);
1642 return -EINVAL;
1643 }
1644
1645 nents = scsi_sg_count(scmnd);
1646 scat = scsi_sglist(scmnd);
1647
1648 dev = target->srp_host->srp_dev;
1649 ibdev = dev->dev;
1650
1651 count = ib_dma_map_sg(ibdev, scat, nents, scmnd->sc_data_direction);
1652 if (unlikely(count == 0))
1653 return -EIO;
1654
1655 fmt = SRP_DATA_DESC_DIRECT;
1656 len = sizeof (struct srp_cmd) + sizeof (struct srp_direct_buf);
1657
1658 if (count == 1 && (pd->flags & IB_PD_UNSAFE_GLOBAL_RKEY)) {
1659 /*
1660 * The midlayer only generated a single gather/scatter
1661 * entry, or DMA mapping coalesced everything to a
1662 * single entry. So a direct descriptor along with
1663 * the DMA MR suffices.
1664 */
1665 struct srp_direct_buf *buf = (void *) cmd->add_data;
1666
1667 buf->va = cpu_to_be64(ib_sg_dma_address(ibdev, scat));
1668 buf->key = cpu_to_be32(pd->unsafe_global_rkey);
1669 buf->len = cpu_to_be32(ib_sg_dma_len(ibdev, scat));
1670
1671 req->nmdesc = 0;
1672 goto map_complete;
1673 }
1674
1675 /*
1676 * We have more than one scatter/gather entry, so build our indirect
1677 * descriptor table, trying to merge as many entries as we can.
1678 */
1679 indirect_hdr = (void *) cmd->add_data;
1680
1681 ib_dma_sync_single_for_cpu(ibdev, req->indirect_dma_addr,
1682 target->indirect_size, DMA_TO_DEVICE);
1683
1684 memset(&state, 0, sizeof(state));
1685 state.desc = req->indirect_desc;
1686 if (dev->use_fast_reg)
1687 ret = srp_map_sg_fr(&state, ch, req, scat, count);
1688 else if (dev->use_fmr)
1689 ret = srp_map_sg_fmr(&state, ch, req, scat, count);
1690 else
1691 ret = srp_map_sg_dma(&state, ch, req, scat, count);
1692 req->nmdesc = state.nmdesc;
1693 if (ret < 0)
1694 goto unmap;
1695
1696 {
1697 DEFINE_DYNAMIC_DEBUG_METADATA(ddm,
1698 "Memory mapping consistency check");
1699 if (DYNAMIC_DEBUG_BRANCH(ddm))
1700 srp_check_mapping(&state, ch, req, scat, count);
1701 }
1702
1703 /* We've mapped the request, now pull as much of the indirect
1704 * descriptor table as we can into the command buffer. If this
1705 * target is not using an external indirect table, we are
1706 * guaranteed to fit into the command, as the SCSI layer won't
1707 * give us more S/G entries than we allow.
1708 */
1709 if (state.ndesc == 1) {
1710 /*
1711 * Memory registration collapsed the sg-list into one entry,
1712 * so use a direct descriptor.
1713 */
1714 struct srp_direct_buf *buf = (void *) cmd->add_data;
1715
1716 *buf = req->indirect_desc[0];
1717 goto map_complete;
1718 }
1719
1720 if (unlikely(target->cmd_sg_cnt < state.ndesc &&
1721 !target->allow_ext_sg)) {
1722 shost_printk(KERN_ERR, target->scsi_host,
1723 "Could not fit S/G list into SRP_CMD\n");
1724 ret = -EIO;
1725 goto unmap;
1726 }
1727
1728 count = min(state.ndesc, target->cmd_sg_cnt);
1729 table_len = state.ndesc * sizeof (struct srp_direct_buf);
1730 idb_len = sizeof(struct srp_indirect_buf) + table_len;
1731
1732 fmt = SRP_DATA_DESC_INDIRECT;
1733 len = sizeof(struct srp_cmd) + sizeof (struct srp_indirect_buf);
1734 len += count * sizeof (struct srp_direct_buf);
1735
1736 memcpy(indirect_hdr->desc_list, req->indirect_desc,
1737 count * sizeof (struct srp_direct_buf));
1738
1739 if (!(pd->flags & IB_PD_UNSAFE_GLOBAL_RKEY)) {
1740 ret = srp_map_idb(ch, req, state.gen.next, state.gen.end,
1741 idb_len, &idb_rkey);
1742 if (ret < 0)
1743 goto unmap;
1744 req->nmdesc++;
1745 } else {
1746 idb_rkey = cpu_to_be32(pd->unsafe_global_rkey);
1747 }
1748
1749 indirect_hdr->table_desc.va = cpu_to_be64(req->indirect_dma_addr);
1750 indirect_hdr->table_desc.key = idb_rkey;
1751 indirect_hdr->table_desc.len = cpu_to_be32(table_len);
1752 indirect_hdr->len = cpu_to_be32(state.total_len);
1753
1754 if (scmnd->sc_data_direction == DMA_TO_DEVICE)
1755 cmd->data_out_desc_cnt = count;
1756 else
1757 cmd->data_in_desc_cnt = count;
1758
1759 ib_dma_sync_single_for_device(ibdev, req->indirect_dma_addr, table_len,
1760 DMA_TO_DEVICE);
1761
1762 map_complete:
1763 if (scmnd->sc_data_direction == DMA_TO_DEVICE)
1764 cmd->buf_fmt = fmt << 4;
1765 else
1766 cmd->buf_fmt = fmt;
1767
1768 return len;
1769
1770 unmap:
1771 srp_unmap_data(scmnd, ch, req);
1772 if (ret == -ENOMEM && req->nmdesc >= target->mr_pool_size)
1773 ret = -E2BIG;
1774 return ret;
1775 }
1776
1777 /*
1778 * Return an IU and possible credit to the free pool
1779 */
1780 static void srp_put_tx_iu(struct srp_rdma_ch *ch, struct srp_iu *iu,
1781 enum srp_iu_type iu_type)
1782 {
1783 unsigned long flags;
1784
1785 spin_lock_irqsave(&ch->lock, flags);
1786 list_add(&iu->list, &ch->free_tx);
1787 if (iu_type != SRP_IU_RSP)
1788 ++ch->req_lim;
1789 spin_unlock_irqrestore(&ch->lock, flags);
1790 }
1791
1792 /*
1793 * Must be called with ch->lock held to protect req_lim and free_tx.
1794 * If IU is not sent, it must be returned using srp_put_tx_iu().
1795 *
1796 * Note:
1797 * An upper limit for the number of allocated information units for each
1798 * request type is:
1799 * - SRP_IU_CMD: SRP_CMD_SQ_SIZE, since the SCSI mid-layer never queues
1800 * more than Scsi_Host.can_queue requests.
1801 * - SRP_IU_TSK_MGMT: SRP_TSK_MGMT_SQ_SIZE.
1802 * - SRP_IU_RSP: 1, since a conforming SRP target never sends more than
1803 * one unanswered SRP request to an initiator.
1804 */
1805 static struct srp_iu *__srp_get_tx_iu(struct srp_rdma_ch *ch,
1806 enum srp_iu_type iu_type)
1807 {
1808 struct srp_target_port *target = ch->target;
1809 s32 rsv = (iu_type == SRP_IU_TSK_MGMT) ? 0 : SRP_TSK_MGMT_SQ_SIZE;
1810 struct srp_iu *iu;
1811
1812 lockdep_assert_held(&ch->lock);
1813
1814 ib_process_cq_direct(ch->send_cq, -1);
1815
1816 if (list_empty(&ch->free_tx))
1817 return NULL;
1818
1819 /* Initiator responses to target requests do not consume credits */
1820 if (iu_type != SRP_IU_RSP) {
1821 if (ch->req_lim <= rsv) {
1822 ++target->zero_req_lim;
1823 return NULL;
1824 }
1825
1826 --ch->req_lim;
1827 }
1828
1829 iu = list_first_entry(&ch->free_tx, struct srp_iu, list);
1830 list_del(&iu->list);
1831 return iu;
1832 }
1833
1834 /*
1835 * Note: if this function is called from inside ib_drain_sq() then it will
1836 * be called without ch->lock being held. If ib_drain_sq() dequeues a WQE
1837 * with status IB_WC_SUCCESS then that's a bug.
1838 */
1839 static void srp_send_done(struct ib_cq *cq, struct ib_wc *wc)
1840 {
1841 struct srp_iu *iu = container_of(wc->wr_cqe, struct srp_iu, cqe);
1842 struct srp_rdma_ch *ch = cq->cq_context;
1843
1844 if (unlikely(wc->status != IB_WC_SUCCESS)) {
1845 srp_handle_qp_err(cq, wc, "SEND");
1846 return;
1847 }
1848
1849 lockdep_assert_held(&ch->lock);
1850
1851 list_add(&iu->list, &ch->free_tx);
1852 }
1853
1854 static int srp_post_send(struct srp_rdma_ch *ch, struct srp_iu *iu, int len)
1855 {
1856 struct srp_target_port *target = ch->target;
1857 struct ib_sge list;
1858 struct ib_send_wr wr, *bad_wr;
1859
1860 list.addr = iu->dma;
1861 list.length = len;
1862 list.lkey = target->lkey;
1863
1864 iu->cqe.done = srp_send_done;
1865
1866 wr.next = NULL;
1867 wr.wr_cqe = &iu->cqe;
1868 wr.sg_list = &list;
1869 wr.num_sge = 1;
1870 wr.opcode = IB_WR_SEND;
1871 wr.send_flags = IB_SEND_SIGNALED;
1872
1873 return ib_post_send(ch->qp, &wr, &bad_wr);
1874 }
1875
1876 static int srp_post_recv(struct srp_rdma_ch *ch, struct srp_iu *iu)
1877 {
1878 struct srp_target_port *target = ch->target;
1879 struct ib_recv_wr wr, *bad_wr;
1880 struct ib_sge list;
1881
1882 list.addr = iu->dma;
1883 list.length = iu->size;
1884 list.lkey = target->lkey;
1885
1886 iu->cqe.done = srp_recv_done;
1887
1888 wr.next = NULL;
1889 wr.wr_cqe = &iu->cqe;
1890 wr.sg_list = &list;
1891 wr.num_sge = 1;
1892
1893 return ib_post_recv(ch->qp, &wr, &bad_wr);
1894 }
1895
1896 static void srp_process_rsp(struct srp_rdma_ch *ch, struct srp_rsp *rsp)
1897 {
1898 struct srp_target_port *target = ch->target;
1899 struct srp_request *req;
1900 struct scsi_cmnd *scmnd;
1901 unsigned long flags;
1902
1903 if (unlikely(rsp->tag & SRP_TAG_TSK_MGMT)) {
1904 spin_lock_irqsave(&ch->lock, flags);
1905 ch->req_lim += be32_to_cpu(rsp->req_lim_delta);
1906 if (rsp->tag == ch->tsk_mgmt_tag) {
1907 ch->tsk_mgmt_status = -1;
1908 if (be32_to_cpu(rsp->resp_data_len) >= 4)
1909 ch->tsk_mgmt_status = rsp->data[3];
1910 complete(&ch->tsk_mgmt_done);
1911 } else {
1912 shost_printk(KERN_ERR, target->scsi_host,
1913 "Received tsk mgmt response too late for tag %#llx\n",
1914 rsp->tag);
1915 }
1916 spin_unlock_irqrestore(&ch->lock, flags);
1917 } else {
1918 scmnd = scsi_host_find_tag(target->scsi_host, rsp->tag);
1919 if (scmnd && scmnd->host_scribble) {
1920 req = (void *)scmnd->host_scribble;
1921 scmnd = srp_claim_req(ch, req, NULL, scmnd);
1922 } else {
1923 scmnd = NULL;
1924 }
1925 if (!scmnd) {
1926 shost_printk(KERN_ERR, target->scsi_host,
1927 "Null scmnd for RSP w/tag %#016llx received on ch %td / QP %#x\n",
1928 rsp->tag, ch - target->ch, ch->qp->qp_num);
1929
1930 spin_lock_irqsave(&ch->lock, flags);
1931 ch->req_lim += be32_to_cpu(rsp->req_lim_delta);
1932 spin_unlock_irqrestore(&ch->lock, flags);
1933
1934 return;
1935 }
1936 scmnd->result = rsp->status;
1937
1938 if (rsp->flags & SRP_RSP_FLAG_SNSVALID) {
1939 memcpy(scmnd->sense_buffer, rsp->data +
1940 be32_to_cpu(rsp->resp_data_len),
1941 min_t(int, be32_to_cpu(rsp->sense_data_len),
1942 SCSI_SENSE_BUFFERSIZE));
1943 }
1944
1945 if (unlikely(rsp->flags & SRP_RSP_FLAG_DIUNDER))
1946 scsi_set_resid(scmnd, be32_to_cpu(rsp->data_in_res_cnt));
1947 else if (unlikely(rsp->flags & SRP_RSP_FLAG_DIOVER))
1948 scsi_set_resid(scmnd, -be32_to_cpu(rsp->data_in_res_cnt));
1949 else if (unlikely(rsp->flags & SRP_RSP_FLAG_DOUNDER))
1950 scsi_set_resid(scmnd, be32_to_cpu(rsp->data_out_res_cnt));
1951 else if (unlikely(rsp->flags & SRP_RSP_FLAG_DOOVER))
1952 scsi_set_resid(scmnd, -be32_to_cpu(rsp->data_out_res_cnt));
1953
1954 srp_free_req(ch, req, scmnd,
1955 be32_to_cpu(rsp->req_lim_delta));
1956
1957 scmnd->host_scribble = NULL;
1958 scmnd->scsi_done(scmnd);
1959 }
1960 }
1961
1962 static int srp_response_common(struct srp_rdma_ch *ch, s32 req_delta,
1963 void *rsp, int len)
1964 {
1965 struct srp_target_port *target = ch->target;
1966 struct ib_device *dev = target->srp_host->srp_dev->dev;
1967 unsigned long flags;
1968 struct srp_iu *iu;
1969 int err;
1970
1971 spin_lock_irqsave(&ch->lock, flags);
1972 ch->req_lim += req_delta;
1973 iu = __srp_get_tx_iu(ch, SRP_IU_RSP);
1974 spin_unlock_irqrestore(&ch->lock, flags);
1975
1976 if (!iu) {
1977 shost_printk(KERN_ERR, target->scsi_host, PFX
1978 "no IU available to send response\n");
1979 return 1;
1980 }
1981
1982 ib_dma_sync_single_for_cpu(dev, iu->dma, len, DMA_TO_DEVICE);
1983 memcpy(iu->buf, rsp, len);
1984 ib_dma_sync_single_for_device(dev, iu->dma, len, DMA_TO_DEVICE);
1985
1986 err = srp_post_send(ch, iu, len);
1987 if (err) {
1988 shost_printk(KERN_ERR, target->scsi_host, PFX
1989 "unable to post response: %d\n", err);
1990 srp_put_tx_iu(ch, iu, SRP_IU_RSP);
1991 }
1992
1993 return err;
1994 }
1995
1996 static void srp_process_cred_req(struct srp_rdma_ch *ch,
1997 struct srp_cred_req *req)
1998 {
1999 struct srp_cred_rsp rsp = {
2000 .opcode = SRP_CRED_RSP,
2001 .tag = req->tag,
2002 };
2003 s32 delta = be32_to_cpu(req->req_lim_delta);
2004
2005 if (srp_response_common(ch, delta, &rsp, sizeof(rsp)))
2006 shost_printk(KERN_ERR, ch->target->scsi_host, PFX
2007 "problems processing SRP_CRED_REQ\n");
2008 }
2009
2010 static void srp_process_aer_req(struct srp_rdma_ch *ch,
2011 struct srp_aer_req *req)
2012 {
2013 struct srp_target_port *target = ch->target;
2014 struct srp_aer_rsp rsp = {
2015 .opcode = SRP_AER_RSP,
2016 .tag = req->tag,
2017 };
2018 s32 delta = be32_to_cpu(req->req_lim_delta);
2019
2020 shost_printk(KERN_ERR, target->scsi_host, PFX
2021 "ignoring AER for LUN %llu\n", scsilun_to_int(&req->lun));
2022
2023 if (srp_response_common(ch, delta, &rsp, sizeof(rsp)))
2024 shost_printk(KERN_ERR, target->scsi_host, PFX
2025 "problems processing SRP_AER_REQ\n");
2026 }
2027
2028 static void srp_recv_done(struct ib_cq *cq, struct ib_wc *wc)
2029 {
2030 struct srp_iu *iu = container_of(wc->wr_cqe, struct srp_iu, cqe);
2031 struct srp_rdma_ch *ch = cq->cq_context;
2032 struct srp_target_port *target = ch->target;
2033 struct ib_device *dev = target->srp_host->srp_dev->dev;
2034 int res;
2035 u8 opcode;
2036
2037 if (unlikely(wc->status != IB_WC_SUCCESS)) {
2038 srp_handle_qp_err(cq, wc, "RECV");
2039 return;
2040 }
2041
2042 ib_dma_sync_single_for_cpu(dev, iu->dma, ch->max_ti_iu_len,
2043 DMA_FROM_DEVICE);
2044
2045 opcode = *(u8 *) iu->buf;
2046
2047 if (0) {
2048 shost_printk(KERN_ERR, target->scsi_host,
2049 PFX "recv completion, opcode 0x%02x\n", opcode);
2050 print_hex_dump(KERN_ERR, "", DUMP_PREFIX_OFFSET, 8, 1,
2051 iu->buf, wc->byte_len, true);
2052 }
2053
2054 switch (opcode) {
2055 case SRP_RSP:
2056 srp_process_rsp(ch, iu->buf);
2057 break;
2058
2059 case SRP_CRED_REQ:
2060 srp_process_cred_req(ch, iu->buf);
2061 break;
2062
2063 case SRP_AER_REQ:
2064 srp_process_aer_req(ch, iu->buf);
2065 break;
2066
2067 case SRP_T_LOGOUT:
2068 /* XXX Handle target logout */
2069 shost_printk(KERN_WARNING, target->scsi_host,
2070 PFX "Got target logout request\n");
2071 break;
2072
2073 default:
2074 shost_printk(KERN_WARNING, target->scsi_host,
2075 PFX "Unhandled SRP opcode 0x%02x\n", opcode);
2076 break;
2077 }
2078
2079 ib_dma_sync_single_for_device(dev, iu->dma, ch->max_ti_iu_len,
2080 DMA_FROM_DEVICE);
2081
2082 res = srp_post_recv(ch, iu);
2083 if (res != 0)
2084 shost_printk(KERN_ERR, target->scsi_host,
2085 PFX "Recv failed with error code %d\n", res);
2086 }
2087
2088 /**
2089 * srp_tl_err_work() - handle a transport layer error
2090 * @work: Work structure embedded in an SRP target port.
2091 *
2092 * Note: This function may get invoked before the rport has been created,
2093 * hence the target->rport test.
2094 */
2095 static void srp_tl_err_work(struct work_struct *work)
2096 {
2097 struct srp_target_port *target;
2098
2099 target = container_of(work, struct srp_target_port, tl_err_work);
2100 if (target->rport)
2101 srp_start_tl_fail_timers(target->rport);
2102 }
2103
2104 static void srp_handle_qp_err(struct ib_cq *cq, struct ib_wc *wc,
2105 const char *opname)
2106 {
2107 struct srp_rdma_ch *ch = cq->cq_context;
2108 struct srp_target_port *target = ch->target;
2109
2110 if (ch->connected && !target->qp_in_error) {
2111 shost_printk(KERN_ERR, target->scsi_host,
2112 PFX "failed %s status %s (%d) for CQE %p\n",
2113 opname, ib_wc_status_msg(wc->status), wc->status,
2114 wc->wr_cqe);
2115 queue_work(system_long_wq, &target->tl_err_work);
2116 }
2117 target->qp_in_error = true;
2118 }
2119
2120 static int srp_queuecommand(struct Scsi_Host *shost, struct scsi_cmnd *scmnd)
2121 {
2122 struct srp_target_port *target = host_to_target(shost);
2123 struct srp_rport *rport = target->rport;
2124 struct srp_rdma_ch *ch;
2125 struct srp_request *req;
2126 struct srp_iu *iu;
2127 struct srp_cmd *cmd;
2128 struct ib_device *dev;
2129 unsigned long flags;
2130 u32 tag;
2131 u16 idx;
2132 int len, ret;
2133 const bool in_scsi_eh = !in_interrupt() && current == shost->ehandler;
2134
2135 /*
2136 * The SCSI EH thread is the only context from which srp_queuecommand()
2137 * can get invoked for blocked devices (SDEV_BLOCK /
2138 * SDEV_CREATED_BLOCK). Avoid racing with srp_reconnect_rport() by
2139 * locking the rport mutex if invoked from inside the SCSI EH.
2140 */
2141 if (in_scsi_eh)
2142 mutex_lock(&rport->mutex);
2143
2144 scmnd->result = srp_chkready(target->rport);
2145 if (unlikely(scmnd->result))
2146 goto err;
2147
2148 WARN_ON_ONCE(scmnd->request->tag < 0);
2149 tag = blk_mq_unique_tag(scmnd->request);
2150 ch = &target->ch[blk_mq_unique_tag_to_hwq(tag)];
2151 idx = blk_mq_unique_tag_to_tag(tag);
2152 WARN_ONCE(idx >= target->req_ring_size, "%s: tag %#x: idx %d >= %d\n",
2153 dev_name(&shost->shost_gendev), tag, idx,
2154 target->req_ring_size);
2155
2156 spin_lock_irqsave(&ch->lock, flags);
2157 iu = __srp_get_tx_iu(ch, SRP_IU_CMD);
2158 spin_unlock_irqrestore(&ch->lock, flags);
2159
2160 if (!iu)
2161 goto err;
2162
2163 req = &ch->req_ring[idx];
2164 dev = target->srp_host->srp_dev->dev;
2165 ib_dma_sync_single_for_cpu(dev, iu->dma, target->max_iu_len,
2166 DMA_TO_DEVICE);
2167
2168 scmnd->host_scribble = (void *) req;
2169
2170 cmd = iu->buf;
2171 memset(cmd, 0, sizeof *cmd);
2172
2173 cmd->opcode = SRP_CMD;
2174 int_to_scsilun(scmnd->device->lun, &cmd->lun);
2175 cmd->tag = tag;
2176 memcpy(cmd->cdb, scmnd->cmnd, scmnd->cmd_len);
2177
2178 req->scmnd = scmnd;
2179 req->cmd = iu;
2180
2181 len = srp_map_data(scmnd, ch, req);
2182 if (len < 0) {
2183 shost_printk(KERN_ERR, target->scsi_host,
2184 PFX "Failed to map data (%d)\n", len);
2185 /*
2186 * If we ran out of memory descriptors (-ENOMEM) because an
2187 * application is queuing many requests with more than
2188 * max_pages_per_mr sg-list elements, tell the SCSI mid-layer
2189 * to reduce queue depth temporarily.
2190 */
2191 scmnd->result = len == -ENOMEM ?
2192 DID_OK << 16 | QUEUE_FULL << 1 : DID_ERROR << 16;
2193 goto err_iu;
2194 }
2195
2196 ib_dma_sync_single_for_device(dev, iu->dma, target->max_iu_len,
2197 DMA_TO_DEVICE);
2198
2199 if (srp_post_send(ch, iu, len)) {
2200 shost_printk(KERN_ERR, target->scsi_host, PFX "Send failed\n");
2201 goto err_unmap;
2202 }
2203
2204 ret = 0;
2205
2206 unlock_rport:
2207 if (in_scsi_eh)
2208 mutex_unlock(&rport->mutex);
2209
2210 return ret;
2211
2212 err_unmap:
2213 srp_unmap_data(scmnd, ch, req);
2214
2215 err_iu:
2216 srp_put_tx_iu(ch, iu, SRP_IU_CMD);
2217
2218 /*
2219 * Avoid that the loops that iterate over the request ring can
2220 * encounter a dangling SCSI command pointer.
2221 */
2222 req->scmnd = NULL;
2223
2224 err:
2225 if (scmnd->result) {
2226 scmnd->scsi_done(scmnd);
2227 ret = 0;
2228 } else {
2229 ret = SCSI_MLQUEUE_HOST_BUSY;
2230 }
2231
2232 goto unlock_rport;
2233 }
2234
2235 /*
2236 * Note: the resources allocated in this function are freed in
2237 * srp_free_ch_ib().
2238 */
2239 static int srp_alloc_iu_bufs(struct srp_rdma_ch *ch)
2240 {
2241 struct srp_target_port *target = ch->target;
2242 int i;
2243
2244 ch->rx_ring = kcalloc(target->queue_size, sizeof(*ch->rx_ring),
2245 GFP_KERNEL);
2246 if (!ch->rx_ring)
2247 goto err_no_ring;
2248 ch->tx_ring = kcalloc(target->queue_size, sizeof(*ch->tx_ring),
2249 GFP_KERNEL);
2250 if (!ch->tx_ring)
2251 goto err_no_ring;
2252
2253 for (i = 0; i < target->queue_size; ++i) {
2254 ch->rx_ring[i] = srp_alloc_iu(target->srp_host,
2255 ch->max_ti_iu_len,
2256 GFP_KERNEL, DMA_FROM_DEVICE);
2257 if (!ch->rx_ring[i])
2258 goto err;
2259 }
2260
2261 for (i = 0; i < target->queue_size; ++i) {
2262 ch->tx_ring[i] = srp_alloc_iu(target->srp_host,
2263 target->max_iu_len,
2264 GFP_KERNEL, DMA_TO_DEVICE);
2265 if (!ch->tx_ring[i])
2266 goto err;
2267
2268 list_add(&ch->tx_ring[i]->list, &ch->free_tx);
2269 }
2270
2271 return 0;
2272
2273 err:
2274 for (i = 0; i < target->queue_size; ++i) {
2275 srp_free_iu(target->srp_host, ch->rx_ring[i]);
2276 srp_free_iu(target->srp_host, ch->tx_ring[i]);
2277 }
2278
2279
2280 err_no_ring:
2281 kfree(ch->tx_ring);
2282 ch->tx_ring = NULL;
2283 kfree(ch->rx_ring);
2284 ch->rx_ring = NULL;
2285
2286 return -ENOMEM;
2287 }
2288
2289 static uint32_t srp_compute_rq_tmo(struct ib_qp_attr *qp_attr, int attr_mask)
2290 {
2291 uint64_t T_tr_ns, max_compl_time_ms;
2292 uint32_t rq_tmo_jiffies;
2293
2294 /*
2295 * According to section 11.2.4.2 in the IBTA spec (Modify Queue Pair,
2296 * table 91), both the QP timeout and the retry count have to be set
2297 * for RC QP's during the RTR to RTS transition.
2298 */
2299 WARN_ON_ONCE((attr_mask & (IB_QP_TIMEOUT | IB_QP_RETRY_CNT)) !=
2300 (IB_QP_TIMEOUT | IB_QP_RETRY_CNT));
2301
2302 /*
2303 * Set target->rq_tmo_jiffies to one second more than the largest time
2304 * it can take before an error completion is generated. See also
2305 * C9-140..142 in the IBTA spec for more information about how to
2306 * convert the QP Local ACK Timeout value to nanoseconds.
2307 */
2308 T_tr_ns = 4096 * (1ULL << qp_attr->timeout);
2309 max_compl_time_ms = qp_attr->retry_cnt * 4 * T_tr_ns;
2310 do_div(max_compl_time_ms, NSEC_PER_MSEC);
2311 rq_tmo_jiffies = msecs_to_jiffies(max_compl_time_ms + 1000);
2312
2313 return rq_tmo_jiffies;
2314 }
2315
2316 static void srp_cm_rep_handler(struct ib_cm_id *cm_id,
2317 const struct srp_login_rsp *lrsp,
2318 struct srp_rdma_ch *ch)
2319 {
2320 struct srp_target_port *target = ch->target;
2321 struct ib_qp_attr *qp_attr = NULL;
2322 int attr_mask = 0;
2323 int ret;
2324 int i;
2325
2326 if (lrsp->opcode == SRP_LOGIN_RSP) {
2327 ch->max_ti_iu_len = be32_to_cpu(lrsp->max_ti_iu_len);
2328 ch->req_lim = be32_to_cpu(lrsp->req_lim_delta);
2329
2330 /*
2331 * Reserve credits for task management so we don't
2332 * bounce requests back to the SCSI mid-layer.
2333 */
2334 target->scsi_host->can_queue
2335 = min(ch->req_lim - SRP_TSK_MGMT_SQ_SIZE,
2336 target->scsi_host->can_queue);
2337 target->scsi_host->cmd_per_lun
2338 = min_t(int, target->scsi_host->can_queue,
2339 target->scsi_host->cmd_per_lun);
2340 } else {
2341 shost_printk(KERN_WARNING, target->scsi_host,
2342 PFX "Unhandled RSP opcode %#x\n", lrsp->opcode);
2343 ret = -ECONNRESET;
2344 goto error;
2345 }
2346
2347 if (!ch->rx_ring) {
2348 ret = srp_alloc_iu_bufs(ch);
2349 if (ret)
2350 goto error;
2351 }
2352
2353 ret = -ENOMEM;
2354 qp_attr = kmalloc(sizeof *qp_attr, GFP_KERNEL);
2355 if (!qp_attr)
2356 goto error;
2357
2358 qp_attr->qp_state = IB_QPS_RTR;
2359 ret = ib_cm_init_qp_attr(cm_id, qp_attr, &attr_mask);
2360 if (ret)
2361 goto error_free;
2362
2363 ret = ib_modify_qp(ch->qp, qp_attr, attr_mask);
2364 if (ret)
2365 goto error_free;
2366
2367 for (i = 0; i < target->queue_size; i++) {
2368 struct srp_iu *iu = ch->rx_ring[i];
2369
2370 ret = srp_post_recv(ch, iu);
2371 if (ret)
2372 goto error_free;
2373 }
2374
2375 qp_attr->qp_state = IB_QPS_RTS;
2376 ret = ib_cm_init_qp_attr(cm_id, qp_attr, &attr_mask);
2377 if (ret)
2378 goto error_free;
2379
2380 target->rq_tmo_jiffies = srp_compute_rq_tmo(qp_attr, attr_mask);
2381
2382 ret = ib_modify_qp(ch->qp, qp_attr, attr_mask);
2383 if (ret)
2384 goto error_free;
2385
2386 ret = ib_send_cm_rtu(cm_id, NULL, 0);
2387
2388 error_free:
2389 kfree(qp_attr);
2390
2391 error:
2392 ch->status = ret;
2393 }
2394
2395 static void srp_cm_rej_handler(struct ib_cm_id *cm_id,
2396 struct ib_cm_event *event,
2397 struct srp_rdma_ch *ch)
2398 {
2399 struct srp_target_port *target = ch->target;
2400 struct Scsi_Host *shost = target->scsi_host;
2401 struct ib_class_port_info *cpi;
2402 int opcode;
2403
2404 switch (event->param.rej_rcvd.reason) {
2405 case IB_CM_REJ_PORT_CM_REDIRECT:
2406 cpi = event->param.rej_rcvd.ari;
2407 sa_path_set_dlid(&ch->path, htonl(ntohs(cpi->redirect_lid)));
2408 ch->path.pkey = cpi->redirect_pkey;
2409 cm_id->remote_cm_qpn = be32_to_cpu(cpi->redirect_qp) & 0x00ffffff;
2410 memcpy(ch->path.dgid.raw, cpi->redirect_gid, 16);
2411
2412 ch->status = sa_path_get_dlid(&ch->path) ?
2413 SRP_DLID_REDIRECT : SRP_PORT_REDIRECT;
2414 break;
2415
2416 case IB_CM_REJ_PORT_REDIRECT:
2417 if (srp_target_is_topspin(target)) {
2418 /*
2419 * Topspin/Cisco SRP gateways incorrectly send
2420 * reject reason code 25 when they mean 24
2421 * (port redirect).
2422 */
2423 memcpy(ch->path.dgid.raw,
2424 event->param.rej_rcvd.ari, 16);
2425
2426 shost_printk(KERN_DEBUG, shost,
2427 PFX "Topspin/Cisco redirect to target port GID %016llx%016llx\n",
2428 be64_to_cpu(ch->path.dgid.global.subnet_prefix),
2429 be64_to_cpu(ch->path.dgid.global.interface_id));
2430
2431 ch->status = SRP_PORT_REDIRECT;
2432 } else {
2433 shost_printk(KERN_WARNING, shost,
2434 " REJ reason: IB_CM_REJ_PORT_REDIRECT\n");
2435 ch->status = -ECONNRESET;
2436 }
2437 break;
2438
2439 case IB_CM_REJ_DUPLICATE_LOCAL_COMM_ID:
2440 shost_printk(KERN_WARNING, shost,
2441 " REJ reason: IB_CM_REJ_DUPLICATE_LOCAL_COMM_ID\n");
2442 ch->status = -ECONNRESET;
2443 break;
2444
2445 case IB_CM_REJ_CONSUMER_DEFINED:
2446 opcode = *(u8 *) event->private_data;
2447 if (opcode == SRP_LOGIN_REJ) {
2448 struct srp_login_rej *rej = event->private_data;
2449 u32 reason = be32_to_cpu(rej->reason);
2450
2451 if (reason == SRP_LOGIN_REJ_REQ_IT_IU_LENGTH_TOO_LARGE)
2452 shost_printk(KERN_WARNING, shost,
2453 PFX "SRP_LOGIN_REJ: requested max_it_iu_len too large\n");
2454 else
2455 shost_printk(KERN_WARNING, shost, PFX
2456 "SRP LOGIN from %pI6 to %pI6 REJECTED, reason 0x%08x\n",
2457 target->sgid.raw,
2458 target->orig_dgid.raw, reason);
2459 } else
2460 shost_printk(KERN_WARNING, shost,
2461 " REJ reason: IB_CM_REJ_CONSUMER_DEFINED,"
2462 " opcode 0x%02x\n", opcode);
2463 ch->status = -ECONNRESET;
2464 break;
2465
2466 case IB_CM_REJ_STALE_CONN:
2467 shost_printk(KERN_WARNING, shost, " REJ reason: stale connection\n");
2468 ch->status = SRP_STALE_CONN;
2469 break;
2470
2471 default:
2472 shost_printk(KERN_WARNING, shost, " REJ reason 0x%x\n",
2473 event->param.rej_rcvd.reason);
2474 ch->status = -ECONNRESET;
2475 }
2476 }
2477
2478 static int srp_cm_handler(struct ib_cm_id *cm_id, struct ib_cm_event *event)
2479 {
2480 struct srp_rdma_ch *ch = cm_id->context;
2481 struct srp_target_port *target = ch->target;
2482 int comp = 0;
2483
2484 switch (event->event) {
2485 case IB_CM_REQ_ERROR:
2486 shost_printk(KERN_DEBUG, target->scsi_host,
2487 PFX "Sending CM REQ failed\n");
2488 comp = 1;
2489 ch->status = -ECONNRESET;
2490 break;
2491
2492 case IB_CM_REP_RECEIVED:
2493 comp = 1;
2494 srp_cm_rep_handler(cm_id, event->private_data, ch);
2495 break;
2496
2497 case IB_CM_REJ_RECEIVED:
2498 shost_printk(KERN_DEBUG, target->scsi_host, PFX "REJ received\n");
2499 comp = 1;
2500
2501 srp_cm_rej_handler(cm_id, event, ch);
2502 break;
2503
2504 case IB_CM_DREQ_RECEIVED:
2505 shost_printk(KERN_WARNING, target->scsi_host,
2506 PFX "DREQ received - connection closed\n");
2507 ch->connected = false;
2508 if (ib_send_cm_drep(cm_id, NULL, 0))
2509 shost_printk(KERN_ERR, target->scsi_host,
2510 PFX "Sending CM DREP failed\n");
2511 queue_work(system_long_wq, &target->tl_err_work);
2512 break;
2513
2514 case IB_CM_TIMEWAIT_EXIT:
2515 shost_printk(KERN_ERR, target->scsi_host,
2516 PFX "connection closed\n");
2517 comp = 1;
2518
2519 ch->status = 0;
2520 break;
2521
2522 case IB_CM_MRA_RECEIVED:
2523 case IB_CM_DREQ_ERROR:
2524 case IB_CM_DREP_RECEIVED:
2525 break;
2526
2527 default:
2528 shost_printk(KERN_WARNING, target->scsi_host,
2529 PFX "Unhandled CM event %d\n", event->event);
2530 break;
2531 }
2532
2533 if (comp)
2534 complete(&ch->done);
2535
2536 return 0;
2537 }
2538
2539 /**
2540 * srp_change_queue_depth - setting device queue depth
2541 * @sdev: scsi device struct
2542 * @qdepth: requested queue depth
2543 *
2544 * Returns queue depth.
2545 */
2546 static int
2547 srp_change_queue_depth(struct scsi_device *sdev, int qdepth)
2548 {
2549 if (!sdev->tagged_supported)
2550 qdepth = 1;
2551 return scsi_change_queue_depth(sdev, qdepth);
2552 }
2553
2554 static int srp_send_tsk_mgmt(struct srp_rdma_ch *ch, u64 req_tag, u64 lun,
2555 u8 func, u8 *status)
2556 {
2557 struct srp_target_port *target = ch->target;
2558 struct srp_rport *rport = target->rport;
2559 struct ib_device *dev = target->srp_host->srp_dev->dev;
2560 struct srp_iu *iu;
2561 struct srp_tsk_mgmt *tsk_mgmt;
2562 int res;
2563
2564 if (!ch->connected || target->qp_in_error)
2565 return -1;
2566
2567 /*
2568 * Lock the rport mutex to avoid that srp_create_ch_ib() is
2569 * invoked while a task management function is being sent.
2570 */
2571 mutex_lock(&rport->mutex);
2572 spin_lock_irq(&ch->lock);
2573 iu = __srp_get_tx_iu(ch, SRP_IU_TSK_MGMT);
2574 spin_unlock_irq(&ch->lock);
2575
2576 if (!iu) {
2577 mutex_unlock(&rport->mutex);
2578
2579 return -1;
2580 }
2581
2582 ib_dma_sync_single_for_cpu(dev, iu->dma, sizeof *tsk_mgmt,
2583 DMA_TO_DEVICE);
2584 tsk_mgmt = iu->buf;
2585 memset(tsk_mgmt, 0, sizeof *tsk_mgmt);
2586
2587 tsk_mgmt->opcode = SRP_TSK_MGMT;
2588 int_to_scsilun(lun, &tsk_mgmt->lun);
2589 tsk_mgmt->tsk_mgmt_func = func;
2590 tsk_mgmt->task_tag = req_tag;
2591
2592 spin_lock_irq(&ch->lock);
2593 ch->tsk_mgmt_tag = (ch->tsk_mgmt_tag + 1) | SRP_TAG_TSK_MGMT;
2594 tsk_mgmt->tag = ch->tsk_mgmt_tag;
2595 spin_unlock_irq(&ch->lock);
2596
2597 init_completion(&ch->tsk_mgmt_done);
2598
2599 ib_dma_sync_single_for_device(dev, iu->dma, sizeof *tsk_mgmt,
2600 DMA_TO_DEVICE);
2601 if (srp_post_send(ch, iu, sizeof(*tsk_mgmt))) {
2602 srp_put_tx_iu(ch, iu, SRP_IU_TSK_MGMT);
2603 mutex_unlock(&rport->mutex);
2604
2605 return -1;
2606 }
2607 res = wait_for_completion_timeout(&ch->tsk_mgmt_done,
2608 msecs_to_jiffies(SRP_ABORT_TIMEOUT_MS));
2609 if (res > 0 && status)
2610 *status = ch->tsk_mgmt_status;
2611 mutex_unlock(&rport->mutex);
2612
2613 WARN_ON_ONCE(res < 0);
2614
2615 return res > 0 ? 0 : -1;
2616 }
2617
2618 static int srp_abort(struct scsi_cmnd *scmnd)
2619 {
2620 struct srp_target_port *target = host_to_target(scmnd->device->host);
2621 struct srp_request *req = (struct srp_request *) scmnd->host_scribble;
2622 u32 tag;
2623 u16 ch_idx;
2624 struct srp_rdma_ch *ch;
2625 int ret;
2626
2627 shost_printk(KERN_ERR, target->scsi_host, "SRP abort called\n");
2628
2629 if (!req)
2630 return SUCCESS;
2631 tag = blk_mq_unique_tag(scmnd->request);
2632 ch_idx = blk_mq_unique_tag_to_hwq(tag);
2633 if (WARN_ON_ONCE(ch_idx >= target->ch_count))
2634 return SUCCESS;
2635 ch = &target->ch[ch_idx];
2636 if (!srp_claim_req(ch, req, NULL, scmnd))
2637 return SUCCESS;
2638 shost_printk(KERN_ERR, target->scsi_host,
2639 "Sending SRP abort for tag %#x\n", tag);
2640 if (srp_send_tsk_mgmt(ch, tag, scmnd->device->lun,
2641 SRP_TSK_ABORT_TASK, NULL) == 0)
2642 ret = SUCCESS;
2643 else if (target->rport->state == SRP_RPORT_LOST)
2644 ret = FAST_IO_FAIL;
2645 else
2646 ret = FAILED;
2647 srp_free_req(ch, req, scmnd, 0);
2648 scmnd->result = DID_ABORT << 16;
2649 scmnd->scsi_done(scmnd);
2650
2651 return ret;
2652 }
2653
2654 static int srp_reset_device(struct scsi_cmnd *scmnd)
2655 {
2656 struct srp_target_port *target = host_to_target(scmnd->device->host);
2657 struct srp_rdma_ch *ch;
2658 int i;
2659 u8 status;
2660
2661 shost_printk(KERN_ERR, target->scsi_host, "SRP reset_device called\n");
2662
2663 ch = &target->ch[0];
2664 if (srp_send_tsk_mgmt(ch, SRP_TAG_NO_REQ, scmnd->device->lun,
2665 SRP_TSK_LUN_RESET, &status))
2666 return FAILED;
2667 if (status)
2668 return FAILED;
2669
2670 for (i = 0; i < target->ch_count; i++) {
2671 ch = &target->ch[i];
2672 for (i = 0; i < target->req_ring_size; ++i) {
2673 struct srp_request *req = &ch->req_ring[i];
2674
2675 srp_finish_req(ch, req, scmnd->device, DID_RESET << 16);
2676 }
2677 }
2678
2679 return SUCCESS;
2680 }
2681
2682 static int srp_reset_host(struct scsi_cmnd *scmnd)
2683 {
2684 struct srp_target_port *target = host_to_target(scmnd->device->host);
2685
2686 shost_printk(KERN_ERR, target->scsi_host, PFX "SRP reset_host called\n");
2687
2688 return srp_reconnect_rport(target->rport) == 0 ? SUCCESS : FAILED;
2689 }
2690
2691 static int srp_slave_alloc(struct scsi_device *sdev)
2692 {
2693 struct Scsi_Host *shost = sdev->host;
2694 struct srp_target_port *target = host_to_target(shost);
2695 struct srp_device *srp_dev = target->srp_host->srp_dev;
2696
2697 if (true)
2698 blk_queue_virt_boundary(sdev->request_queue,
2699 ~srp_dev->mr_page_mask);
2700
2701 return 0;
2702 }
2703
2704 static int srp_slave_configure(struct scsi_device *sdev)
2705 {
2706 struct Scsi_Host *shost = sdev->host;
2707 struct srp_target_port *target = host_to_target(shost);
2708 struct request_queue *q = sdev->request_queue;
2709 unsigned long timeout;
2710
2711 if (sdev->type == TYPE_DISK) {
2712 timeout = max_t(unsigned, 30 * HZ, target->rq_tmo_jiffies);
2713 blk_queue_rq_timeout(q, timeout);
2714 }
2715
2716 return 0;
2717 }
2718
2719 static ssize_t show_id_ext(struct device *dev, struct device_attribute *attr,
2720 char *buf)
2721 {
2722 struct srp_target_port *target = host_to_target(class_to_shost(dev));
2723
2724 return sprintf(buf, "0x%016llx\n", be64_to_cpu(target->id_ext));
2725 }
2726
2727 static ssize_t show_ioc_guid(struct device *dev, struct device_attribute *attr,
2728 char *buf)
2729 {
2730 struct srp_target_port *target = host_to_target(class_to_shost(dev));
2731
2732 return sprintf(buf, "0x%016llx\n", be64_to_cpu(target->ioc_guid));
2733 }
2734
2735 static ssize_t show_service_id(struct device *dev,
2736 struct device_attribute *attr, char *buf)
2737 {
2738 struct srp_target_port *target = host_to_target(class_to_shost(dev));
2739
2740 return sprintf(buf, "0x%016llx\n", be64_to_cpu(target->service_id));
2741 }
2742
2743 static ssize_t show_pkey(struct device *dev, struct device_attribute *attr,
2744 char *buf)
2745 {
2746 struct srp_target_port *target = host_to_target(class_to_shost(dev));
2747
2748 return sprintf(buf, "0x%04x\n", be16_to_cpu(target->pkey));
2749 }
2750
2751 static ssize_t show_sgid(struct device *dev, struct device_attribute *attr,
2752 char *buf)
2753 {
2754 struct srp_target_port *target = host_to_target(class_to_shost(dev));
2755
2756 return sprintf(buf, "%pI6\n", target->sgid.raw);
2757 }
2758
2759 static ssize_t show_dgid(struct device *dev, struct device_attribute *attr,
2760 char *buf)
2761 {
2762 struct srp_target_port *target = host_to_target(class_to_shost(dev));
2763 struct srp_rdma_ch *ch = &target->ch[0];
2764
2765 return sprintf(buf, "%pI6\n", ch->path.dgid.raw);
2766 }
2767
2768 static ssize_t show_orig_dgid(struct device *dev,
2769 struct device_attribute *attr, char *buf)
2770 {
2771 struct srp_target_port *target = host_to_target(class_to_shost(dev));
2772
2773 return sprintf(buf, "%pI6\n", target->orig_dgid.raw);
2774 }
2775
2776 static ssize_t show_req_lim(struct device *dev,
2777 struct device_attribute *attr, char *buf)
2778 {
2779 struct srp_target_port *target = host_to_target(class_to_shost(dev));
2780 struct srp_rdma_ch *ch;
2781 int i, req_lim = INT_MAX;
2782
2783 for (i = 0; i < target->ch_count; i++) {
2784 ch = &target->ch[i];
2785 req_lim = min(req_lim, ch->req_lim);
2786 }
2787 return sprintf(buf, "%d\n", req_lim);
2788 }
2789
2790 static ssize_t show_zero_req_lim(struct device *dev,
2791 struct device_attribute *attr, char *buf)
2792 {
2793 struct srp_target_port *target = host_to_target(class_to_shost(dev));
2794
2795 return sprintf(buf, "%d\n", target->zero_req_lim);
2796 }
2797
2798 static ssize_t show_local_ib_port(struct device *dev,
2799 struct device_attribute *attr, char *buf)
2800 {
2801 struct srp_target_port *target = host_to_target(class_to_shost(dev));
2802
2803 return sprintf(buf, "%d\n", target->srp_host->port);
2804 }
2805
2806 static ssize_t show_local_ib_device(struct device *dev,
2807 struct device_attribute *attr, char *buf)
2808 {
2809 struct srp_target_port *target = host_to_target(class_to_shost(dev));
2810
2811 return sprintf(buf, "%s\n", target->srp_host->srp_dev->dev->name);
2812 }
2813
2814 static ssize_t show_ch_count(struct device *dev, struct device_attribute *attr,
2815 char *buf)
2816 {
2817 struct srp_target_port *target = host_to_target(class_to_shost(dev));
2818
2819 return sprintf(buf, "%d\n", target->ch_count);
2820 }
2821
2822 static ssize_t show_comp_vector(struct device *dev,
2823 struct device_attribute *attr, char *buf)
2824 {
2825 struct srp_target_port *target = host_to_target(class_to_shost(dev));
2826
2827 return sprintf(buf, "%d\n", target->comp_vector);
2828 }
2829
2830 static ssize_t show_tl_retry_count(struct device *dev,
2831 struct device_attribute *attr, char *buf)
2832 {
2833 struct srp_target_port *target = host_to_target(class_to_shost(dev));
2834
2835 return sprintf(buf, "%d\n", target->tl_retry_count);
2836 }
2837
2838 static ssize_t show_cmd_sg_entries(struct device *dev,
2839 struct device_attribute *attr, char *buf)
2840 {
2841 struct srp_target_port *target = host_to_target(class_to_shost(dev));
2842
2843 return sprintf(buf, "%u\n", target->cmd_sg_cnt);
2844 }
2845
2846 static ssize_t show_allow_ext_sg(struct device *dev,
2847 struct device_attribute *attr, char *buf)
2848 {
2849 struct srp_target_port *target = host_to_target(class_to_shost(dev));
2850
2851 return sprintf(buf, "%s\n", target->allow_ext_sg ? "true" : "false");
2852 }
2853
2854 static DEVICE_ATTR(id_ext, S_IRUGO, show_id_ext, NULL);
2855 static DEVICE_ATTR(ioc_guid, S_IRUGO, show_ioc_guid, NULL);
2856 static DEVICE_ATTR(service_id, S_IRUGO, show_service_id, NULL);
2857 static DEVICE_ATTR(pkey, S_IRUGO, show_pkey, NULL);
2858 static DEVICE_ATTR(sgid, S_IRUGO, show_sgid, NULL);
2859 static DEVICE_ATTR(dgid, S_IRUGO, show_dgid, NULL);
2860 static DEVICE_ATTR(orig_dgid, S_IRUGO, show_orig_dgid, NULL);
2861 static DEVICE_ATTR(req_lim, S_IRUGO, show_req_lim, NULL);
2862 static DEVICE_ATTR(zero_req_lim, S_IRUGO, show_zero_req_lim, NULL);
2863 static DEVICE_ATTR(local_ib_port, S_IRUGO, show_local_ib_port, NULL);
2864 static DEVICE_ATTR(local_ib_device, S_IRUGO, show_local_ib_device, NULL);
2865 static DEVICE_ATTR(ch_count, S_IRUGO, show_ch_count, NULL);
2866 static DEVICE_ATTR(comp_vector, S_IRUGO, show_comp_vector, NULL);
2867 static DEVICE_ATTR(tl_retry_count, S_IRUGO, show_tl_retry_count, NULL);
2868 static DEVICE_ATTR(cmd_sg_entries, S_IRUGO, show_cmd_sg_entries, NULL);
2869 static DEVICE_ATTR(allow_ext_sg, S_IRUGO, show_allow_ext_sg, NULL);
2870
2871 static struct device_attribute *srp_host_attrs[] = {
2872 &dev_attr_id_ext,
2873 &dev_attr_ioc_guid,
2874 &dev_attr_service_id,
2875 &dev_attr_pkey,
2876 &dev_attr_sgid,
2877 &dev_attr_dgid,
2878 &dev_attr_orig_dgid,
2879 &dev_attr_req_lim,
2880 &dev_attr_zero_req_lim,
2881 &dev_attr_local_ib_port,
2882 &dev_attr_local_ib_device,
2883 &dev_attr_ch_count,
2884 &dev_attr_comp_vector,
2885 &dev_attr_tl_retry_count,
2886 &dev_attr_cmd_sg_entries,
2887 &dev_attr_allow_ext_sg,
2888 NULL
2889 };
2890
2891 static struct scsi_host_template srp_template = {
2892 .module = THIS_MODULE,
2893 .name = "InfiniBand SRP initiator",
2894 .proc_name = DRV_NAME,
2895 .slave_alloc = srp_slave_alloc,
2896 .slave_configure = srp_slave_configure,
2897 .info = srp_target_info,
2898 .queuecommand = srp_queuecommand,
2899 .change_queue_depth = srp_change_queue_depth,
2900 .eh_timed_out = srp_timed_out,
2901 .eh_abort_handler = srp_abort,
2902 .eh_device_reset_handler = srp_reset_device,
2903 .eh_host_reset_handler = srp_reset_host,
2904 .skip_settle_delay = true,
2905 .sg_tablesize = SRP_DEF_SG_TABLESIZE,
2906 .can_queue = SRP_DEFAULT_CMD_SQ_SIZE,
2907 .this_id = -1,
2908 .cmd_per_lun = SRP_DEFAULT_CMD_SQ_SIZE,
2909 .use_clustering = ENABLE_CLUSTERING,
2910 .shost_attrs = srp_host_attrs,
2911 .track_queue_depth = 1,
2912 };
2913
2914 static int srp_sdev_count(struct Scsi_Host *host)
2915 {
2916 struct scsi_device *sdev;
2917 int c = 0;
2918
2919 shost_for_each_device(sdev, host)
2920 c++;
2921
2922 return c;
2923 }
2924
2925 /*
2926 * Return values:
2927 * < 0 upon failure. Caller is responsible for SRP target port cleanup.
2928 * 0 and target->state == SRP_TARGET_REMOVED if asynchronous target port
2929 * removal has been scheduled.
2930 * 0 and target->state != SRP_TARGET_REMOVED upon success.
2931 */
2932 static int srp_add_target(struct srp_host *host, struct srp_target_port *target)
2933 {
2934 struct srp_rport_identifiers ids;
2935 struct srp_rport *rport;
2936
2937 target->state = SRP_TARGET_SCANNING;
2938 sprintf(target->target_name, "SRP.T10:%016llX",
2939 be64_to_cpu(target->id_ext));
2940
2941 if (scsi_add_host(target->scsi_host, host->srp_dev->dev->dev.parent))
2942 return -ENODEV;
2943
2944 memcpy(ids.port_id, &target->id_ext, 8);
2945 memcpy(ids.port_id + 8, &target->ioc_guid, 8);
2946 ids.roles = SRP_RPORT_ROLE_TARGET;
2947 rport = srp_rport_add(target->scsi_host, &ids);
2948 if (IS_ERR(rport)) {
2949 scsi_remove_host(target->scsi_host);
2950 return PTR_ERR(rport);
2951 }
2952
2953 rport->lld_data = target;
2954 target->rport = rport;
2955
2956 spin_lock(&host->target_lock);
2957 list_add_tail(&target->list, &host->target_list);
2958 spin_unlock(&host->target_lock);
2959
2960 scsi_scan_target(&target->scsi_host->shost_gendev,
2961 0, target->scsi_id, SCAN_WILD_CARD, SCSI_SCAN_INITIAL);
2962
2963 if (srp_connected_ch(target) < target->ch_count ||
2964 target->qp_in_error) {
2965 shost_printk(KERN_INFO, target->scsi_host,
2966 PFX "SCSI scan failed - removing SCSI host\n");
2967 srp_queue_remove_work(target);
2968 goto out;
2969 }
2970
2971 pr_debug("%s: SCSI scan succeeded - detected %d LUNs\n",
2972 dev_name(&target->scsi_host->shost_gendev),
2973 srp_sdev_count(target->scsi_host));
2974
2975 spin_lock_irq(&target->lock);
2976 if (target->state == SRP_TARGET_SCANNING)
2977 target->state = SRP_TARGET_LIVE;
2978 spin_unlock_irq(&target->lock);
2979
2980 out:
2981 return 0;
2982 }
2983
2984 static void srp_release_dev(struct device *dev)
2985 {
2986 struct srp_host *host =
2987 container_of(dev, struct srp_host, dev);
2988
2989 complete(&host->released);
2990 }
2991
2992 static struct class srp_class = {
2993 .name = "infiniband_srp",
2994 .dev_release = srp_release_dev
2995 };
2996
2997 /**
2998 * srp_conn_unique() - check whether the connection to a target is unique
2999 * @host: SRP host.
3000 * @target: SRP target port.
3001 */
3002 static bool srp_conn_unique(struct srp_host *host,
3003 struct srp_target_port *target)
3004 {
3005 struct srp_target_port *t;
3006 bool ret = false;
3007
3008 if (target->state == SRP_TARGET_REMOVED)
3009 goto out;
3010
3011 ret = true;
3012
3013 spin_lock(&host->target_lock);
3014 list_for_each_entry(t, &host->target_list, list) {
3015 if (t != target &&
3016 target->id_ext == t->id_ext &&
3017 target->ioc_guid == t->ioc_guid &&
3018 target->initiator_ext == t->initiator_ext) {
3019 ret = false;
3020 break;
3021 }
3022 }
3023 spin_unlock(&host->target_lock);
3024
3025 out:
3026 return ret;
3027 }
3028
3029 /*
3030 * Target ports are added by writing
3031 *
3032 * id_ext=<SRP ID ext>,ioc_guid=<SRP IOC GUID>,dgid=<dest GID>,
3033 * pkey=<P_Key>,service_id=<service ID>
3034 *
3035 * to the add_target sysfs attribute.
3036 */
3037 enum {
3038 SRP_OPT_ERR = 0,
3039 SRP_OPT_ID_EXT = 1 << 0,
3040 SRP_OPT_IOC_GUID = 1 << 1,
3041 SRP_OPT_DGID = 1 << 2,
3042 SRP_OPT_PKEY = 1 << 3,
3043 SRP_OPT_SERVICE_ID = 1 << 4,
3044 SRP_OPT_MAX_SECT = 1 << 5,
3045 SRP_OPT_MAX_CMD_PER_LUN = 1 << 6,
3046 SRP_OPT_IO_CLASS = 1 << 7,
3047 SRP_OPT_INITIATOR_EXT = 1 << 8,
3048 SRP_OPT_CMD_SG_ENTRIES = 1 << 9,
3049 SRP_OPT_ALLOW_EXT_SG = 1 << 10,
3050 SRP_OPT_SG_TABLESIZE = 1 << 11,
3051 SRP_OPT_COMP_VECTOR = 1 << 12,
3052 SRP_OPT_TL_RETRY_COUNT = 1 << 13,
3053 SRP_OPT_QUEUE_SIZE = 1 << 14,
3054 SRP_OPT_ALL = (SRP_OPT_ID_EXT |
3055 SRP_OPT_IOC_GUID |
3056 SRP_OPT_DGID |
3057 SRP_OPT_PKEY |
3058 SRP_OPT_SERVICE_ID),
3059 };
3060
3061 static const match_table_t srp_opt_tokens = {
3062 { SRP_OPT_ID_EXT, "id_ext=%s" },
3063 { SRP_OPT_IOC_GUID, "ioc_guid=%s" },
3064 { SRP_OPT_DGID, "dgid=%s" },
3065 { SRP_OPT_PKEY, "pkey=%x" },
3066 { SRP_OPT_SERVICE_ID, "service_id=%s" },
3067 { SRP_OPT_MAX_SECT, "max_sect=%d" },
3068 { SRP_OPT_MAX_CMD_PER_LUN, "max_cmd_per_lun=%d" },
3069 { SRP_OPT_IO_CLASS, "io_class=%x" },
3070 { SRP_OPT_INITIATOR_EXT, "initiator_ext=%s" },
3071 { SRP_OPT_CMD_SG_ENTRIES, "cmd_sg_entries=%u" },
3072 { SRP_OPT_ALLOW_EXT_SG, "allow_ext_sg=%u" },
3073 { SRP_OPT_SG_TABLESIZE, "sg_tablesize=%u" },
3074 { SRP_OPT_COMP_VECTOR, "comp_vector=%u" },
3075 { SRP_OPT_TL_RETRY_COUNT, "tl_retry_count=%u" },
3076 { SRP_OPT_QUEUE_SIZE, "queue_size=%d" },
3077 { SRP_OPT_ERR, NULL }
3078 };
3079
3080 static int srp_parse_options(const char *buf, struct srp_target_port *target)
3081 {
3082 char *options, *sep_opt;
3083 char *p;
3084 char dgid[3];
3085 substring_t args[MAX_OPT_ARGS];
3086 int opt_mask = 0;
3087 int token;
3088 int ret = -EINVAL;
3089 int i;
3090
3091 options = kstrdup(buf, GFP_KERNEL);
3092 if (!options)
3093 return -ENOMEM;
3094
3095 sep_opt = options;
3096 while ((p = strsep(&sep_opt, ",\n")) != NULL) {
3097 if (!*p)
3098 continue;
3099
3100 token = match_token(p, srp_opt_tokens, args);
3101 opt_mask |= token;
3102
3103 switch (token) {
3104 case SRP_OPT_ID_EXT:
3105 p = match_strdup(args);
3106 if (!p) {
3107 ret = -ENOMEM;
3108 goto out;
3109 }
3110 target->id_ext = cpu_to_be64(simple_strtoull(p, NULL, 16));
3111 kfree(p);
3112 break;
3113
3114 case SRP_OPT_IOC_GUID:
3115 p = match_strdup(args);
3116 if (!p) {
3117 ret = -ENOMEM;
3118 goto out;
3119 }
3120 target->ioc_guid = cpu_to_be64(simple_strtoull(p, NULL, 16));
3121 kfree(p);
3122 break;
3123
3124 case SRP_OPT_DGID:
3125 p = match_strdup(args);
3126 if (!p) {
3127 ret = -ENOMEM;
3128 goto out;
3129 }
3130 if (strlen(p) != 32) {
3131 pr_warn("bad dest GID parameter '%s'\n", p);
3132 kfree(p);
3133 goto out;
3134 }
3135
3136 for (i = 0; i < 16; ++i) {
3137 strlcpy(dgid, p + i * 2, sizeof(dgid));
3138 if (sscanf(dgid, "%hhx",
3139 &target->orig_dgid.raw[i]) < 1) {
3140 ret = -EINVAL;
3141 kfree(p);
3142 goto out;
3143 }
3144 }
3145 kfree(p);
3146 break;
3147
3148 case SRP_OPT_PKEY:
3149 if (match_hex(args, &token)) {
3150 pr_warn("bad P_Key parameter '%s'\n", p);
3151 goto out;
3152 }
3153 target->pkey = cpu_to_be16(token);
3154 break;
3155
3156 case SRP_OPT_SERVICE_ID:
3157 p = match_strdup(args);
3158 if (!p) {
3159 ret = -ENOMEM;
3160 goto out;
3161 }
3162 target->service_id = cpu_to_be64(simple_strtoull(p, NULL, 16));
3163 kfree(p);
3164 break;
3165
3166 case SRP_OPT_MAX_SECT:
3167 if (match_int(args, &token)) {
3168 pr_warn("bad max sect parameter '%s'\n", p);
3169 goto out;
3170 }
3171 target->scsi_host->max_sectors = token;
3172 break;
3173
3174 case SRP_OPT_QUEUE_SIZE:
3175 if (match_int(args, &token) || token < 1) {
3176 pr_warn("bad queue_size parameter '%s'\n", p);
3177 goto out;
3178 }
3179 target->scsi_host->can_queue = token;
3180 target->queue_size = token + SRP_RSP_SQ_SIZE +
3181 SRP_TSK_MGMT_SQ_SIZE;
3182 if (!(opt_mask & SRP_OPT_MAX_CMD_PER_LUN))
3183 target->scsi_host->cmd_per_lun = token;
3184 break;
3185
3186 case SRP_OPT_MAX_CMD_PER_LUN:
3187 if (match_int(args, &token) || token < 1) {
3188 pr_warn("bad max cmd_per_lun parameter '%s'\n",
3189 p);
3190 goto out;
3191 }
3192 target->scsi_host->cmd_per_lun = token;
3193 break;
3194
3195 case SRP_OPT_IO_CLASS:
3196 if (match_hex(args, &token)) {
3197 pr_warn("bad IO class parameter '%s'\n", p);
3198 goto out;
3199 }
3200 if (token != SRP_REV10_IB_IO_CLASS &&
3201 token != SRP_REV16A_IB_IO_CLASS) {
3202 pr_warn("unknown IO class parameter value %x specified (use %x or %x).\n",
3203 token, SRP_REV10_IB_IO_CLASS,
3204 SRP_REV16A_IB_IO_CLASS);
3205 goto out;
3206 }
3207 target->io_class = token;
3208 break;
3209
3210 case SRP_OPT_INITIATOR_EXT:
3211 p = match_strdup(args);
3212 if (!p) {
3213 ret = -ENOMEM;
3214 goto out;
3215 }
3216 target->initiator_ext = cpu_to_be64(simple_strtoull(p, NULL, 16));
3217 kfree(p);
3218 break;
3219
3220 case SRP_OPT_CMD_SG_ENTRIES:
3221 if (match_int(args, &token) || token < 1 || token > 255) {
3222 pr_warn("bad max cmd_sg_entries parameter '%s'\n",
3223 p);
3224 goto out;
3225 }
3226 target->cmd_sg_cnt = token;
3227 break;
3228
3229 case SRP_OPT_ALLOW_EXT_SG:
3230 if (match_int(args, &token)) {
3231 pr_warn("bad allow_ext_sg parameter '%s'\n", p);
3232 goto out;
3233 }
3234 target->allow_ext_sg = !!token;
3235 break;
3236
3237 case SRP_OPT_SG_TABLESIZE:
3238 if (match_int(args, &token) || token < 1 ||
3239 token > SG_MAX_SEGMENTS) {
3240 pr_warn("bad max sg_tablesize parameter '%s'\n",
3241 p);
3242 goto out;
3243 }
3244 target->sg_tablesize = token;
3245 break;
3246
3247 case SRP_OPT_COMP_VECTOR:
3248 if (match_int(args, &token) || token < 0) {
3249 pr_warn("bad comp_vector parameter '%s'\n", p);
3250 goto out;
3251 }
3252 target->comp_vector = token;
3253 break;
3254
3255 case SRP_OPT_TL_RETRY_COUNT:
3256 if (match_int(args, &token) || token < 2 || token > 7) {
3257 pr_warn("bad tl_retry_count parameter '%s' (must be a number between 2 and 7)\n",
3258 p);
3259 goto out;
3260 }
3261 target->tl_retry_count = token;
3262 break;
3263
3264 default:
3265 pr_warn("unknown parameter or missing value '%s' in target creation request\n",
3266 p);
3267 goto out;
3268 }
3269 }
3270
3271 if ((opt_mask & SRP_OPT_ALL) == SRP_OPT_ALL)
3272 ret = 0;
3273 else
3274 for (i = 0; i < ARRAY_SIZE(srp_opt_tokens); ++i)
3275 if ((srp_opt_tokens[i].token & SRP_OPT_ALL) &&
3276 !(srp_opt_tokens[i].token & opt_mask))
3277 pr_warn("target creation request is missing parameter '%s'\n",
3278 srp_opt_tokens[i].pattern);
3279
3280 if (target->scsi_host->cmd_per_lun > target->scsi_host->can_queue
3281 && (opt_mask & SRP_OPT_MAX_CMD_PER_LUN))
3282 pr_warn("cmd_per_lun = %d > queue_size = %d\n",
3283 target->scsi_host->cmd_per_lun,
3284 target->scsi_host->can_queue);
3285
3286 out:
3287 kfree(options);
3288 return ret;
3289 }
3290
3291 static ssize_t srp_create_target(struct device *dev,
3292 struct device_attribute *attr,
3293 const char *buf, size_t count)
3294 {
3295 struct srp_host *host =
3296 container_of(dev, struct srp_host, dev);
3297 struct Scsi_Host *target_host;
3298 struct srp_target_port *target;
3299 struct srp_rdma_ch *ch;
3300 struct srp_device *srp_dev = host->srp_dev;
3301 struct ib_device *ibdev = srp_dev->dev;
3302 int ret, node_idx, node, cpu, i;
3303 unsigned int max_sectors_per_mr, mr_per_cmd = 0;
3304 bool multich = false;
3305
3306 target_host = scsi_host_alloc(&srp_template,
3307 sizeof (struct srp_target_port));
3308 if (!target_host)
3309 return -ENOMEM;
3310
3311 target_host->transportt = ib_srp_transport_template;
3312 target_host->max_channel = 0;
3313 target_host->max_id = 1;
3314 target_host->max_lun = -1LL;
3315 target_host->max_cmd_len = sizeof ((struct srp_cmd *) (void *) 0L)->cdb;
3316
3317 target = host_to_target(target_host);
3318
3319 target->io_class = SRP_REV16A_IB_IO_CLASS;
3320 target->scsi_host = target_host;
3321 target->srp_host = host;
3322 target->pd = host->srp_dev->pd;
3323 target->lkey = host->srp_dev->pd->local_dma_lkey;
3324 target->cmd_sg_cnt = cmd_sg_entries;
3325 target->sg_tablesize = indirect_sg_entries ? : cmd_sg_entries;
3326 target->allow_ext_sg = allow_ext_sg;
3327 target->tl_retry_count = 7;
3328 target->queue_size = SRP_DEFAULT_QUEUE_SIZE;
3329
3330 /*
3331 * Avoid that the SCSI host can be removed by srp_remove_target()
3332 * before this function returns.
3333 */
3334 scsi_host_get(target->scsi_host);
3335
3336 ret = mutex_lock_interruptible(&host->add_target_mutex);
3337 if (ret < 0)
3338 goto put;
3339
3340 ret = srp_parse_options(buf, target);
3341 if (ret)
3342 goto out;
3343
3344 target->req_ring_size = target->queue_size - SRP_TSK_MGMT_SQ_SIZE;
3345
3346 if (!srp_conn_unique(target->srp_host, target)) {
3347 shost_printk(KERN_INFO, target->scsi_host,
3348 PFX "Already connected to target port with id_ext=%016llx;ioc_guid=%016llx;initiator_ext=%016llx\n",
3349 be64_to_cpu(target->id_ext),
3350 be64_to_cpu(target->ioc_guid),
3351 be64_to_cpu(target->initiator_ext));
3352 ret = -EEXIST;
3353 goto out;
3354 }
3355
3356 if (!srp_dev->has_fmr && !srp_dev->has_fr && !target->allow_ext_sg &&
3357 target->cmd_sg_cnt < target->sg_tablesize) {
3358 pr_warn("No MR pool and no external indirect descriptors, limiting sg_tablesize to cmd_sg_cnt\n");
3359 target->sg_tablesize = target->cmd_sg_cnt;
3360 }
3361
3362 if (srp_dev->use_fast_reg || srp_dev->use_fmr) {
3363 /*
3364 * FR and FMR can only map one HCA page per entry. If the
3365 * start address is not aligned on a HCA page boundary two
3366 * entries will be used for the head and the tail although
3367 * these two entries combined contain at most one HCA page of
3368 * data. Hence the "+ 1" in the calculation below.
3369 *
3370 * The indirect data buffer descriptor is contiguous so the
3371 * memory for that buffer will only be registered if
3372 * register_always is true. Hence add one to mr_per_cmd if
3373 * register_always has been set.
3374 */
3375 max_sectors_per_mr = srp_dev->max_pages_per_mr <<
3376 (ilog2(srp_dev->mr_page_size) - 9);
3377 mr_per_cmd = register_always +
3378 (target->scsi_host->max_sectors + 1 +
3379 max_sectors_per_mr - 1) / max_sectors_per_mr;
3380 pr_debug("max_sectors = %u; max_pages_per_mr = %u; mr_page_size = %u; max_sectors_per_mr = %u; mr_per_cmd = %u\n",
3381 target->scsi_host->max_sectors,
3382 srp_dev->max_pages_per_mr, srp_dev->mr_page_size,
3383 max_sectors_per_mr, mr_per_cmd);
3384 }
3385
3386 target_host->sg_tablesize = target->sg_tablesize;
3387 target->mr_pool_size = target->scsi_host->can_queue * mr_per_cmd;
3388 target->mr_per_cmd = mr_per_cmd;
3389 target->indirect_size = target->sg_tablesize *
3390 sizeof (struct srp_direct_buf);
3391 target->max_iu_len = sizeof (struct srp_cmd) +
3392 sizeof (struct srp_indirect_buf) +
3393 target->cmd_sg_cnt * sizeof (struct srp_direct_buf);
3394
3395 INIT_WORK(&target->tl_err_work, srp_tl_err_work);
3396 INIT_WORK(&target->remove_work, srp_remove_work);
3397 spin_lock_init(&target->lock);
3398 ret = ib_query_gid(ibdev, host->port, 0, &target->sgid, NULL);
3399 if (ret)
3400 goto out;
3401
3402 ret = -ENOMEM;
3403 target->ch_count = max_t(unsigned, num_online_nodes(),
3404 min(ch_count ? :
3405 min(4 * num_online_nodes(),
3406 ibdev->num_comp_vectors),
3407 num_online_cpus()));
3408 target->ch = kcalloc(target->ch_count, sizeof(*target->ch),
3409 GFP_KERNEL);
3410 if (!target->ch)
3411 goto out;
3412
3413 node_idx = 0;
3414 for_each_online_node(node) {
3415 const int ch_start = (node_idx * target->ch_count /
3416 num_online_nodes());
3417 const int ch_end = ((node_idx + 1) * target->ch_count /
3418 num_online_nodes());
3419 const int cv_start = (node_idx * ibdev->num_comp_vectors /
3420 num_online_nodes() + target->comp_vector)
3421 % ibdev->num_comp_vectors;
3422 const int cv_end = ((node_idx + 1) * ibdev->num_comp_vectors /
3423 num_online_nodes() + target->comp_vector)
3424 % ibdev->num_comp_vectors;
3425 int cpu_idx = 0;
3426
3427 for_each_online_cpu(cpu) {
3428 if (cpu_to_node(cpu) != node)
3429 continue;
3430 if (ch_start + cpu_idx >= ch_end)
3431 continue;
3432 ch = &target->ch[ch_start + cpu_idx];
3433 ch->target = target;
3434 ch->comp_vector = cv_start == cv_end ? cv_start :
3435 cv_start + cpu_idx % (cv_end - cv_start);
3436 spin_lock_init(&ch->lock);
3437 INIT_LIST_HEAD(&ch->free_tx);
3438 ret = srp_new_cm_id(ch);
3439 if (ret)
3440 goto err_disconnect;
3441
3442 ret = srp_create_ch_ib(ch);
3443 if (ret)
3444 goto err_disconnect;
3445
3446 ret = srp_alloc_req_data(ch);
3447 if (ret)
3448 goto err_disconnect;
3449
3450 ret = srp_connect_ch(ch, multich);
3451 if (ret) {
3452 shost_printk(KERN_ERR, target->scsi_host,
3453 PFX "Connection %d/%d to %pI6 failed\n",
3454 ch_start + cpu_idx,
3455 target->ch_count,
3456 ch->target->orig_dgid.raw);
3457 if (node_idx == 0 && cpu_idx == 0) {
3458 goto free_ch;
3459 } else {
3460 srp_free_ch_ib(target, ch);
3461 srp_free_req_data(target, ch);
3462 target->ch_count = ch - target->ch;
3463 goto connected;
3464 }
3465 }
3466
3467 multich = true;
3468 cpu_idx++;
3469 }
3470 node_idx++;
3471 }
3472
3473 connected:
3474 target->scsi_host->nr_hw_queues = target->ch_count;
3475
3476 ret = srp_add_target(host, target);
3477 if (ret)
3478 goto err_disconnect;
3479
3480 if (target->state != SRP_TARGET_REMOVED) {
3481 shost_printk(KERN_DEBUG, target->scsi_host, PFX
3482 "new target: id_ext %016llx ioc_guid %016llx pkey %04x service_id %016llx sgid %pI6 dgid %pI6\n",
3483 be64_to_cpu(target->id_ext),
3484 be64_to_cpu(target->ioc_guid),
3485 be16_to_cpu(target->pkey),
3486 be64_to_cpu(target->service_id),
3487 target->sgid.raw, target->orig_dgid.raw);
3488 }
3489
3490 ret = count;
3491
3492 out:
3493 mutex_unlock(&host->add_target_mutex);
3494
3495 put:
3496 scsi_host_put(target->scsi_host);
3497 if (ret < 0)
3498 scsi_host_put(target->scsi_host);
3499
3500 return ret;
3501
3502 err_disconnect:
3503 srp_disconnect_target(target);
3504
3505 free_ch:
3506 for (i = 0; i < target->ch_count; i++) {
3507 ch = &target->ch[i];
3508 srp_free_ch_ib(target, ch);
3509 srp_free_req_data(target, ch);
3510 }
3511
3512 kfree(target->ch);
3513 goto out;
3514 }
3515
3516 static DEVICE_ATTR(add_target, S_IWUSR, NULL, srp_create_target);
3517
3518 static ssize_t show_ibdev(struct device *dev, struct device_attribute *attr,
3519 char *buf)
3520 {
3521 struct srp_host *host = container_of(dev, struct srp_host, dev);
3522
3523 return sprintf(buf, "%s\n", host->srp_dev->dev->name);
3524 }
3525
3526 static DEVICE_ATTR(ibdev, S_IRUGO, show_ibdev, NULL);
3527
3528 static ssize_t show_port(struct device *dev, struct device_attribute *attr,
3529 char *buf)
3530 {
3531 struct srp_host *host = container_of(dev, struct srp_host, dev);
3532
3533 return sprintf(buf, "%d\n", host->port);
3534 }
3535
3536 static DEVICE_ATTR(port, S_IRUGO, show_port, NULL);
3537
3538 static struct srp_host *srp_add_port(struct srp_device *device, u8 port)
3539 {
3540 struct srp_host *host;
3541
3542 host = kzalloc(sizeof *host, GFP_KERNEL);
3543 if (!host)
3544 return NULL;
3545
3546 INIT_LIST_HEAD(&host->target_list);
3547 spin_lock_init(&host->target_lock);
3548 init_completion(&host->released);
3549 mutex_init(&host->add_target_mutex);
3550 host->srp_dev = device;
3551 host->port = port;
3552
3553 host->dev.class = &srp_class;
3554 host->dev.parent = device->dev->dev.parent;
3555 dev_set_name(&host->dev, "srp-%s-%d", device->dev->name, port);
3556
3557 if (device_register(&host->dev))
3558 goto free_host;
3559 if (device_create_file(&host->dev, &dev_attr_add_target))
3560 goto err_class;
3561 if (device_create_file(&host->dev, &dev_attr_ibdev))
3562 goto err_class;
3563 if (device_create_file(&host->dev, &dev_attr_port))
3564 goto err_class;
3565
3566 return host;
3567
3568 err_class:
3569 device_unregister(&host->dev);
3570
3571 free_host:
3572 kfree(host);
3573
3574 return NULL;
3575 }
3576
3577 static void srp_add_one(struct ib_device *device)
3578 {
3579 struct srp_device *srp_dev;
3580 struct ib_device_attr *attr = &device->attrs;
3581 struct srp_host *host;
3582 int mr_page_shift, p;
3583 u64 max_pages_per_mr;
3584 unsigned int flags = 0;
3585
3586 srp_dev = kzalloc(sizeof(*srp_dev), GFP_KERNEL);
3587 if (!srp_dev)
3588 return;
3589
3590 /*
3591 * Use the smallest page size supported by the HCA, down to a
3592 * minimum of 4096 bytes. We're unlikely to build large sglists
3593 * out of smaller entries.
3594 */
3595 mr_page_shift = max(12, ffs(attr->page_size_cap) - 1);
3596 srp_dev->mr_page_size = 1 << mr_page_shift;
3597 srp_dev->mr_page_mask = ~((u64) srp_dev->mr_page_size - 1);
3598 max_pages_per_mr = attr->max_mr_size;
3599 do_div(max_pages_per_mr, srp_dev->mr_page_size);
3600 pr_debug("%s: %llu / %u = %llu <> %u\n", __func__,
3601 attr->max_mr_size, srp_dev->mr_page_size,
3602 max_pages_per_mr, SRP_MAX_PAGES_PER_MR);
3603 srp_dev->max_pages_per_mr = min_t(u64, SRP_MAX_PAGES_PER_MR,
3604 max_pages_per_mr);
3605
3606 srp_dev->has_fmr = (device->alloc_fmr && device->dealloc_fmr &&
3607 device->map_phys_fmr && device->unmap_fmr);
3608 srp_dev->has_fr = (attr->device_cap_flags &
3609 IB_DEVICE_MEM_MGT_EXTENSIONS);
3610 if (!never_register && !srp_dev->has_fmr && !srp_dev->has_fr) {
3611 dev_warn(&device->dev, "neither FMR nor FR is supported\n");
3612 } else if (!never_register &&
3613 attr->max_mr_size >= 2 * srp_dev->mr_page_size) {
3614 srp_dev->use_fast_reg = (srp_dev->has_fr &&
3615 (!srp_dev->has_fmr || prefer_fr));
3616 srp_dev->use_fmr = !srp_dev->use_fast_reg && srp_dev->has_fmr;
3617 }
3618
3619 if (never_register || !register_always ||
3620 (!srp_dev->has_fmr && !srp_dev->has_fr))
3621 flags |= IB_PD_UNSAFE_GLOBAL_RKEY;
3622
3623 if (srp_dev->use_fast_reg) {
3624 srp_dev->max_pages_per_mr =
3625 min_t(u32, srp_dev->max_pages_per_mr,
3626 attr->max_fast_reg_page_list_len);
3627 }
3628 srp_dev->mr_max_size = srp_dev->mr_page_size *
3629 srp_dev->max_pages_per_mr;
3630 pr_debug("%s: mr_page_shift = %d, device->max_mr_size = %#llx, device->max_fast_reg_page_list_len = %u, max_pages_per_mr = %d, mr_max_size = %#x\n",
3631 device->name, mr_page_shift, attr->max_mr_size,
3632 attr->max_fast_reg_page_list_len,
3633 srp_dev->max_pages_per_mr, srp_dev->mr_max_size);
3634
3635 INIT_LIST_HEAD(&srp_dev->dev_list);
3636
3637 srp_dev->dev = device;
3638 srp_dev->pd = ib_alloc_pd(device, flags);
3639 if (IS_ERR(srp_dev->pd))
3640 goto free_dev;
3641
3642
3643 for (p = rdma_start_port(device); p <= rdma_end_port(device); ++p) {
3644 host = srp_add_port(srp_dev, p);
3645 if (host)
3646 list_add_tail(&host->list, &srp_dev->dev_list);
3647 }
3648
3649 ib_set_client_data(device, &srp_client, srp_dev);
3650 return;
3651
3652 free_dev:
3653 kfree(srp_dev);
3654 }
3655
3656 static void srp_remove_one(struct ib_device *device, void *client_data)
3657 {
3658 struct srp_device *srp_dev;
3659 struct srp_host *host, *tmp_host;
3660 struct srp_target_port *target;
3661
3662 srp_dev = client_data;
3663 if (!srp_dev)
3664 return;
3665
3666 list_for_each_entry_safe(host, tmp_host, &srp_dev->dev_list, list) {
3667 device_unregister(&host->dev);
3668 /*
3669 * Wait for the sysfs entry to go away, so that no new
3670 * target ports can be created.
3671 */
3672 wait_for_completion(&host->released);
3673
3674 /*
3675 * Remove all target ports.
3676 */
3677 spin_lock(&host->target_lock);
3678 list_for_each_entry(target, &host->target_list, list)
3679 srp_queue_remove_work(target);
3680 spin_unlock(&host->target_lock);
3681
3682 /*
3683 * Wait for tl_err and target port removal tasks.
3684 */
3685 flush_workqueue(system_long_wq);
3686 flush_workqueue(srp_remove_wq);
3687
3688 kfree(host);
3689 }
3690
3691 ib_dealloc_pd(srp_dev->pd);
3692
3693 kfree(srp_dev);
3694 }
3695
3696 static struct srp_function_template ib_srp_transport_functions = {
3697 .has_rport_state = true,
3698 .reset_timer_if_blocked = true,
3699 .reconnect_delay = &srp_reconnect_delay,
3700 .fast_io_fail_tmo = &srp_fast_io_fail_tmo,
3701 .dev_loss_tmo = &srp_dev_loss_tmo,
3702 .reconnect = srp_rport_reconnect,
3703 .rport_delete = srp_rport_delete,
3704 .terminate_rport_io = srp_terminate_io,
3705 };
3706
3707 static int __init srp_init_module(void)
3708 {
3709 int ret;
3710
3711 if (srp_sg_tablesize) {
3712 pr_warn("srp_sg_tablesize is deprecated, please use cmd_sg_entries\n");
3713 if (!cmd_sg_entries)
3714 cmd_sg_entries = srp_sg_tablesize;
3715 }
3716
3717 if (!cmd_sg_entries)
3718 cmd_sg_entries = SRP_DEF_SG_TABLESIZE;
3719
3720 if (cmd_sg_entries > 255) {
3721 pr_warn("Clamping cmd_sg_entries to 255\n");
3722 cmd_sg_entries = 255;
3723 }
3724
3725 if (!indirect_sg_entries)
3726 indirect_sg_entries = cmd_sg_entries;
3727 else if (indirect_sg_entries < cmd_sg_entries) {
3728 pr_warn("Bumping up indirect_sg_entries to match cmd_sg_entries (%u)\n",
3729 cmd_sg_entries);
3730 indirect_sg_entries = cmd_sg_entries;
3731 }
3732
3733 if (indirect_sg_entries > SG_MAX_SEGMENTS) {
3734 pr_warn("Clamping indirect_sg_entries to %u\n",
3735 SG_MAX_SEGMENTS);
3736 indirect_sg_entries = SG_MAX_SEGMENTS;
3737 }
3738
3739 srp_remove_wq = create_workqueue("srp_remove");
3740 if (!srp_remove_wq) {
3741 ret = -ENOMEM;
3742 goto out;
3743 }
3744
3745 ret = -ENOMEM;
3746 ib_srp_transport_template =
3747 srp_attach_transport(&ib_srp_transport_functions);
3748 if (!ib_srp_transport_template)
3749 goto destroy_wq;
3750
3751 ret = class_register(&srp_class);
3752 if (ret) {
3753 pr_err("couldn't register class infiniband_srp\n");
3754 goto release_tr;
3755 }
3756
3757 ib_sa_register_client(&srp_sa_client);
3758
3759 ret = ib_register_client(&srp_client);
3760 if (ret) {
3761 pr_err("couldn't register IB client\n");
3762 goto unreg_sa;
3763 }
3764
3765 out:
3766 return ret;
3767
3768 unreg_sa:
3769 ib_sa_unregister_client(&srp_sa_client);
3770 class_unregister(&srp_class);
3771
3772 release_tr:
3773 srp_release_transport(ib_srp_transport_template);
3774
3775 destroy_wq:
3776 destroy_workqueue(srp_remove_wq);
3777 goto out;
3778 }
3779
3780 static void __exit srp_cleanup_module(void)
3781 {
3782 ib_unregister_client(&srp_client);
3783 ib_sa_unregister_client(&srp_sa_client);
3784 class_unregister(&srp_class);
3785 srp_release_transport(ib_srp_transport_template);
3786 destroy_workqueue(srp_remove_wq);
3787 }
3788
3789 module_init(srp_init_module);
3790 module_exit(srp_cleanup_module);