scsi: qla2xxx: Enable Target Multi Queue
authorQuinn Tran <quinn.tran@cavium.com>
Wed, 14 Jun 2017 03:47:18 +0000 (20:47 -0700)
committerMartin K. Petersen <martin.petersen@oracle.com>
Wed, 28 Jun 2017 01:21:40 +0000 (21:21 -0400)
Enable Multi Queue for Target mode. At Initiator LUN scan time, each LUN
is assign to a QPair. Each QPair is affinitize to certain CPU. When new
cmd arrives from the wire, the lunid is used to search for qpair. The
qpair's affinitized cpuid will be used to queue up the work element.

Signed-off-by: Quinn Tran <quinn.tran@cavium.com>
Signed-off-by: Himanshu Madhani <himanshu.madhani@cavium.com>
Signed-off-by: Martin K. Petersen <martin.petersen@oracle.com>
drivers/scsi/qla2xxx/qla_def.h
drivers/scsi/qla2xxx/qla_init.c
drivers/scsi/qla2xxx/qla_inline.h
drivers/scsi/qla2xxx/qla_isr.c
drivers/scsi/qla2xxx/qla_os.c
drivers/scsi/qla2xxx/qla_target.c
drivers/scsi/qla2xxx/qla_target.h

index 64109134e2763447dc737dbf5be424304796351a..005ca2de3795d08f84bc202a90196072d95b9ac2 100644 (file)
@@ -3245,7 +3245,7 @@ struct req_que {
 struct qla_qpair {
        spinlock_t qp_lock;
        atomic_t ref_count;
-
+       uint32_t lun_cnt;
        /*
         * For qpair 0, qp_lock_ptr will point at hardware_lock due to
         * legacy code. For other Qpair(s), it will point at qp_lock.
@@ -3275,6 +3275,7 @@ struct qla_qpair {
        struct qla_hw_data *hw;
        struct work_struct q_work;
        struct list_head qp_list_elem; /* vha->qp_list */
+       struct list_head hints_list;
        uint16_t cpuid;
 };
 
index 878b552be2632020faf457e62bb17a2b91b64525..4366b12b0e6d74826b93f9792beaac1decd3cfff 100644 (file)
@@ -7623,6 +7623,7 @@ struct qla_qpair *qla2xxx_create_qpair(struct scsi_qla_host *vha, int qos,
                ha->queue_pair_map[qpair_id] = qpair;
                qpair->id = qpair_id;
                qpair->vp_idx = vp_idx;
+               INIT_LIST_HEAD(&qpair->hints_list);
 
                for (i = 0; i < ha->msix_count; i++) {
                        msix = &ha->msix_entries[i];
@@ -7666,6 +7667,8 @@ struct qla_qpair *qla2xxx_create_qpair(struct scsi_qla_host *vha, int qos,
                qpair->req = ha->req_q_map[req_id];
                qpair->rsp->req = qpair->req;
                qpair->rsp->qpair = qpair;
+               /* init qpair to this cpu. Will adjust at run time. */
+               qla_cpu_update(qpair, smp_processor_id());
 
                if (IS_T10_PI_CAPABLE(ha) && ql2xenabledif) {
                        if (ha->fw_attributes & BIT_4)
index 99028d48c6644707cb68295ab6fd223c660db98e..bd8cb796f64e0f4ae02866e8b82dc66a15f085e5 100644 (file)
@@ -324,3 +324,31 @@ qla_is_exch_offld_enabled(struct scsi_qla_host *vha)
        else
                return false;
 }
+
+static inline void
+qla_cpu_update(struct qla_qpair *qpair, uint16_t cpuid)
+{
+       qpair->cpuid = cpuid;
+
+       if (!list_empty(&qpair->hints_list)) {
+               struct qla_qpair_hint *h;
+
+               list_for_each_entry(h, &qpair->hints_list, hint_elem)
+                       h->cpuid = qpair->cpuid;
+       }
+}
+
+static inline struct qla_qpair_hint *
+qla_qpair_to_hint(struct qla_tgt *tgt, struct qla_qpair *qpair)
+{
+       struct qla_qpair_hint *h;
+       u16 i;
+
+       for (i = 0; i < tgt->ha->max_qpairs + 1; i++) {
+               h = &tgt->qphints[i];
+               if (h->qpair == qpair)
+                       return h;
+       }
+
+       return NULL;
+}
index 1535a29a9d9fe797a2e53ea70f1fd381a44fb7a4..9eb946cc8297dcae6801815f15fcf0e17c2032fd 100644 (file)
@@ -9,6 +9,7 @@
 
 #include <linux/delay.h>
 #include <linux/slab.h>
+#include <linux/cpu.h>
 #include <linux/t10-pi.h>
 #include <scsi/scsi_tcq.h>
 #include <scsi/scsi_bsg_fc.h>
@@ -2761,6 +2762,9 @@ void qla24xx_process_response_queue(struct scsi_qla_host *vha,
        if (!ha->flags.fw_started)
                return;
 
+       if (rsp->qpair->cpuid != smp_processor_id())
+               qla_cpu_update(rsp->qpair, smp_processor_id());
+
        while (rsp->ring_ptr->signature != RESPONSE_PROCESSED) {
                pkt = (struct sts_entry_24xx *)rsp->ring_ptr;
 
@@ -3196,10 +3200,10 @@ struct qla_init_msix_entry {
 };
 
 static const struct qla_init_msix_entry msix_entries[] = {
-       { "qla2xxx (default)", qla24xx_msix_default },
-       { "qla2xxx (rsp_q)", qla24xx_msix_rsp_q },
-       { "qla2xxx (atio_q)", qla83xx_msix_atio_q },
-       { "qla2xxx (qpair_multiq)", qla2xxx_msix_rsp_q },
+       { "default", qla24xx_msix_default },
+       { "rsp_q", qla24xx_msix_rsp_q },
+       { "atio_q", qla83xx_msix_atio_q },
+       { "qpair_multiq", qla2xxx_msix_rsp_q },
 };
 
 static const struct qla_init_msix_entry qla82xx_msix_entries[] = {
@@ -3279,7 +3283,7 @@ qla24xx_enable_msix(struct qla_hw_data *ha, struct rsp_que *rsp)
                qentry->handle = rsp;
                rsp->msix = qentry;
                scnprintf(qentry->name, sizeof(qentry->name),
-                   "%s", msix_entries[i].name);
+                   "qla2xxx%lu_%s", vha->host_no, msix_entries[i].name);
                if (IS_P3P_TYPE(ha))
                        ret = request_irq(qentry->vector,
                                qla82xx_msix_entries[i].handler,
@@ -3287,7 +3291,7 @@ qla24xx_enable_msix(struct qla_hw_data *ha, struct rsp_que *rsp)
                else
                        ret = request_irq(qentry->vector,
                                msix_entries[i].handler,
-                               0, msix_entries[i].name, rsp);
+                               0, qentry->name, rsp);
                if (ret)
                        goto msix_register_fail;
                qentry->have_irq = 1;
@@ -3303,11 +3307,12 @@ qla24xx_enable_msix(struct qla_hw_data *ha, struct rsp_que *rsp)
                rsp->msix = qentry;
                qentry->handle = rsp;
                scnprintf(qentry->name, sizeof(qentry->name),
-                   "%s", msix_entries[QLA_ATIO_VECTOR].name);
+                   "qla2xxx%lu_%s", vha->host_no,
+                   msix_entries[QLA_ATIO_VECTOR].name);
                qentry->in_use = 1;
                ret = request_irq(qentry->vector,
                        msix_entries[QLA_ATIO_VECTOR].handler,
-                       0, msix_entries[QLA_ATIO_VECTOR].name, rsp);
+                       0, qentry->name, rsp);
                qentry->have_irq = 1;
        }
 
index 82bbb6432f77f803207e9e061c429f96de59a44e..3963602aef35051ca588ffbcf94ee888909d5650 100644 (file)
@@ -371,6 +371,23 @@ static int qla2x00_alloc_queues(struct qla_hw_data *ha, struct req_que *req,
                goto fail_rsp_map;
        }
 
+       ha->base_qpair = kzalloc(sizeof(struct qla_qpair), GFP_KERNEL);
+       if (ha->base_qpair == NULL) {
+               ql_log(ql_log_warn, vha, 0x00e0,
+                   "Failed to allocate base queue pair memory.\n");
+               goto fail_base_qpair;
+       }
+
+       rsp->qpair = ha->base_qpair;
+       rsp->req = req;
+       ha->base_qpair->req = req;
+       ha->base_qpair->rsp = rsp;
+       ha->base_qpair->vha = vha;
+       ha->base_qpair->qp_lock_ptr = &ha->hardware_lock;
+       ha->base_qpair->msix = &ha->msix_entries[QLA_MSIX_RSP_Q];
+       INIT_LIST_HEAD(&ha->base_qpair->hints_list);
+       qla_cpu_update(rsp->qpair, smp_processor_id());
+
        if (ql2xmqsupport && ha->max_qpairs) {
                ha->queue_pair_map = kcalloc(ha->max_qpairs, sizeof(struct qla_qpair *),
                        GFP_KERNEL);
@@ -379,23 +396,8 @@ static int qla2x00_alloc_queues(struct qla_hw_data *ha, struct req_que *req,
                            "Unable to allocate memory for queue pair ptrs.\n");
                        goto fail_qpair_map;
                }
-               ha->base_qpair = kzalloc(sizeof(struct qla_qpair), GFP_KERNEL);
-               if (ha->base_qpair == NULL) {
-                       ql_log(ql_log_warn, vha, 0x00e0,
-                           "Failed to allocate base queue pair memory.\n");
-                       goto fail_base_qpair;
-               }
-               ha->base_qpair->req = req;
-               ha->base_qpair->rsp = rsp;
        }
 
-       rsp->qpair = ha->base_qpair;
-       rsp->req = req;
-       ha->base_qpair->vha = vha;
-       ha->base_qpair->qp_lock_ptr = &ha->hardware_lock;
-       ha->queue_pair_map[0] = ha->base_qpair;
-       set_bit(0, ha->qpair_qid_map);
-
        /*
         * Make sure we record at least the request and response queue zero in
         * case we need to free them if part of the probe fails.
@@ -2009,7 +2011,7 @@ qla83xx_iospace_config(struct qla_hw_data *ha)
                /* Read MSIX vector size of the board */
                pci_read_config_word(ha->pdev,
                    QLA_83XX_PCI_MSIX_CONTROL, &msix);
-               ha->msix_count = msix + 1;
+               ha->msix_count = (msix & PCI_MSIX_FLAGS_QSIZE)  + 1;
                /*
                 * By default, driver uses at least two msix vectors
                 * (default & rspq)
@@ -3125,12 +3127,26 @@ qla2x00_probe_one(struct pci_dev *pdev, const struct pci_device_id *id)
            host->can_queue, base_vha->req,
            base_vha->mgmt_svr_loop_id, host->sg_tablesize);
 
-       if (ha->mqenable && qla_ini_mode_enabled(base_vha)) {
+       if (ha->mqenable) {
+               bool mq = false;
+               bool startit = false;
                ha->wq = alloc_workqueue("qla2xxx_wq", WQ_MEM_RECLAIM, 1);
-               /* Create start of day qpairs for Block MQ */
-               if (shost_use_blk_mq(host)) {
+
+               if (QLA_TGT_MODE_ENABLED()) {
+                       mq = true;
+                       startit = false;
+               }
+
+               if ((ql2x_ini_mode == QLA2XXX_INI_MODE_ENABLED) &&
+                   shost_use_blk_mq(host)) {
+                       mq = true;
+                       startit = true;
+               }
+
+               if (mq) {
+                       /* Create start of day qpairs for Block MQ */
                        for (i = 0; i < ha->max_qpairs; i++)
-                               qla2xxx_create_qpair(base_vha, 5,  0, true);
+                               qla2xxx_create_qpair(base_vha, 5, 0, startit);
                }
        }
 
index 22f9bb59a98d5585badd68128bf6f4cf460bf9cc..92e41055e6f86b7e30706a361e0b12b8a8ff0a2e 100644 (file)
@@ -1515,6 +1515,10 @@ EXPORT_SYMBOL(qlt_stop_phase2);
 static void qlt_release(struct qla_tgt *tgt)
 {
        scsi_qla_host_t *vha = tgt->vha;
+       void *node;
+       u64 key = 0;
+       u16 i;
+       struct qla_qpair_hint *h;
 
        if ((vha->vha_tgt.qla_tgt != NULL) && !tgt->tgt_stop &&
            !tgt->tgt_stopped)
@@ -1523,6 +1527,24 @@ static void qlt_release(struct qla_tgt *tgt)
        if ((vha->vha_tgt.qla_tgt != NULL) && !tgt->tgt_stopped)
                qlt_stop_phase2(tgt);
 
+       for (i = 0; i < vha->hw->max_qpairs + 1; i++) {
+               unsigned long flags;
+
+               h = &tgt->qphints[i];
+               if (h->qpair) {
+                       spin_lock_irqsave(h->qpair->qp_lock_ptr, flags);
+                       list_del(&h->hint_elem);
+                       spin_unlock_irqrestore(h->qpair->qp_lock_ptr, flags);
+                       h->qpair = NULL;
+               }
+       }
+       kfree(tgt->qphints);
+
+       btree_for_each_safe64(&tgt->lun_qpair_map, key, node)
+               btree_remove64(&tgt->lun_qpair_map, key);
+
+       btree_destroy64(&tgt->lun_qpair_map);
+
        vha->vha_tgt.qla_tgt = NULL;
 
        ql_dbg(ql_dbg_tgt_mgt, vha, 0xf00d,
@@ -2354,9 +2376,8 @@ static int qlt_24xx_build_ctio_pkt(struct qla_qpair *qpair,
                 * the session and, so, the command.
                 */
                return -EAGAIN;
-       } else {
-               vha->req->outstanding_cmds[h] = (srb_t *)prm->cmd;
-       }
+       } else
+               qpair->req->outstanding_cmds[h] = (srb_t *)prm->cmd;
 
        pkt->handle = MAKE_HANDLE(qpair->req->id, h);
        pkt->handle |= CTIO_COMPLETION_HANDLE_MARK;
@@ -3976,8 +3997,6 @@ static void __qlt_do_work(struct qla_tgt_cmd *cmd)
        spin_lock_init(&cmd->cmd_lock);
        cdb = &atio->u.isp24.fcp_cmnd.cdb[0];
        cmd->se_cmd.tag = atio->u.isp24.exchange_addr;
-       cmd->unpacked_lun = scsilun_to_int(
-           (struct scsi_lun *)&atio->u.isp24.fcp_cmnd.lun);
 
        if (atio->u.isp24.fcp_cmnd.rddata &&
            atio->u.isp24.fcp_cmnd.wrdata) {
@@ -4040,6 +4059,85 @@ static void qlt_do_work(struct work_struct *work)
        __qlt_do_work(cmd);
 }
 
+static void qlt_assign_qpair(struct scsi_qla_host *vha,
+       struct qla_tgt_cmd *cmd)
+{
+       struct qla_qpair *qpair, *qp;
+       struct qla_tgt *tgt = vha->vha_tgt.qla_tgt;
+       struct qla_qpair_hint *h;
+
+       if (vha->flags.qpairs_available) {
+               h = btree_lookup64(&tgt->lun_qpair_map, cmd->unpacked_lun);
+               if (unlikely(!h)) {
+                       /* spread lun to qpair ratio evently */
+                       int lcnt = 0, rc;
+                       struct scsi_qla_host *base_vha =
+                               pci_get_drvdata(vha->hw->pdev);
+
+                       qpair = vha->hw->base_qpair;
+                       if (qpair->lun_cnt == 0) {
+                               qpair->lun_cnt++;
+                               h = qla_qpair_to_hint(tgt, qpair);
+                               BUG_ON(!h);
+                               rc = btree_insert64(&tgt->lun_qpair_map,
+                                       cmd->unpacked_lun, h, GFP_ATOMIC);
+                               if (rc) {
+                                       qpair->lun_cnt--;
+                                       ql_log(ql_log_info, vha, 0xd037,
+                                           "Unable to insert lun %llx into lun_qpair_map\n",
+                                           cmd->unpacked_lun);
+                               }
+                               goto out;
+                       } else {
+                               lcnt = qpair->lun_cnt;
+                       }
+
+                       h = NULL;
+                       list_for_each_entry(qp, &base_vha->qp_list,
+                           qp_list_elem) {
+                               if (qp->lun_cnt == 0) {
+                                       qp->lun_cnt++;
+                                       h = qla_qpair_to_hint(tgt, qp);
+                                       BUG_ON(!h);
+                                       rc = btree_insert64(&tgt->lun_qpair_map,
+                                           cmd->unpacked_lun, h, GFP_ATOMIC);
+                                       if (rc) {
+                                               qp->lun_cnt--;
+                                               ql_log(ql_log_info, vha, 0xd038,
+                                                       "Unable to insert lun %llx into lun_qpair_map\n",
+                                                       cmd->unpacked_lun);
+                                       }
+                                       qpair = qp;
+                                       goto out;
+                               } else {
+                                       if (qp->lun_cnt < lcnt) {
+                                               lcnt = qp->lun_cnt;
+                                               qpair = qp;
+                                               continue;
+                                       }
+                               }
+                       }
+                       BUG_ON(!qpair);
+                       qpair->lun_cnt++;
+                       h = qla_qpair_to_hint(tgt, qpair);
+                       BUG_ON(!h);
+                       rc = btree_insert64(&tgt->lun_qpair_map,
+                               cmd->unpacked_lun, h, GFP_ATOMIC);
+                       if (rc) {
+                               qpair->lun_cnt--;
+                               ql_log(ql_log_info, vha, 0xd039,
+                                  "Unable to insert lun %llx into lun_qpair_map\n",
+                                  cmd->unpacked_lun);
+                       }
+               }
+       } else {
+               h = &tgt->qphints[0];
+       }
+out:
+       cmd->qpair = h->qpair;
+       cmd->se_cmd.cpuid = h->cpuid;
+}
+
 static struct qla_tgt_cmd *qlt_get_tag(scsi_qla_host_t *vha,
                                       struct fc_port *sess,
                                       struct atio_from_isp *atio)
@@ -4069,8 +4167,9 @@ static struct qla_tgt_cmd *qlt_get_tag(scsi_qla_host_t *vha,
        cmd->jiffies_at_alloc = get_jiffies_64();
 
        cmd->reset_count = vha->hw->chip_reset;
-       cmd->qpair = vha->hw->base_qpair;
-       cmd->se_cmd.cpuid = cmd->qpair->cpuid;
+       cmd->unpacked_lun = scsilun_to_int(
+           (struct scsi_lun *)&atio->u.isp24.fcp_cmnd.lun);
+       qlt_assign_qpair(vha, cmd);
 
        return cmd;
 }
@@ -4218,7 +4317,9 @@ static int qlt_handle_cmd_for_atio(struct scsi_qla_host *vha,
        spin_unlock_irqrestore(&vha->cmd_list_lock, flags);
 
        INIT_WORK(&cmd->work, qlt_do_work);
-       if (ha->msix_count) {
+       if (vha->flags.qpairs_available) {
+               queue_work_on(cmd->se_cmd.cpuid, qla_tgt_wq, &cmd->work);
+       } else if (ha->msix_count) {
                if (cmd->atio.u.isp24.fcp_cmnd.rddata)
                        queue_work_on(smp_processor_id(), qla_tgt_wq,
                            &cmd->work);
@@ -5944,6 +6045,8 @@ static void qlt_sess_work_fn(struct work_struct *work)
 int qlt_add_target(struct qla_hw_data *ha, struct scsi_qla_host *base_vha)
 {
        struct qla_tgt *tgt;
+       int rc, i;
+       struct qla_qpair_hint *h;
 
        if (!QLA_TGT_MODE_ENABLED())
                return 0;
@@ -5966,9 +6069,47 @@ int qlt_add_target(struct qla_hw_data *ha, struct scsi_qla_host *base_vha)
                return -ENOMEM;
        }
 
+       tgt->qphints = kzalloc((ha->max_qpairs + 1) *
+           sizeof(struct qla_qpair_hint), GFP_KERNEL);
+       if (!tgt->qphints) {
+               kfree(tgt);
+               ql_log(ql_log_warn, base_vha, 0x0197,
+                   "Unable to allocate qpair hints.\n");
+               return -ENOMEM;
+       }
+
        if (!(base_vha->host->hostt->supported_mode & MODE_TARGET))
                base_vha->host->hostt->supported_mode |= MODE_TARGET;
 
+       rc = btree_init64(&tgt->lun_qpair_map);
+       if (rc) {
+               kfree(tgt->qphints);
+               kfree(tgt);
+               ql_log(ql_log_info, base_vha, 0x0198,
+                       "Unable to initialize lun_qpair_map btree\n");
+               return -EIO;
+       }
+       h = &tgt->qphints[0];
+       h->qpair = ha->base_qpair;
+       INIT_LIST_HEAD(&h->hint_elem);
+       h->cpuid = ha->base_qpair->cpuid;
+       list_add_tail(&h->hint_elem, &ha->base_qpair->hints_list);
+
+       for (i = 0; i < ha->max_qpairs; i++) {
+               unsigned long flags;
+
+               struct qla_qpair *qpair = ha->queue_pair_map[i];
+               h = &tgt->qphints[i + 1];
+               INIT_LIST_HEAD(&h->hint_elem);
+               if (qpair) {
+                       h->qpair = qpair;
+                       spin_lock_irqsave(qpair->qp_lock_ptr, flags);
+                       list_add_tail(&h->hint_elem, &qpair->hints_list);
+                       spin_unlock_irqrestore(qpair->qp_lock_ptr, flags);
+                       h->cpuid = qpair->cpuid;
+               }
+       }
+
        tgt->ha = ha;
        tgt->vha = base_vha;
        init_waitqueue_head(&tgt->waitQ);
index 9519eeca1997a1f64c3bb759527953f24d6693b5..22c783e3e38fa6f9cc654fe3735820c994b340a6 100644 (file)
@@ -787,10 +787,18 @@ struct qla_port_24xx_data {
        uint16_t reserved;
 };
 
+struct qla_qpair_hint {
+       struct list_head hint_elem;
+       struct qla_qpair *qpair;
+       u16 cpuid;
+       uint8_t cmd_cnt;
+};
+
 struct qla_tgt {
        struct scsi_qla_host *vha;
        struct qla_hw_data *ha;
-
+       struct btree_head64 lun_qpair_map;
+       struct qla_qpair_hint *qphints;
        /*
         * To sync between IRQ handlers and qlt_target_release(). Needed,
         * because req_pkt() can drop/reaquire HW lock inside. Protected by