IB/mlx5: Collect signature error completion
authorSagi Grimberg <sagig@mellanox.com>
Sun, 23 Feb 2014 12:19:12 +0000 (14:19 +0200)
committerRoland Dreier <roland@purestorage.com>
Fri, 7 Mar 2014 19:40:04 +0000 (11:40 -0800)
This commit takes care of the generated signature error CQE generated
by the HW (if happened).  The underlying mlx5 driver will handle
signature error completions and will mark the relevant memory region
as dirty.

Once the consumer gets the completion for the transaction, it must
check for signature errors on signature memory region using a new
lightweight verb ib_check_mr_status().

In case the user doesn't check for signature error (i.e. doesn't call
ib_check_mr_status() with status check IB_MR_CHECK_SIG_STATUS), the
memory region cannot be used for another signature operation
(REG_SIG_MR work request will fail).

Signed-off-by: Sagi Grimberg <sagig@mellanox.com>
Signed-off-by: Roland Dreier <roland@purestorage.com>
drivers/infiniband/hw/mlx5/cq.c
drivers/infiniband/hw/mlx5/main.c
drivers/infiniband/hw/mlx5/mlx5_ib.h
drivers/infiniband/hw/mlx5/mr.c
drivers/infiniband/hw/mlx5/qp.c
include/linux/mlx5/cq.h
include/linux/mlx5/device.h
include/linux/mlx5/driver.h
include/linux/mlx5/qp.h

index b1705ce6eb88bcf351201495a7f9f0d2ffa275d7..62bb6b49dc1d56debdeb6ecea2d8b431fd11c2bb 100644 (file)
@@ -366,6 +366,38 @@ static void free_cq_buf(struct mlx5_ib_dev *dev, struct mlx5_ib_cq_buf *buf)
        mlx5_buf_free(&dev->mdev, &buf->buf);
 }
 
+static void get_sig_err_item(struct mlx5_sig_err_cqe *cqe,
+                            struct ib_sig_err *item)
+{
+       u16 syndrome = be16_to_cpu(cqe->syndrome);
+
+#define GUARD_ERR   (1 << 13)
+#define APPTAG_ERR  (1 << 12)
+#define REFTAG_ERR  (1 << 11)
+
+       if (syndrome & GUARD_ERR) {
+               item->err_type = IB_SIG_BAD_GUARD;
+               item->expected = be32_to_cpu(cqe->expected_trans_sig) >> 16;
+               item->actual = be32_to_cpu(cqe->actual_trans_sig) >> 16;
+       } else
+       if (syndrome & REFTAG_ERR) {
+               item->err_type = IB_SIG_BAD_REFTAG;
+               item->expected = be32_to_cpu(cqe->expected_reftag);
+               item->actual = be32_to_cpu(cqe->actual_reftag);
+       } else
+       if (syndrome & APPTAG_ERR) {
+               item->err_type = IB_SIG_BAD_APPTAG;
+               item->expected = be32_to_cpu(cqe->expected_trans_sig) & 0xffff;
+               item->actual = be32_to_cpu(cqe->actual_trans_sig) & 0xffff;
+       } else {
+               pr_err("Got signature completion error with bad syndrome %04x\n",
+                      syndrome);
+       }
+
+       item->sig_err_offset = be64_to_cpu(cqe->err_offset);
+       item->key = be32_to_cpu(cqe->mkey);
+}
+
 static int mlx5_poll_one(struct mlx5_ib_cq *cq,
                         struct mlx5_ib_qp **cur_qp,
                         struct ib_wc *wc)
@@ -375,6 +407,9 @@ static int mlx5_poll_one(struct mlx5_ib_cq *cq,
        struct mlx5_cqe64 *cqe64;
        struct mlx5_core_qp *mqp;
        struct mlx5_ib_wq *wq;
+       struct mlx5_sig_err_cqe *sig_err_cqe;
+       struct mlx5_core_mr *mmr;
+       struct mlx5_ib_mr *mr;
        uint8_t opcode;
        uint32_t qpn;
        u16 wqe_ctr;
@@ -475,6 +510,33 @@ repoll:
                        }
                }
                break;
+       case MLX5_CQE_SIG_ERR:
+               sig_err_cqe = (struct mlx5_sig_err_cqe *)cqe64;
+
+               read_lock(&dev->mdev.priv.mr_table.lock);
+               mmr = __mlx5_mr_lookup(&dev->mdev,
+                                      mlx5_base_mkey(be32_to_cpu(sig_err_cqe->mkey)));
+               if (unlikely(!mmr)) {
+                       read_unlock(&dev->mdev.priv.mr_table.lock);
+                       mlx5_ib_warn(dev, "CQE@CQ %06x for unknown MR %6x\n",
+                                    cq->mcq.cqn, be32_to_cpu(sig_err_cqe->mkey));
+                       return -EINVAL;
+               }
+
+               mr = to_mibmr(mmr);
+               get_sig_err_item(sig_err_cqe, &mr->sig->err_item);
+               mr->sig->sig_err_exists = true;
+               mr->sig->sigerr_count++;
+
+               mlx5_ib_warn(dev, "CQN: 0x%x Got SIGERR on key: 0x%x err_type %x err_offset %llx expected %x actual %x\n",
+                            cq->mcq.cqn, mr->sig->err_item.key,
+                            mr->sig->err_item.err_type,
+                            mr->sig->err_item.sig_err_offset,
+                            mr->sig->err_item.expected,
+                            mr->sig->err_item.actual);
+
+               read_unlock(&dev->mdev.priv.mr_table.lock);
+               goto repoll;
        }
 
        return 0;
index 7260a299c6dbb7acbd8d935b7b94fe936e75b0d0..ba3ecec7fa651f037383625c32b417d25d4f2c14 100644 (file)
@@ -1431,6 +1431,7 @@ static int init_one(struct pci_dev *pdev,
        dev->ib_dev.alloc_fast_reg_mr   = mlx5_ib_alloc_fast_reg_mr;
        dev->ib_dev.alloc_fast_reg_page_list = mlx5_ib_alloc_fast_reg_page_list;
        dev->ib_dev.free_fast_reg_page_list  = mlx5_ib_free_fast_reg_page_list;
+       dev->ib_dev.check_mr_status     = mlx5_ib_check_mr_status;
 
        if (mdev->caps.flags & MLX5_DEV_CAP_FLAG_XRC) {
                dev->ib_dev.alloc_xrcd = mlx5_ib_alloc_xrcd;
index e438f08899ae16a8b42eb99689e6e454307dccd7..50541586e0a6171df4ef6fe2a1d5e8d20a223f11 100644 (file)
@@ -400,6 +400,11 @@ static inline struct mlx5_ib_qp *to_mibqp(struct mlx5_core_qp *mqp)
        return container_of(mqp, struct mlx5_ib_qp, mqp);
 }
 
+static inline struct mlx5_ib_mr *to_mibmr(struct mlx5_core_mr *mmr)
+{
+       return container_of(mmr, struct mlx5_ib_mr, mmr);
+}
+
 static inline struct mlx5_ib_pd *to_mpd(struct ib_pd *ibpd)
 {
        return container_of(ibpd, struct mlx5_ib_pd, ibpd);
@@ -537,6 +542,8 @@ int mlx5_mr_cache_init(struct mlx5_ib_dev *dev);
 int mlx5_mr_cache_cleanup(struct mlx5_ib_dev *dev);
 int mlx5_mr_ib_cont_pages(struct ib_umem *umem, u64 addr, int *count, int *shift);
 void mlx5_umr_cq_handler(struct ib_cq *cq, void *cq_context);
+int mlx5_ib_check_mr_status(struct ib_mr *ibmr, u32 check_mask,
+                           struct ib_mr_status *mr_status);
 
 static inline void init_query_mad(struct ib_smp *mad)
 {
index 032445c47608b2969deb1f7d12fe88fcd0fcf37f..81392b26d078abfd50b819d9a6dd3589a21fa522 100644 (file)
@@ -1038,6 +1038,11 @@ struct ib_mr *mlx5_ib_create_mr(struct ib_pd *pd,
                access_mode = MLX5_ACCESS_MODE_KLM;
                mr->sig->psv_memory.psv_idx = psv_index[0];
                mr->sig->psv_wire.psv_idx = psv_index[1];
+
+               mr->sig->sig_status_checked = true;
+               mr->sig->sig_err_exists = false;
+               /* Next UMR, Arm SIGERR */
+               ++mr->sig->sigerr_count;
        }
 
        in->seg.flags = MLX5_PERM_UMR_EN | access_mode;
@@ -1188,3 +1193,44 @@ void mlx5_ib_free_fast_reg_page_list(struct ib_fast_reg_page_list *page_list)
        kfree(mfrpl->ibfrpl.page_list);
        kfree(mfrpl);
 }
+
+int mlx5_ib_check_mr_status(struct ib_mr *ibmr, u32 check_mask,
+                           struct ib_mr_status *mr_status)
+{
+       struct mlx5_ib_mr *mmr = to_mmr(ibmr);
+       int ret = 0;
+
+       if (check_mask & ~IB_MR_CHECK_SIG_STATUS) {
+               pr_err("Invalid status check mask\n");
+               ret = -EINVAL;
+               goto done;
+       }
+
+       mr_status->fail_status = 0;
+       if (check_mask & IB_MR_CHECK_SIG_STATUS) {
+               if (!mmr->sig) {
+                       ret = -EINVAL;
+                       pr_err("signature status check requested on a non-signature enabled MR\n");
+                       goto done;
+               }
+
+               mmr->sig->sig_status_checked = true;
+               if (!mmr->sig->sig_err_exists)
+                       goto done;
+
+               if (ibmr->lkey == mmr->sig->err_item.key)
+                       memcpy(&mr_status->sig_err, &mmr->sig->err_item,
+                              sizeof(mr_status->sig_err));
+               else {
+                       mr_status->sig_err.err_type = IB_SIG_BAD_GUARD;
+                       mr_status->sig_err.sig_err_offset = 0;
+                       mr_status->sig_err.key = mmr->sig->err_item.key;
+               }
+
+               mmr->sig->sig_err_exists = false;
+               mr_status->fail_status |= IB_MR_CHECK_SIG_STATUS;
+       }
+
+done:
+       return ret;
+}
index 67e79989b181b042fa857cb95580f45166f974a1..ae788d27b93f51de73e869b728f7ce0ba16d586f 100644 (file)
@@ -1784,6 +1784,7 @@ static __be64 sig_mkey_mask(void)
        result = MLX5_MKEY_MASK_LEN             |
                MLX5_MKEY_MASK_PAGE_SIZE        |
                MLX5_MKEY_MASK_START_ADDR       |
+               MLX5_MKEY_MASK_EN_SIGERR        |
                MLX5_MKEY_MASK_EN_RINVAL        |
                MLX5_MKEY_MASK_KEY              |
                MLX5_MKEY_MASK_LR               |
@@ -2219,13 +2220,14 @@ static void set_sig_mkey_segment(struct mlx5_mkey_seg *seg,
 {
        struct ib_mr *sig_mr = wr->wr.sig_handover.sig_mr;
        u32 sig_key = sig_mr->rkey;
+       u8 sigerr = to_mmr(sig_mr)->sig->sigerr_count & 1;
 
        memset(seg, 0, sizeof(*seg));
 
        seg->flags = get_umr_flags(wr->wr.sig_handover.access_flags) |
                                   MLX5_ACCESS_MODE_KLM;
        seg->qpn_mkey7_0 = cpu_to_be32((sig_key & 0xff) | 0xffffff00);
-       seg->flags_pd = cpu_to_be32(MLX5_MKEY_REMOTE_INVAL |
+       seg->flags_pd = cpu_to_be32(MLX5_MKEY_REMOTE_INVAL | sigerr << 26 |
                                    MLX5_MKEY_BSF_EN | pdn);
        seg->len = cpu_to_be64(length);
        seg->xlt_oct_size = cpu_to_be32(be16_to_cpu(get_klm_octo(nelements)));
@@ -2255,7 +2257,8 @@ static int set_sig_umr_wr(struct ib_send_wr *wr, struct mlx5_ib_qp *qp,
        if (unlikely(wr->num_sge != 1) ||
            unlikely(wr->wr.sig_handover.access_flags &
                     IB_ACCESS_REMOTE_ATOMIC) ||
-           unlikely(!sig_mr->sig) || unlikely(!qp->signature_en))
+           unlikely(!sig_mr->sig) || unlikely(!qp->signature_en) ||
+           unlikely(!sig_mr->sig->sig_status_checked))
                return -EINVAL;
 
        /* length of the protected region, data + protection */
@@ -2286,6 +2289,7 @@ static int set_sig_umr_wr(struct ib_send_wr *wr, struct mlx5_ib_qp *qp,
        if (ret)
                return ret;
 
+       sig_mr->sig->sig_status_checked = false;
        return 0;
 }
 
index 2202c7f72b75a703a4713ce2d7d79b81db8f7a81..f6b17ac601bda06533afb7bc4c7d80a692ff7bc4 100644 (file)
@@ -80,6 +80,7 @@ enum {
        MLX5_CQE_RESP_SEND_IMM  = 3,
        MLX5_CQE_RESP_SEND_INV  = 4,
        MLX5_CQE_RESIZE_CQ      = 5,
+       MLX5_CQE_SIG_ERR        = 12,
        MLX5_CQE_REQ_ERR        = 13,
        MLX5_CQE_RESP_ERR       = 14,
        MLX5_CQE_INVALID        = 15,
index f714fc427765aa65dc39fbbd8b4ece07a5c124bb..407bdb67fd4f809ac1cc53d5db0de47b901d2424 100644 (file)
@@ -118,6 +118,7 @@ enum {
        MLX5_MKEY_MASK_START_ADDR       = 1ull << 6,
        MLX5_MKEY_MASK_PD               = 1ull << 7,
        MLX5_MKEY_MASK_EN_RINVAL        = 1ull << 8,
+       MLX5_MKEY_MASK_EN_SIGERR        = 1ull << 9,
        MLX5_MKEY_MASK_BSF_EN           = 1ull << 12,
        MLX5_MKEY_MASK_KEY              = 1ull << 13,
        MLX5_MKEY_MASK_QPN              = 1ull << 14,
@@ -557,6 +558,23 @@ struct mlx5_cqe64 {
        u8              op_own;
 };
 
+struct mlx5_sig_err_cqe {
+       u8              rsvd0[16];
+       __be32          expected_trans_sig;
+       __be32          actual_trans_sig;
+       __be32          expected_reftag;
+       __be32          actual_reftag;
+       __be16          syndrome;
+       u8              rsvd22[2];
+       __be32          mkey;
+       __be64          err_offset;
+       u8              rsvd30[8];
+       __be32          qpn;
+       u8              rsvd38[2];
+       u8              signature;
+       u8              op_own;
+};
+
 struct mlx5_wqe_srq_next_seg {
        u8                      rsvd0[2];
        __be16                  next_wqe_index;
index e562e01e59c7f26045cf0fda96fd3b946511626a..93cef6313e72d2a63e2eb1c8e5c6347ae5664cad 100644 (file)
@@ -416,6 +416,10 @@ struct mlx5_core_psv {
 struct mlx5_core_sig_ctx {
        struct mlx5_core_psv    psv_memory;
        struct mlx5_core_psv    psv_wire;
+       struct ib_sig_err       err_item;
+       bool                    sig_status_checked;
+       bool                    sig_err_exists;
+       u32                     sigerr_count;
 };
 
 struct mlx5_core_mr {
index 49af74f90ef9b8adc7c52c6b1a3f666e776b9d9a..f829ad80ff28fc38883025c4e06c4247ba908edd 100644 (file)
@@ -506,6 +506,11 @@ static inline struct mlx5_core_qp *__mlx5_qp_lookup(struct mlx5_core_dev *dev, u
        return radix_tree_lookup(&dev->priv.qp_table.tree, qpn);
 }
 
+static inline struct mlx5_core_mr *__mlx5_mr_lookup(struct mlx5_core_dev *dev, u32 key)
+{
+       return radix_tree_lookup(&dev->priv.mr_table.tree, key);
+}
+
 int mlx5_core_create_qp(struct mlx5_core_dev *dev,
                        struct mlx5_core_qp *qp,
                        struct mlx5_create_qp_mbox_in *in,