nvme-rdma: don't complete requests before a send work request has completed
authorSagi Grimberg <sagi@grimberg.me>
Thu, 23 Nov 2017 15:35:22 +0000 (17:35 +0200)
committerGreg Kroah-Hartman <gregkh@linuxfoundation.org>
Sat, 3 Feb 2018 16:39:02 +0000 (17:39 +0100)
[ Upstream commit 4af7f7ff92a42b6c713293c99e7982bcfcf51a70 ]

In order to guarantee that the HCA will never get an access violation
(either from invalidated rkey or from iommu) when retrying a send
operation we must complete a request only when both send completion and
the nvme cqe has arrived. We need to set the send/recv completions flags
atomically because we might have more than a single context accessing the
request concurrently (one is cq irq-poll context and the other is
user-polling used in IOCB_HIPRI).

Only then we are safe to invalidate the rkey (if needed), unmap the host
buffers, and complete the IO.

Signed-off-by: Sagi Grimberg <sagi@grimberg.me>
Reviewed-by: Max Gurtovoy <maxg@mellanox.com>
Signed-off-by: Christoph Hellwig <hch@lst.de>
Signed-off-by: Sasha Levin <alexander.levin@verizon.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
drivers/nvme/host/rdma.c

index 6de163e6c9ebab57630bad61aa96171165ced535..33d4431c2b4b1892ccfa05160c08f32b1d31455c 100644 (file)
@@ -67,6 +67,9 @@ struct nvme_rdma_request {
        struct nvme_request     req;
        struct ib_mr            *mr;
        struct nvme_rdma_qe     sqe;
+       union nvme_result       result;
+       __le16                  status;
+       refcount_t              ref;
        struct ib_sge           sge[1 + NVME_RDMA_MAX_INLINE_SEGMENTS];
        u32                     num_sge;
        int                     nents;
@@ -1177,6 +1180,7 @@ static int nvme_rdma_map_data(struct nvme_rdma_queue *queue,
        req->num_sge = 1;
        req->inline_data = false;
        req->mr->need_inval = false;
+       refcount_set(&req->ref, 2); /* send and recv completions */
 
        c->common.flags |= NVME_CMD_SGL_METABUF;
 
@@ -1213,8 +1217,19 @@ static int nvme_rdma_map_data(struct nvme_rdma_queue *queue,
 
 static void nvme_rdma_send_done(struct ib_cq *cq, struct ib_wc *wc)
 {
-       if (unlikely(wc->status != IB_WC_SUCCESS))
+       struct nvme_rdma_qe *qe =
+               container_of(wc->wr_cqe, struct nvme_rdma_qe, cqe);
+       struct nvme_rdma_request *req =
+               container_of(qe, struct nvme_rdma_request, sqe);
+       struct request *rq = blk_mq_rq_from_pdu(req);
+
+       if (unlikely(wc->status != IB_WC_SUCCESS)) {
                nvme_rdma_wr_error(cq, wc, "SEND");
+               return;
+       }
+
+       if (refcount_dec_and_test(&req->ref))
+               nvme_end_request(rq, req->status, req->result);
 }
 
 /*
@@ -1359,14 +1374,19 @@ static int nvme_rdma_process_nvme_rsp(struct nvme_rdma_queue *queue,
        }
        req = blk_mq_rq_to_pdu(rq);
 
-       if (rq->tag == tag)
-               ret = 1;
+       req->status = cqe->status;
+       req->result = cqe->result;
 
        if ((wc->wc_flags & IB_WC_WITH_INVALIDATE) &&
            wc->ex.invalidate_rkey == req->mr->rkey)
                req->mr->need_inval = false;
 
-       nvme_end_request(rq, cqe->status, cqe->result);
+       if (refcount_dec_and_test(&req->ref)) {
+               if (rq->tag == tag)
+                       ret = 1;
+               nvme_end_request(rq, req->status, req->result);
+       }
+
        return ret;
 }