From bfbac097b6e8023e10fdadab2527d0a1a3160d7e Mon Sep 17 00:00:00 2001 From: Dennis Dalessandro Date: Fri, 22 Jan 2016 13:00:22 -0800 Subject: [PATCH] IB/rdmavt: Add post send to rdmavt Add in a post_send and post_one_send to rdmavt. The ULP will provide a WQE to rdmavt which will then walk and queue each element. Rdmavt will then queue the work to be done in the driver or kick the driver's progress routine. There needs to be a follow on patch which adds in another lock for the head of the queue so that it can be added to and read from in parallel. This will touch protocol handlers and require other changes in the drivers. This will be done separately. Reviewed-by: Mike Marciniszyn Reviewed-by: Ira Weiny Signed-off-by: Dennis Dalessandro Signed-off-by: Doug Ledford --- drivers/infiniband/sw/rdmavt/qp.c | 183 ++++++++++++++++++++++++++++-- include/rdma/rdma_vt.h | 7 ++ include/rdma/rdmavt_qp.h | 26 +++++ 3 files changed, 204 insertions(+), 12 deletions(-) diff --git a/drivers/infiniband/sw/rdmavt/qp.c b/drivers/infiniband/sw/rdmavt/qp.c index 43346a773ff3..bd2d91a5b19a 100644 --- a/drivers/infiniband/sw/rdmavt/qp.c +++ b/drivers/infiniband/sw/rdmavt/qp.c @@ -53,6 +53,27 @@ #include "qp.h" #include "vt.h" +/* + * Note that it is OK to post send work requests in the SQE and ERR + * states; rvt_do_send() will process them and generate error + * completions as per IB 1.2 C10-96. + */ +const int ib_rvt_state_ops[IB_QPS_ERR + 1] = { + [IB_QPS_RESET] = 0, + [IB_QPS_INIT] = RVT_POST_RECV_OK, + [IB_QPS_RTR] = RVT_POST_RECV_OK | RVT_PROCESS_RECV_OK, + [IB_QPS_RTS] = RVT_POST_RECV_OK | RVT_PROCESS_RECV_OK | + RVT_POST_SEND_OK | RVT_PROCESS_SEND_OK | + RVT_PROCESS_NEXT_SEND_OK, + [IB_QPS_SQD] = RVT_POST_RECV_OK | RVT_PROCESS_RECV_OK | + RVT_POST_SEND_OK | RVT_PROCESS_SEND_OK, + [IB_QPS_SQE] = RVT_POST_RECV_OK | RVT_PROCESS_RECV_OK | + RVT_POST_SEND_OK | RVT_FLUSH_SEND, + [IB_QPS_ERR] = RVT_POST_RECV_OK | RVT_FLUSH_RECV | + RVT_POST_SEND_OK | RVT_FLUSH_SEND, +}; +EXPORT_SYMBOL(ib_rvt_state_ops); + static void get_map_page(struct rvt_qpn_table *qpt, struct rvt_qpn_map *map, gfp_t gfp) @@ -586,7 +607,7 @@ struct ib_qp *rvt_create_qp(struct ib_pd *ibpd, /* * Return the address of the RWQ as the offset to mmap. - * See hfi1_mmap() for details. + * See rvt_mmap() for details. */ if (udata && udata->outlen >= sizeof(__u64)) { if (!qp->r_rq.wq) { @@ -749,6 +770,118 @@ int rvt_post_recv(struct ib_qp *ibqp, struct ib_recv_wr *wr, return -EOPNOTSUPP; } +/** + * rvt_post_one_wr - post one RC, UC, or UD send work request + * @qp: the QP to post on + * @wr: the work request to send + */ +static int rvt_post_one_wr(struct rvt_qp *qp, struct ib_send_wr *wr) +{ + struct rvt_swqe *wqe; + u32 next; + int i; + int j; + int acc; + struct rvt_lkey_table *rkt; + struct rvt_pd *pd; + struct rvt_dev_info *rdi = ib_to_rvt(qp->ibqp.device); + + /* IB spec says that num_sge == 0 is OK. */ + if (unlikely(wr->num_sge > qp->s_max_sge)) + return -EINVAL; + + /* + * Don't allow RDMA reads or atomic operations on UC or + * undefined operations. + * Make sure buffer is large enough to hold the result for atomics. + */ + if (qp->ibqp.qp_type == IB_QPT_UC) { + if ((unsigned)wr->opcode >= IB_WR_RDMA_READ) + return -EINVAL; + } else if (qp->ibqp.qp_type != IB_QPT_RC) { + /* Check IB_QPT_SMI, IB_QPT_GSI, IB_QPT_UD opcode */ + if (wr->opcode != IB_WR_SEND && + wr->opcode != IB_WR_SEND_WITH_IMM) + return -EINVAL; + /* Check UD destination address PD */ + if (qp->ibqp.pd != ud_wr(wr)->ah->pd) + return -EINVAL; + } else if ((unsigned)wr->opcode > IB_WR_ATOMIC_FETCH_AND_ADD) { + return -EINVAL; + } else if (wr->opcode >= IB_WR_ATOMIC_CMP_AND_SWP && + (wr->num_sge == 0 || + wr->sg_list[0].length < sizeof(u64) || + wr->sg_list[0].addr & (sizeof(u64) - 1))) { + return -EINVAL; + } else if (wr->opcode >= IB_WR_RDMA_READ && !qp->s_max_rd_atomic) { + return -EINVAL; + } + + next = qp->s_head + 1; + if (next >= qp->s_size) + next = 0; + if (next == qp->s_last) + return -ENOMEM; + + rkt = &rdi->lkey_table; + pd = ibpd_to_rvtpd(qp->ibqp.pd); + wqe = rvt_get_swqe_ptr(qp, qp->s_head); + + if (qp->ibqp.qp_type != IB_QPT_UC && + qp->ibqp.qp_type != IB_QPT_RC) + memcpy(&wqe->ud_wr, ud_wr(wr), sizeof(wqe->ud_wr)); + else if (wr->opcode == IB_WR_RDMA_WRITE_WITH_IMM || + wr->opcode == IB_WR_RDMA_WRITE || + wr->opcode == IB_WR_RDMA_READ) + memcpy(&wqe->rdma_wr, rdma_wr(wr), sizeof(wqe->rdma_wr)); + else if (wr->opcode == IB_WR_ATOMIC_CMP_AND_SWP || + wr->opcode == IB_WR_ATOMIC_FETCH_AND_ADD) + memcpy(&wqe->atomic_wr, atomic_wr(wr), sizeof(wqe->atomic_wr)); + else + memcpy(&wqe->wr, wr, sizeof(wqe->wr)); + + wqe->length = 0; + j = 0; + if (wr->num_sge) { + acc = wr->opcode >= IB_WR_RDMA_READ ? + IB_ACCESS_LOCAL_WRITE : 0; + for (i = 0; i < wr->num_sge; i++) { + u32 length = wr->sg_list[i].length; + int ok; + + if (length == 0) + continue; + ok = rvt_lkey_ok(rkt, pd, &wqe->sg_list[j], + &wr->sg_list[i], acc); + if (!ok) + goto bail_inval_free; + wqe->length += length; + j++; + } + wqe->wr.num_sge = j; + } + if (qp->ibqp.qp_type == IB_QPT_UC || + qp->ibqp.qp_type == IB_QPT_RC) { + if (wqe->length > 0x80000000U) + goto bail_inval_free; + } else { + atomic_inc(&ibah_to_rvtah(ud_wr(wr)->ah)->refcount); + } + wqe->ssn = qp->s_ssn++; + qp->s_head = next; + + return 0; + +bail_inval_free: + /* release mr holds */ + while (j) { + struct rvt_sge *sge = &wqe->sg_list[--j]; + + rvt_put_mr(sge->mr); + } + return -EINVAL; +} + /** * rvt_post_send - post a send on a QP * @ibqp: the QP to post the send on @@ -760,20 +893,46 @@ int rvt_post_recv(struct ib_qp *ibqp, struct ib_recv_wr *wr, int rvt_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr, struct ib_send_wr **bad_wr) { + struct rvt_qp *qp = ibqp_to_rvtqp(ibqp); + struct rvt_dev_info *rdi = ib_to_rvt(ibqp->device); + unsigned long flags = 0; + int call_send; + unsigned nreq = 0; + int err = 0; + + spin_lock_irqsave(&qp->s_lock, flags); + /* - * VT-DRIVER-API: do_send() - * Driver needs to have a do_send() call which is a single entry point - * to take an already formed packet and throw it out on the wire. Once - * the packet is sent the driver needs to make an upcall to rvt so the - * completion queue can be notified and/or any other outstanding - * work/book keeping can be finished. - * - * Note that there should also be a way for rvt to protect itself - * against hangs in the driver layer. If a send doesn't actually - * complete in a timely manor rvt needs to return an error event. + * Ensure QP state is such that we can send. If not bail out early, + * there is no need to do this every time we post a send. */ + if (unlikely(!(ib_rvt_state_ops[qp->state] & RVT_POST_SEND_OK))) { + spin_unlock_irqrestore(&qp->s_lock, flags); + return -EINVAL; + } - return -EOPNOTSUPP; + /* + * If the send queue is empty, and we only have a single WR then just go + * ahead and kick the send engine into gear. Otherwise we will always + * just schedule the send to happen later. + */ + call_send = qp->s_head == ACCESS_ONCE(qp->s_last) && !wr->next; + + for (; wr; wr = wr->next) { + err = rvt_post_one_wr(qp, wr); + if (unlikely(err)) { + *bad_wr = wr; + goto bail; + } + nreq++; + } +bail: + if (nreq && !call_send) + rdi->driver_f.schedule_send(qp); + spin_unlock_irqrestore(&qp->s_lock, flags); + if (nreq && call_send) + rdi->driver_f.do_send(qp); + return err; } /** diff --git a/include/rdma/rdma_vt.h b/include/rdma/rdma_vt.h index 79080e3b09f8..36e4fb4c0df3 100644 --- a/include/rdma/rdma_vt.h +++ b/include/rdma/rdma_vt.h @@ -231,6 +231,8 @@ struct rvt_driver_provided { gfp_t gfp); void (*qp_priv_free)(struct rvt_dev_info *rdi, struct rvt_qp *qp); void (*notify_qp_reset)(struct rvt_qp *qp); + void (*schedule_send)(struct rvt_qp *qp); + void (*do_send)(struct rvt_qp *qp); /*--------------------*/ /* Optional functions */ @@ -312,6 +314,11 @@ static inline struct rvt_srq *ibsrq_to_rvtsrq(struct ib_srq *ibsrq) return container_of(ibsrq, struct rvt_srq, ibsrq); } +static inline struct rvt_qp *ibqp_to_rvtqp(struct ib_qp *ibqp) +{ + return container_of(ibqp, struct rvt_qp, ibqp); +} + static inline unsigned rvt_get_npkeys(struct rvt_dev_info *rdi) { /* diff --git a/include/rdma/rdmavt_qp.h b/include/rdma/rdmavt_qp.h index bce0a03a7c07..3189f195538c 100644 --- a/include/rdma/rdmavt_qp.h +++ b/include/rdma/rdmavt_qp.h @@ -129,6 +129,17 @@ /* Number of bits to pay attention to in the opcode for checking qp type */ #define RVT_OPCODE_QP_MASK 0xE0 +/* Flags for checking QP state (see ib_rvt_state_ops[]) */ +#define RVT_POST_SEND_OK 0x01 +#define RVT_POST_RECV_OK 0x02 +#define RVT_PROCESS_RECV_OK 0x04 +#define RVT_PROCESS_SEND_OK 0x08 +#define RVT_PROCESS_NEXT_SEND_OK 0x10 +#define RVT_FLUSH_SEND 0x20 +#define RVT_FLUSH_RECV 0x40 +#define RVT_PROCESS_OR_FLUSH_SEND \ + (RVT_PROCESS_SEND_OK | RVT_FLUSH_SEND) + /* * Send work request queue entry. * The size of the sg_list is determined when the QP is created and stored @@ -373,4 +384,19 @@ struct rvt_qp_ibdev { struct rvt_qpn_table qpn_table; }; +/* + * Since struct rvt_swqe is not a fixed size, we can't simply index into + * struct hfi1_qp.s_wq. This function does the array index computation. + */ +static inline struct rvt_swqe *rvt_get_swqe_ptr(struct rvt_qp *qp, + unsigned n) +{ + return (struct rvt_swqe *)((char *)qp->s_wq + + (sizeof(struct rvt_swqe) + + qp->s_max_sge * + sizeof(struct rvt_sge)) * n); +} + +extern const int ib_rvt_state_ops[]; + #endif /* DEF_RDMAVT_INCQP_H */ -- 2.20.1