net/smc: use separate memory regions for RMBs
authorUrsula Braun <ubraun@linux.vnet.ibm.com>
Fri, 28 Jul 2017 11:56:16 +0000 (13:56 +0200)
committerDavid S. Miller <davem@davemloft.net>
Sat, 29 Jul 2017 18:22:58 +0000 (11:22 -0700)
SMC currently uses the unsafe_global_rkey of the protection domain,
which exposes all memory for remote reads and writes once a connection
is established. This patch introduces separate memory regions with
separate rkeys for every RMB. Now the unsafe_global_rkey of the
protection domain is no longer needed.

Signed-off-by: Ursula Braun <ubraun@linux.vnet.ibm.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
net/smc/smc_clc.c
net/smc/smc_core.c
net/smc/smc_core.h
net/smc/smc_ib.c
net/smc/smc_ib.h

index 15cb76019009f19911c08ff5b1f611963ba08f1e..3934913ab835cc791362a6a2ea816e871ce2cee1 100644 (file)
@@ -204,7 +204,7 @@ int smc_clc_send_confirm(struct smc_sock *smc)
        memcpy(&cclc.lcl.mac, &link->smcibdev->mac[link->ibport - 1], ETH_ALEN);
        hton24(cclc.qpn, link->roce_qp->qp_num);
        cclc.rmb_rkey =
-               htonl(link->roce_pd->unsafe_global_rkey);
+               htonl(conn->rmb_desc->mr_rx[SMC_SINGLE_LINK]->rkey);
        cclc.conn_idx = 1; /* for now: 1 RMB = 1 RMBE */
        cclc.rmbe_alert_token = htonl(conn->alert_token_local);
        cclc.qp_mtu = min(link->path_mtu, link->peer_mtu);
@@ -256,7 +256,7 @@ int smc_clc_send_accept(struct smc_sock *new_smc, int srv_first_contact)
        memcpy(&aclc.lcl.mac, link->smcibdev->mac[link->ibport - 1], ETH_ALEN);
        hton24(aclc.qpn, link->roce_qp->qp_num);
        aclc.rmb_rkey =
-               htonl(link->roce_pd->unsafe_global_rkey);
+               htonl(conn->rmb_desc->mr_rx[SMC_SINGLE_LINK]->rkey);
        aclc.conn_idx = 1;                      /* as long as 1 RMB = 1 RMBE */
        aclc.rmbe_alert_token = htonl(conn->alert_token_local);
        aclc.qp_mtu = link->path_mtu;
index bfdbda795f67179426389e21dec94bdd126becf0..f1dd4e1cd3e1ff4a9444808e4bf6bef6c9552bf3 100644 (file)
@@ -218,6 +218,7 @@ static void smc_sndbuf_unuse(struct smc_connection *conn)
 static void smc_rmb_unuse(struct smc_connection *conn)
 {
        if (conn->rmb_desc) {
+               conn->rmb_desc->reused = true;
                conn->rmb_desc->used = 0;
                conn->rmbe_size = 0;
        }
@@ -274,6 +275,8 @@ static void smc_lgr_free_rmbs(struct smc_link_group *lgr)
                list_for_each_entry_safe(rmb_desc, bf_desc, &lgr->rmbs[i],
                                         list) {
                        list_del(&rmb_desc->list);
+                       smc_ib_put_memory_region(
+                                       rmb_desc->mr_rx[SMC_SINGLE_LINK]);
                        smc_ib_buf_unmap_sg(lnk->smcibdev, rmb_desc,
                                            DMA_FROM_DEVICE);
                        kfree(rmb_desc->cpu_addr);
@@ -627,6 +630,21 @@ int smc_rmb_create(struct smc_sock *smc)
                        rmb_desc = NULL;
                        continue; /* if mapping failed, try smaller one */
                }
+               rc = smc_ib_get_memory_region(lgr->lnk[SMC_SINGLE_LINK].roce_pd,
+                                             IB_ACCESS_REMOTE_WRITE |
+                                             IB_ACCESS_LOCAL_WRITE,
+                                             rmb_desc);
+               if (rc) {
+                       smc_ib_buf_unmap_sg(lgr->lnk[SMC_SINGLE_LINK].smcibdev,
+                                           rmb_desc, DMA_FROM_DEVICE);
+                       sg_free_table(&rmb_desc->sgt[SMC_SINGLE_LINK]);
+                       free_pages((unsigned long)rmb_desc->cpu_addr,
+                                  rmb_desc->order);
+                       kfree(rmb_desc);
+                       rmb_desc = NULL;
+                       continue;
+               }
+
                rmb_desc->used = 1;
                write_lock_bh(&lgr->rmbs_lock);
                list_add(&rmb_desc->list, &lgr->rmbs[bufsize_short]);
index 0ee450d6990765c391820a113a43893740ff22a8..17b5fea09901db5e51ff03da4c36ce1f6198cd4b 100644 (file)
@@ -94,8 +94,13 @@ struct smc_buf_desc {
                                                /* mapped address of buffer */
        void                    *cpu_addr;      /* virtual address of buffer */
        struct sg_table         sgt[SMC_LINKS_PER_LGR_MAX];/* virtual buffer */
+       struct ib_mr            *mr_rx[SMC_LINKS_PER_LGR_MAX];
+                                               /* for rmb only: memory region
+                                                * incl. rkey provided to peer
+                                                */
        u32                     order;          /* allocation order */
        u32                     used;           /* currently used / unused */
+       bool                    reused;         /* new created / reused */
 };
 
 struct smc_rtoken {                            /* address/key of remote RMB */
@@ -175,5 +180,4 @@ int smc_sndbuf_create(struct smc_sock *smc);
 int smc_rmb_create(struct smc_sock *smc);
 int smc_rmb_rtoken_handling(struct smc_connection *conn,
                            struct smc_clc_msg_accept_confirm *clc);
-
 #endif
index fcfeb89b05d9768bd70e09d9e448ee43047ca206..08233492ec458603bc89df305dbe32a551c49242 100644 (file)
@@ -192,8 +192,7 @@ int smc_ib_create_protection_domain(struct smc_link *lnk)
 {
        int rc;
 
-       lnk->roce_pd = ib_alloc_pd(lnk->smcibdev->ibdev,
-                                  IB_PD_UNSAFE_GLOBAL_RKEY);
+       lnk->roce_pd = ib_alloc_pd(lnk->smcibdev->ibdev, 0);
        rc = PTR_ERR_OR_ZERO(lnk->roce_pd);
        if (IS_ERR(lnk->roce_pd))
                lnk->roce_pd = NULL;
@@ -254,6 +253,48 @@ int smc_ib_create_queue_pair(struct smc_link *lnk)
        return rc;
 }
 
+void smc_ib_put_memory_region(struct ib_mr *mr)
+{
+       ib_dereg_mr(mr);
+}
+
+static int smc_ib_map_mr_sg(struct smc_buf_desc *buf_slot)
+{
+       unsigned int offset = 0;
+       int sg_num;
+
+       /* map the largest prefix of a dma mapped SG list */
+       sg_num = ib_map_mr_sg(buf_slot->mr_rx[SMC_SINGLE_LINK],
+                             buf_slot->sgt[SMC_SINGLE_LINK].sgl,
+                             buf_slot->sgt[SMC_SINGLE_LINK].orig_nents,
+                             &offset, PAGE_SIZE);
+
+       return sg_num;
+}
+
+/* Allocate a memory region and map the dma mapped SG list of buf_slot */
+int smc_ib_get_memory_region(struct ib_pd *pd, int access_flags,
+                            struct smc_buf_desc *buf_slot)
+{
+       if (buf_slot->mr_rx[SMC_SINGLE_LINK])
+               return 0; /* already done */
+
+       buf_slot->mr_rx[SMC_SINGLE_LINK] =
+               ib_alloc_mr(pd, IB_MR_TYPE_MEM_REG, 1 << buf_slot->order);
+       if (IS_ERR(buf_slot->mr_rx[SMC_SINGLE_LINK])) {
+               int rc;
+
+               rc = PTR_ERR(buf_slot->mr_rx[SMC_SINGLE_LINK]);
+               buf_slot->mr_rx[SMC_SINGLE_LINK] = NULL;
+               return rc;
+       }
+
+       if (smc_ib_map_mr_sg(buf_slot) != 1)
+               return -EINVAL;
+
+       return 0;
+}
+
 /* map a new TX or RX buffer to DMA */
 int smc_ib_buf_map(struct smc_ib_device *smcibdev, int buf_size,
                   struct smc_buf_desc *buf_slot,
index b30e387854b63457544191a24abec0dcdab76be1..b57d29f290428fa7731be74975cc374925146c4d 100644 (file)
@@ -71,6 +71,7 @@ int smc_ib_ready_link(struct smc_link *lnk);
 int smc_ib_modify_qp_rts(struct smc_link *lnk);
 int smc_ib_modify_qp_reset(struct smc_link *lnk);
 long smc_ib_setup_per_ibdev(struct smc_ib_device *smcibdev);
-
-
+int smc_ib_get_memory_region(struct ib_pd *pd, int access_flags,
+                            struct smc_buf_desc *buf_slot);
+void smc_ib_put_memory_region(struct ib_mr *mr);
 #endif