IB/mlx4: Support the new memory registration API
authorSagi Grimberg <sagig@mellanox.com>
Tue, 13 Oct 2015 16:11:27 +0000 (19:11 +0300)
committerDoug Ledford <dledford@redhat.com>
Thu, 29 Oct 2015 02:27:17 +0000 (22:27 -0400)
Support the new memory registration API by allocating a
private page list array in mlx4_ib_mr and populate it when
mlx4_ib_map_mr_sg is invoked. Also, support IB_WR_REG_MR
by setting the exact WQE as IB_WR_FAST_REG_MR, just take the
needed information from different places:
- page_size, iova, length, access flags (ib_mr)
- page array (mlx4_ib_mr)
- key (ib_reg_wr)

The IB_WR_FAST_REG_MR handlers will be removed later when
all the ULPs will be converted.

Signed-off-by: Sagi Grimberg <sagig@mellanox.com>
Tested-by: Christoph Hellwig <hch@lst.de>
Signed-off-by: Doug Ledford <dledford@redhat.com>
drivers/infiniband/hw/mlx4/cq.c
drivers/infiniband/hw/mlx4/main.c
drivers/infiniband/hw/mlx4/mlx4_ib.h
drivers/infiniband/hw/mlx4/mr.c
drivers/infiniband/hw/mlx4/qp.c

index 5fd49f9435f9dd8d2496d91394271b6270af2cbd..2ea4125b7903d3be04c09d9fe2147c859e1bc163 100644 (file)
@@ -819,6 +819,7 @@ repoll:
                        break;
                case MLX4_OPCODE_FMR:
                        wc->opcode    = IB_WC_FAST_REG_MR;
+                       /* TODO: wc->opcode    = IB_WC_REG_MR; */
                        break;
                case MLX4_OPCODE_LOCAL_INVAL:
                        wc->opcode    = IB_WC_LOCAL_INV;
index f63d5427bfc0c55153738061ebb9b1c607f7b4fd..7e87599fbdcbf2ff197acdb5371f31cd43a6b2d5 100644 (file)
@@ -2266,6 +2266,7 @@ static void *mlx4_ib_add(struct mlx4_dev *dev)
        ibdev->ib_dev.rereg_user_mr     = mlx4_ib_rereg_user_mr;
        ibdev->ib_dev.dereg_mr          = mlx4_ib_dereg_mr;
        ibdev->ib_dev.alloc_mr          = mlx4_ib_alloc_mr;
+       ibdev->ib_dev.map_mr_sg         = mlx4_ib_map_mr_sg;
        ibdev->ib_dev.alloc_fast_reg_page_list = mlx4_ib_alloc_fast_reg_page_list;
        ibdev->ib_dev.free_fast_reg_page_list  = mlx4_ib_free_fast_reg_page_list;
        ibdev->ib_dev.attach_mcast      = mlx4_ib_mcg_attach;
index 28a99e08030ec97c02d3256a9e628a22a2f4404c..64c80e15a9f85aba67f0a16b14608fcd909c0900 100644 (file)
@@ -129,10 +129,17 @@ struct mlx4_ib_cq {
        struct list_head                recv_qp_list;
 };
 
+#define MLX4_MR_PAGES_ALIGN 0x40
+
 struct mlx4_ib_mr {
        struct ib_mr            ibmr;
+       __be64                  *pages;
+       dma_addr_t              page_map;
+       u32                     npages;
+       u32                     max_pages;
        struct mlx4_mr          mmr;
        struct ib_umem         *umem;
+       void                    *pages_alloc;
 };
 
 struct mlx4_ib_mw {
@@ -714,6 +721,9 @@ int mlx4_ib_dealloc_mw(struct ib_mw *mw);
 struct ib_mr *mlx4_ib_alloc_mr(struct ib_pd *pd,
                               enum ib_mr_type mr_type,
                               u32 max_num_sg);
+int mlx4_ib_map_mr_sg(struct ib_mr *ibmr,
+                     struct scatterlist *sg,
+                     int sg_nents);
 struct ib_fast_reg_page_list *mlx4_ib_alloc_fast_reg_page_list(struct ib_device *ibdev,
                                                               int page_list_len);
 void mlx4_ib_free_fast_reg_page_list(struct ib_fast_reg_page_list *page_list);
index 5bba176e9dfa86b8c845393856710491cadf9e39..e5a32e4520e8d5922dd134979d1b44ca870cd88b 100644 (file)
@@ -59,7 +59,7 @@ struct ib_mr *mlx4_ib_get_dma_mr(struct ib_pd *pd, int acc)
        struct mlx4_ib_mr *mr;
        int err;
 
-       mr = kmalloc(sizeof *mr, GFP_KERNEL);
+       mr = kzalloc(sizeof(*mr), GFP_KERNEL);
        if (!mr)
                return ERR_PTR(-ENOMEM);
 
@@ -140,7 +140,7 @@ struct ib_mr *mlx4_ib_reg_user_mr(struct ib_pd *pd, u64 start, u64 length,
        int err;
        int n;
 
-       mr = kmalloc(sizeof *mr, GFP_KERNEL);
+       mr = kzalloc(sizeof(*mr), GFP_KERNEL);
        if (!mr)
                return ERR_PTR(-ENOMEM);
 
@@ -271,11 +271,59 @@ release_mpt_entry:
        return err;
 }
 
+static int
+mlx4_alloc_priv_pages(struct ib_device *device,
+                     struct mlx4_ib_mr *mr,
+                     int max_pages)
+{
+       int size = max_pages * sizeof(u64);
+       int add_size;
+       int ret;
+
+       add_size = max_t(int, MLX4_MR_PAGES_ALIGN - ARCH_KMALLOC_MINALIGN, 0);
+
+       mr->pages_alloc = kzalloc(size + add_size, GFP_KERNEL);
+       if (!mr->pages_alloc)
+               return -ENOMEM;
+
+       mr->pages = PTR_ALIGN(mr->pages_alloc, MLX4_MR_PAGES_ALIGN);
+
+       mr->page_map = dma_map_single(device->dma_device, mr->pages,
+                                     size, DMA_TO_DEVICE);
+
+       if (dma_mapping_error(device->dma_device, mr->page_map)) {
+               ret = -ENOMEM;
+               goto err;
+       }
+
+       return 0;
+err:
+       kfree(mr->pages_alloc);
+
+       return ret;
+}
+
+static void
+mlx4_free_priv_pages(struct mlx4_ib_mr *mr)
+{
+       if (mr->pages) {
+               struct ib_device *device = mr->ibmr.device;
+               int size = mr->max_pages * sizeof(u64);
+
+               dma_unmap_single(device->dma_device, mr->page_map,
+                                size, DMA_TO_DEVICE);
+               kfree(mr->pages_alloc);
+               mr->pages = NULL;
+       }
+}
+
 int mlx4_ib_dereg_mr(struct ib_mr *ibmr)
 {
        struct mlx4_ib_mr *mr = to_mmr(ibmr);
        int ret;
 
+       mlx4_free_priv_pages(mr);
+
        ret = mlx4_mr_free(to_mdev(ibmr->device)->dev, &mr->mmr);
        if (ret)
                return ret;
@@ -362,7 +410,7 @@ struct ib_mr *mlx4_ib_alloc_mr(struct ib_pd *pd,
            max_num_sg > MLX4_MAX_FAST_REG_PAGES)
                return ERR_PTR(-EINVAL);
 
-       mr = kmalloc(sizeof *mr, GFP_KERNEL);
+       mr = kzalloc(sizeof(*mr), GFP_KERNEL);
        if (!mr)
                return ERR_PTR(-ENOMEM);
 
@@ -371,18 +419,25 @@ struct ib_mr *mlx4_ib_alloc_mr(struct ib_pd *pd,
        if (err)
                goto err_free;
 
+       err = mlx4_alloc_priv_pages(pd->device, mr, max_num_sg);
+       if (err)
+               goto err_free_mr;
+
+       mr->max_pages = max_num_sg;
+
        err = mlx4_mr_enable(dev->dev, &mr->mmr);
        if (err)
-               goto err_mr;
+               goto err_free_pl;
 
        mr->ibmr.rkey = mr->ibmr.lkey = mr->mmr.key;
        mr->umem = NULL;
 
        return &mr->ibmr;
 
-err_mr:
+err_free_pl:
+       mlx4_free_priv_pages(mr);
+err_free_mr:
        (void) mlx4_mr_free(dev->dev, &mr->mmr);
-
 err_free:
        kfree(mr);
        return ERR_PTR(err);
@@ -528,3 +583,37 @@ int mlx4_ib_fmr_dealloc(struct ib_fmr *ibfmr)
 
        return err;
 }
+
+static int mlx4_set_page(struct ib_mr *ibmr, u64 addr)
+{
+       struct mlx4_ib_mr *mr = to_mmr(ibmr);
+
+       if (unlikely(mr->npages == mr->max_pages))
+               return -ENOMEM;
+
+       mr->pages[mr->npages++] = cpu_to_be64(addr | MLX4_MTT_FLAG_PRESENT);
+
+       return 0;
+}
+
+int mlx4_ib_map_mr_sg(struct ib_mr *ibmr,
+                     struct scatterlist *sg,
+                     int sg_nents)
+{
+       struct mlx4_ib_mr *mr = to_mmr(ibmr);
+       int rc;
+
+       mr->npages = 0;
+
+       ib_dma_sync_single_for_cpu(ibmr->device, mr->page_map,
+                                  sizeof(u64) * mr->max_pages,
+                                  DMA_TO_DEVICE);
+
+       rc = ib_sg_to_pages(ibmr, sg, sg_nents, mlx4_set_page);
+
+       ib_dma_sync_single_for_device(ibmr->device, mr->page_map,
+                                     sizeof(u64) * mr->max_pages,
+                                     DMA_TO_DEVICE);
+
+       return rc;
+}
index f2b2a61898f85accf27fc0b3d4a56b822c771d91..0067f4b4dc0923ace878bbb4e63c713278f04348 100644 (file)
@@ -112,6 +112,7 @@ static const __be32 mlx4_ib_opcode[] = {
        [IB_WR_SEND_WITH_INV]                   = cpu_to_be32(MLX4_OPCODE_SEND_INVAL),
        [IB_WR_LOCAL_INV]                       = cpu_to_be32(MLX4_OPCODE_LOCAL_INVAL),
        [IB_WR_FAST_REG_MR]                     = cpu_to_be32(MLX4_OPCODE_FMR),
+       [IB_WR_REG_MR]                          = cpu_to_be32(MLX4_OPCODE_FMR),
        [IB_WR_MASKED_ATOMIC_CMP_AND_SWP]       = cpu_to_be32(MLX4_OPCODE_MASKED_ATOMIC_CS),
        [IB_WR_MASKED_ATOMIC_FETCH_AND_ADD]     = cpu_to_be32(MLX4_OPCODE_MASKED_ATOMIC_FA),
        [IB_WR_BIND_MW]                         = cpu_to_be32(MLX4_OPCODE_BIND_MW),
@@ -2505,6 +2506,22 @@ static __be32 convert_access(int acc)
                cpu_to_be32(MLX4_WQE_FMR_PERM_LOCAL_READ);
 }
 
+static void set_reg_seg(struct mlx4_wqe_fmr_seg *fseg,
+                       struct ib_reg_wr *wr)
+{
+       struct mlx4_ib_mr *mr = to_mmr(wr->mr);
+
+       fseg->flags             = convert_access(wr->access);
+       fseg->mem_key           = cpu_to_be32(wr->key);
+       fseg->buf_list          = cpu_to_be64(mr->page_map);
+       fseg->start_addr        = cpu_to_be64(mr->ibmr.iova);
+       fseg->reg_len           = cpu_to_be64(mr->ibmr.length);
+       fseg->offset            = 0; /* XXX -- is this just for ZBVA? */
+       fseg->page_size         = cpu_to_be32(ilog2(mr->ibmr.page_size));
+       fseg->reserved[0]       = 0;
+       fseg->reserved[1]       = 0;
+}
+
 static void set_fmr_seg(struct mlx4_wqe_fmr_seg *fseg,
                struct ib_fast_reg_wr *wr)
 {
@@ -2866,6 +2883,14 @@ int mlx4_ib_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr,
                                size += sizeof (struct mlx4_wqe_fmr_seg) / 16;
                                break;
 
+                       case IB_WR_REG_MR:
+                               ctrl->srcrb_flags |=
+                                       cpu_to_be32(MLX4_WQE_CTRL_STRONG_ORDER);
+                               set_reg_seg(wqe, reg_wr(wr));
+                               wqe  += sizeof(struct mlx4_wqe_fmr_seg);
+                               size += sizeof(struct mlx4_wqe_fmr_seg) / 16;
+                               break;
+
                        case IB_WR_BIND_MW:
                                ctrl->srcrb_flags |=
                                        cpu_to_be32(MLX4_WQE_CTRL_STRONG_ORDER);