mlx5: Support communicating arbitrary host page size to firmware
authorEli Cohen <eli@dev.mellanox.co.il>
Wed, 23 Oct 2013 06:53:19 +0000 (09:53 +0300)
committerRoland Dreier <roland@purestorage.com>
Fri, 8 Nov 2013 22:43:00 +0000 (14:43 -0800)
Connect-IB firmware requires 4K pages to be communicated with the
driver. This patch breaks larger pages to 4K units to enable support
for architectures utilizing larger page size, such as PowerPC.  This
patch also fixes several places that referred to PAGE_SHIFT instead of
explicit 12 which is the inherent page shift on Connect-IB.

Signed-off-by: Eli Cohen <eli@mellanox.com>
Signed-off-by: Roland Dreier <roland@purestorage.com>
drivers/infiniband/hw/mlx5/cq.c
drivers/infiniband/hw/mlx5/qp.c
drivers/infiniband/hw/mlx5/srq.c
drivers/net/ethernet/mellanox/mlx5/core/eq.c
drivers/net/ethernet/mellanox/mlx5/core/pagealloc.c
include/linux/mlx5/driver.h

index 84591865e39d60aa95d8f02cf86aeb20f30aa0aa..eecac7b52bcb1b783108badccf94a2df6b8a80a6 100644 (file)
@@ -620,7 +620,7 @@ static int create_cq_kernel(struct mlx5_ib_dev *dev, struct mlx5_ib_cq *cq,
        }
        mlx5_fill_page_array(&cq->buf.buf, (*cqb)->pas);
 
-       (*cqb)->ctx.log_pg_sz = cq->buf.buf.page_shift - PAGE_SHIFT;
+       (*cqb)->ctx.log_pg_sz = cq->buf.buf.page_shift - 12;
        *index = dev->mdev.priv.uuari.uars[0].index;
 
        return 0;
index 8a36fd78c89f6b1cfd5fd1793608f3d6c7a3b14e..ce14008a570279c9ec0c7be7df97ed3727eb6f5b 100644 (file)
@@ -551,7 +551,7 @@ static int create_user_qp(struct mlx5_ib_dev *dev, struct ib_pd *pd,
        }
        mlx5_ib_populate_pas(dev, qp->umem, page_shift, (*in)->pas, 0);
        (*in)->ctx.log_pg_sz_remote_qpn =
-               cpu_to_be32((page_shift - PAGE_SHIFT) << 24);
+               cpu_to_be32((page_shift - 12) << 24);
        (*in)->ctx.params2 = cpu_to_be32(offset << 6);
 
        (*in)->ctx.qp_counter_set_usr_page = cpu_to_be32(uar_index);
@@ -648,7 +648,7 @@ static int create_kernel_qp(struct mlx5_ib_dev *dev,
                goto err_buf;
        }
        (*in)->ctx.qp_counter_set_usr_page = cpu_to_be32(uar_index);
-       (*in)->ctx.log_pg_sz_remote_qpn = cpu_to_be32((qp->buf.page_shift - PAGE_SHIFT) << 24);
+       (*in)->ctx.log_pg_sz_remote_qpn = cpu_to_be32((qp->buf.page_shift - 12) << 24);
        /* Set "fast registration enabled" for all kernel QPs */
        (*in)->ctx.params1 |= cpu_to_be32(1 << 11);
        (*in)->ctx.sq_crq_size |= cpu_to_be16(1 << 4);
index f1e5845b42b0b5e689ed5d9f661370cf69f1f00c..dbc2c7188c5e0ad8629d7bf21f01ebf57ca193df 100644 (file)
@@ -123,7 +123,7 @@ static int create_srq_user(struct ib_pd *pd, struct mlx5_ib_srq *srq,
                goto err_in;
        }
 
-       (*in)->ctx.log_pg_sz = page_shift - PAGE_SHIFT;
+       (*in)->ctx.log_pg_sz = page_shift - 12;
        (*in)->ctx.pgoff_cqn = cpu_to_be32(offset << 26);
 
        return 0;
@@ -192,7 +192,7 @@ static int create_srq_kernel(struct mlx5_ib_dev *dev, struct mlx5_ib_srq *srq,
        }
        srq->wq_sig = !!srq_signature;
 
-       (*in)->ctx.log_pg_sz = page_shift - PAGE_SHIFT;
+       (*in)->ctx.log_pg_sz = page_shift - 12;
 
        return 0;
 
index 2231d93cc7ad116e55c77e0269fa27878f6c6a4d..6b4b436840bd9522a40925571b63a95914371428 100644 (file)
@@ -354,7 +354,7 @@ int mlx5_create_map_eq(struct mlx5_core_dev *dev, struct mlx5_eq *eq, u8 vecidx,
        in->hdr.opcode = cpu_to_be16(MLX5_CMD_OP_CREATE_EQ);
        in->ctx.log_sz_usr_page = cpu_to_be32(ilog2(eq->nent) << 24 | uar->index);
        in->ctx.intr = vecidx;
-       in->ctx.log_page_size = PAGE_SHIFT - 12;
+       in->ctx.log_page_size = eq->buf.page_shift - 12;
        in->events_mask = cpu_to_be64(mask);
 
        err = mlx5_cmd_exec(dev, in, inlen, &out, sizeof(out));
index a0d0da35578c38ab5c794ff6d0ec1eb6f4d0b401..013aa422adee93c8ae04f5c7e4476dcb1dad3546 100644 (file)
@@ -57,10 +57,13 @@ struct mlx5_pages_req {
 };
 
 struct fw_page {
-       struct rb_node  rb_node;
-       u64             addr;
-       struct page     *page;
-       u16             func_id;
+       struct rb_node          rb_node;
+       u64                     addr;
+       struct page            *page;
+       u16                     func_id;
+       unsigned long           bitmask;
+       struct list_head        list;
+       unsigned                free_count;
 };
 
 struct mlx5_query_pages_inbox {
@@ -94,6 +97,11 @@ enum {
        MAX_RECLAIM_TIME_MSECS  = 5000,
 };
 
+enum {
+       MLX5_MAX_RECLAIM_TIME_MILI      = 5000,
+       MLX5_NUM_4K_IN_PAGE             = PAGE_SIZE / 4096,
+};
+
 static int insert_page(struct mlx5_core_dev *dev, u64 addr, struct page *page, u16 func_id)
 {
        struct rb_root *root = &dev->priv.page_root;
@@ -101,6 +109,7 @@ static int insert_page(struct mlx5_core_dev *dev, u64 addr, struct page *page, u
        struct rb_node *parent = NULL;
        struct fw_page *nfp;
        struct fw_page *tfp;
+       int i;
 
        while (*new) {
                parent = *new;
@@ -113,25 +122,29 @@ static int insert_page(struct mlx5_core_dev *dev, u64 addr, struct page *page, u
                        return -EEXIST;
        }
 
-       nfp = kmalloc(sizeof(*nfp), GFP_KERNEL);
+       nfp = kzalloc(sizeof(*nfp), GFP_KERNEL);
        if (!nfp)
                return -ENOMEM;
 
        nfp->addr = addr;
        nfp->page = page;
        nfp->func_id = func_id;
+       nfp->free_count = MLX5_NUM_4K_IN_PAGE;
+       for (i = 0; i < MLX5_NUM_4K_IN_PAGE; i++)
+               set_bit(i, &nfp->bitmask);
 
        rb_link_node(&nfp->rb_node, parent, new);
        rb_insert_color(&nfp->rb_node, root);
+       list_add(&nfp->list, &dev->priv.free_list);
 
        return 0;
 }
 
-static struct page *remove_page(struct mlx5_core_dev *dev, u64 addr)
+static struct fw_page *find_fw_page(struct mlx5_core_dev *dev, u64 addr)
 {
        struct rb_root *root = &dev->priv.page_root;
        struct rb_node *tmp = root->rb_node;
-       struct page *result = NULL;
+       struct fw_page *result = NULL;
        struct fw_page *tfp;
 
        while (tmp) {
@@ -141,9 +154,7 @@ static struct page *remove_page(struct mlx5_core_dev *dev, u64 addr)
                } else if (tfp->addr > addr) {
                        tmp = tmp->rb_right;
                } else {
-                       rb_erase(&tfp->rb_node, root);
-                       result = tfp->page;
-                       kfree(tfp);
+                       result = tfp;
                        break;
                }
        }
@@ -176,13 +187,97 @@ static int mlx5_cmd_query_pages(struct mlx5_core_dev *dev, u16 *func_id,
        return err;
 }
 
+static int alloc_4k(struct mlx5_core_dev *dev, u64 *addr)
+{
+       struct fw_page *fp;
+       unsigned n;
+
+       if (list_empty(&dev->priv.free_list)) {
+               return -ENOMEM;
+               mlx5_core_warn(dev, "\n");
+       }
+
+       fp = list_entry(dev->priv.free_list.next, struct fw_page, list);
+       n = find_first_bit(&fp->bitmask, 8 * sizeof(fp->bitmask));
+       if (n >= MLX5_NUM_4K_IN_PAGE) {
+               mlx5_core_warn(dev, "alloc 4k bug\n");
+               return -ENOENT;
+       }
+       clear_bit(n, &fp->bitmask);
+       fp->free_count--;
+       if (!fp->free_count)
+               list_del(&fp->list);
+
+       *addr = fp->addr + n * 4096;
+
+       return 0;
+}
+
+static void free_4k(struct mlx5_core_dev *dev, u64 addr)
+{
+       struct fw_page *fwp;
+       int n;
+
+       fwp = find_fw_page(dev, addr & PAGE_MASK);
+       if (!fwp) {
+               mlx5_core_warn(dev, "page not found\n");
+               return;
+       }
+
+       n = (addr & ~PAGE_MASK) % 4096;
+       fwp->free_count++;
+       set_bit(n, &fwp->bitmask);
+       if (fwp->free_count == MLX5_NUM_4K_IN_PAGE) {
+               rb_erase(&fwp->rb_node, &dev->priv.page_root);
+               list_del(&fwp->list);
+               dma_unmap_page(&dev->pdev->dev, addr, PAGE_SIZE, DMA_BIDIRECTIONAL);
+               __free_page(fwp->page);
+               kfree(fwp);
+       } else if (fwp->free_count == 1) {
+               list_add(&fwp->list, &dev->priv.free_list);
+       }
+}
+
+static int alloc_system_page(struct mlx5_core_dev *dev, u16 func_id)
+{
+       struct page *page;
+       u64 addr;
+       int err;
+
+       page = alloc_page(GFP_HIGHUSER);
+       if (!page) {
+               mlx5_core_warn(dev, "failed to allocate page\n");
+               return -ENOMEM;
+       }
+       addr = dma_map_page(&dev->pdev->dev, page, 0,
+                           PAGE_SIZE, DMA_BIDIRECTIONAL);
+       if (dma_mapping_error(&dev->pdev->dev, addr)) {
+               mlx5_core_warn(dev, "failed dma mapping page\n");
+               err = -ENOMEM;
+               goto out_alloc;
+       }
+       err = insert_page(dev, addr, page, func_id);
+       if (err) {
+               mlx5_core_err(dev, "failed to track allocated page\n");
+               goto out_mapping;
+       }
+
+       return 0;
+
+out_mapping:
+       dma_unmap_page(&dev->pdev->dev, addr, PAGE_SIZE, DMA_BIDIRECTIONAL);
+
+out_alloc:
+       __free_page(page);
+
+       return err;
+}
 static int give_pages(struct mlx5_core_dev *dev, u16 func_id, int npages,
                      int notify_fail)
 {
        struct mlx5_manage_pages_inbox *in;
        struct mlx5_manage_pages_outbox out;
        struct mlx5_manage_pages_inbox *nin;
-       struct page *page;
        int inlen;
        u64 addr;
        int err;
@@ -197,27 +292,15 @@ static int give_pages(struct mlx5_core_dev *dev, u16 func_id, int npages,
        memset(&out, 0, sizeof(out));
 
        for (i = 0; i < npages; i++) {
-               page = alloc_page(GFP_HIGHUSER);
-               if (!page) {
-                       err = -ENOMEM;
-                       mlx5_core_warn(dev, "failed to allocate page\n");
-                       goto out_alloc;
-               }
-               addr = dma_map_page(&dev->pdev->dev, page, 0,
-                                   PAGE_SIZE, DMA_BIDIRECTIONAL);
-               if (dma_mapping_error(&dev->pdev->dev, addr)) {
-                       mlx5_core_warn(dev, "failed dma mapping page\n");
-                       __free_page(page);
-                       err = -ENOMEM;
-                       goto out_alloc;
-               }
-               err = insert_page(dev, addr, page, func_id);
+retry:
+               err = alloc_4k(dev, &addr);
                if (err) {
-                       mlx5_core_err(dev, "failed to track allocated page\n");
-                       dma_unmap_page(&dev->pdev->dev, addr, PAGE_SIZE, DMA_BIDIRECTIONAL);
-                       __free_page(page);
-                       err = -ENOMEM;
-                       goto out_alloc;
+                       if (err == -ENOMEM)
+                               err = alloc_system_page(dev, func_id);
+                       if (err)
+                               goto out_4k;
+
+                       goto retry;
                }
                in->pas[i] = cpu_to_be64(addr);
        }
@@ -227,7 +310,6 @@ static int give_pages(struct mlx5_core_dev *dev, u16 func_id, int npages,
        in->func_id = cpu_to_be16(func_id);
        in->num_entries = cpu_to_be32(npages);
        err = mlx5_cmd_exec(dev, in, inlen, &out, sizeof(out));
-       mlx5_core_dbg(dev, "err %d\n", err);
        if (err) {
                mlx5_core_warn(dev, "func_id 0x%x, npages %d, err %d\n", func_id, npages, err);
                goto out_alloc;
@@ -251,7 +333,7 @@ out_alloc:
                nin = kzalloc(sizeof(*nin), GFP_KERNEL);
                if (!nin) {
                        mlx5_core_warn(dev, "allocation failed\n");
-                       goto unmap;
+                       goto out_4k;
                }
                memset(&out, 0, sizeof(out));
                nin->hdr.opcode = cpu_to_be16(MLX5_CMD_OP_MANAGE_PAGES);
@@ -261,19 +343,9 @@ out_alloc:
                kfree(nin);
        }
 
-unmap:
-       for (i--; i >= 0; i--) {
-               addr = be64_to_cpu(in->pas[i]);
-               page = remove_page(dev, addr);
-               if (!page) {
-                       mlx5_core_err(dev, "BUG: can't remove page at addr 0x%llx\n",
-                                     addr);
-                       continue;
-               }
-               dma_unmap_page(&dev->pdev->dev, addr, PAGE_SIZE, DMA_BIDIRECTIONAL);
-               __free_page(page);
-       }
-
+out_4k:
+       for (i--; i >= 0; i--)
+               free_4k(dev, be64_to_cpu(in->pas[i]));
 out_free:
        mlx5_vfree(in);
        return err;
@@ -284,7 +356,6 @@ static int reclaim_pages(struct mlx5_core_dev *dev, u32 func_id, int npages,
 {
        struct mlx5_manage_pages_inbox   in;
        struct mlx5_manage_pages_outbox *out;
-       struct page *page;
        int num_claimed;
        int outlen;
        u64 addr;
@@ -323,13 +394,7 @@ static int reclaim_pages(struct mlx5_core_dev *dev, u32 func_id, int npages,
 
        for (i = 0; i < num_claimed; i++) {
                addr = be64_to_cpu(out->pas[i]);
-               page = remove_page(dev, addr);
-               if (!page) {
-                       mlx5_core_warn(dev, "FW reported unknown DMA address 0x%llx\n", addr);
-               } else {
-                       dma_unmap_page(&dev->pdev->dev, addr, PAGE_SIZE, DMA_BIDIRECTIONAL);
-                       __free_page(page);
-               }
+               free_4k(dev, addr);
        }
 
 out_free:
@@ -435,6 +500,7 @@ int mlx5_reclaim_startup_pages(struct mlx5_core_dev *dev)
 void mlx5_pagealloc_init(struct mlx5_core_dev *dev)
 {
        dev->priv.page_root = RB_ROOT;
+       INIT_LIST_HEAD(&dev->priv.free_list);
 }
 
 void mlx5_pagealloc_cleanup(struct mlx5_core_dev *dev)
index 513619a75695d2ec3c4f0d379d5e5c8401256181..554548cd3dd4683145ce5ef0a4b1e66fb829365d 100644 (file)
@@ -483,6 +483,7 @@ struct mlx5_priv {
        struct rb_root          page_root;
        int                     fw_pages;
        int                     reg_pages;
+       struct list_head        free_list;
 
        struct mlx5_core_health health;