net/mlx4: Avoid wrong virtual mappings
authorHaggai Abramovsky <hagaya@mellanox.com>
Wed, 4 May 2016 11:50:15 +0000 (14:50 +0300)
committerDavid S. Miller <davem@davemloft.net>
Fri, 6 May 2016 03:23:05 +0000 (23:23 -0400)
The dma_alloc_coherent() function returns a virtual address which can
be used for coherent access to the underlying memory.  On some
architectures, like arm64, undefined behavior results if this memory is
also accessed via virtual mappings that are not coherent.  Because of
their undefined nature, operations like virt_to_page() return garbage
when passed virtual addresses obtained from dma_alloc_coherent().  Any
subsequent mappings via vmap() of the garbage page values are unusable
and result in bad things like bus errors (synchronous aborts in ARM64
speak).

The mlx4 driver contains code that does the equivalent of:
vmap(virt_to_page(dma_alloc_coherent)), this results in an OOPs when the
device is opened.

Prevent Ethernet driver to run this problematic code by forcing it to
allocate contiguous memory. As for the Infiniband driver, at first we
are trying to allocate contiguous memory, but in case of failure roll
back to work with fragmented memory.

Signed-off-by: Haggai Abramovsky <hagaya@mellanox.com>
Signed-off-by: Yishai Hadas <yishaih@mellanox.com>
Reported-by: David Daney <david.daney@cavium.com>
Tested-by: Sinan Kaya <okaya@codeaurora.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
drivers/infiniband/hw/mlx4/qp.c
drivers/net/ethernet/mellanox/mlx4/alloc.c
drivers/net/ethernet/mellanox/mlx4/en_cq.c
drivers/net/ethernet/mellanox/mlx4/en_netdev.c
drivers/net/ethernet/mellanox/mlx4/en_resources.c
drivers/net/ethernet/mellanox/mlx4/en_rx.c
drivers/net/ethernet/mellanox/mlx4/en_tx.c
drivers/net/ethernet/mellanox/mlx4/mlx4_en.h
include/linux/mlx4/device.h

index fd97534762b8dc7f83651a52e9398a3aa6e4944e..81b0e1fbec1d96ddce580e4021f2560589cc22b2 100644 (file)
@@ -419,7 +419,8 @@ static int set_rq_size(struct mlx4_ib_dev *dev, struct ib_qp_cap *cap,
 }
 
 static int set_kernel_sq_size(struct mlx4_ib_dev *dev, struct ib_qp_cap *cap,
-                             enum mlx4_ib_qp_type type, struct mlx4_ib_qp *qp)
+                             enum mlx4_ib_qp_type type, struct mlx4_ib_qp *qp,
+                             bool shrink_wqe)
 {
        int s;
 
@@ -477,7 +478,7 @@ static int set_kernel_sq_size(struct mlx4_ib_dev *dev, struct ib_qp_cap *cap,
         * We set WQE size to at least 64 bytes, this way stamping
         * invalidates each WQE.
         */
-       if (dev->dev->caps.fw_ver >= MLX4_FW_VER_WQE_CTRL_NEC &&
+       if (shrink_wqe && dev->dev->caps.fw_ver >= MLX4_FW_VER_WQE_CTRL_NEC &&
            qp->sq_signal_bits && BITS_PER_LONG == 64 &&
            type != MLX4_IB_QPT_SMI && type != MLX4_IB_QPT_GSI &&
            !(type & (MLX4_IB_QPT_PROXY_SMI_OWNER | MLX4_IB_QPT_PROXY_SMI |
@@ -642,6 +643,7 @@ static int create_qp_common(struct mlx4_ib_dev *dev, struct ib_pd *pd,
 {
        int qpn;
        int err;
+       struct ib_qp_cap backup_cap;
        struct mlx4_ib_sqp *sqp;
        struct mlx4_ib_qp *qp;
        enum mlx4_ib_qp_type qp_type = (enum mlx4_ib_qp_type) init_attr->qp_type;
@@ -775,7 +777,9 @@ static int create_qp_common(struct mlx4_ib_dev *dev, struct ib_pd *pd,
                                goto err;
                }
 
-               err = set_kernel_sq_size(dev, &init_attr->cap, qp_type, qp);
+               memcpy(&backup_cap, &init_attr->cap, sizeof(backup_cap));
+               err = set_kernel_sq_size(dev, &init_attr->cap,
+                                        qp_type, qp, true);
                if (err)
                        goto err;
 
@@ -787,9 +791,20 @@ static int create_qp_common(struct mlx4_ib_dev *dev, struct ib_pd *pd,
                        *qp->db.db = 0;
                }
 
-               if (mlx4_buf_alloc(dev->dev, qp->buf_size, PAGE_SIZE * 2, &qp->buf, gfp)) {
-                       err = -ENOMEM;
-                       goto err_db;
+               if (mlx4_buf_alloc(dev->dev, qp->buf_size, qp->buf_size,
+                                  &qp->buf, gfp)) {
+                       memcpy(&init_attr->cap, &backup_cap,
+                              sizeof(backup_cap));
+                       err = set_kernel_sq_size(dev, &init_attr->cap, qp_type,
+                                                qp, false);
+                       if (err)
+                               goto err_db;
+
+                       if (mlx4_buf_alloc(dev->dev, qp->buf_size,
+                                          PAGE_SIZE * 2, &qp->buf, gfp)) {
+                               err = -ENOMEM;
+                               goto err_db;
+                       }
                }
 
                err = mlx4_mtt_init(dev->dev, qp->buf.npages, qp->buf.page_shift,
index 0c51c69f802faf63a849300508ad4e809eeb7e3a..249a4584401ad487629c03b92a52f728d3318307 100644 (file)
@@ -576,41 +576,48 @@ out:
 
        return res;
 }
-/*
- * Handling for queue buffers -- we allocate a bunch of memory and
- * register it in a memory region at HCA virtual address 0.  If the
- * requested size is > max_direct, we split the allocation into
- * multiple pages, so we don't require too much contiguous memory.
- */
 
-int mlx4_buf_alloc(struct mlx4_dev *dev, int size, int max_direct,
-                  struct mlx4_buf *buf, gfp_t gfp)
+static int mlx4_buf_direct_alloc(struct mlx4_dev *dev, int size,
+                                struct mlx4_buf *buf, gfp_t gfp)
 {
        dma_addr_t t;
 
-       if (size <= max_direct) {
-               buf->nbufs        = 1;
-               buf->npages       = 1;
-               buf->page_shift   = get_order(size) + PAGE_SHIFT;
-               buf->direct.buf   = dma_alloc_coherent(&dev->persist->pdev->dev,
-                                                      size, &t, gfp);
-               if (!buf->direct.buf)
-                       return -ENOMEM;
+       buf->nbufs        = 1;
+       buf->npages       = 1;
+       buf->page_shift   = get_order(size) + PAGE_SHIFT;
+       buf->direct.buf   =
+               dma_zalloc_coherent(&dev->persist->pdev->dev,
+                                   size, &t, gfp);
+       if (!buf->direct.buf)
+               return -ENOMEM;
 
-               buf->direct.map = t;
+       buf->direct.map = t;
 
-               while (t & ((1 << buf->page_shift) - 1)) {
-                       --buf->page_shift;
-                       buf->npages *= 2;
-               }
+       while (t & ((1 << buf->page_shift) - 1)) {
+               --buf->page_shift;
+               buf->npages *= 2;
+       }
 
-               memset(buf->direct.buf, 0, size);
+       return 0;
+}
+
+/* Handling for queue buffers -- we allocate a bunch of memory and
+ * register it in a memory region at HCA virtual address 0. If the
+ *  requested size is > max_direct, we split the allocation into
+ *  multiple pages, so we don't require too much contiguous memory.
+ */
+int mlx4_buf_alloc(struct mlx4_dev *dev, int size, int max_direct,
+                  struct mlx4_buf *buf, gfp_t gfp)
+{
+       if (size <= max_direct) {
+               return mlx4_buf_direct_alloc(dev, size, buf, gfp);
        } else {
+               dma_addr_t t;
                int i;
 
-               buf->direct.buf  = NULL;
-               buf->nbufs       = (size + PAGE_SIZE - 1) / PAGE_SIZE;
-               buf->npages      = buf->nbufs;
+               buf->direct.buf = NULL;
+               buf->nbufs      = (size + PAGE_SIZE - 1) / PAGE_SIZE;
+               buf->npages     = buf->nbufs;
                buf->page_shift  = PAGE_SHIFT;
                buf->page_list   = kcalloc(buf->nbufs, sizeof(*buf->page_list),
                                           gfp);
@@ -619,28 +626,12 @@ int mlx4_buf_alloc(struct mlx4_dev *dev, int size, int max_direct,
 
                for (i = 0; i < buf->nbufs; ++i) {
                        buf->page_list[i].buf =
-                               dma_alloc_coherent(&dev->persist->pdev->dev,
-                                                  PAGE_SIZE,
-                                                  &t, gfp);
+                               dma_zalloc_coherent(&dev->persist->pdev->dev,
+                                                   PAGE_SIZE, &t, gfp);
                        if (!buf->page_list[i].buf)
                                goto err_free;
 
                        buf->page_list[i].map = t;
-
-                       memset(buf->page_list[i].buf, 0, PAGE_SIZE);
-               }
-
-               if (BITS_PER_LONG == 64) {
-                       struct page **pages;
-                       pages = kmalloc(sizeof *pages * buf->nbufs, gfp);
-                       if (!pages)
-                               goto err_free;
-                       for (i = 0; i < buf->nbufs; ++i)
-                               pages[i] = virt_to_page(buf->page_list[i].buf);
-                       buf->direct.buf = vmap(pages, buf->nbufs, VM_MAP, PAGE_KERNEL);
-                       kfree(pages);
-                       if (!buf->direct.buf)
-                               goto err_free;
                }
        }
 
@@ -655,15 +646,11 @@ EXPORT_SYMBOL_GPL(mlx4_buf_alloc);
 
 void mlx4_buf_free(struct mlx4_dev *dev, int size, struct mlx4_buf *buf)
 {
-       int i;
-
-       if (buf->nbufs == 1)
+       if (buf->nbufs == 1) {
                dma_free_coherent(&dev->persist->pdev->dev, size,
-                                 buf->direct.buf,
-                                 buf->direct.map);
-       else {
-               if (BITS_PER_LONG == 64)
-                       vunmap(buf->direct.buf);
+                                 buf->direct.buf, buf->direct.map);
+       } else {
+               int i;
 
                for (i = 0; i < buf->nbufs; ++i)
                        if (buf->page_list[i].buf)
@@ -789,7 +776,7 @@ void mlx4_db_free(struct mlx4_dev *dev, struct mlx4_db *db)
 EXPORT_SYMBOL_GPL(mlx4_db_free);
 
 int mlx4_alloc_hwq_res(struct mlx4_dev *dev, struct mlx4_hwq_resources *wqres,
-                      int size, int max_direct)
+                      int size)
 {
        int err;
 
@@ -799,7 +786,7 @@ int mlx4_alloc_hwq_res(struct mlx4_dev *dev, struct mlx4_hwq_resources *wqres,
 
        *wqres->db.db = 0;
 
-       err = mlx4_buf_alloc(dev, size, max_direct, &wqres->buf, GFP_KERNEL);
+       err = mlx4_buf_direct_alloc(dev, size, &wqres->buf, GFP_KERNEL);
        if (err)
                goto err_db;
 
index af975a2b74c6d277c1a744cf358846d66de38abe..132cea655920d636c2e22fd2f110d163a421b8f3 100644 (file)
@@ -73,22 +73,16 @@ int mlx4_en_create_cq(struct mlx4_en_priv *priv,
         */
        set_dev_node(&mdev->dev->persist->pdev->dev, node);
        err = mlx4_alloc_hwq_res(mdev->dev, &cq->wqres,
-                               cq->buf_size, 2 * PAGE_SIZE);
+                               cq->buf_size);
        set_dev_node(&mdev->dev->persist->pdev->dev, mdev->dev->numa_node);
        if (err)
                goto err_cq;
 
-       err = mlx4_en_map_buffer(&cq->wqres.buf);
-       if (err)
-               goto err_res;
-
        cq->buf = (struct mlx4_cqe *)cq->wqres.buf.direct.buf;
        *pcq = cq;
 
        return 0;
 
-err_res:
-       mlx4_free_hwq_res(mdev->dev, &cq->wqres, cq->buf_size);
 err_cq:
        kfree(cq);
        *pcq = NULL;
@@ -177,7 +171,6 @@ void mlx4_en_destroy_cq(struct mlx4_en_priv *priv, struct mlx4_en_cq **pcq)
        struct mlx4_en_dev *mdev = priv->mdev;
        struct mlx4_en_cq *cq = *pcq;
 
-       mlx4_en_unmap_buffer(&cq->wqres.buf);
        mlx4_free_hwq_res(mdev->dev, &cq->wqres, cq->buf_size);
        if (mlx4_is_eq_vector_valid(mdev->dev, priv->port, cq->vector) &&
            cq->is_tx == RX)
index 6f28ac58251cc8a35a3f4da4ea77894ec2fd09f4..92e0624f4cf03cef36f085c8fa4f2669c80c0ef3 100644 (file)
@@ -2928,7 +2928,7 @@ int mlx4_en_init_netdev(struct mlx4_en_dev *mdev, int port,
 
        /* Allocate page for receive rings */
        err = mlx4_alloc_hwq_res(mdev->dev, &priv->res,
-                               MLX4_EN_PAGE_SIZE, MLX4_EN_PAGE_SIZE);
+                               MLX4_EN_PAGE_SIZE);
        if (err) {
                en_err(priv, "Failed to allocate page for rx qps\n");
                goto out;
index 02e925d6f7348d774fe70c4a91b5038d5a27ba35..a6b0db0e038373348fcbe05d6490fcb35bb3c7fb 100644 (file)
@@ -107,37 +107,6 @@ int mlx4_en_change_mcast_lb(struct mlx4_en_priv *priv, struct mlx4_qp *qp,
        return ret;
 }
 
-int mlx4_en_map_buffer(struct mlx4_buf *buf)
-{
-       struct page **pages;
-       int i;
-
-       if (BITS_PER_LONG == 64 || buf->nbufs == 1)
-               return 0;
-
-       pages = kmalloc(sizeof *pages * buf->nbufs, GFP_KERNEL);
-       if (!pages)
-               return -ENOMEM;
-
-       for (i = 0; i < buf->nbufs; ++i)
-               pages[i] = virt_to_page(buf->page_list[i].buf);
-
-       buf->direct.buf = vmap(pages, buf->nbufs, VM_MAP, PAGE_KERNEL);
-       kfree(pages);
-       if (!buf->direct.buf)
-               return -ENOMEM;
-
-       return 0;
-}
-
-void mlx4_en_unmap_buffer(struct mlx4_buf *buf)
-{
-       if (BITS_PER_LONG == 64 || buf->nbufs == 1)
-               return;
-
-       vunmap(buf->direct.buf);
-}
-
 void mlx4_en_sqp_event(struct mlx4_qp *qp, enum mlx4_event event)
 {
     return;
index b723e3bcab39e83653be5367da514bd876b9d0cd..8ef6875b6cf9cf7491c4017b22dcd63af6fcdd8c 100644 (file)
@@ -394,17 +394,11 @@ int mlx4_en_create_rx_ring(struct mlx4_en_priv *priv,
 
        /* Allocate HW buffers on provided NUMA node */
        set_dev_node(&mdev->dev->persist->pdev->dev, node);
-       err = mlx4_alloc_hwq_res(mdev->dev, &ring->wqres,
-                                ring->buf_size, 2 * PAGE_SIZE);
+       err = mlx4_alloc_hwq_res(mdev->dev, &ring->wqres, ring->buf_size);
        set_dev_node(&mdev->dev->persist->pdev->dev, mdev->dev->numa_node);
        if (err)
                goto err_info;
 
-       err = mlx4_en_map_buffer(&ring->wqres.buf);
-       if (err) {
-               en_err(priv, "Failed to map RX buffer\n");
-               goto err_hwq;
-       }
        ring->buf = ring->wqres.buf.direct.buf;
 
        ring->hwtstamp_rx_filter = priv->hwtstamp_config.rx_filter;
@@ -412,8 +406,6 @@ int mlx4_en_create_rx_ring(struct mlx4_en_priv *priv,
        *pring = ring;
        return 0;
 
-err_hwq:
-       mlx4_free_hwq_res(mdev->dev, &ring->wqres, ring->buf_size);
 err_info:
        vfree(ring->rx_info);
        ring->rx_info = NULL;
@@ -517,7 +509,6 @@ void mlx4_en_destroy_rx_ring(struct mlx4_en_priv *priv,
        struct mlx4_en_dev *mdev = priv->mdev;
        struct mlx4_en_rx_ring *ring = *pring;
 
-       mlx4_en_unmap_buffer(&ring->wqres.buf);
        mlx4_free_hwq_res(mdev->dev, &ring->wqres, size * stride + TXBB_SIZE);
        vfree(ring->rx_info);
        ring->rx_info = NULL;
index 0f206a95429c7904c9607ad6da02e502529be636..f6e61570cb2c2b53a13adfbfbccc2c00e084171b 100644 (file)
@@ -94,20 +94,13 @@ int mlx4_en_create_tx_ring(struct mlx4_en_priv *priv,
 
        /* Allocate HW buffers on provided NUMA node */
        set_dev_node(&mdev->dev->persist->pdev->dev, node);
-       err = mlx4_alloc_hwq_res(mdev->dev, &ring->wqres, ring->buf_size,
-                                2 * PAGE_SIZE);
+       err = mlx4_alloc_hwq_res(mdev->dev, &ring->wqres, ring->buf_size);
        set_dev_node(&mdev->dev->persist->pdev->dev, mdev->dev->numa_node);
        if (err) {
                en_err(priv, "Failed allocating hwq resources\n");
                goto err_bounce;
        }
 
-       err = mlx4_en_map_buffer(&ring->wqres.buf);
-       if (err) {
-               en_err(priv, "Failed to map TX buffer\n");
-               goto err_hwq_res;
-       }
-
        ring->buf = ring->wqres.buf.direct.buf;
 
        en_dbg(DRV, priv, "Allocated TX ring (addr:%p) - buf:%p size:%d buf_size:%d dma:%llx\n",
@@ -118,7 +111,7 @@ int mlx4_en_create_tx_ring(struct mlx4_en_priv *priv,
                                    MLX4_RESERVE_ETH_BF_QP);
        if (err) {
                en_err(priv, "failed reserving qp for TX ring\n");
-               goto err_map;
+               goto err_hwq_res;
        }
 
        err = mlx4_qp_alloc(mdev->dev, ring->qpn, &ring->qp, GFP_KERNEL);
@@ -155,8 +148,6 @@ int mlx4_en_create_tx_ring(struct mlx4_en_priv *priv,
 
 err_reserve:
        mlx4_qp_release_range(mdev->dev, ring->qpn, 1);
-err_map:
-       mlx4_en_unmap_buffer(&ring->wqres.buf);
 err_hwq_res:
        mlx4_free_hwq_res(mdev->dev, &ring->wqres, ring->buf_size);
 err_bounce:
@@ -183,7 +174,6 @@ void mlx4_en_destroy_tx_ring(struct mlx4_en_priv *priv,
        mlx4_qp_remove(mdev->dev, &ring->qp);
        mlx4_qp_free(mdev->dev, &ring->qp);
        mlx4_qp_release_range(priv->mdev->dev, ring->qpn, 1);
-       mlx4_en_unmap_buffer(&ring->wqres.buf);
        mlx4_free_hwq_res(mdev->dev, &ring->wqres, ring->buf_size);
        kfree(ring->bounce_buf);
        ring->bounce_buf = NULL;
index 63b1aeae2c037cd585b1473db97134644e188e03..cc84e09f324a42aa526be4f1e5a74f1c88899791 100644 (file)
@@ -672,8 +672,6 @@ void mlx4_en_fill_qp_context(struct mlx4_en_priv *priv, int size, int stride,
                int is_tx, int rss, int qpn, int cqn, int user_prio,
                struct mlx4_qp_context *context);
 void mlx4_en_sqp_event(struct mlx4_qp *qp, enum mlx4_event event);
-int mlx4_en_map_buffer(struct mlx4_buf *buf);
-void mlx4_en_unmap_buffer(struct mlx4_buf *buf);
 int mlx4_en_change_mcast_lb(struct mlx4_en_priv *priv, struct mlx4_qp *qp,
                            int loopback);
 void mlx4_en_calc_rx_buf(struct net_device *dev);
index d1f904c8b2cb48ef7df75141134f0b8c2f1d4057..80dec87a94f8431024d88a6303f1f9189b2114e6 100644 (file)
@@ -1058,7 +1058,7 @@ int mlx4_buf_alloc(struct mlx4_dev *dev, int size, int max_direct,
 void mlx4_buf_free(struct mlx4_dev *dev, int size, struct mlx4_buf *buf);
 static inline void *mlx4_buf_offset(struct mlx4_buf *buf, int offset)
 {
-       if (BITS_PER_LONG == 64 || buf->nbufs == 1)
+       if (buf->nbufs == 1)
                return buf->direct.buf + offset;
        else
                return buf->page_list[offset >> PAGE_SHIFT].buf +
@@ -1098,7 +1098,7 @@ int mlx4_db_alloc(struct mlx4_dev *dev, struct mlx4_db *db, int order,
 void mlx4_db_free(struct mlx4_dev *dev, struct mlx4_db *db);
 
 int mlx4_alloc_hwq_res(struct mlx4_dev *dev, struct mlx4_hwq_resources *wqres,
-                      int size, int max_direct);
+                      int size);
 void mlx4_free_hwq_res(struct mlx4_dev *mdev, struct mlx4_hwq_resources *wqres,
                       int size);