IB/mlx5: Extract page fault code
authorArtemy Kovalyov <artemyko@mellanox.com>
Wed, 5 Apr 2017 06:23:58 +0000 (09:23 +0300)
committerDoug Ledford <dledford@redhat.com>
Tue, 25 Apr 2017 19:40:28 +0000 (15:40 -0400)
To make page fault handling code more flexible
split pagefault_single_data_segment() function.
Keep MR resolution in pagefault_single_data_segment() and
move actual updates into pagefault_single_mr().

Signed-off-by: Artemy Kovalyov <artemyko@mellanox.com>
Signed-off-by: Leon Romanovsky <leon@kernel.org>
Signed-off-by: Doug Ledford <dledford@redhat.com>
drivers/infiniband/hw/mlx5/odp.c

index eddabd6e6596fcf228232a9cc9b1d7870021d870..842e1dbb50b85dea9092fb143b503d3b4545dbef 100644 (file)
@@ -511,81 +511,38 @@ void mlx5_ib_free_implicit_mr(struct mlx5_ib_mr *imr)
        wait_event(imr->q_leaf_free, !atomic_read(&imr->num_leaf_free));
 }
 
-/*
- * Handle a single data segment in a page-fault WQE or RDMA region.
- *
- * Returns number of OS pages retrieved on success. The caller may continue to
- * the next data segment.
- * Can return the following error codes:
- * -EAGAIN to designate a temporary error. The caller will abort handling the
- *  page fault and resolve it.
- * -EFAULT when there's an error mapping the requested pages. The caller will
- *  abort the page fault handling.
- */
-static int pagefault_single_data_segment(struct mlx5_ib_dev *dev,
-                                        u32 key, u64 io_virt, size_t bcnt,
-                                        u32 *bytes_committed,
-                                        u32 *bytes_mapped)
+static int pagefault_mr(struct mlx5_ib_dev *dev, struct mlx5_ib_mr *mr,
+                       u64 io_virt, size_t bcnt, u32 *bytes_mapped)
 {
-       int srcu_key;
-       unsigned int current_seq = 0;
-       u64 start_idx, page_mask;
-       int npages = 0, ret = 0;
-       struct mlx5_ib_mr *mr;
        u64 access_mask = ODP_READ_ALLOWED_BIT;
+       int npages = 0, page_shift, np;
+       u64 start_idx, page_mask;
        struct ib_umem_odp *odp;
-       int implicit = 0;
+       int current_seq;
        size_t size;
-       int page_shift;
-
-       srcu_key = srcu_read_lock(&dev->mr_srcu);
-       mr = mlx5_ib_odp_find_mr_lkey(dev, key);
-       /*
-        * If we didn't find the MR, it means the MR was closed while we were
-        * handling the ODP event. In this case we return -EFAULT so that the
-        * QP will be closed.
-        */
-       if (!mr || !mr->ibmr.pd) {
-               mlx5_ib_dbg(dev, "Failed to find relevant mr for lkey=0x%06x, probably the MR was destroyed\n",
-                           key);
-               ret = -EFAULT;
-               goto srcu_unlock;
-       }
-       if (!mr->umem->odp_data) {
-               mlx5_ib_dbg(dev, "skipping non ODP MR (lkey=0x%06x) in page fault handler.\n",
-                           key);
-               if (bytes_mapped)
-                       *bytes_mapped +=
-                               (bcnt - *bytes_committed);
-               goto srcu_unlock;
-       }
-
-       /*
-        * Avoid branches - this code will perform correctly
-        * in all iterations (in iteration 2 and above,
-        * bytes_committed == 0).
-        */
-       io_virt += *bytes_committed;
-       bcnt -= *bytes_committed;
+       int ret;
 
        if (!mr->umem->odp_data->page_list) {
                odp = implicit_mr_get_data(mr, io_virt, bcnt);
 
-               if (IS_ERR(odp)) {
-                       ret = PTR_ERR(odp);
-                       goto srcu_unlock;
-               }
+               if (IS_ERR(odp))
+                       return PTR_ERR(odp);
                mr = odp->private;
-               implicit = 1;
 
        } else {
                odp = mr->umem->odp_data;
        }
 
+next_mr:
+       size = min_t(size_t, bcnt, ib_umem_end(odp->umem) - io_virt);
+
        page_shift = mr->umem->page_shift;
        page_mask = ~(BIT(page_shift) - 1);
+       start_idx = (io_virt - (mr->mmkey.iova & page_mask)) >> page_shift;
+
+       if (mr->umem->writable)
+               access_mask |= ODP_WRITE_ALLOWED_BIT;
 
-next_mr:
        current_seq = READ_ONCE(odp->notifiers_seq);
        /*
         * Ensure the sequence number is valid for some time before we call
@@ -593,51 +550,43 @@ next_mr:
         */
        smp_rmb();
 
-       size = min_t(size_t, bcnt, ib_umem_end(odp->umem) - io_virt);
-       start_idx = (io_virt - (mr->mmkey.iova & page_mask)) >> page_shift;
-
-       if (mr->umem->writable)
-               access_mask |= ODP_WRITE_ALLOWED_BIT;
-
        ret = ib_umem_odp_map_dma_pages(mr->umem, io_virt, size,
                                        access_mask, current_seq);
 
        if (ret < 0)
-               goto srcu_unlock;
+               goto out;
 
-       if (ret > 0) {
-               int np = ret;
-
-               mutex_lock(&odp->umem_mutex);
-               if (!ib_umem_mmu_notifier_retry(mr->umem, current_seq)) {
-                       /*
-                        * No need to check whether the MTTs really belong to
-                        * this MR, since ib_umem_odp_map_dma_pages already
-                        * checks this.
-                        */
-                       ret = mlx5_ib_update_xlt(mr, start_idx, np,
-                                                page_shift,
-                                                MLX5_IB_UPD_XLT_ATOMIC);
-               } else {
-                       ret = -EAGAIN;
-               }
-               mutex_unlock(&odp->umem_mutex);
-               if (ret < 0) {
-                       if (ret != -EAGAIN)
-                               mlx5_ib_err(dev, "Failed to update mkey page tables\n");
-                       goto srcu_unlock;
-               }
-               if (bytes_mapped) {
-                       u32 new_mappings = (np << page_shift) -
-                               (io_virt - round_down(io_virt,
-                                                     1 << page_shift));
-                       *bytes_mapped += min_t(u32, new_mappings, size);
-               }
+       np = ret;
 
-               npages += np << (page_shift - PAGE_SHIFT);
+       mutex_lock(&odp->umem_mutex);
+       if (!ib_umem_mmu_notifier_retry(mr->umem, current_seq)) {
+               /*
+                * No need to check whether the MTTs really belong to
+                * this MR, since ib_umem_odp_map_dma_pages already
+                * checks this.
+                */
+               ret = mlx5_ib_update_xlt(mr, start_idx, np,
+                                        page_shift, MLX5_IB_UPD_XLT_ATOMIC);
+       } else {
+               ret = -EAGAIN;
        }
+       mutex_unlock(&odp->umem_mutex);
 
+       if (ret < 0) {
+               if (ret != -EAGAIN)
+                       mlx5_ib_err(dev, "Failed to update mkey page tables\n");
+               goto out;
+       }
+
+       if (bytes_mapped) {
+               u32 new_mappings = (np << page_shift) -
+                       (io_virt - round_down(io_virt, 1 << page_shift));
+               *bytes_mapped += min_t(u32, new_mappings, size);
+       }
+
+       npages += np << (page_shift - PAGE_SHIFT);
        bcnt -= size;
+
        if (unlikely(bcnt)) {
                struct ib_umem_odp *next;
 
@@ -646,17 +595,18 @@ next_mr:
                if (unlikely(!next || next->umem->address != io_virt)) {
                        mlx5_ib_dbg(dev, "next implicit leaf removed at 0x%llx. got %p\n",
                                    io_virt, next);
-                       ret = -EAGAIN;
-                       goto srcu_unlock_no_wait;
+                       return -EAGAIN;
                }
                odp = next;
                mr = odp->private;
                goto next_mr;
        }
 
-srcu_unlock:
+       return npages;
+
+out:
        if (ret == -EAGAIN) {
-               if (implicit || !odp->dying) {
+               if (mr->parent || !odp->dying) {
                        unsigned long timeout =
                                msecs_to_jiffies(MMU_NOTIFIER_TIMEOUT);
 
@@ -672,7 +622,62 @@ srcu_unlock:
                }
        }
 
-srcu_unlock_no_wait:
+       return ret;
+}
+
+/*
+ * Handle a single data segment in a page-fault WQE or RDMA region.
+ *
+ * Returns number of OS pages retrieved on success. The caller may continue to
+ * the next data segment.
+ * Can return the following error codes:
+ * -EAGAIN to designate a temporary error. The caller will abort handling the
+ *  page fault and resolve it.
+ * -EFAULT when there's an error mapping the requested pages. The caller will
+ *  abort the page fault handling.
+ */
+static int pagefault_single_data_segment(struct mlx5_ib_dev *dev,
+                                        u32 key, u64 io_virt, size_t bcnt,
+                                        u32 *bytes_committed,
+                                        u32 *bytes_mapped)
+{
+       int npages = 0, srcu_key, ret;
+       struct mlx5_ib_mr *mr;
+       size_t size;
+
+       srcu_key = srcu_read_lock(&dev->mr_srcu);
+       mr = mlx5_ib_odp_find_mr_lkey(dev, key);
+       /*
+        * If we didn't find the MR, it means the MR was closed while we were
+        * handling the ODP event. In this case we return -EFAULT so that the
+        * QP will be closed.
+        */
+       if (!mr || !mr->ibmr.pd) {
+               mlx5_ib_dbg(dev, "Failed to find relevant mr for lkey=0x%06x, probably the MR was destroyed\n",
+                           key);
+               ret = -EFAULT;
+               goto srcu_unlock;
+       }
+       if (!mr->umem->odp_data) {
+               mlx5_ib_dbg(dev, "skipping non ODP MR (lkey=0x%06x) in page fault handler.\n",
+                           key);
+               if (bytes_mapped)
+                       *bytes_mapped +=
+                               (bcnt - *bytes_committed);
+               goto srcu_unlock;
+       }
+
+       /*
+        * Avoid branches - this code will perform correctly
+        * in all iterations (in iteration 2 and above,
+        * bytes_committed == 0).
+        */
+       io_virt += *bytes_committed;
+       bcnt -= *bytes_committed;
+
+       npages = pagefault_mr(dev, mr, io_virt, size, bytes_mapped);
+
+srcu_unlock:
        srcu_read_unlock(&dev->mr_srcu, srcu_key);
        *bytes_committed = 0;
        return ret ? ret : npages;