wait_event(imr->q_leaf_free, !atomic_read(&imr->num_leaf_free));
}
-/*
- * Handle a single data segment in a page-fault WQE or RDMA region.
- *
- * Returns number of OS pages retrieved on success. The caller may continue to
- * the next data segment.
- * Can return the following error codes:
- * -EAGAIN to designate a temporary error. The caller will abort handling the
- * page fault and resolve it.
- * -EFAULT when there's an error mapping the requested pages. The caller will
- * abort the page fault handling.
- */
-static int pagefault_single_data_segment(struct mlx5_ib_dev *dev,
- u32 key, u64 io_virt, size_t bcnt,
- u32 *bytes_committed,
- u32 *bytes_mapped)
+static int pagefault_mr(struct mlx5_ib_dev *dev, struct mlx5_ib_mr *mr,
+ u64 io_virt, size_t bcnt, u32 *bytes_mapped)
{
- int srcu_key;
- unsigned int current_seq = 0;
- u64 start_idx, page_mask;
- int npages = 0, ret = 0;
- struct mlx5_ib_mr *mr;
u64 access_mask = ODP_READ_ALLOWED_BIT;
+ int npages = 0, page_shift, np;
+ u64 start_idx, page_mask;
struct ib_umem_odp *odp;
- int implicit = 0;
+ int current_seq;
size_t size;
- int page_shift;
-
- srcu_key = srcu_read_lock(&dev->mr_srcu);
- mr = mlx5_ib_odp_find_mr_lkey(dev, key);
- /*
- * If we didn't find the MR, it means the MR was closed while we were
- * handling the ODP event. In this case we return -EFAULT so that the
- * QP will be closed.
- */
- if (!mr || !mr->ibmr.pd) {
- mlx5_ib_dbg(dev, "Failed to find relevant mr for lkey=0x%06x, probably the MR was destroyed\n",
- key);
- ret = -EFAULT;
- goto srcu_unlock;
- }
- if (!mr->umem->odp_data) {
- mlx5_ib_dbg(dev, "skipping non ODP MR (lkey=0x%06x) in page fault handler.\n",
- key);
- if (bytes_mapped)
- *bytes_mapped +=
- (bcnt - *bytes_committed);
- goto srcu_unlock;
- }
-
- /*
- * Avoid branches - this code will perform correctly
- * in all iterations (in iteration 2 and above,
- * bytes_committed == 0).
- */
- io_virt += *bytes_committed;
- bcnt -= *bytes_committed;
+ int ret;
if (!mr->umem->odp_data->page_list) {
odp = implicit_mr_get_data(mr, io_virt, bcnt);
- if (IS_ERR(odp)) {
- ret = PTR_ERR(odp);
- goto srcu_unlock;
- }
+ if (IS_ERR(odp))
+ return PTR_ERR(odp);
mr = odp->private;
- implicit = 1;
} else {
odp = mr->umem->odp_data;
}
+next_mr:
+ size = min_t(size_t, bcnt, ib_umem_end(odp->umem) - io_virt);
+
page_shift = mr->umem->page_shift;
page_mask = ~(BIT(page_shift) - 1);
+ start_idx = (io_virt - (mr->mmkey.iova & page_mask)) >> page_shift;
+
+ if (mr->umem->writable)
+ access_mask |= ODP_WRITE_ALLOWED_BIT;
-next_mr:
current_seq = READ_ONCE(odp->notifiers_seq);
/*
* Ensure the sequence number is valid for some time before we call
*/
smp_rmb();
- size = min_t(size_t, bcnt, ib_umem_end(odp->umem) - io_virt);
- start_idx = (io_virt - (mr->mmkey.iova & page_mask)) >> page_shift;
-
- if (mr->umem->writable)
- access_mask |= ODP_WRITE_ALLOWED_BIT;
-
ret = ib_umem_odp_map_dma_pages(mr->umem, io_virt, size,
access_mask, current_seq);
if (ret < 0)
- goto srcu_unlock;
+ goto out;
- if (ret > 0) {
- int np = ret;
-
- mutex_lock(&odp->umem_mutex);
- if (!ib_umem_mmu_notifier_retry(mr->umem, current_seq)) {
- /*
- * No need to check whether the MTTs really belong to
- * this MR, since ib_umem_odp_map_dma_pages already
- * checks this.
- */
- ret = mlx5_ib_update_xlt(mr, start_idx, np,
- page_shift,
- MLX5_IB_UPD_XLT_ATOMIC);
- } else {
- ret = -EAGAIN;
- }
- mutex_unlock(&odp->umem_mutex);
- if (ret < 0) {
- if (ret != -EAGAIN)
- mlx5_ib_err(dev, "Failed to update mkey page tables\n");
- goto srcu_unlock;
- }
- if (bytes_mapped) {
- u32 new_mappings = (np << page_shift) -
- (io_virt - round_down(io_virt,
- 1 << page_shift));
- *bytes_mapped += min_t(u32, new_mappings, size);
- }
+ np = ret;
- npages += np << (page_shift - PAGE_SHIFT);
+ mutex_lock(&odp->umem_mutex);
+ if (!ib_umem_mmu_notifier_retry(mr->umem, current_seq)) {
+ /*
+ * No need to check whether the MTTs really belong to
+ * this MR, since ib_umem_odp_map_dma_pages already
+ * checks this.
+ */
+ ret = mlx5_ib_update_xlt(mr, start_idx, np,
+ page_shift, MLX5_IB_UPD_XLT_ATOMIC);
+ } else {
+ ret = -EAGAIN;
}
+ mutex_unlock(&odp->umem_mutex);
+ if (ret < 0) {
+ if (ret != -EAGAIN)
+ mlx5_ib_err(dev, "Failed to update mkey page tables\n");
+ goto out;
+ }
+
+ if (bytes_mapped) {
+ u32 new_mappings = (np << page_shift) -
+ (io_virt - round_down(io_virt, 1 << page_shift));
+ *bytes_mapped += min_t(u32, new_mappings, size);
+ }
+
+ npages += np << (page_shift - PAGE_SHIFT);
bcnt -= size;
+
if (unlikely(bcnt)) {
struct ib_umem_odp *next;
if (unlikely(!next || next->umem->address != io_virt)) {
mlx5_ib_dbg(dev, "next implicit leaf removed at 0x%llx. got %p\n",
io_virt, next);
- ret = -EAGAIN;
- goto srcu_unlock_no_wait;
+ return -EAGAIN;
}
odp = next;
mr = odp->private;
goto next_mr;
}
-srcu_unlock:
+ return npages;
+
+out:
if (ret == -EAGAIN) {
- if (implicit || !odp->dying) {
+ if (mr->parent || !odp->dying) {
unsigned long timeout =
msecs_to_jiffies(MMU_NOTIFIER_TIMEOUT);
}
}
-srcu_unlock_no_wait:
+ return ret;
+}
+
+/*
+ * Handle a single data segment in a page-fault WQE or RDMA region.
+ *
+ * Returns number of OS pages retrieved on success. The caller may continue to
+ * the next data segment.
+ * Can return the following error codes:
+ * -EAGAIN to designate a temporary error. The caller will abort handling the
+ * page fault and resolve it.
+ * -EFAULT when there's an error mapping the requested pages. The caller will
+ * abort the page fault handling.
+ */
+static int pagefault_single_data_segment(struct mlx5_ib_dev *dev,
+ u32 key, u64 io_virt, size_t bcnt,
+ u32 *bytes_committed,
+ u32 *bytes_mapped)
+{
+ int npages = 0, srcu_key, ret;
+ struct mlx5_ib_mr *mr;
+ size_t size;
+
+ srcu_key = srcu_read_lock(&dev->mr_srcu);
+ mr = mlx5_ib_odp_find_mr_lkey(dev, key);
+ /*
+ * If we didn't find the MR, it means the MR was closed while we were
+ * handling the ODP event. In this case we return -EFAULT so that the
+ * QP will be closed.
+ */
+ if (!mr || !mr->ibmr.pd) {
+ mlx5_ib_dbg(dev, "Failed to find relevant mr for lkey=0x%06x, probably the MR was destroyed\n",
+ key);
+ ret = -EFAULT;
+ goto srcu_unlock;
+ }
+ if (!mr->umem->odp_data) {
+ mlx5_ib_dbg(dev, "skipping non ODP MR (lkey=0x%06x) in page fault handler.\n",
+ key);
+ if (bytes_mapped)
+ *bytes_mapped +=
+ (bcnt - *bytes_committed);
+ goto srcu_unlock;
+ }
+
+ /*
+ * Avoid branches - this code will perform correctly
+ * in all iterations (in iteration 2 and above,
+ * bytes_committed == 0).
+ */
+ io_virt += *bytes_committed;
+ bcnt -= *bytes_committed;
+
+ npages = pagefault_mr(dev, mr, io_virt, size, bytes_mapped);
+
+srcu_unlock:
srcu_read_unlock(&dev->mr_srcu, srcu_key);
*bytes_committed = 0;
return ret ? ret : npages;