f2fs: fix to avoid deadlock in f2fs_read_inline_dir()
[GitHub/LineageOS/android_kernel_motorola_exynos9610.git] / fs / userfaultfd.c
index 1c713fd5b3e67966c3d998979d2c30eb8e14ba07..5f10052d2671ff4a6709213f9db13e8cc1d51374 100644 (file)
@@ -220,24 +220,26 @@ static inline bool userfaultfd_huge_must_wait(struct userfaultfd_ctx *ctx,
                                         unsigned long reason)
 {
        struct mm_struct *mm = ctx->mm;
-       pte_t *pte;
+       pte_t *ptep, pte;
        bool ret = true;
 
        VM_BUG_ON(!rwsem_is_locked(&mm->mmap_sem));
 
-       pte = huge_pte_offset(mm, address, vma_mmu_pagesize(vma));
-       if (!pte)
+       ptep = huge_pte_offset(mm, address, vma_mmu_pagesize(vma));
+
+       if (!ptep)
                goto out;
 
        ret = false;
+       pte = huge_ptep_get(ptep);
 
        /*
         * Lockless access: we're in a wait_event so it's ok if it
         * changes under us.
         */
-       if (huge_pte_none(*pte))
+       if (huge_pte_none(pte))
                ret = true;
-       if (!huge_pte_write(*pte) && (reason & VM_UFFD_WP))
+       if (!huge_pte_write(pte) && (reason & VM_UFFD_WP))
                ret = true;
 out:
        return ret;
@@ -570,11 +572,14 @@ out:
 static void userfaultfd_event_wait_completion(struct userfaultfd_ctx *ctx,
                                              struct userfaultfd_wait_queue *ewq)
 {
+       struct userfaultfd_ctx *release_new_ctx;
+
        if (WARN_ON_ONCE(current->flags & PF_EXITING))
                goto out;
 
        ewq->ctx = ctx;
        init_waitqueue_entry(&ewq->wq, current);
+       release_new_ctx = NULL;
 
        spin_lock(&ctx->event_wqh.lock);
        /*
@@ -601,8 +606,7 @@ static void userfaultfd_event_wait_completion(struct userfaultfd_ctx *ctx,
                                new = (struct userfaultfd_ctx *)
                                        (unsigned long)
                                        ewq->msg.arg.reserved.reserved1;
-
-                               userfaultfd_ctx_put(new);
+                               release_new_ctx = new;
                        }
                        break;
                }
@@ -617,6 +621,22 @@ static void userfaultfd_event_wait_completion(struct userfaultfd_ctx *ctx,
        __set_current_state(TASK_RUNNING);
        spin_unlock(&ctx->event_wqh.lock);
 
+       if (release_new_ctx) {
+               struct vm_area_struct *vma;
+               struct mm_struct *mm = release_new_ctx->mm;
+
+               /* the various vma->vm_userfaultfd_ctx still points to it */
+               down_write(&mm->mmap_sem);
+               for (vma = mm->mmap; vma; vma = vma->vm_next)
+                       if (vma->vm_userfaultfd_ctx.ctx == release_new_ctx) {
+                               vma->vm_userfaultfd_ctx = NULL_VM_UFFD_CTX;
+                               vma->vm_flags &= ~(VM_UFFD_WP | VM_UFFD_MISSING);
+                       }
+               up_write(&mm->mmap_sem);
+
+               userfaultfd_ctx_put(release_new_ctx);
+       }
+
        /*
         * ctx may go away after this if the userfault pseudo fd is
         * already released.
@@ -1342,6 +1362,19 @@ static int userfaultfd_register(struct userfaultfd_ctx *ctx,
                ret = -EINVAL;
                if (!vma_can_userfault(cur))
                        goto out_unlock;
+
+               /*
+                * UFFDIO_COPY will fill file holes even without
+                * PROT_WRITE. This check enforces that if this is a
+                * MAP_SHARED, the process has write permission to the backing
+                * file. If VM_MAYWRITE is set it also enforces that on a
+                * MAP_SHARED vma: there is no F_WRITE_SEAL and no further
+                * F_WRITE_SEAL can be taken until the vma is destroyed.
+                */
+               ret = -EPERM;
+               if (unlikely(!(cur->vm_flags & VM_MAYWRITE)))
+                       goto out_unlock;
+
                /*
                 * If this vma contains ending address, and huge pages
                 * check alignment.
@@ -1387,6 +1420,7 @@ static int userfaultfd_register(struct userfaultfd_ctx *ctx,
                BUG_ON(!vma_can_userfault(vma));
                BUG_ON(vma->vm_userfaultfd_ctx.ctx &&
                       vma->vm_userfaultfd_ctx.ctx != ctx);
+               WARN_ON(!(vma->vm_flags & VM_MAYWRITE));
 
                /*
                 * Nothing to do: this vma is already registered into this
@@ -1541,6 +1575,8 @@ static int userfaultfd_unregister(struct userfaultfd_ctx *ctx,
                if (!vma->vm_userfaultfd_ctx.ctx)
                        goto skip;
 
+               WARN_ON(!(vma->vm_flags & VM_MAYWRITE));
+
                if (vma->vm_start > start)
                        start = vma->vm_start;
                vma_end = min(end, vma->vm_end);