mm/gup: Introduce get_user_pages_remote()
authorDave Hansen <dave.hansen@linux.intel.com>
Fri, 12 Feb 2016 21:01:54 +0000 (13:01 -0800)
committerIngo Molnar <mingo@kernel.org>
Tue, 16 Feb 2016 09:04:09 +0000 (10:04 +0100)
For protection keys, we need to understand whether protections
should be enforced in software or not.  In general, we enforce
protections when working on our own task, but not when on others.
We call these "current" and "remote" operations.

This patch introduces a new get_user_pages() variant:

        get_user_pages_remote()

Which is a replacement for when get_user_pages() is called on
non-current tsk/mm.

We also introduce a new gup flag: FOLL_REMOTE which can be used
for the "__" gup variants to get this new behavior.

The uprobes is_trap_at_addr() location holds mmap_sem and
calls get_user_pages(current->mm) on an instruction address.  This
makes it a pretty unique gup caller.  Being an instruction access
and also really originating from the kernel (vs. the app), I opted
to consider this a 'remote' access where protection keys will not
be enforced.

Without protection keys, this patch should not change any behavior.

Signed-off-by: Dave Hansen <dave.hansen@linux.intel.com>
Reviewed-by: Thomas Gleixner <tglx@linutronix.de>
Cc: Andrea Arcangeli <aarcange@redhat.com>
Cc: Andrew Morton <akpm@linux-foundation.org>
Cc: Andy Lutomirski <luto@amacapital.net>
Cc: Borislav Petkov <bp@alien8.de>
Cc: Brian Gerst <brgerst@gmail.com>
Cc: Dave Hansen <dave@sr71.net>
Cc: Denys Vlasenko <dvlasenk@redhat.com>
Cc: H. Peter Anvin <hpa@zytor.com>
Cc: Kirill A. Shutemov <kirill.shutemov@linux.intel.com>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Naoya Horiguchi <n-horiguchi@ah.jp.nec.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Rik van Riel <riel@redhat.com>
Cc: Srikar Dronamraju <srikar@linux.vnet.ibm.com>
Cc: Vlastimil Babka <vbabka@suse.cz>
Cc: jack@suse.cz
Cc: linux-mm@kvack.org
Link: http://lkml.kernel.org/r/20160212210154.3F0E51EA@viggo.jf.intel.com
Signed-off-by: Ingo Molnar <mingo@kernel.org>
drivers/gpu/drm/etnaviv/etnaviv_gem.c
drivers/gpu/drm/i915/i915_gem_userptr.c
drivers/infiniband/core/umem_odp.c
fs/exec.c
include/linux/mm.h
kernel/events/uprobes.c
mm/gup.c
mm/memory.c
mm/process_vm_access.c
security/tomoyo/domain.c
virt/kvm/async_pf.c

index 4b519e4309b28edb1a1a9f56c40fb00153c87036..97d4457be8d260dcac159afff7758b4cbfa9551d 100644 (file)
@@ -753,9 +753,9 @@ static struct page **etnaviv_gem_userptr_do_get_pages(
 
        down_read(&mm->mmap_sem);
        while (pinned < npages) {
-               ret = get_user_pages(task, mm, ptr, npages - pinned,
-                                    !etnaviv_obj->userptr.ro, 0,
-                                    pvec + pinned, NULL);
+               ret = get_user_pages_remote(task, mm, ptr, npages - pinned,
+                                           !etnaviv_obj->userptr.ro, 0,
+                                           pvec + pinned, NULL);
                if (ret < 0)
                        break;
 
index 59e45b3a69379a0e892fbd85d7a17ba3f85913eb..90dbf8121210934d16338bfe48466c1e4aacdaae 100644 (file)
@@ -584,11 +584,11 @@ __i915_gem_userptr_get_pages_worker(struct work_struct *_work)
 
                down_read(&mm->mmap_sem);
                while (pinned < npages) {
-                       ret = get_user_pages(work->task, mm,
-                                            obj->userptr.ptr + pinned * PAGE_SIZE,
-                                            npages - pinned,
-                                            !obj->userptr.read_only, 0,
-                                            pvec + pinned, NULL);
+                       ret = get_user_pages_remote(work->task, mm,
+                                       obj->userptr.ptr + pinned * PAGE_SIZE,
+                                       npages - pinned,
+                                       !obj->userptr.read_only, 0,
+                                       pvec + pinned, NULL);
                        if (ret < 0)
                                break;
 
index e69bf266049d0117830577167c2987c83cd8ac62..75077a018675e1aa77c7955878250ad521c9d25d 100644 (file)
@@ -572,10 +572,10 @@ int ib_umem_odp_map_dma_pages(struct ib_umem *umem, u64 user_virt, u64 bcnt,
                 * complex (and doesn't gain us much performance in most use
                 * cases).
                 */
-               npages = get_user_pages(owning_process, owning_mm, user_virt,
-                                       gup_num_pages,
-                                       access_mask & ODP_WRITE_ALLOWED_BIT, 0,
-                                       local_page_list, NULL);
+               npages = get_user_pages_remote(owning_process, owning_mm,
+                               user_virt, gup_num_pages,
+                               access_mask & ODP_WRITE_ALLOWED_BIT,
+                               0, local_page_list, NULL);
                up_read(&owning_mm->mmap_sem);
 
                if (npages < 0)
index dcd4ac7d3f1e77b45fde8a84585150a4862b84d9..d885b98b6a000546911f1fa28da47e7bb45cd977 100644 (file)
--- a/fs/exec.c
+++ b/fs/exec.c
@@ -198,8 +198,12 @@ static struct page *get_arg_page(struct linux_binprm *bprm, unsigned long pos,
                        return NULL;
        }
 #endif
-       ret = get_user_pages(current, bprm->mm, pos,
-                       1, write, 1, &page, NULL);
+       /*
+        * We are doing an exec().  'current' is the process
+        * doing the exec and bprm->mm is the new process's mm.
+        */
+       ret = get_user_pages_remote(current, bprm->mm, pos, 1, write,
+                       1, &page, NULL);
        if (ret <= 0)
                return NULL;
 
index b1d4b8c7f7cdcddb4b2513f11bc4231c546baf25..faf3b709eeadda3cc23dc585a8139c089fa52952 100644 (file)
@@ -1225,6 +1225,10 @@ long __get_user_pages(struct task_struct *tsk, struct mm_struct *mm,
                      unsigned long start, unsigned long nr_pages,
                      unsigned int foll_flags, struct page **pages,
                      struct vm_area_struct **vmas, int *nonblocking);
+long get_user_pages_remote(struct task_struct *tsk, struct mm_struct *mm,
+                           unsigned long start, unsigned long nr_pages,
+                           int write, int force, struct page **pages,
+                           struct vm_area_struct **vmas);
 long get_user_pages(struct task_struct *tsk, struct mm_struct *mm,
                    unsigned long start, unsigned long nr_pages,
                    int write, int force, struct page **pages,
@@ -2170,6 +2174,7 @@ static inline struct page *follow_page(struct vm_area_struct *vma,
 #define FOLL_MIGRATION 0x400   /* wait for page to replace migration entry */
 #define FOLL_TRIED     0x800   /* a retry, previous pass started an IO */
 #define FOLL_MLOCK     0x1000  /* lock present pages */
+#define FOLL_REMOTE    0x2000  /* we are working on non-current tsk/mm */
 
 typedef int (*pte_fn_t)(pte_t *pte, pgtable_t token, unsigned long addr,
                        void *data);
index 0167679182c08dae79cf54e0b3830fc626866ee2..8eef5f55d3f0ede648b5f4170a28886f27224935 100644 (file)
@@ -299,7 +299,7 @@ int uprobe_write_opcode(struct mm_struct *mm, unsigned long vaddr,
 
 retry:
        /* Read the page with vaddr into memory */
-       ret = get_user_pages(NULL, mm, vaddr, 1, 0, 1, &old_page, &vma);
+       ret = get_user_pages_remote(NULL, mm, vaddr, 1, 0, 1, &old_page, &vma);
        if (ret <= 0)
                return ret;
 
@@ -1700,7 +1700,13 @@ static int is_trap_at_addr(struct mm_struct *mm, unsigned long vaddr)
        if (likely(result == 0))
                goto out;
 
-       result = get_user_pages(NULL, mm, vaddr, 1, 0, 1, &page, NULL);
+       /*
+        * The NULL 'tsk' here ensures that any faults that occur here
+        * will not be accounted to the task.  'mm' *is* current->mm,
+        * but we treat this as a 'remote' access since it is
+        * essentially a kernel access to the memory.
+        */
+       result = get_user_pages_remote(NULL, mm, vaddr, 1, 0, 1, &page, NULL);
        if (result < 0)
                return result;
 
index 7bf19ffa21999c13fa1f24dc01a6bda77217688c..36ca850936c9ff3515db2fd7b8b1b1348a490037 100644 (file)
--- a/mm/gup.c
+++ b/mm/gup.c
@@ -870,7 +870,7 @@ long get_user_pages_unlocked(struct task_struct *tsk, struct mm_struct *mm,
 EXPORT_SYMBOL(get_user_pages_unlocked);
 
 /*
- * get_user_pages() - pin user pages in memory
+ * get_user_pages_remote() - pin user pages in memory
  * @tsk:       the task_struct to use for page fault accounting, or
  *             NULL if faults are not to be recorded.
  * @mm:                mm_struct of target mm
@@ -924,12 +924,29 @@ EXPORT_SYMBOL(get_user_pages_unlocked);
  * should use get_user_pages because it cannot pass
  * FAULT_FLAG_ALLOW_RETRY to handle_mm_fault.
  */
-long get_user_pages(struct task_struct *tsk, struct mm_struct *mm,
-               unsigned long start, unsigned long nr_pages, int write,
-               int force, struct page **pages, struct vm_area_struct **vmas)
+long get_user_pages_remote(struct task_struct *tsk, struct mm_struct *mm,
+               unsigned long start, unsigned long nr_pages,
+               int write, int force, struct page **pages,
+               struct vm_area_struct **vmas)
 {
        return __get_user_pages_locked(tsk, mm, start, nr_pages, write, force,
-                                      pages, vmas, NULL, false, FOLL_TOUCH);
+                                      pages, vmas, NULL, false,
+                                      FOLL_TOUCH | FOLL_REMOTE);
+}
+EXPORT_SYMBOL(get_user_pages_remote);
+
+/*
+ * This is the same as get_user_pages_remote() for the time
+ * being.
+ */
+long get_user_pages(struct task_struct *tsk, struct mm_struct *mm,
+               unsigned long start, unsigned long nr_pages,
+               int write, int force, struct page **pages,
+               struct vm_area_struct **vmas)
+{
+       return __get_user_pages_locked(tsk, mm, start, nr_pages,
+                                      write, force, pages, vmas, NULL, false,
+                                      FOLL_TOUCH);
 }
 EXPORT_SYMBOL(get_user_pages);
 
index 38090ca37a08b256ae83633e8709953f842fcbfe..8bfbad0cca8ceb77fa206308b709affc4a4c6e25 100644 (file)
@@ -3685,7 +3685,7 @@ static int __access_remote_vm(struct task_struct *tsk, struct mm_struct *mm,
                void *maddr;
                struct page *page = NULL;
 
-               ret = get_user_pages(tsk, mm, addr, 1,
+               ret = get_user_pages_remote(tsk, mm, addr, 1,
                                write, 1, &page, &vma);
                if (ret <= 0) {
 #ifndef CONFIG_HAVE_IOREMAP_PROT
index 5d453e58ddbf7504e78869b4de406aa360fc2a01..07514d41ebcc1623b789fc93e09794058ecdc6ca 100644 (file)
@@ -98,9 +98,14 @@ static int process_vm_rw_single_vec(unsigned long addr,
                int pages = min(nr_pages, max_pages_per_loop);
                size_t bytes;
 
-               /* Get the pages we're interested in */
-               pages = get_user_pages_unlocked(task, mm, pa, pages,
-                                               vm_write, 0, process_pages);
+               /*
+                * Get the pages we're interested in.  We must
+                * add FOLL_REMOTE because task/mm might not
+                * current/current->mm
+                */
+               pages = __get_user_pages_unlocked(task, mm, pa, pages,
+                                                 vm_write, 0, process_pages,
+                                                 FOLL_REMOTE);
                if (pages <= 0)
                        return -EFAULT;
 
index 38651454ed08a5d0ae7479dda87a41bead18c7a1..ade7c6cad172a13833a3b41799a142ebf4cb4f46 100644 (file)
@@ -874,7 +874,14 @@ bool tomoyo_dump_page(struct linux_binprm *bprm, unsigned long pos,
        }
        /* Same with get_arg_page(bprm, pos, 0) in fs/exec.c */
 #ifdef CONFIG_MMU
-       if (get_user_pages(current, bprm->mm, pos, 1, 0, 1, &page, NULL) <= 0)
+       /*
+        * This is called at execve() time in order to dig around
+        * in the argv/environment of the new proceess
+        * (represented by bprm).  'current' is the process doing
+        * the execve().
+        */
+       if (get_user_pages_remote(current, bprm->mm, pos, 1,
+                               0, 1, &page, NULL) <= 0)
                return false;
 #else
        page = bprm->page[pos / PAGE_SIZE];
index 35315992245600a418874fd371e641ef336a0a22..d604e87a510af7b4ec6fa43276cf05d594b8e072 100644 (file)
@@ -79,7 +79,13 @@ static void async_pf_execute(struct work_struct *work)
 
        might_sleep();
 
-       get_user_pages_unlocked(NULL, mm, addr, 1, 1, 0, NULL);
+       /*
+        * This work is run asynchromously to the task which owns
+        * mm and might be done in another context, so we must
+        * use FOLL_REMOTE.
+        */
+       __get_user_pages_unlocked(NULL, mm, addr, 1, 1, 0, NULL, FOLL_REMOTE);
+
        kvm_async_page_present_sync(vcpu, apf);
 
        spin_lock(&vcpu->async_pf.lock);