mm, x86/mm: Fix performance regression in get_user_pages_fast()
authorKirill A. Shutemov <kirill.shutemov@linux.intel.com>
Fri, 8 Sep 2017 21:56:03 +0000 (00:56 +0300)
committerGreg Kroah-Hartman <gregkh@linuxfoundation.org>
Sun, 10 Dec 2017 12:40:43 +0000 (13:40 +0100)
[ Upstream commit 5b65c4677a57a1d4414212f9995aa0e46a21ff80 ]

The 0-day test bot found a performance regression that was tracked down to
switching x86 to the generic get_user_pages_fast() implementation:

  http://lkml.kernel.org/r/20170710024020.GA26389@yexl-desktop

The regression was caused by the fact that we now use local_irq_save() +
local_irq_restore() in get_user_pages_fast() to disable interrupts.
In x86 implementation local_irq_disable() + local_irq_enable() was used.

The fix is to make get_user_pages_fast() use local_irq_disable(),
leaving local_irq_save() for __get_user_pages_fast() that can be called
with interrupts disabled.

Numbers for pinning a gigabyte of memory, one page a time, 20 repeats:

  Before:  Average: 14.91 ms, stddev: 0.45 ms
  After:   Average: 10.76 ms, stddev: 0.18 ms

Signed-off-by: Kirill A. Shutemov <kirill.shutemov@linux.intel.com>
Cc: Andrew Morton <akpm@linux-foundation.org>
Cc: Huang Ying <ying.huang@intel.com>
Cc: Jonathan Corbet <corbet@lwn.net>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Thorsten Leemhuis <regressions@leemhuis.info>
Cc: linux-mm@kvack.org
Fixes: e585513b76f7 ("x86/mm/gup: Switch GUP to the generic get_user_page_fast() implementation")
Link: http://lkml.kernel.org/r/20170908215603.9189-3-kirill.shutemov@linux.intel.com
Signed-off-by: Ingo Molnar <mingo@kernel.org>
Signed-off-by: Sasha Levin <alexander.levin@verizon.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
mm/gup.c

index 165ba2174c75841ccab981969fd47c37bfbb0c45..e0d82b6706d72d82637bca5eaef1e35e15a1abdf 100644 (file)
--- a/mm/gup.c
+++ b/mm/gup.c
@@ -1707,6 +1707,47 @@ static int gup_p4d_range(pgd_t pgd, unsigned long addr, unsigned long end,
        return 1;
 }
 
+static void gup_pgd_range(unsigned long addr, unsigned long end,
+               int write, struct page **pages, int *nr)
+{
+       unsigned long next;
+       pgd_t *pgdp;
+
+       pgdp = pgd_offset(current->mm, addr);
+       do {
+               pgd_t pgd = READ_ONCE(*pgdp);
+
+               next = pgd_addr_end(addr, end);
+               if (pgd_none(pgd))
+                       return;
+               if (unlikely(pgd_huge(pgd))) {
+                       if (!gup_huge_pgd(pgd, pgdp, addr, next, write,
+                                         pages, nr))
+                               return;
+               } else if (unlikely(is_hugepd(__hugepd(pgd_val(pgd))))) {
+                       if (!gup_huge_pd(__hugepd(pgd_val(pgd)), addr,
+                                        PGDIR_SHIFT, next, write, pages, nr))
+                               return;
+               } else if (!gup_p4d_range(pgd, addr, next, write, pages, nr))
+                       return;
+       } while (pgdp++, addr = next, addr != end);
+}
+
+#ifndef gup_fast_permitted
+/*
+ * Check if it's allowed to use __get_user_pages_fast() for the range, or
+ * we need to fall back to the slow version:
+ */
+bool gup_fast_permitted(unsigned long start, int nr_pages, int write)
+{
+       unsigned long len, end;
+
+       len = (unsigned long) nr_pages << PAGE_SHIFT;
+       end = start + len;
+       return end >= start;
+}
+#endif
+
 /*
  * Like get_user_pages_fast() except it's IRQ-safe in that it won't fall back to
  * the regular GUP. It will only return non-negative values.
@@ -1714,10 +1755,8 @@ static int gup_p4d_range(pgd_t pgd, unsigned long addr, unsigned long end,
 int __get_user_pages_fast(unsigned long start, int nr_pages, int write,
                          struct page **pages)
 {
-       struct mm_struct *mm = current->mm;
        unsigned long addr, len, end;
-       unsigned long next, flags;
-       pgd_t *pgdp;
+       unsigned long flags;
        int nr = 0;
 
        start &= PAGE_MASK;
@@ -1741,45 +1780,15 @@ int __get_user_pages_fast(unsigned long start, int nr_pages, int write,
         * block IPIs that come from THPs splitting.
         */
 
-       local_irq_save(flags);
-       pgdp = pgd_offset(mm, addr);
-       do {
-               pgd_t pgd = READ_ONCE(*pgdp);
-
-               next = pgd_addr_end(addr, end);
-               if (pgd_none(pgd))
-                       break;
-               if (unlikely(pgd_huge(pgd))) {
-                       if (!gup_huge_pgd(pgd, pgdp, addr, next, write,
-                                         pages, &nr))
-                               break;
-               } else if (unlikely(is_hugepd(__hugepd(pgd_val(pgd))))) {
-                       if (!gup_huge_pd(__hugepd(pgd_val(pgd)), addr,
-                                        PGDIR_SHIFT, next, write, pages, &nr))
-                               break;
-               } else if (!gup_p4d_range(pgd, addr, next, write, pages, &nr))
-                       break;
-       } while (pgdp++, addr = next, addr != end);
-       local_irq_restore(flags);
+       if (gup_fast_permitted(start, nr_pages, write)) {
+               local_irq_save(flags);
+               gup_pgd_range(addr, end, write, pages, &nr);
+               local_irq_restore(flags);
+       }
 
        return nr;
 }
 
-#ifndef gup_fast_permitted
-/*
- * Check if it's allowed to use __get_user_pages_fast() for the range, or
- * we need to fall back to the slow version:
- */
-bool gup_fast_permitted(unsigned long start, int nr_pages, int write)
-{
-       unsigned long len, end;
-
-       len = (unsigned long) nr_pages << PAGE_SHIFT;
-       end = start + len;
-       return end >= start;
-}
-#endif
-
 /**
  * get_user_pages_fast() - pin user pages in memory
  * @start:     starting user address
@@ -1799,12 +1808,22 @@ bool gup_fast_permitted(unsigned long start, int nr_pages, int write)
 int get_user_pages_fast(unsigned long start, int nr_pages, int write,
                        struct page **pages)
 {
+       unsigned long addr, len, end;
        int nr = 0, ret = 0;
 
        start &= PAGE_MASK;
+       addr = start;
+       len = (unsigned long) nr_pages << PAGE_SHIFT;
+       end = start + len;
+
+       if (unlikely(!access_ok(write ? VERIFY_WRITE : VERIFY_READ,
+                                       (void __user *)start, len)))
+               return 0;
 
        if (gup_fast_permitted(start, nr_pages, write)) {
-               nr = __get_user_pages_fast(start, nr_pages, write, pages);
+               local_irq_disable();
+               gup_pgd_range(addr, end, write, pages, &nr);
+               local_irq_enable();
                ret = nr;
        }