mm: thp: fix DEBUG_PAGEALLOC oops in copy_page_rep()
authorHugh Dickins <hughd@google.com>
Mon, 23 Jun 2014 20:22:05 +0000 (13:22 -0700)
committerLinus Torvalds <torvalds@linux-foundation.org>
Mon, 23 Jun 2014 23:47:44 +0000 (16:47 -0700)
Trinity has for over a year been reporting a CONFIG_DEBUG_PAGEALLOC oops
in copy_page_rep() called from copy_user_huge_page() called from
do_huge_pmd_wp_page().

I believe this is a DEBUG_PAGEALLOC false positive, due to the source
page being split, and a tail page freed, while copy is in progress; and
not a problem without DEBUG_PAGEALLOC, since the pmd_same() check will
prevent a miscopy from being made visible.

Fix by adding get_user_huge_page() and put_user_huge_page(): reducing to
the usual get_page() and put_page() on head page in the usual config;
but get and put references to all of the tail pages when
DEBUG_PAGEALLOC.

[akpm@linux-foundation.org: coding-style fixes]
Signed-off-by: Hugh Dickins <hughd@google.com>
Acked-by: Kirill A. Shutemov <kirill.shutemov@linux.intel.com>
Reported-by: Sasha Levin <sasha.levin@oracle.com>
Tested-by: Sasha Levin <sasha.levin@oracle.com>
Cc: Dave Jones <davej@redhat.com>
Cc: Andrea Arcangeli <aarcange@redhat.com>
Cc: David Rientjes <rientjes@google.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
mm/huge_memory.c

index e60837dc785c4ce35f579374527587c77665ecce..bade35ef563b084d7b9c38be10dc8747c168591a 100644 (file)
@@ -941,6 +941,37 @@ unlock:
        spin_unlock(ptl);
 }
 
+/*
+ * Save CONFIG_DEBUG_PAGEALLOC from faulting falsely on tail pages
+ * during copy_user_huge_page()'s copy_page_rep(): in the case when
+ * the source page gets split and a tail freed before copy completes.
+ * Called under pmd_lock of checked pmd, so safe from splitting itself.
+ */
+static void get_user_huge_page(struct page *page)
+{
+       if (IS_ENABLED(CONFIG_DEBUG_PAGEALLOC)) {
+               struct page *endpage = page + HPAGE_PMD_NR;
+
+               atomic_add(HPAGE_PMD_NR, &page->_count);
+               while (++page < endpage)
+                       get_huge_page_tail(page);
+       } else {
+               get_page(page);
+       }
+}
+
+static void put_user_huge_page(struct page *page)
+{
+       if (IS_ENABLED(CONFIG_DEBUG_PAGEALLOC)) {
+               struct page *endpage = page + HPAGE_PMD_NR;
+
+               while (page < endpage)
+                       put_page(page++);
+       } else {
+               put_page(page);
+       }
+}
+
 static int do_huge_pmd_wp_page_fallback(struct mm_struct *mm,
                                        struct vm_area_struct *vma,
                                        unsigned long address,
@@ -1074,7 +1105,7 @@ int do_huge_pmd_wp_page(struct mm_struct *mm, struct vm_area_struct *vma,
                ret |= VM_FAULT_WRITE;
                goto out_unlock;
        }
-       get_page(page);
+       get_user_huge_page(page);
        spin_unlock(ptl);
 alloc:
        if (transparent_hugepage_enabled(vma) &&
@@ -1095,7 +1126,7 @@ alloc:
                                split_huge_page(page);
                                ret |= VM_FAULT_FALLBACK;
                        }
-                       put_page(page);
+                       put_user_huge_page(page);
                }
                count_vm_event(THP_FAULT_FALLBACK);
                goto out;
@@ -1105,7 +1136,7 @@ alloc:
                put_page(new_page);
                if (page) {
                        split_huge_page(page);
-                       put_page(page);
+                       put_user_huge_page(page);
                } else
                        split_huge_page_pmd(vma, address, pmd);
                ret |= VM_FAULT_FALLBACK;
@@ -1127,7 +1158,7 @@ alloc:
 
        spin_lock(ptl);
        if (page)
-               put_page(page);
+               put_user_huge_page(page);
        if (unlikely(!pmd_same(*pmd, orig_pmd))) {
                spin_unlock(ptl);
                mem_cgroup_uncharge_page(new_page);