mm: numa: do not trap faults on the huge zero page
authorMel Gorman <mgorman@suse.de>
Thu, 12 Feb 2015 22:58:35 +0000 (14:58 -0800)
committerLinus Torvalds <torvalds@linux-foundation.org>
Fri, 13 Feb 2015 02:54:08 +0000 (18:54 -0800)
Faults on the huge zero page are pointless and there is a BUG_ON to catch
them during fault time.  This patch reintroduces a check that avoids
marking the zero page PAGE_NONE.

Signed-off-by: Mel Gorman <mgorman@suse.de>
Cc: Aneesh Kumar K.V <aneesh.kumar@linux.vnet.ibm.com>
Cc: Benjamin Herrenschmidt <benh@kernel.crashing.org>
Cc: Dave Jones <davej@redhat.com>
Cc: Hugh Dickins <hughd@google.com>
Cc: Ingo Molnar <mingo@redhat.com>
Cc: Kirill Shutemov <kirill.shutemov@linux.intel.com>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Paul Mackerras <paulus@samba.org>
Cc: Rik van Riel <riel@redhat.com>
Cc: Sasha Levin <sasha.levin@oracle.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
include/linux/huge_mm.h
mm/huge_memory.c
mm/memory.c
mm/mprotect.c

index 062bd252e9944f67849442b457ea1ad403ec7c77..f10b20f051599287a40188d322ead088726e2296 100644 (file)
@@ -31,7 +31,8 @@ extern int move_huge_pmd(struct vm_area_struct *vma,
                         unsigned long new_addr, unsigned long old_end,
                         pmd_t *old_pmd, pmd_t *new_pmd);
 extern int change_huge_pmd(struct vm_area_struct *vma, pmd_t *pmd,
-                       unsigned long addr, pgprot_t newprot);
+                       unsigned long addr, pgprot_t newprot,
+                       int prot_numa);
 
 enum transparent_hugepage_flag {
        TRANSPARENT_HUGEPAGE_FLAG,
index cb9b3e847dac65ad9d0695c42e606a2ae74df743..8e791a3db6b6804987ae587feff95c79ac940480 100644 (file)
@@ -1471,7 +1471,7 @@ out:
  *  - HPAGE_PMD_NR is protections changed and TLB flush necessary
  */
 int change_huge_pmd(struct vm_area_struct *vma, pmd_t *pmd,
-               unsigned long addr, pgprot_t newprot)
+               unsigned long addr, pgprot_t newprot, int prot_numa)
 {
        struct mm_struct *mm = vma->vm_mm;
        spinlock_t *ptl;
@@ -1479,6 +1479,17 @@ int change_huge_pmd(struct vm_area_struct *vma, pmd_t *pmd,
 
        if (__pmd_trans_huge_lock(pmd, vma, &ptl) == 1) {
                pmd_t entry;
+
+               /*
+                * Avoid trapping faults against the zero page. The read-only
+                * data is likely to be read-cached on the local CPU and
+                * local/remote hits to the zero page are not interesting.
+                */
+               if (prot_numa && is_huge_zero_pmd(*pmd)) {
+                       spin_unlock(ptl);
+                       return 0;
+               }
+
                ret = 1;
                entry = pmdp_get_and_clear_notify(mm, addr, pmd);
                entry = pmd_modify(entry, newprot);
index d7921760cf793ad4b240f688696848e6797634ce..bf244f56b05ac9ef78404ac926438a52cd844cbd 100644 (file)
@@ -3040,7 +3040,6 @@ static int do_numa_page(struct mm_struct *mm, struct vm_area_struct *vma,
                pte_unmap_unlock(ptep, ptl);
                return 0;
        }
-       BUG_ON(is_zero_pfn(page_to_pfn(page)));
 
        /*
         * Avoid grouping on DSO/COW pages in specific and RO pages
index 76824d73380da16f075f0a7ca80933d909d49613..dd599fc235c2fe5ce483128bb89dfad29dc104cd 100644 (file)
@@ -76,6 +76,18 @@ static unsigned long change_pte_range(struct vm_area_struct *vma, pmd_t *pmd,
                if (pte_present(oldpte)) {
                        pte_t ptent;
 
+                       /*
+                        * Avoid trapping faults against the zero or KSM
+                        * pages. See similar comment in change_huge_pmd.
+                        */
+                       if (prot_numa) {
+                               struct page *page;
+
+                               page = vm_normal_page(vma, addr, oldpte);
+                               if (!page || PageKsm(page))
+                                       continue;
+                       }
+
                        ptent = ptep_modify_prot_start(mm, addr, pte);
                        ptent = pte_modify(ptent, newprot);
 
@@ -142,7 +154,7 @@ static inline unsigned long change_pmd_range(struct vm_area_struct *vma,
                                split_huge_page_pmd(vma, addr, pmd);
                        else {
                                int nr_ptes = change_huge_pmd(vma, pmd, addr,
-                                               newprot);
+                                               newprot, prot_numa);
 
                                if (nr_ptes) {
                                        if (nr_ptes == HPAGE_PMD_NR) {