mm: mempolicy: Use _PAGE_NUMA to migrate pages
authorMel Gorman <mgorman@suse.de>
Fri, 2 Nov 2012 11:33:45 +0000 (11:33 +0000)
committerMel Gorman <mgorman@suse.de>
Tue, 11 Dec 2012 14:42:42 +0000 (14:42 +0000)
Note: Based on "mm/mpol: Use special PROT_NONE to migrate pages" but
sufficiently different that the signed-off-bys were dropped

Combine our previous _PAGE_NUMA, mpol_misplaced and migrate_misplaced_page()
pieces into an effective migrate on fault scheme.

Note that (on x86) we rely on PROT_NONE pages being !present and avoid
the TLB flush from try_to_unmap(TTU_MIGRATION). This greatly improves the
page-migration performance.

Based-on-work-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
Signed-off-by: Mel Gorman <mgorman@suse.de>
include/linux/huge_mm.h
mm/huge_memory.c
mm/memory.c

index a1d26a98c655856efce587b33aa504a55330b224..dabb5108d6c0cf3e76f2626a6663c22656aaafb1 100644 (file)
@@ -160,8 +160,8 @@ static inline struct page *compound_trans_head(struct page *page)
        return page;
 }
 
-extern int do_huge_pmd_numa_page(struct mm_struct *mm, unsigned long addr,
-                                 pmd_t pmd, pmd_t *pmdp);
+extern int do_huge_pmd_numa_page(struct mm_struct *mm, struct vm_area_struct *vma,
+                               unsigned long addr, pmd_t pmd, pmd_t *pmdp);
 
 #else /* CONFIG_TRANSPARENT_HUGEPAGE */
 #define HPAGE_PMD_SHIFT ({ BUILD_BUG(); 0; })
@@ -200,9 +200,10 @@ static inline int pmd_trans_huge_lock(pmd_t *pmd,
        return 0;
 }
 
-static inline int do_huge_pmd_numa_page(struct mm_struct *mm, unsigned long addr,
-                                       pmd_t pmd, pmd_t *pmdp)
+static inline int do_huge_pmd_numa_page(struct mm_struct *mm, struct vm_area_struct *vma,
+                                       unsigned long addr, pmd_t pmd, pmd_t *pmdp)
 {
+       return 0;
 }
 
 #endif /* CONFIG_TRANSPARENT_HUGEPAGE */
index f5f37630c54da63c867afeca0e159c08f4beb8e3..5723b551c023cdbb5c951d7fe2cc80a39c5888ec 100644 (file)
@@ -18,6 +18,7 @@
 #include <linux/freezer.h>
 #include <linux/mman.h>
 #include <linux/pagemap.h>
+#include <linux/migrate.h>
 #include <asm/tlb.h>
 #include <asm/pgalloc.h>
 #include "internal.h"
@@ -1019,17 +1020,39 @@ out:
 }
 
 /* NUMA hinting page fault entry point for trans huge pmds */
-int do_huge_pmd_numa_page(struct mm_struct *mm, unsigned long addr,
-                               pmd_t pmd, pmd_t *pmdp)
+int do_huge_pmd_numa_page(struct mm_struct *mm, struct vm_area_struct *vma,
+                               unsigned long addr, pmd_t pmd, pmd_t *pmdp)
 {
-       struct page *page;
+       struct page *page = NULL;
        unsigned long haddr = addr & HPAGE_PMD_MASK;
+       int target_nid;
 
        spin_lock(&mm->page_table_lock);
        if (unlikely(!pmd_same(pmd, *pmdp)))
                goto out_unlock;
 
        page = pmd_page(pmd);
+       get_page(page);
+       spin_unlock(&mm->page_table_lock);
+
+       target_nid = mpol_misplaced(page, vma, haddr);
+       if (target_nid == -1)
+               goto clear_pmdnuma;
+
+       /*
+        * Due to lacking code to migrate thp pages, we'll split
+        * (which preserves the special PROT_NONE) and re-take the
+        * fault on the normal pages.
+        */
+       split_huge_page(page);
+       put_page(page);
+       return 0;
+
+clear_pmdnuma:
+       spin_lock(&mm->page_table_lock);
+       if (unlikely(!pmd_same(pmd, *pmdp)))
+               goto out_unlock;
+
        pmd = pmd_mknonnuma(pmd);
        set_pmd_at(mm, haddr, pmdp, pmd);
        VM_BUG_ON(pmd_numa(*pmdp));
@@ -1037,6 +1060,8 @@ int do_huge_pmd_numa_page(struct mm_struct *mm, unsigned long addr,
 
 out_unlock:
        spin_unlock(&mm->page_table_lock);
+       if (page)
+               put_page(page);
        return 0;
 }
 
index e30616f2cc3dbc5afcf88c218f5494df849f04e7..d52542680e108d42bcb9f4dcbe43238a078ceab5 100644 (file)
@@ -57,6 +57,7 @@
 #include <linux/swapops.h>
 #include <linux/elf.h>
 #include <linux/gfp.h>
+#include <linux/migrate.h>
 
 #include <asm/io.h>
 #include <asm/pgalloc.h>
@@ -3451,8 +3452,9 @@ static int do_nonlinear_fault(struct mm_struct *mm, struct vm_area_struct *vma,
 int do_numa_page(struct mm_struct *mm, struct vm_area_struct *vma,
                   unsigned long addr, pte_t pte, pte_t *ptep, pmd_t *pmd)
 {
-       struct page *page;
+       struct page *page = NULL;
        spinlock_t *ptl;
+       int current_nid, target_nid;
 
        /*
        * The "pte" at this point cannot be used safely without
@@ -3465,8 +3467,11 @@ int do_numa_page(struct mm_struct *mm, struct vm_area_struct *vma,
        */
        ptl = pte_lockptr(mm, pmd);
        spin_lock(ptl);
-       if (unlikely(!pte_same(*ptep, pte)))
-               goto out_unlock;
+       if (unlikely(!pte_same(*ptep, pte))) {
+               pte_unmap_unlock(ptep, ptl);
+               goto out;
+       }
+
        pte = pte_mknonnuma(pte);
        set_pte_at(mm, addr, ptep, pte);
        update_mmu_cache(vma, addr, ptep);
@@ -3477,8 +3482,25 @@ int do_numa_page(struct mm_struct *mm, struct vm_area_struct *vma,
                return 0;
        }
 
-out_unlock:
+       get_page(page);
+       current_nid = page_to_nid(page);
+       target_nid = mpol_misplaced(page, vma, addr);
        pte_unmap_unlock(ptep, ptl);
+       if (target_nid == -1) {
+               /*
+                * Account for the fault against the current node if it not
+                * being replaced regardless of where the page is located.
+                */
+               current_nid = numa_node_id();
+               put_page(page);
+               goto out;
+       }
+
+       /* Migrate to the requested node */
+       if (migrate_misplaced_page(page, target_nid))
+               current_nid = target_nid;
+
+out:
        return 0;
 }
 
@@ -3655,7 +3677,7 @@ retry:
                barrier();
                if (pmd_trans_huge(orig_pmd)) {
                        if (pmd_numa(*pmd))
-                               return do_huge_pmd_numa_page(mm, address,
+                               return do_huge_pmd_numa_page(mm, vma, address,
                                                             orig_pmd, pmd);
 
                        if ((flags & FAULT_FLAG_WRITE) && !pmd_write(orig_pmd)) {