mm: numa: Migrate pages handled during a pmd_numa hinting fault

author Mel Gorman <mgorman@suse.de>

Thu, 15 Nov 2012 01:24:32 +0000 (01:24 +0000)

committer Mel Gorman <mgorman@suse.de>

Tue, 11 Dec 2012 14:42:49 +0000 (14:42 +0000)
author Mel Gorman <mgorman@suse.de>
Thu, 15 Nov 2012 01:24:32 +0000 (01:24 +0000)
committer Mel Gorman <mgorman@suse.de>
Tue, 11 Dec 2012 14:42:49 +0000 (14:42 +0000)
diff --git a/mm/memory.c b/mm/memory.c

index 8a7b4ccbe136e4474e4bb7469e4d956e5b6ca3ca..84c6d9eab182ed16aa9c3ef3e67b89b28234222e 100644 (file)
--- a/mm/memory.c
+++ b/mm/memory.c
@@ -3449,6 +3449,18 @@ static int do_nonlinear_fault(struct mm_struct *mm, struct vm_area_struct *vma,
         return __do_fault(mm, vma, address, pmd, pgoff, flags, orig_pte);
  }
  
+int numa_migrate_prep(struct page *page, struct vm_area_struct *vma,
+                               unsigned long addr, int current_nid)
+{
+       get_page(page);
+
+       count_vm_numa_event(NUMA_HINT_FAULTS);
+       if (current_nid == numa_node_id())
+               count_vm_numa_event(NUMA_HINT_FAULTS_LOCAL);
+
+       return mpol_misplaced(page, vma, addr);
+}
+
  int do_numa_page(struct mm_struct *mm, struct vm_area_struct *vma,
                    unsigned long addr, pte_t pte, pte_t *ptep, pmd_t *pmd)
  {
@@ -3477,18 +3489,14 @@ int do_numa_page(struct mm_struct *mm, struct vm_area_struct *vma,
         set_pte_at(mm, addr, ptep, pte);
         update_mmu_cache(vma, addr, ptep);
  
-       count_vm_numa_event(NUMA_HINT_FAULTS);
         page = vm_normal_page(vma, addr, pte);
         if (!page) {
                 pte_unmap_unlock(ptep, ptl);
                 return 0;
         }
  
-       get_page(page);
         current_nid = page_to_nid(page);
-       if (current_nid == numa_node_id())
-               count_vm_numa_event(NUMA_HINT_FAULTS_LOCAL);
-       target_nid = mpol_misplaced(page, vma, addr);
+       target_nid = numa_migrate_prep(page, vma, addr, current_nid);
         pte_unmap_unlock(ptep, ptl);
         if (target_nid == -1) {
                 /*
@@ -3505,7 +3513,8 @@ int do_numa_page(struct mm_struct *mm, struct vm_area_struct *vma,
                 current_nid = target_nid;
  
  out:
-       task_numa_fault(current_nid, 1);
+       if (current_nid != -1)
+               task_numa_fault(current_nid, 1);
         return 0;
  }
  
@@ -3521,8 +3530,6 @@ static int do_pmd_numa_page(struct mm_struct *mm, struct vm_area_struct *vma,
         spinlock_t *ptl;
         bool numa = false;
         int local_nid = numa_node_id();
-       unsigned long nr_faults = 0;
-       unsigned long nr_faults_local = 0;
  
         spin_lock(&mm->page_table_lock);
         pmd = *pmdp;
@@ -3545,7 +3552,8 @@ static int do_pmd_numa_page(struct mm_struct *mm, struct vm_area_struct *vma,
         for (addr = _addr + offset; addr < _addr + PMD_SIZE; pte++, addr += PAGE_SIZE) {
                 pte_t pteval = *pte;
                 struct page *page;
-               int curr_nid;
+               int curr_nid = local_nid;
+               int target_nid;
                 if (!pte_present(pteval))
                         continue;
                 if (!pte_numa(pteval))
@@ -3566,21 +3574,30 @@ static int do_pmd_numa_page(struct mm_struct *mm, struct vm_area_struct *vma,
                 /* only check non-shared pages */
                 if (unlikely(page_mapcount(page) != 1))
                         continue;
-               pte_unmap_unlock(pte, ptl);
  
-               curr_nid = page_to_nid(page);
-               task_numa_fault(curr_nid, 1);
+               /*
+                * Note that the NUMA fault is later accounted to either
+                * the node that is currently running or where the page is
+                * migrated to.
+                */
+               curr_nid = local_nid;
+               target_nid = numa_migrate_prep(page, vma, addr,
+                                              page_to_nid(page));
+               if (target_nid == -1) {
+                       put_page(page);
+                       continue;
+               }
  
-               nr_faults++;
-               if (curr_nid == local_nid)
-                       nr_faults_local++;
+               /* Migrate to the requested node */
+               pte_unmap_unlock(pte, ptl);
+               if (migrate_misplaced_page(page, target_nid))
+                       curr_nid = target_nid;
+               task_numa_fault(curr_nid, 1);
  
                 pte = pte_offset_map_lock(mm, pmdp, addr, &ptl);
         }
         pte_unmap_unlock(orig_pte, ptl);
  
-       count_vm_numa_events(NUMA_HINT_FAULTS, nr_faults);
-       count_vm_numa_events(NUMA_HINT_FAULTS_LOCAL, nr_faults_local);
         return 0;
  }
  #else
diff --git a/mm/mprotect.c b/mm/mprotect.c

index 7ef6ae964e8f02327b14b69507ecc1357e69a8b4..dce6fb48edc4612e090702a4d58d123946eacf95 100644 (file)
--- a/mm/mprotect.c
+++ b/mm/mprotect.c
@@ -37,12 +37,14 @@ static inline pgprot_t pgprot_modify(pgprot_t oldprot, pgprot_t newprot)
  
  static unsigned long change_pte_range(struct vm_area_struct *vma, pmd_t *pmd,
                 unsigned long addr, unsigned long end, pgprot_t newprot,
-               int dirty_accountable, int prot_numa)
+               int dirty_accountable, int prot_numa, bool *ret_all_same_node)
  {
         struct mm_struct *mm = vma->vm_mm;
         pte_t *pte, oldpte;
         spinlock_t *ptl;
         unsigned long pages = 0;
+       bool all_same_node = true;
+       int last_nid = -1;
  
         pte = pte_offset_map_lock(mm, pmd, addr, &ptl);
         arch_enter_lazy_mmu_mode();
@@ -61,6 +63,12 @@ static unsigned long change_pte_range(struct vm_area_struct *vma, pmd_t *pmd,
  
                                 page = vm_normal_page(vma, addr, oldpte);
                                 if (page) {
+                                       int this_nid = page_to_nid(page);
+                                       if (last_nid == -1)
+                                               last_nid = this_nid;
+                                       if (last_nid != this_nid)
+                                               all_same_node = false;
+
                                         /* only check non-shared pages */
                                         if (!pte_numa(oldpte) &&
                                             page_mapcount(page) == 1) {
@@ -81,7 +89,6 @@ static unsigned long change_pte_range(struct vm_area_struct *vma, pmd_t *pmd,
  
                         if (updated)
                                 pages++;
-
                         ptep_modify_prot_commit(mm, addr, pte, ptent);
                 } else if (IS_ENABLED(CONFIG_MIGRATION) && !pte_file(oldpte)) {
                         swp_entry_t entry = pte_to_swp_entry(oldpte);
@@ -101,6 +108,7 @@ static unsigned long change_pte_range(struct vm_area_struct *vma, pmd_t *pmd,
         arch_leave_lazy_mmu_mode();
         pte_unmap_unlock(pte - 1, ptl);
  
+       *ret_all_same_node = all_same_node;
         return pages;
  }
  
@@ -127,6 +135,7 @@ static inline unsigned long change_pmd_range(struct vm_area_struct *vma, pud_t *
         pmd_t *pmd;
         unsigned long next;
         unsigned long pages = 0;
+       bool all_same_node;
  
         pmd = pmd_offset(pud, addr);
         do {
@@ -143,9 +152,15 @@ static inline unsigned long change_pmd_range(struct vm_area_struct *vma, pud_t *
                 if (pmd_none_or_clear_bad(pmd))
                         continue;
                 pages += change_pte_range(vma, pmd, addr, next, newprot,
-                                dirty_accountable, prot_numa);
-
-               if (prot_numa)
+                                dirty_accountable, prot_numa, &all_same_node);
+
+               /*
+                * If we are changing protections for NUMA hinting faults then
+                * set pmd_numa if the examined pages were all on the same
+                * node. This allows a regular PMD to be handled as one fault
+                * and effectively batches the taking of the PTL
+                */
+               if (prot_numa && all_same_node)
                         change_pmd_protnuma(vma->vm_mm, addr, pmd);
         } while (pmd++, addr = next, addr != end);
author	Mel Gorman <mgorman@suse.de>
	Thu, 15 Nov 2012 01:24:32 +0000 (01:24 +0000)
committer	Mel Gorman <mgorman@suse.de>
	Tue, 11 Dec 2012 14:42:49 +0000 (14:42 +0000)
mm/memory.c		patch \| blob \| blame \| history
mm/mprotect.c		patch \| blob \| blame \| history