mm: introduce do_cow_fault()
authorKirill A. Shutemov <kirill.shutemov@linux.intel.com>
Thu, 3 Apr 2014 21:48:12 +0000 (14:48 -0700)
committerLinus Torvalds <torvalds@linux-foundation.org>
Thu, 3 Apr 2014 23:21:03 +0000 (16:21 -0700)
Introduce do_cow_fault().  The function does what do_fault() does for
write page faults to private mappings.

Unlike do_fault(), do_read_fault() is relatively clean and
straight-forward.

Signed-off-by: Kirill A. Shutemov <kirill.shutemov@linux.intel.com>
Cc: Mel Gorman <mgorman@suse.de>
Cc: Rik van Riel <riel@redhat.com>
Cc: Andi Kleen <ak@linux.intel.com>
Cc: Matthew Wilcox <matthew.r.wilcox@intel.com>
Cc: Dave Hansen <dave.hansen@linux.intel.com>
Cc: Naoya Horiguchi <n-horiguchi@ah.jp.nec.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
mm/memory.c

index 56784e9a7151fc5b49be65fdcc9df7038fc58cb3..5be13e794a7caf8c607c74b037903341f1925d3b 100644 (file)
@@ -3354,6 +3354,62 @@ static int do_read_fault(struct mm_struct *mm, struct vm_area_struct *vma,
        return ret;
 }
 
+static int do_cow_fault(struct mm_struct *mm, struct vm_area_struct *vma,
+               unsigned long address, pmd_t *pmd,
+               pgoff_t pgoff, unsigned int flags, pte_t orig_pte)
+{
+       struct page *fault_page, *new_page;
+       spinlock_t *ptl;
+       pte_t entry, *pte;
+       int ret;
+
+       if (unlikely(anon_vma_prepare(vma)))
+               return VM_FAULT_OOM;
+
+       new_page = alloc_page_vma(GFP_HIGHUSER_MOVABLE, vma, address);
+       if (!new_page)
+               return VM_FAULT_OOM;
+
+       if (mem_cgroup_newpage_charge(new_page, mm, GFP_KERNEL)) {
+               page_cache_release(new_page);
+               return VM_FAULT_OOM;
+       }
+
+       ret = __do_fault(vma, address, pgoff, flags, &fault_page);
+       if (unlikely(ret & (VM_FAULT_ERROR | VM_FAULT_NOPAGE | VM_FAULT_RETRY)))
+               goto uncharge_out;
+
+       copy_user_highpage(new_page, fault_page, address, vma);
+       __SetPageUptodate(new_page);
+
+       pte = pte_offset_map_lock(mm, pmd, address, &ptl);
+       if (unlikely(!pte_same(*pte, orig_pte))) {
+               pte_unmap_unlock(pte, ptl);
+               unlock_page(fault_page);
+               page_cache_release(fault_page);
+               goto uncharge_out;
+       }
+
+       flush_icache_page(vma, new_page);
+       entry = mk_pte(new_page, vma->vm_page_prot);
+       entry = maybe_mkwrite(pte_mkdirty(entry), vma);
+       inc_mm_counter_fast(mm, MM_ANONPAGES);
+       page_add_new_anon_rmap(new_page, vma, address);
+       set_pte_at(mm, address, pte, entry);
+
+       /* no need to invalidate: a not-present page won't be cached */
+       update_mmu_cache(vma, address, pte);
+
+       pte_unmap_unlock(pte, ptl);
+       unlock_page(fault_page);
+       page_cache_release(fault_page);
+       return ret;
+uncharge_out:
+       mem_cgroup_uncharge_page(new_page);
+       page_cache_release(new_page);
+       return ret;
+}
+
 /*
  * do_fault() tries to create a new page mapping. It aggressively
  * tries to share with existing pages, but makes a separate copy if
@@ -3550,6 +3606,9 @@ static int do_linear_fault(struct mm_struct *mm, struct vm_area_struct *vma,
        if (!(flags & FAULT_FLAG_WRITE))
                return do_read_fault(mm, vma, address, pmd, pgoff, flags,
                                orig_pte);
+       if (!(vma->vm_flags & VM_SHARED))
+               return do_cow_fault(mm, vma, address, pmd, pgoff, flags,
+                               orig_pte);
        return do_fault(mm, vma, address, pmd, pgoff, flags, orig_pte);
 }
 
@@ -3585,6 +3644,9 @@ static int do_nonlinear_fault(struct mm_struct *mm, struct vm_area_struct *vma,
        if (!(flags & FAULT_FLAG_WRITE))
                return do_read_fault(mm, vma, address, pmd, pgoff, flags,
                                orig_pte);
+       if (!(vma->vm_flags & VM_SHARED))
+               return do_cow_fault(mm, vma, address, pmd, pgoff, flags,
+                               orig_pte);
        return do_fault(mm, vma, address, pmd, pgoff, flags, orig_pte);
 }