mm: cleanup "swapcache" in do_swap_page

[GitHub/mt8127/android_kernel_alcatel_ttab.git] / mm / memory.c
diff --git a/mm/memory.c b/mm/memory.c

index e0a9b0ce4f102f8cfef2d6d7c6581d71f7595ff4..705473afc1f4f6c32eeaa1726e04068993dd946e 100644 (file)
--- a/mm/memory.c
+++ b/mm/memory.c
@@ -69,6 +69,10 @@
  
  #include "internal.h"
  
+#ifdef LAST_NID_NOT_IN_PAGE_FLAGS
+#warning Unfortunate NUMA and NUMA Balancing config, growing page-frame for last_nid.
+#endif
+
  #ifndef CONFIG_NEED_MULTIPLE_NODES
  /* use the per-pgdat data instead for discontigmem - mbligh */
  unsigned long max_mapnr;
@@ -184,10 +188,14 @@ static int tlb_next_batch(struct mmu_gather *tlb)
                 return 1;
         }
  
+       if (tlb->batch_count == MAX_GATHER_BATCH_COUNT)
+               return 0;
+
         batch = (void *)__get_free_pages(GFP_NOWAIT | __GFP_NOWARN, 0);
         if (!batch)
                 return 0;
  
+       tlb->batch_count++;
         batch->next = NULL;
         batch->nr   = 0;
         batch->max  = MAX_GATHER_BATCH;
@@ -216,6 +224,7 @@ void tlb_gather_mmu(struct mmu_gather *tlb, struct mm_struct *mm, bool fullmm)
         tlb->local.nr   = 0;
         tlb->local.max  = ARRAY_SIZE(tlb->__pages);
         tlb->active     = &tlb->local;
+       tlb->batch_count = 0;
  
  #ifdef CONFIG_HAVE_RCU_TABLE_FREE
         tlb->batch = NULL;
@@ -1453,10 +1462,11 @@ int zap_vma_ptes(struct vm_area_struct *vma, unsigned long address,
  EXPORT_SYMBOL_GPL(zap_vma_ptes);
  
  /**
- * follow_page - look up a page descriptor from a user-virtual address
+ * follow_page_mask - look up a page descriptor from a user-virtual address
   * @vma: vm_area_struct mapping @address
   * @address: virtual address to look up
   * @flags: flags modifying lookup behaviour
+ * @page_mask: on output, *page_mask is set according to the size of the page
   *
   * @flags can have FOLL_ flags set, defined in <linux/mm.h>
   *
@@ -1464,8 +1474,9 @@ EXPORT_SYMBOL_GPL(zap_vma_ptes);
   * an error pointer if there is a mapping to something not represented
   * by a page descriptor (see also vm_normal_page()).
   */
-struct page *follow_page(struct vm_area_struct *vma, unsigned long address,
-                       unsigned int flags)
+struct page *follow_page_mask(struct vm_area_struct *vma,
+                             unsigned long address, unsigned int flags,
+                             unsigned int *page_mask)
  {
         pgd_t *pgd;
         pud_t *pud;
@@ -1475,6 +1486,8 @@ struct page *follow_page(struct vm_area_struct *vma, unsigned long address,
         struct page *page;
         struct mm_struct *mm = vma->vm_mm;
  
+       *page_mask = 0;
+
         page = follow_huge_addr(mm, address, flags & FOLL_WRITE);
         if (!IS_ERR(page)) {
                 BUG_ON(flags & FOLL_GET);
@@ -1521,6 +1534,7 @@ struct page *follow_page(struct vm_area_struct *vma, unsigned long address,
                                 page = follow_trans_huge_pmd(vma, address,
                                                              pmd, flags);
                                 spin_unlock(&mm->page_table_lock);
+                               *page_mask = HPAGE_PMD_NR - 1;
                                 goto out;
                         }
                 } else
@@ -1534,8 +1548,24 @@ split_fallthrough:
         ptep = pte_offset_map_lock(mm, pmd, address, &ptl);
  
         pte = *ptep;
-       if (!pte_present(pte))
-               goto no_page;
+       if (!pte_present(pte)) {
+               swp_entry_t entry;
+               /*
+                * KSM's break_ksm() relies upon recognizing a ksm page
+                * even while it is being migrated, so for that case we
+                * need migration_entry_wait().
+                */
+               if (likely(!(flags & FOLL_MIGRATION)))
+                       goto no_page;
+               if (pte_none(pte) || pte_file(pte))
+                       goto no_page;
+               entry = pte_to_swp_entry(pte);
+               if (!is_migration_entry(entry))
+                       goto no_page;
+               pte_unmap_unlock(ptep, ptl);
+               migration_entry_wait(mm, pmd, address);
+               goto split_fallthrough;
+       }
         if ((flags & FOLL_NUMA) && pte_numa(pte))
                 goto no_page;
         if ((flags & FOLL_WRITE) && !pte_write(pte))
@@ -1668,15 +1698,16 @@ static inline int stack_guard_page(struct vm_area_struct *vma, unsigned long add
   * instead of __get_user_pages. __get_user_pages should be used only if
   * you need some special @gup_flags.
   */
-int __get_user_pages(struct task_struct *tsk, struct mm_struct *mm,
-                    unsigned long start, int nr_pages, unsigned int gup_flags,
-                    struct page **pages, struct vm_area_struct **vmas,
-                    int *nonblocking)
+long __get_user_pages(struct task_struct *tsk, struct mm_struct *mm,
+               unsigned long start, unsigned long nr_pages,
+               unsigned int gup_flags, struct page **pages,
+               struct vm_area_struct **vmas, int *nonblocking)
  {
-       int i;
+       long i;
         unsigned long vm_flags;
+       unsigned int page_mask;
  
-       if (nr_pages <= 0)
+       if (!nr_pages)
                 return 0;
  
         VM_BUG_ON(!!pages != !!(gup_flags & FOLL_GET));
@@ -1752,6 +1783,7 @@ int __get_user_pages(struct task_struct *tsk, struct mm_struct *mm,
                                 get_page(page);
                         }
                         pte_unmap(pte);
+                       page_mask = 0;
                         goto next_page;
                 }
  
@@ -1769,6 +1801,7 @@ int __get_user_pages(struct task_struct *tsk, struct mm_struct *mm,
                 do {
                         struct page *page;
                         unsigned int foll_flags = gup_flags;
+                       unsigned int page_increm;
  
                         /*
                          * If we have a pending SIGKILL, don't keep faulting
@@ -1778,7 +1811,8 @@ int __get_user_pages(struct task_struct *tsk, struct mm_struct *mm,
                                 return i ? i : -ERESTARTSYS;
  
                         cond_resched();
-                       while (!(page = follow_page(vma, start, foll_flags))) {
+                       while (!(page = follow_page_mask(vma, start,
+                                               foll_flags, &page_mask))) {
                                 int ret;
                                 unsigned int fault_flags = 0;
  
@@ -1852,13 +1886,19 @@ int __get_user_pages(struct task_struct *tsk, struct mm_struct *mm,
  
                                 flush_anon_page(vma, page, start);
                                 flush_dcache_page(page);
+                               page_mask = 0;
                         }
  next_page:
-                       if (vmas)
+                       if (vmas) {
                                 vmas[i] = vma;
-                       i++;
-                       start += PAGE_SIZE;
-                       nr_pages--;
+                               page_mask = 0;
+                       }
+                       page_increm = 1 + (~(start >> PAGE_SHIFT) & page_mask);
+                       if (page_increm > nr_pages)
+                               page_increm = nr_pages;
+                       i += page_increm;
+                       start += page_increm * PAGE_SIZE;
+                       nr_pages -= page_increm;
                 } while (nr_pages && start < vma->vm_end);
         } while (nr_pages);
         return i;
@@ -1972,9 +2012,9 @@ int fixup_user_fault(struct task_struct *tsk, struct mm_struct *mm,
   *
   * See also get_user_pages_fast, for performance critical applications.
   */
-int get_user_pages(struct task_struct *tsk, struct mm_struct *mm,
-               unsigned long start, int nr_pages, int write, int force,
-               struct page **pages, struct vm_area_struct **vmas)
+long get_user_pages(struct task_struct *tsk, struct mm_struct *mm,
+               unsigned long start, unsigned long nr_pages, int write,
+               int force, struct page **pages, struct vm_area_struct **vmas)
  {
         int flags = FOLL_TOUCH;
  
@@ -2914,7 +2954,7 @@ static int do_swap_page(struct mm_struct *mm, struct vm_area_struct *vma,
                 unsigned int flags, pte_t orig_pte)
  {
         spinlock_t *ptl;
-       struct page *page, *swapcache = NULL;
+       struct page *page, *swapcache;
         swp_entry_t entry;
         pte_t pte;
         int locked;
@@ -2965,9 +3005,11 @@ static int do_swap_page(struct mm_struct *mm, struct vm_area_struct *vma,
                  */
                 ret = VM_FAULT_HWPOISON;
                 delayacct_clear_flag(DELAYACCT_PF_SWAPIN);
+               swapcache = page;
                 goto out_release;
         }
  
+       swapcache = page;
         locked = lock_page_or_retry(page, mm, flags);
  
         delayacct_clear_flag(DELAYACCT_PF_SWAPIN);
@@ -2985,16 +3027,11 @@ static int do_swap_page(struct mm_struct *mm, struct vm_area_struct *vma,
         if (unlikely(!PageSwapCache(page) || page_private(page) != entry.val))
                 goto out_page;
  
-       if (ksm_might_need_to_copy(page, vma, address)) {
-               swapcache = page;
-               page = ksm_does_need_to_copy(page, vma, address);
-
-               if (unlikely(!page)) {
-                       ret = VM_FAULT_OOM;
-                       page = swapcache;
-                       swapcache = NULL;
-                       goto out_page;
-               }
+       page = ksm_might_need_to_copy(page, vma, address);
+       if (unlikely(!page)) {
+               ret = VM_FAULT_OOM;
+               page = swapcache;
+               goto out_page;
         }
  
         if (mem_cgroup_try_charge_swapin(mm, page, GFP_KERNEL, &ptr)) {
@@ -3039,7 +3076,10 @@ static int do_swap_page(struct mm_struct *mm, struct vm_area_struct *vma,
         }
         flush_icache_page(vma, page);
         set_pte_at(mm, address, page_table, pte);
-       do_page_add_anon_rmap(page, vma, address, exclusive);
+       if (page == swapcache)
+               do_page_add_anon_rmap(page, vma, address, exclusive);
+       else /* ksm created a completely new copy */
+               page_add_new_anon_rmap(page, vma, address);
         /* It's better to call commit-charge after rmap is established */
         mem_cgroup_commit_charge_swapin(page, ptr);
  
@@ -3047,7 +3087,7 @@ static int do_swap_page(struct mm_struct *mm, struct vm_area_struct *vma,
         if (vm_swap_full() || (vma->vm_flags & VM_LOCKED) || PageMlocked(page))
                 try_to_free_swap(page);
         unlock_page(page);
-       if (swapcache) {
+       if (page != swapcache) {
                 /*
                  * Hold the lock to avoid the swap entry to be reused
                  * until we take the PT lock for the pte_same() check
@@ -3080,7 +3120,7 @@ out_page:
         unlock_page(page);
  out_release:
         page_cache_release(page);
-       if (swapcache) {
+       if (page != swapcache) {
                 unlock_page(swapcache);
                 page_cache_release(swapcache);
         }
@@ -3706,6 +3746,14 @@ retry:
                 if (pmd_trans_huge(orig_pmd)) {
                         unsigned int dirty = flags & FAULT_FLAG_WRITE;
  
+                       /*
+                        * If the pmd is splitting, return and retry the
+                        * the fault.  Alternative: wait until the split
+                        * is done, and goto retry.
+                        */
+                       if (pmd_trans_splitting(orig_pmd))
+                               return 0;
+
                         if (pmd_numa(orig_pmd))
                                 return do_huge_pmd_numa_page(mm, vma, address,
                                                              orig_pmd, pmd);
@@ -3808,30 +3856,6 @@ int __pmd_alloc(struct mm_struct *mm, pud_t *pud, unsigned long address)
  }
  #endif /* __PAGETABLE_PMD_FOLDED */
  
-int make_pages_present(unsigned long addr, unsigned long end)
-{
-       int ret, len, write;
-       struct vm_area_struct * vma;
-
-       vma = find_vma(current->mm, addr);
-       if (!vma)
-               return -ENOMEM;
-       /*
-        * We want to touch writable mappings with a write fault in order
-        * to break COW, except for shared mappings because these don't COW
-        * and we would not want to dirty them for nothing.
-        */
-       write = (vma->vm_flags & (VM_WRITE | VM_SHARED)) == VM_WRITE;
-       BUG_ON(addr >= end);
-       BUG_ON(end > vma->vm_end);
-       len = DIV_ROUND_UP(end, PAGE_SIZE) - addr/PAGE_SIZE;
-       ret = get_user_pages(current, current->mm, addr,
-                       len, write, 0, NULL, NULL);
-       if (ret < 0)
-               return ret;
-       return ret == len ? 0 : -EFAULT;
-}
-
  #if !defined(__HAVE_ARCH_GATE_AREA)
  
  #if defined(AT_SYSINFO_EHDR)