Merge tag 'v3.10.86' into update

[GitHub/mt8127/android_kernel_alcatel_ttab.git] / mm / memory.c
diff --git a/mm/memory.c b/mm/memory.c

index 61a262b08e53efa2f69c1387e488bea6f87bb0f9..313d94a9ae260fcdf1d4da7de288b055171afd7d 100644 (file)
--- a/mm/memory.c
+++ b/mm/memory.c
@@ -69,6 +69,11 @@
  
  #include "internal.h"
  
+#ifdef CONFIG_MTK_EXTMEM
+extern bool extmem_in_mspace(struct vm_area_struct *vma);
+extern unsigned long get_virt_from_mspace(unsigned long pa);
+#endif
+
  #ifdef LAST_NID_NOT_IN_PAGE_FLAGS
  #warning Unfortunate NUMA and NUMA Balancing config, growing page-frame for last_nid.
  #endif
@@ -211,14 +216,15 @@ static int tlb_next_batch(struct mmu_gather *tlb)
   *     tear-down from @mm. The @fullmm argument is used when @mm is without
   *     users and we're going to destroy the full address space (exit/execve).
   */
-void tlb_gather_mmu(struct mmu_gather *tlb, struct mm_struct *mm, bool fullmm)
+void tlb_gather_mmu(struct mmu_gather *tlb, struct mm_struct *mm, unsigned long start, unsigned long end)
  {
         tlb->mm = mm;
  
-       tlb->fullmm     = fullmm;
+       /* Is it from 0 to ~0? */
+       tlb->fullmm     = !(start | (end+1));
         tlb->need_flush_all = 0;
-       tlb->start      = -1UL;
-       tlb->end        = 0;
+       tlb->start      = start;
+       tlb->end        = end;
         tlb->need_flush = 0;
         tlb->local.next = NULL;
         tlb->local.nr   = 0;
@@ -258,8 +264,6 @@ void tlb_finish_mmu(struct mmu_gather *tlb, unsigned long start, unsigned long e
  {
         struct mmu_gather_batch *batch, *next;
  
-       tlb->start = start;
-       tlb->end   = end;
         tlb_flush_mmu(tlb);
  
         /* keep the page table cache within bounds */
@@ -835,20 +839,20 @@ copy_one_pte(struct mm_struct *dst_mm, struct mm_struct *src_mm,
                 if (!pte_file(pte)) {
                         swp_entry_t entry = pte_to_swp_entry(pte);
  
-                       if (swap_duplicate(entry) < 0)
-                               return entry.val;
-
-                       /* make sure dst_mm is on swapoff's mmlist. */
-                       if (unlikely(list_empty(&dst_mm->mmlist))) {
-                               spin_lock(&mmlist_lock);
-                               if (list_empty(&dst_mm->mmlist))
-                                       list_add(&dst_mm->mmlist,
-                                                &src_mm->mmlist);
-                               spin_unlock(&mmlist_lock);
-                       }
-                       if (likely(!non_swap_entry(entry)))
+                       if (likely(!non_swap_entry(entry))) {
+                               if (swap_duplicate(entry) < 0)
+                                       return entry.val;
+
+                               /* make sure dst_mm is on swapoff's mmlist. */
+                               if (unlikely(list_empty(&dst_mm->mmlist))) {
+                                       spin_lock(&mmlist_lock);
+                                       if (list_empty(&dst_mm->mmlist))
+                                               list_add(&dst_mm->mmlist,
+                                                        &src_mm->mmlist);
+                                       spin_unlock(&mmlist_lock);
+                               }
                                 rss[MM_SWAPENTS]++;
-                       else if (is_migration_entry(entry)) {
+                       } else if (is_migration_entry(entry)) {
                                 page = migration_entry_to_page(entry);
  
                                 if (PageAnon(page))
@@ -1203,13 +1207,23 @@ again:
          * and page-free while holding it.
          */
         if (force_flush) {
+               unsigned long old_end;
+
                 force_flush = 0;
  
-#ifdef HAVE_GENERIC_MMU_GATHER
-               tlb->start = addr;
-               tlb->end = end;
-#endif
+               /*
+                * Flush the TLB just for the previous segment,
+                * then update the range to be the remaining
+                * TLB range.
+                */
+               old_end = tlb->end;
+               tlb->end = addr;
+
                 tlb_flush_mmu(tlb);
+
+               tlb->start = addr;
+               tlb->end = old_end;
+
                 if (addr != end)
                         goto again;
         }
@@ -1396,7 +1410,7 @@ void zap_page_range(struct vm_area_struct *vma, unsigned long start,
         unsigned long end = start + size;
  
         lru_add_drain();
-       tlb_gather_mmu(&tlb, mm, 0);
+       tlb_gather_mmu(&tlb, mm, start, end);
         update_hiwater_rss(mm);
         mmu_notifier_invalidate_range_start(mm, start, end);
         for ( ; vma && vma->vm_start < end; vma = vma->vm_next)
@@ -1422,7 +1436,7 @@ static void zap_page_range_single(struct vm_area_struct *vma, unsigned long addr
         unsigned long end = address + size;
  
         lru_add_drain();
-       tlb_gather_mmu(&tlb, mm, 0);
+       tlb_gather_mmu(&tlb, mm, address, end);
         update_hiwater_rss(mm);
         mmu_notifier_invalidate_range_start(mm, address, end);
         unmap_single_vma(&tlb, vma, address, end, details);
@@ -1453,6 +1467,16 @@ int zap_vma_ptes(struct vm_area_struct *vma, unsigned long address,
  }
  EXPORT_SYMBOL_GPL(zap_vma_ptes);
  
+/*
+ * FOLL_FORCE can write to even unwritable pte's, but only
+ * after we've gone through a COW cycle and they are dirty.
+ */
+static inline bool can_follow_write_pte(pte_t pte, unsigned int flags)
+{
+       return pte_write(pte) ||
+               ((flags & FOLL_FORCE) && (flags & FOLL_COW) && pte_dirty(pte));
+}
+
  /**
   * follow_page_mask - look up a page descriptor from a user-virtual address
   * @vma: vm_area_struct mapping @address
@@ -1560,7 +1584,7 @@ split_fallthrough:
         }
         if ((flags & FOLL_NUMA) && pte_numa(pte))
                 goto no_page;
-       if ((flags & FOLL_WRITE) && !pte_write(pte))
+       if ((flags & FOLL_WRITE) && !can_follow_write_pte(pte, flags))
                 goto unlock;
  
         page = vm_normal_page(vma, address, pte);
@@ -1634,6 +1658,7 @@ no_page_table:
                 return ERR_PTR(-EFAULT);
         return page;
  }
+EXPORT_SYMBOL_GPL(follow_page_mask);
  
  static inline int stack_guard_page(struct vm_area_struct *vma, unsigned long addr)
  {
@@ -1778,11 +1803,24 @@ long __get_user_pages(struct task_struct *tsk, struct mm_struct *mm,
                         page_mask = 0;
                         goto next_page;
                 }
-
+    #ifdef CONFIG_MTK_EXTMEM
+        if (!vma || !(vm_flags & vma->vm_flags))
+               {
+                   return i ? : -EFAULT;
+        }
+
+               if (vma->vm_flags & (VM_IO | VM_PFNMAP))
+               {
+                   /*Would pass VM_IO | VM_RESERVED | VM_PFNMAP. (for Reserved Physical Memory PFN Mapping Usage)*/
+                   if(!((vma->vm_flags&VM_IO)&&(vma->vm_flags&VM_RESERVED)&&(vma->vm_flags&VM_PFNMAP)))
+                           return i ? : -EFAULT;
+        }
+    #else
                 if (!vma ||
                     (vma->vm_flags & (VM_IO | VM_PFNMAP)) ||
                     !(vm_flags & vma->vm_flags))
                         return i ? : -EFAULT;
+    #endif
  
                 if (is_vm_hugetlb_page(vma)) {
                         i = follow_hugetlb_page(mm, vma, pages, vmas,
@@ -1835,7 +1873,8 @@ long __get_user_pages(struct task_struct *tsk, struct mm_struct *mm,
                                                 else
                                                         return -EFAULT;
                                         }
-                                       if (ret & VM_FAULT_SIGBUS)
+                                       if (ret & (VM_FAULT_SIGBUS |
+                                                  VM_FAULT_SIGSEGV))
                                                 return i ? i : -EFAULT;
                                         BUG();
                                 }
@@ -1867,7 +1906,7 @@ long __get_user_pages(struct task_struct *tsk, struct mm_struct *mm,
                                  */
                                 if ((ret & VM_FAULT_WRITE) &&
                                     !(vma->vm_flags & VM_WRITE))
-                                       foll_flags &= ~FOLL_WRITE;
+                                       foll_flags |= FOLL_COW;
  
                                 cond_resched();
                         }
@@ -1928,19 +1967,24 @@ int fixup_user_fault(struct task_struct *tsk, struct mm_struct *mm,
                      unsigned long address, unsigned int fault_flags)
  {
         struct vm_area_struct *vma;
+       vm_flags_t vm_flags;
         int ret;
  
         vma = find_extend_vma(mm, address);
         if (!vma || address < vma->vm_start)
                 return -EFAULT;
  
+       vm_flags = (fault_flags & FAULT_FLAG_WRITE) ? VM_WRITE : VM_READ;
+       if (!(vm_flags & vma->vm_flags))
+               return -EFAULT;
+
         ret = handle_mm_fault(mm, vma, address, fault_flags);
         if (ret & VM_FAULT_ERROR) {
                 if (ret & VM_FAULT_OOM)
                         return -ENOMEM;
                 if (ret & (VM_FAULT_HWPOISON | VM_FAULT_HWPOISON_LARGE))
                         return -EHWPOISON;
-               if (ret & VM_FAULT_SIGBUS)
+               if (ret & (VM_FAULT_SIGBUS | VM_FAULT_SIGSEGV))
                         return -EFAULT;
                 BUG();
         }
@@ -2353,12 +2397,18 @@ int remap_pfn_range(struct vm_area_struct *vma, unsigned long addr,
          * un-COW'ed pages by matching them up with "vma->vm_pgoff".
          * See vm_normal_page() for details.
          */
+#ifdef CONFIG_MTK_EXTMEM
+       if (addr == vma->vm_start && end == vma->vm_end) {
+               vma->vm_pgoff = pfn;
+       } else if (is_cow_mapping(vma->vm_flags))
+               return -EINVAL;
+#else
         if (is_cow_mapping(vma->vm_flags)) {
                 if (addr != vma->vm_start || end != vma->vm_end)
                         return -EINVAL;
                 vma->vm_pgoff = pfn;
         }
-
+#endif
         err = track_pfn_remap(vma, &prot, pfn, addr, PAGE_ALIGN(size));
         if (err)
                 return -EINVAL;
@@ -3186,7 +3236,7 @@ static inline int check_stack_guard_page(struct vm_area_struct *vma, unsigned lo
                 if (prev && prev->vm_end == address)
                         return prev->vm_flags & VM_GROWSDOWN ? 0 : -ENOMEM;
  
-               expand_downwards(vma, address - PAGE_SIZE);
+               return expand_downwards(vma, address - PAGE_SIZE);
         }
         if ((vma->vm_flags & VM_GROWSUP) && address + PAGE_SIZE == vma->vm_end) {
                 struct vm_area_struct *next = vma->vm_next;
@@ -3195,7 +3245,7 @@ static inline int check_stack_guard_page(struct vm_area_struct *vma, unsigned lo
                 if (next && next->vm_start == address + PAGE_SIZE)
                         return next->vm_flags & VM_GROWSUP ? 0 : -ENOMEM;
  
-               expand_upwards(vma, address + PAGE_SIZE);
+               return expand_upwards(vma, address + PAGE_SIZE);
         }
         return 0;
  }
@@ -3215,9 +3265,13 @@ static int do_anonymous_page(struct mm_struct *mm, struct vm_area_struct *vma,
  
         pte_unmap(page_table);
  
+       /* File mapping without ->vm_ops ? */
+       if (vma->vm_flags & VM_SHARED)
+               return VM_FAULT_SIGBUS;
+
         /* Check if we need to add a guard page to the stack */
         if (check_stack_guard_page(vma, address) < 0)
-               return VM_FAULT_SIGBUS;
+               return VM_FAULT_SIGSEGV;
  
         /* Use the zero-page for reads */
         if (!(flags & FAULT_FLAG_WRITE)) {
@@ -3479,7 +3533,14 @@ static int do_linear_fault(struct mm_struct *mm, struct vm_area_struct *vma,
         pgoff_t pgoff = (((address & PAGE_MASK)
                         - vma->vm_start) >> PAGE_SHIFT) + vma->vm_pgoff;
  
+       /* The VMA was not fully populated on mmap() or missing VM_DONTEXPAND */
+       if (!vma->vm_ops->fault)
+               return VM_FAULT_SIGBUS;
+
         pte_unmap(page_table);
+       /* The VMA was not fully populated on mmap() or missing VM_DONTEXPAND */
+       if (!vma->vm_ops->fault)
+               return VM_FAULT_SIGBUS;
         return __do_fault(mm, vma, address, pmd, pgoff, flags, orig_pte);
  }
  
@@ -3516,12 +3577,12 @@ static int do_nonlinear_fault(struct mm_struct *mm, struct vm_area_struct *vma,
  }
  
  int numa_migrate_prep(struct page *page, struct vm_area_struct *vma,
-                               unsigned long addr, int current_nid)
+                               unsigned long addr, int page_nid)
  {
         get_page(page);
  
         count_vm_numa_event(NUMA_HINT_FAULTS);
-       if (current_nid == numa_node_id())
+       if (page_nid == numa_node_id())
                 count_vm_numa_event(NUMA_HINT_FAULTS_LOCAL);
  
         return mpol_misplaced(page, vma, addr);
@@ -3532,7 +3593,7 @@ int do_numa_page(struct mm_struct *mm, struct vm_area_struct *vma,
  {
         struct page *page = NULL;
         spinlock_t *ptl;
-       int current_nid = -1;
+       int page_nid = -1;
         int target_nid;
         bool migrated = false;
  
@@ -3562,15 +3623,10 @@ int do_numa_page(struct mm_struct *mm, struct vm_area_struct *vma,
                 return 0;
         }
  
-       current_nid = page_to_nid(page);
-       target_nid = numa_migrate_prep(page, vma, addr, current_nid);
+       page_nid = page_to_nid(page);
+       target_nid = numa_migrate_prep(page, vma, addr, page_nid);
         pte_unmap_unlock(ptep, ptl);
         if (target_nid == -1) {
-               /*
-                * Account for the fault against the current node if it not
-                * being replaced regardless of where the page is located.
-                */
-               current_nid = numa_node_id();
                 put_page(page);
                 goto out;
         }
@@ -3578,11 +3634,11 @@ int do_numa_page(struct mm_struct *mm, struct vm_area_struct *vma,
         /* Migrate to the requested node */
         migrated = migrate_misplaced_page(page, target_nid);
         if (migrated)
-               current_nid = target_nid;
+               page_nid = target_nid;
  
  out:
-       if (current_nid != -1)
-               task_numa_fault(current_nid, 1, migrated);
+       if (page_nid != -1)
+               task_numa_fault(page_nid, 1, migrated);
         return 0;
  }
  
@@ -3597,7 +3653,6 @@ static int do_pmd_numa_page(struct mm_struct *mm, struct vm_area_struct *vma,
         unsigned long offset;
         spinlock_t *ptl;
         bool numa = false;
-       int local_nid = numa_node_id();
  
         spin_lock(&mm->page_table_lock);
         pmd = *pmdp;
@@ -3620,9 +3675,10 @@ static int do_pmd_numa_page(struct mm_struct *mm, struct vm_area_struct *vma,
         for (addr = _addr + offset; addr < _addr + PMD_SIZE; pte++, addr += PAGE_SIZE) {
                 pte_t pteval = *pte;
                 struct page *page;
-               int curr_nid = local_nid;
+               int page_nid = -1;
                 int target_nid;
-               bool migrated;
+               bool migrated = false;
+
                 if (!pte_present(pteval))
                         continue;
                 if (!pte_numa(pteval))
@@ -3644,25 +3700,19 @@ static int do_pmd_numa_page(struct mm_struct *mm, struct vm_area_struct *vma,
                 if (unlikely(page_mapcount(page) != 1))
                         continue;
  
-               /*
-                * Note that the NUMA fault is later accounted to either
-                * the node that is currently running or where the page is
-                * migrated to.
-                */
-               curr_nid = local_nid;
-               target_nid = numa_migrate_prep(page, vma, addr,
-                                              page_to_nid(page));
-               if (target_nid == -1) {
+               page_nid = page_to_nid(page);
+               target_nid = numa_migrate_prep(page, vma, addr, page_nid);
+               pte_unmap_unlock(pte, ptl);
+               if (target_nid != -1) {
+                       migrated = migrate_misplaced_page(page, target_nid);
+                       if (migrated)
+                               page_nid = target_nid;
+               } else {
                         put_page(page);
-                       continue;
                 }
  
-               /* Migrate to the requested node */
-               pte_unmap_unlock(pte, ptl);
-               migrated = migrate_misplaced_page(page, target_nid);
-               if (migrated)
-                       curr_nid = target_nid;
-               task_numa_fault(curr_nid, 1, migrated);
+               if (page_nid != -1)
+                       task_numa_fault(page_nid, 1, migrated);
  
                 pte = pte_offset_map_lock(mm, pmdp, addr, &ptl);
         }
@@ -3702,11 +3752,9 @@ int handle_pte_fault(struct mm_struct *mm,
         entry = *pte;
         if (!pte_present(entry)) {
                 if (pte_none(entry)) {
-                       if (vma->vm_ops) {
-                               if (likely(vma->vm_ops->fault))
-                                       return do_linear_fault(mm, vma, address,
+                       if (vma->vm_ops)
+                               return do_linear_fault(mm, vma, address,
                                                 pte, pmd, flags, entry);
-                       }
                         return do_anonymous_page(mm, vma, address,
                                                  pte, pmd, flags);
                 }
@@ -3751,22 +3799,14 @@ unlock:
  /*
   * By the time we get here, we already hold the mm semaphore
   */
-int handle_mm_fault(struct mm_struct *mm, struct vm_area_struct *vma,
-               unsigned long address, unsigned int flags)
+static int __handle_mm_fault(struct mm_struct *mm, struct vm_area_struct *vma,
+                            unsigned long address, unsigned int flags)
  {
         pgd_t *pgd;
         pud_t *pud;
         pmd_t *pmd;
         pte_t *pte;
  
-       __set_current_state(TASK_RUNNING);
-
-       count_vm_event(PGFAULT);
-       mem_cgroup_count_vm_event(mm, PGFAULT);
-
-       /* do counter updates before entering really critical section. */
-       check_sync_rss_stat(current);
-
         if (unlikely(is_vm_hugetlb_page(vma)))
                 return hugetlb_fault(mm, vma, address, flags);
  
@@ -3847,6 +3887,43 @@ retry:
         return handle_pte_fault(mm, vma, address, pte, pmd, flags);
  }
  
+int handle_mm_fault(struct mm_struct *mm, struct vm_area_struct *vma,
+                   unsigned long address, unsigned int flags)
+{
+       int ret;
+
+       __set_current_state(TASK_RUNNING);
+
+       count_vm_event(PGFAULT);
+       mem_cgroup_count_vm_event(mm, PGFAULT);
+
+       /* do counter updates before entering really critical section. */
+       check_sync_rss_stat(current);
+
+       /*
+        * Enable the memcg OOM handling for faults triggered in user
+        * space.  Kernel faults are handled more gracefully.
+        */
+       if (flags & FAULT_FLAG_USER)
+               mem_cgroup_oom_enable();
+
+       ret = __handle_mm_fault(mm, vma, address, flags);
+
+       if (flags & FAULT_FLAG_USER) {
+               mem_cgroup_oom_disable();
+                /*
+                 * The task may have entered a memcg OOM situation but
+                 * if the allocation error was handled gracefully (no
+                 * VM_FAULT_OOM), there is no need to kill anything.
+                 * Just clean up the OOM state peacefully.
+                 */
+                if (task_in_memcg_oom(current) && !(ret & VM_FAULT_OOM))
+                        mem_cgroup_oom_synchronize(false);
+       }
+
+       return ret;
+}
+
  #ifndef __PAGETABLE_PUD_FOLDED
  /*
   * Allocate page upper directory.
@@ -4056,7 +4133,7 @@ int generic_access_phys(struct vm_area_struct *vma, unsigned long addr,
         if (follow_phys(vma, addr, write, &prot, &phys_addr))
                 return -EINVAL;
  
-       maddr = ioremap_prot(phys_addr, PAGE_SIZE, prot);
+       maddr = ioremap_prot(phys_addr, PAGE_ALIGN(len + offset), prot);
         if (write)
                 memcpy_toio(maddr + offset, buf, len);
         else
@@ -4065,6 +4142,7 @@ int generic_access_phys(struct vm_area_struct *vma, unsigned long addr,
  
         return len;
  }
+EXPORT_SYMBOL_GPL(generic_access_phys);
  #endif
  
  /*
@@ -4087,6 +4165,21 @@ static int __access_remote_vm(struct task_struct *tsk, struct mm_struct *mm,
                 ret = get_user_pages(tsk, mm, addr, 1,
                                 write, 1, &page, &vma);
                 if (ret <= 0) {
+#ifdef CONFIG_MTK_EXTMEM
+                       if (!write) {
+                               vma = find_vma(mm, addr);
+                               if (!vma || vma->vm_start > addr)
+                                       break;
+                               if (vma->vm_end < addr + len)
+                                       len = vma->vm_end - addr;
+                               if (extmem_in_mspace(vma)) {
+                                       void *extmem_va = (void *)get_virt_from_mspace(vma->vm_pgoff << PAGE_SHIFT) + (addr - vma->vm_start);
+                                       memcpy(buf, extmem_va, len);
+                                       buf += len;
+                                       break;
+                               }
+                       }
+#endif
                         /*
                          * Check if this is a VM_IO | VM_PFNMAP VMA, which
                          * we can access using slightly different code.