Merge tag 'v3.10.86' into update
[GitHub/mt8127/android_kernel_alcatel_ttab.git] / mm / memory.c
index 61a262b08e53efa2f69c1387e488bea6f87bb0f9..313d94a9ae260fcdf1d4da7de288b055171afd7d 100644 (file)
 
 #include "internal.h"
 
+#ifdef CONFIG_MTK_EXTMEM
+extern bool extmem_in_mspace(struct vm_area_struct *vma);
+extern unsigned long get_virt_from_mspace(unsigned long pa);
+#endif
+
 #ifdef LAST_NID_NOT_IN_PAGE_FLAGS
 #warning Unfortunate NUMA and NUMA Balancing config, growing page-frame for last_nid.
 #endif
@@ -211,14 +216,15 @@ static int tlb_next_batch(struct mmu_gather *tlb)
  *     tear-down from @mm. The @fullmm argument is used when @mm is without
  *     users and we're going to destroy the full address space (exit/execve).
  */
-void tlb_gather_mmu(struct mmu_gather *tlb, struct mm_struct *mm, bool fullmm)
+void tlb_gather_mmu(struct mmu_gather *tlb, struct mm_struct *mm, unsigned long start, unsigned long end)
 {
        tlb->mm = mm;
 
-       tlb->fullmm     = fullmm;
+       /* Is it from 0 to ~0? */
+       tlb->fullmm     = !(start | (end+1));
        tlb->need_flush_all = 0;
-       tlb->start      = -1UL;
-       tlb->end        = 0;
+       tlb->start      = start;
+       tlb->end        = end;
        tlb->need_flush = 0;
        tlb->local.next = NULL;
        tlb->local.nr   = 0;
@@ -258,8 +264,6 @@ void tlb_finish_mmu(struct mmu_gather *tlb, unsigned long start, unsigned long e
 {
        struct mmu_gather_batch *batch, *next;
 
-       tlb->start = start;
-       tlb->end   = end;
        tlb_flush_mmu(tlb);
 
        /* keep the page table cache within bounds */
@@ -835,20 +839,20 @@ copy_one_pte(struct mm_struct *dst_mm, struct mm_struct *src_mm,
                if (!pte_file(pte)) {
                        swp_entry_t entry = pte_to_swp_entry(pte);
 
-                       if (swap_duplicate(entry) < 0)
-                               return entry.val;
-
-                       /* make sure dst_mm is on swapoff's mmlist. */
-                       if (unlikely(list_empty(&dst_mm->mmlist))) {
-                               spin_lock(&mmlist_lock);
-                               if (list_empty(&dst_mm->mmlist))
-                                       list_add(&dst_mm->mmlist,
-                                                &src_mm->mmlist);
-                               spin_unlock(&mmlist_lock);
-                       }
-                       if (likely(!non_swap_entry(entry)))
+                       if (likely(!non_swap_entry(entry))) {
+                               if (swap_duplicate(entry) < 0)
+                                       return entry.val;
+
+                               /* make sure dst_mm is on swapoff's mmlist. */
+                               if (unlikely(list_empty(&dst_mm->mmlist))) {
+                                       spin_lock(&mmlist_lock);
+                                       if (list_empty(&dst_mm->mmlist))
+                                               list_add(&dst_mm->mmlist,
+                                                        &src_mm->mmlist);
+                                       spin_unlock(&mmlist_lock);
+                               }
                                rss[MM_SWAPENTS]++;
-                       else if (is_migration_entry(entry)) {
+                       else if (is_migration_entry(entry)) {
                                page = migration_entry_to_page(entry);
 
                                if (PageAnon(page))
@@ -1203,13 +1207,23 @@ again:
         * and page-free while holding it.
         */
        if (force_flush) {
+               unsigned long old_end;
+
                force_flush = 0;
 
-#ifdef HAVE_GENERIC_MMU_GATHER
-               tlb->start = addr;
-               tlb->end = end;
-#endif
+               /*
+                * Flush the TLB just for the previous segment,
+                * then update the range to be the remaining
+                * TLB range.
+                */
+               old_end = tlb->end;
+               tlb->end = addr;
+
                tlb_flush_mmu(tlb);
+
+               tlb->start = addr;
+               tlb->end = old_end;
+
                if (addr != end)
                        goto again;
        }
@@ -1396,7 +1410,7 @@ void zap_page_range(struct vm_area_struct *vma, unsigned long start,
        unsigned long end = start + size;
 
        lru_add_drain();
-       tlb_gather_mmu(&tlb, mm, 0);
+       tlb_gather_mmu(&tlb, mm, start, end);
        update_hiwater_rss(mm);
        mmu_notifier_invalidate_range_start(mm, start, end);
        for ( ; vma && vma->vm_start < end; vma = vma->vm_next)
@@ -1422,7 +1436,7 @@ static void zap_page_range_single(struct vm_area_struct *vma, unsigned long addr
        unsigned long end = address + size;
 
        lru_add_drain();
-       tlb_gather_mmu(&tlb, mm, 0);
+       tlb_gather_mmu(&tlb, mm, address, end);
        update_hiwater_rss(mm);
        mmu_notifier_invalidate_range_start(mm, address, end);
        unmap_single_vma(&tlb, vma, address, end, details);
@@ -1453,6 +1467,16 @@ int zap_vma_ptes(struct vm_area_struct *vma, unsigned long address,
 }
 EXPORT_SYMBOL_GPL(zap_vma_ptes);
 
+/*
+ * FOLL_FORCE can write to even unwritable pte's, but only
+ * after we've gone through a COW cycle and they are dirty.
+ */
+static inline bool can_follow_write_pte(pte_t pte, unsigned int flags)
+{
+       return pte_write(pte) ||
+               ((flags & FOLL_FORCE) && (flags & FOLL_COW) && pte_dirty(pte));
+}
+
 /**
  * follow_page_mask - look up a page descriptor from a user-virtual address
  * @vma: vm_area_struct mapping @address
@@ -1560,7 +1584,7 @@ split_fallthrough:
        }
        if ((flags & FOLL_NUMA) && pte_numa(pte))
                goto no_page;
-       if ((flags & FOLL_WRITE) && !pte_write(pte))
+       if ((flags & FOLL_WRITE) && !can_follow_write_pte(pte, flags))
                goto unlock;
 
        page = vm_normal_page(vma, address, pte);
@@ -1634,6 +1658,7 @@ no_page_table:
                return ERR_PTR(-EFAULT);
        return page;
 }
+EXPORT_SYMBOL_GPL(follow_page_mask);
 
 static inline int stack_guard_page(struct vm_area_struct *vma, unsigned long addr)
 {
@@ -1778,11 +1803,24 @@ long __get_user_pages(struct task_struct *tsk, struct mm_struct *mm,
                        page_mask = 0;
                        goto next_page;
                }
-
+    #ifdef CONFIG_MTK_EXTMEM
+        if (!vma || !(vm_flags & vma->vm_flags))
+               {
+                   return i ? : -EFAULT;
+        }
+
+               if (vma->vm_flags & (VM_IO | VM_PFNMAP))
+               {
+                   /*Would pass VM_IO | VM_RESERVED | VM_PFNMAP. (for Reserved Physical Memory PFN Mapping Usage)*/
+                   if(!((vma->vm_flags&VM_IO)&&(vma->vm_flags&VM_RESERVED)&&(vma->vm_flags&VM_PFNMAP)))
+                           return i ? : -EFAULT;
+        }
+    #else
                if (!vma ||
                    (vma->vm_flags & (VM_IO | VM_PFNMAP)) ||
                    !(vm_flags & vma->vm_flags))
                        return i ? : -EFAULT;
+    #endif
 
                if (is_vm_hugetlb_page(vma)) {
                        i = follow_hugetlb_page(mm, vma, pages, vmas,
@@ -1835,7 +1873,8 @@ long __get_user_pages(struct task_struct *tsk, struct mm_struct *mm,
                                                else
                                                        return -EFAULT;
                                        }
-                                       if (ret & VM_FAULT_SIGBUS)
+                                       if (ret & (VM_FAULT_SIGBUS |
+                                                  VM_FAULT_SIGSEGV))
                                                return i ? i : -EFAULT;
                                        BUG();
                                }
@@ -1867,7 +1906,7 @@ long __get_user_pages(struct task_struct *tsk, struct mm_struct *mm,
                                 */
                                if ((ret & VM_FAULT_WRITE) &&
                                    !(vma->vm_flags & VM_WRITE))
-                                       foll_flags &= ~FOLL_WRITE;
+                                       foll_flags |= FOLL_COW;
 
                                cond_resched();
                        }
@@ -1928,19 +1967,24 @@ int fixup_user_fault(struct task_struct *tsk, struct mm_struct *mm,
                     unsigned long address, unsigned int fault_flags)
 {
        struct vm_area_struct *vma;
+       vm_flags_t vm_flags;
        int ret;
 
        vma = find_extend_vma(mm, address);
        if (!vma || address < vma->vm_start)
                return -EFAULT;
 
+       vm_flags = (fault_flags & FAULT_FLAG_WRITE) ? VM_WRITE : VM_READ;
+       if (!(vm_flags & vma->vm_flags))
+               return -EFAULT;
+
        ret = handle_mm_fault(mm, vma, address, fault_flags);
        if (ret & VM_FAULT_ERROR) {
                if (ret & VM_FAULT_OOM)
                        return -ENOMEM;
                if (ret & (VM_FAULT_HWPOISON | VM_FAULT_HWPOISON_LARGE))
                        return -EHWPOISON;
-               if (ret & VM_FAULT_SIGBUS)
+               if (ret & (VM_FAULT_SIGBUS | VM_FAULT_SIGSEGV))
                        return -EFAULT;
                BUG();
        }
@@ -2353,12 +2397,18 @@ int remap_pfn_range(struct vm_area_struct *vma, unsigned long addr,
         * un-COW'ed pages by matching them up with "vma->vm_pgoff".
         * See vm_normal_page() for details.
         */
+#ifdef CONFIG_MTK_EXTMEM
+       if (addr == vma->vm_start && end == vma->vm_end) {
+               vma->vm_pgoff = pfn;
+       } else if (is_cow_mapping(vma->vm_flags))
+               return -EINVAL;
+#else
        if (is_cow_mapping(vma->vm_flags)) {
                if (addr != vma->vm_start || end != vma->vm_end)
                        return -EINVAL;
                vma->vm_pgoff = pfn;
        }
-
+#endif
        err = track_pfn_remap(vma, &prot, pfn, addr, PAGE_ALIGN(size));
        if (err)
                return -EINVAL;
@@ -3186,7 +3236,7 @@ static inline int check_stack_guard_page(struct vm_area_struct *vma, unsigned lo
                if (prev && prev->vm_end == address)
                        return prev->vm_flags & VM_GROWSDOWN ? 0 : -ENOMEM;
 
-               expand_downwards(vma, address - PAGE_SIZE);
+               return expand_downwards(vma, address - PAGE_SIZE);
        }
        if ((vma->vm_flags & VM_GROWSUP) && address + PAGE_SIZE == vma->vm_end) {
                struct vm_area_struct *next = vma->vm_next;
@@ -3195,7 +3245,7 @@ static inline int check_stack_guard_page(struct vm_area_struct *vma, unsigned lo
                if (next && next->vm_start == address + PAGE_SIZE)
                        return next->vm_flags & VM_GROWSUP ? 0 : -ENOMEM;
 
-               expand_upwards(vma, address + PAGE_SIZE);
+               return expand_upwards(vma, address + PAGE_SIZE);
        }
        return 0;
 }
@@ -3215,9 +3265,13 @@ static int do_anonymous_page(struct mm_struct *mm, struct vm_area_struct *vma,
 
        pte_unmap(page_table);
 
+       /* File mapping without ->vm_ops ? */
+       if (vma->vm_flags & VM_SHARED)
+               return VM_FAULT_SIGBUS;
+
        /* Check if we need to add a guard page to the stack */
        if (check_stack_guard_page(vma, address) < 0)
-               return VM_FAULT_SIGBUS;
+               return VM_FAULT_SIGSEGV;
 
        /* Use the zero-page for reads */
        if (!(flags & FAULT_FLAG_WRITE)) {
@@ -3479,7 +3533,14 @@ static int do_linear_fault(struct mm_struct *mm, struct vm_area_struct *vma,
        pgoff_t pgoff = (((address & PAGE_MASK)
                        - vma->vm_start) >> PAGE_SHIFT) + vma->vm_pgoff;
 
+       /* The VMA was not fully populated on mmap() or missing VM_DONTEXPAND */
+       if (!vma->vm_ops->fault)
+               return VM_FAULT_SIGBUS;
+
        pte_unmap(page_table);
+       /* The VMA was not fully populated on mmap() or missing VM_DONTEXPAND */
+       if (!vma->vm_ops->fault)
+               return VM_FAULT_SIGBUS;
        return __do_fault(mm, vma, address, pmd, pgoff, flags, orig_pte);
 }
 
@@ -3516,12 +3577,12 @@ static int do_nonlinear_fault(struct mm_struct *mm, struct vm_area_struct *vma,
 }
 
 int numa_migrate_prep(struct page *page, struct vm_area_struct *vma,
-                               unsigned long addr, int current_nid)
+                               unsigned long addr, int page_nid)
 {
        get_page(page);
 
        count_vm_numa_event(NUMA_HINT_FAULTS);
-       if (current_nid == numa_node_id())
+       if (page_nid == numa_node_id())
                count_vm_numa_event(NUMA_HINT_FAULTS_LOCAL);
 
        return mpol_misplaced(page, vma, addr);
@@ -3532,7 +3593,7 @@ int do_numa_page(struct mm_struct *mm, struct vm_area_struct *vma,
 {
        struct page *page = NULL;
        spinlock_t *ptl;
-       int current_nid = -1;
+       int page_nid = -1;
        int target_nid;
        bool migrated = false;
 
@@ -3562,15 +3623,10 @@ int do_numa_page(struct mm_struct *mm, struct vm_area_struct *vma,
                return 0;
        }
 
-       current_nid = page_to_nid(page);
-       target_nid = numa_migrate_prep(page, vma, addr, current_nid);
+       page_nid = page_to_nid(page);
+       target_nid = numa_migrate_prep(page, vma, addr, page_nid);
        pte_unmap_unlock(ptep, ptl);
        if (target_nid == -1) {
-               /*
-                * Account for the fault against the current node if it not
-                * being replaced regardless of where the page is located.
-                */
-               current_nid = numa_node_id();
                put_page(page);
                goto out;
        }
@@ -3578,11 +3634,11 @@ int do_numa_page(struct mm_struct *mm, struct vm_area_struct *vma,
        /* Migrate to the requested node */
        migrated = migrate_misplaced_page(page, target_nid);
        if (migrated)
-               current_nid = target_nid;
+               page_nid = target_nid;
 
 out:
-       if (current_nid != -1)
-               task_numa_fault(current_nid, 1, migrated);
+       if (page_nid != -1)
+               task_numa_fault(page_nid, 1, migrated);
        return 0;
 }
 
@@ -3597,7 +3653,6 @@ static int do_pmd_numa_page(struct mm_struct *mm, struct vm_area_struct *vma,
        unsigned long offset;
        spinlock_t *ptl;
        bool numa = false;
-       int local_nid = numa_node_id();
 
        spin_lock(&mm->page_table_lock);
        pmd = *pmdp;
@@ -3620,9 +3675,10 @@ static int do_pmd_numa_page(struct mm_struct *mm, struct vm_area_struct *vma,
        for (addr = _addr + offset; addr < _addr + PMD_SIZE; pte++, addr += PAGE_SIZE) {
                pte_t pteval = *pte;
                struct page *page;
-               int curr_nid = local_nid;
+               int page_nid = -1;
                int target_nid;
-               bool migrated;
+               bool migrated = false;
+
                if (!pte_present(pteval))
                        continue;
                if (!pte_numa(pteval))
@@ -3644,25 +3700,19 @@ static int do_pmd_numa_page(struct mm_struct *mm, struct vm_area_struct *vma,
                if (unlikely(page_mapcount(page) != 1))
                        continue;
 
-               /*
-                * Note that the NUMA fault is later accounted to either
-                * the node that is currently running or where the page is
-                * migrated to.
-                */
-               curr_nid = local_nid;
-               target_nid = numa_migrate_prep(page, vma, addr,
-                                              page_to_nid(page));
-               if (target_nid == -1) {
+               page_nid = page_to_nid(page);
+               target_nid = numa_migrate_prep(page, vma, addr, page_nid);
+               pte_unmap_unlock(pte, ptl);
+               if (target_nid != -1) {
+                       migrated = migrate_misplaced_page(page, target_nid);
+                       if (migrated)
+                               page_nid = target_nid;
+               } else {
                        put_page(page);
-                       continue;
                }
 
-               /* Migrate to the requested node */
-               pte_unmap_unlock(pte, ptl);
-               migrated = migrate_misplaced_page(page, target_nid);
-               if (migrated)
-                       curr_nid = target_nid;
-               task_numa_fault(curr_nid, 1, migrated);
+               if (page_nid != -1)
+                       task_numa_fault(page_nid, 1, migrated);
 
                pte = pte_offset_map_lock(mm, pmdp, addr, &ptl);
        }
@@ -3702,11 +3752,9 @@ int handle_pte_fault(struct mm_struct *mm,
        entry = *pte;
        if (!pte_present(entry)) {
                if (pte_none(entry)) {
-                       if (vma->vm_ops) {
-                               if (likely(vma->vm_ops->fault))
-                                       return do_linear_fault(mm, vma, address,
+                       if (vma->vm_ops)
+                               return do_linear_fault(mm, vma, address,
                                                pte, pmd, flags, entry);
-                       }
                        return do_anonymous_page(mm, vma, address,
                                                 pte, pmd, flags);
                }
@@ -3751,22 +3799,14 @@ unlock:
 /*
  * By the time we get here, we already hold the mm semaphore
  */
-int handle_mm_fault(struct mm_struct *mm, struct vm_area_struct *vma,
-               unsigned long address, unsigned int flags)
+static int __handle_mm_fault(struct mm_struct *mm, struct vm_area_struct *vma,
+                            unsigned long address, unsigned int flags)
 {
        pgd_t *pgd;
        pud_t *pud;
        pmd_t *pmd;
        pte_t *pte;
 
-       __set_current_state(TASK_RUNNING);
-
-       count_vm_event(PGFAULT);
-       mem_cgroup_count_vm_event(mm, PGFAULT);
-
-       /* do counter updates before entering really critical section. */
-       check_sync_rss_stat(current);
-
        if (unlikely(is_vm_hugetlb_page(vma)))
                return hugetlb_fault(mm, vma, address, flags);
 
@@ -3847,6 +3887,43 @@ retry:
        return handle_pte_fault(mm, vma, address, pte, pmd, flags);
 }
 
+int handle_mm_fault(struct mm_struct *mm, struct vm_area_struct *vma,
+                   unsigned long address, unsigned int flags)
+{
+       int ret;
+
+       __set_current_state(TASK_RUNNING);
+
+       count_vm_event(PGFAULT);
+       mem_cgroup_count_vm_event(mm, PGFAULT);
+
+       /* do counter updates before entering really critical section. */
+       check_sync_rss_stat(current);
+
+       /*
+        * Enable the memcg OOM handling for faults triggered in user
+        * space.  Kernel faults are handled more gracefully.
+        */
+       if (flags & FAULT_FLAG_USER)
+               mem_cgroup_oom_enable();
+
+       ret = __handle_mm_fault(mm, vma, address, flags);
+
+       if (flags & FAULT_FLAG_USER) {
+               mem_cgroup_oom_disable();
+                /*
+                 * The task may have entered a memcg OOM situation but
+                 * if the allocation error was handled gracefully (no
+                 * VM_FAULT_OOM), there is no need to kill anything.
+                 * Just clean up the OOM state peacefully.
+                 */
+                if (task_in_memcg_oom(current) && !(ret & VM_FAULT_OOM))
+                        mem_cgroup_oom_synchronize(false);
+       }
+
+       return ret;
+}
+
 #ifndef __PAGETABLE_PUD_FOLDED
 /*
  * Allocate page upper directory.
@@ -4056,7 +4133,7 @@ int generic_access_phys(struct vm_area_struct *vma, unsigned long addr,
        if (follow_phys(vma, addr, write, &prot, &phys_addr))
                return -EINVAL;
 
-       maddr = ioremap_prot(phys_addr, PAGE_SIZE, prot);
+       maddr = ioremap_prot(phys_addr, PAGE_ALIGN(len + offset), prot);
        if (write)
                memcpy_toio(maddr + offset, buf, len);
        else
@@ -4065,6 +4142,7 @@ int generic_access_phys(struct vm_area_struct *vma, unsigned long addr,
 
        return len;
 }
+EXPORT_SYMBOL_GPL(generic_access_phys);
 #endif
 
 /*
@@ -4087,6 +4165,21 @@ static int __access_remote_vm(struct task_struct *tsk, struct mm_struct *mm,
                ret = get_user_pages(tsk, mm, addr, 1,
                                write, 1, &page, &vma);
                if (ret <= 0) {
+#ifdef CONFIG_MTK_EXTMEM
+                       if (!write) {
+                               vma = find_vma(mm, addr);
+                               if (!vma || vma->vm_start > addr)
+                                       break;
+                               if (vma->vm_end < addr + len)
+                                       len = vma->vm_end - addr;
+                               if (extmem_in_mspace(vma)) {
+                                       void *extmem_va = (void *)get_virt_from_mspace(vma->vm_pgoff << PAGE_SHIFT) + (addr - vma->vm_start);
+                                       memcpy(buf, extmem_va, len);
+                                       buf += len;
+                                       break;
+                               }
+                       }
+#endif
                        /*
                         * Check if this is a VM_IO | VM_PFNMAP VMA, which
                         * we can access using slightly different code.