mm: mm_event: add special kernel allocation stat
[GitHub/LineageOS/android_kernel_motorola_exynos9610.git] / mm / memory.c
index a728bed16c206902de6498921a1d130d141ff7b7..16956844e5535450ea80926db381c4b98c34ff52 100644 (file)
@@ -81,7 +81,7 @@
 
 #include "internal.h"
 
-#ifdef LAST_CPUPID_NOT_IN_PAGE_FLAGS
+#if defined(LAST_CPUPID_NOT_IN_PAGE_FLAGS) && !defined(CONFIG_COMPILE_TEST)
 #warning Unfortunate NUMA and NUMA Balancing config, growing page-frame for last_cpupid.
 #endif
 
@@ -246,9 +246,6 @@ static void tlb_flush_mmu_tlbonly(struct mmu_gather *tlb)
 
        tlb_flush(tlb);
        mmu_notifier_invalidate_range(tlb->mm, tlb->start, tlb->end);
-#ifdef CONFIG_HAVE_RCU_TABLE_FREE
-       tlb_table_flush(tlb);
-#endif
        __tlb_reset_range(tlb);
 }
 
@@ -256,6 +253,9 @@ static void tlb_flush_mmu_free(struct mmu_gather *tlb)
 {
        struct mmu_gather_batch *batch;
 
+#ifdef CONFIG_HAVE_RCU_TABLE_FREE
+       tlb_table_flush(tlb);
+#endif
        for (batch = &tlb->local; batch && batch->nr; batch = batch->next) {
                free_pages_and_swap_cache(batch->pages, batch->nr);
                batch->nr = 0;
@@ -331,6 +331,21 @@ bool __tlb_remove_page_size(struct mmu_gather *tlb, struct page *page, int page_
  * See the comment near struct mmu_table_batch.
  */
 
+/*
+ * If we want tlb_remove_table() to imply TLB invalidates.
+ */
+static inline void tlb_table_invalidate(struct mmu_gather *tlb)
+{
+#ifdef CONFIG_HAVE_RCU_TABLE_INVALIDATE
+       /*
+        * Invalidate page-table caches used by hardware walkers. Then we still
+        * need to RCU-sched wait while freeing the pages because software
+        * walkers can still be in-flight.
+        */
+       tlb_flush_mmu_tlbonly(tlb);
+#endif
+}
+
 static void tlb_remove_table_smp_sync(void *arg)
 {
        /* Simply deliver the interrupt */
@@ -367,6 +382,7 @@ void tlb_table_flush(struct mmu_gather *tlb)
        struct mmu_table_batch **batch = &tlb->batch;
 
        if (*batch) {
+               tlb_table_invalidate(tlb);
                call_rcu_sched(&(*batch)->rcu, tlb_remove_table_rcu);
                *batch = NULL;
        }
@@ -376,23 +392,16 @@ void tlb_remove_table(struct mmu_gather *tlb, void *table)
 {
        struct mmu_table_batch **batch = &tlb->batch;
 
-       /*
-        * When there's less then two users of this mm there cannot be a
-        * concurrent page-table walk.
-        */
-       if (atomic_read(&tlb->mm->mm_users) < 2) {
-               __tlb_remove_table(table);
-               return;
-       }
-
        if (*batch == NULL) {
                *batch = (struct mmu_table_batch *)__get_free_page(GFP_NOWAIT | __GFP_NOWARN);
                if (*batch == NULL) {
+                       tlb_table_invalidate(tlb);
                        tlb_remove_table_one(table);
                        return;
                }
                (*batch)->nr = 0;
        }
+
        (*batch)->tables[(*batch)->nr++] = table;
        if ((*batch)->nr == MAX_TABLE_BATCH)
                tlb_table_flush(tlb);
@@ -1417,11 +1426,9 @@ static inline unsigned long zap_pmd_range(struct mmu_gather *tlb,
        do {
                next = pmd_addr_end(addr, end);
                if (is_swap_pmd(*pmd) || pmd_trans_huge(*pmd) || pmd_devmap(*pmd)) {
-                       if (next - addr != HPAGE_PMD_SIZE) {
-                               VM_BUG_ON_VMA(vma_is_anonymous(vma) &&
-                                   !rwsem_is_locked(&tlb->mm->mmap_sem), vma);
+                       if (next - addr != HPAGE_PMD_SIZE)
                                __split_huge_pmd(vma, pmd, addr, false, NULL);
-                       else if (zap_huge_pmd(tlb, vma, pmd, addr))
+                       else if (zap_huge_pmd(tlb, vma, pmd, addr))
                                goto next;
                        /* fall through */
                }
@@ -1887,6 +1894,9 @@ int vm_insert_pfn_prot(struct vm_area_struct *vma, unsigned long addr,
        if (addr < vma->vm_start || addr >= vma->vm_end)
                return -EFAULT;
 
+       if (!pfn_modify_allowed(pfn, pgprot))
+               return -EACCES;
+
        track_pfn_insert(vma, &pgprot, __pfn_to_pfn_t(pfn, PFN_DEV));
 
        ret = insert_pfn(vma, addr, __pfn_to_pfn_t(pfn, PFN_DEV), pgprot,
@@ -1908,6 +1918,9 @@ static int __vm_insert_mixed(struct vm_area_struct *vma, unsigned long addr,
 
        track_pfn_insert(vma, &pgprot, pfn);
 
+       if (!pfn_modify_allowed(pfn_t_to_pfn(pfn), pgprot))
+               return -EACCES;
+
        /*
         * If we don't have pte special, then we have to use the pfn_valid()
         * based VM_MIXEDMAP scheme (see vm_normal_page), and thus we *must*
@@ -1955,6 +1968,7 @@ static int remap_pte_range(struct mm_struct *mm, pmd_t *pmd,
 {
        pte_t *pte;
        spinlock_t *ptl;
+       int err = 0;
 
        pte = pte_alloc_map_lock(mm, pmd, addr, &ptl);
        if (!pte)
@@ -1962,12 +1976,16 @@ static int remap_pte_range(struct mm_struct *mm, pmd_t *pmd,
        arch_enter_lazy_mmu_mode();
        do {
                BUG_ON(!pte_none(*pte));
+               if (!pfn_modify_allowed(pfn, prot)) {
+                       err = -EACCES;
+                       break;
+               }
                set_pte_at(mm, addr, pte, pte_mkspecial(pfn_pte(pfn, prot)));
                pfn++;
        } while (pte++, addr += PAGE_SIZE, addr != end);
        arch_leave_lazy_mmu_mode();
        pte_unmap_unlock(pte - 1, ptl);
-       return 0;
+       return err;
 }
 
 static inline int remap_pmd_range(struct mm_struct *mm, pud_t *pud,
@@ -1976,6 +1994,7 @@ static inline int remap_pmd_range(struct mm_struct *mm, pud_t *pud,
 {
        pmd_t *pmd;
        unsigned long next;
+       int err;
 
        pfn -= addr >> PAGE_SHIFT;
        pmd = pmd_alloc(mm, pud, addr);
@@ -1984,9 +2003,10 @@ static inline int remap_pmd_range(struct mm_struct *mm, pud_t *pud,
        VM_BUG_ON(pmd_trans_huge(*pmd));
        do {
                next = pmd_addr_end(addr, end);
-               if (remap_pte_range(mm, pmd, addr, next,
-                               pfn + (addr >> PAGE_SHIFT), prot))
-                       return -ENOMEM;
+               err = remap_pte_range(mm, pmd, addr, next,
+                               pfn + (addr >> PAGE_SHIFT), prot);
+               if (err)
+                       return err;
        } while (pmd++, addr = next, addr != end);
        return 0;
 }
@@ -1997,6 +2017,7 @@ static inline int remap_pud_range(struct mm_struct *mm, p4d_t *p4d,
 {
        pud_t *pud;
        unsigned long next;
+       int err;
 
        pfn -= addr >> PAGE_SHIFT;
        pud = pud_alloc(mm, p4d, addr);
@@ -2004,9 +2025,10 @@ static inline int remap_pud_range(struct mm_struct *mm, p4d_t *p4d,
                return -ENOMEM;
        do {
                next = pud_addr_end(addr, end);
-               if (remap_pmd_range(mm, pud, addr, next,
-                               pfn + (addr >> PAGE_SHIFT), prot))
-                       return -ENOMEM;
+               err = remap_pmd_range(mm, pud, addr, next,
+                               pfn + (addr >> PAGE_SHIFT), prot);
+               if (err)
+                       return err;
        } while (pud++, addr = next, addr != end);
        return 0;
 }
@@ -2017,6 +2039,7 @@ static inline int remap_p4d_range(struct mm_struct *mm, pgd_t *pgd,
 {
        p4d_t *p4d;
        unsigned long next;
+       int err;
 
        pfn -= addr >> PAGE_SHIFT;
        p4d = p4d_alloc(mm, pgd, addr);
@@ -2024,9 +2047,10 @@ static inline int remap_p4d_range(struct mm_struct *mm, pgd_t *pgd,
                return -ENOMEM;
        do {
                next = p4d_addr_end(addr, end);
-               if (remap_pud_range(mm, p4d, addr, next,
-                               pfn + (addr >> PAGE_SHIFT), prot))
-                       return -ENOMEM;
+               err = remap_pud_range(mm, p4d, addr, next,
+                               pfn + (addr >> PAGE_SHIFT), prot);
+               if (err)
+                       return err;
        } while (p4d++, addr = next, addr != end);
        return 0;
 }
@@ -3028,7 +3052,7 @@ int do_swap_page(struct vm_fault *vmf)
 unlock:
        pte_unmap_unlock(vmf->pte, vmf->ptl);
 out:
-       return ret;
+       return ret | VM_FAULT_SWAP;
 out_nomap:
        mem_cgroup_cancel_charge(page, memcg, false);
        pte_unmap_unlock(vmf->pte, vmf->ptl);
@@ -3040,7 +3064,7 @@ out_release:
                unlock_page(swapcache);
                put_page(swapcache);
        }
-       return ret;
+       return ret | VM_FAULT_SWAP;
 }
 
 /*
@@ -3167,6 +3191,29 @@ static int __do_fault(struct vm_fault *vmf)
        struct vm_area_struct *vma = vmf->vma;
        int ret;
 
+       /*
+        * Preallocate pte before we take page_lock because this might lead to
+        * deadlocks for memcg reclaim which waits for pages under writeback:
+        *                              lock_page(A)
+        *                              SetPageWriteback(A)
+        *                              unlock_page(A)
+        * lock_page(B)
+        *                              lock_page(B)
+        * pte_alloc_pne
+        *   shrink_page_list
+        *     wait_on_page_writeback(A)
+        *                              SetPageWriteback(B)
+        *                              unlock_page(B)
+        *                              # flush A, B to clear the writeback
+        */
+       if (pmd_none(*vmf->pmd) && !vmf->prealloc_pte) {
+               vmf->prealloc_pte = pte_alloc_one(vmf->vma->vm_mm,
+                                                 vmf->address);
+               if (!vmf->prealloc_pte)
+                       return VM_FAULT_OOM;
+               smp_wmb(); /* See comment in __pte_alloc() */
+       }
+
        ret = vma->vm_ops->fault(vmf);
        if (unlikely(ret & (VM_FAULT_ERROR | VM_FAULT_NOPAGE | VM_FAULT_RETRY |
                            VM_FAULT_DONE_COW)))
@@ -3673,10 +3720,36 @@ static int do_fault(struct vm_fault *vmf)
        struct vm_area_struct *vma = vmf->vma;
        int ret;
 
-       /* The VMA was not fully populated on mmap() or missing VM_DONTEXPAND */
-       if (!vma->vm_ops->fault)
-               ret = VM_FAULT_SIGBUS;
-       else if (!(vmf->flags & FAULT_FLAG_WRITE))
+       /*
+        * The VMA was not fully populated on mmap() or missing VM_DONTEXPAND
+        */
+       if (!vma->vm_ops->fault) {
+               /*
+                * If we find a migration pmd entry or a none pmd entry, which
+                * should never happen, return SIGBUS
+                */
+               if (unlikely(!pmd_present(*vmf->pmd)))
+                       ret = VM_FAULT_SIGBUS;
+               else {
+                       vmf->pte = pte_offset_map_lock(vmf->vma->vm_mm,
+                                                      vmf->pmd,
+                                                      vmf->address,
+                                                      &vmf->ptl);
+                       /*
+                        * Make sure this is not a temporary clearing of pte
+                        * by holding ptl and checking again. A R/M/W update
+                        * of pte involves: take ptl, clearing the pte so that
+                        * we don't have concurrent modification by hardware
+                        * followed by an update.
+                        */
+                       if (unlikely(pte_none(*vmf->pte)))
+                               ret = VM_FAULT_SIGBUS;
+                       else
+                               ret = VM_FAULT_NOPAGE;
+
+                       pte_unmap_unlock(vmf->pte, vmf->ptl);
+               }
+       } else if (!(vmf->flags & FAULT_FLAG_WRITE))
                ret = do_read_fault(vmf);
        else if (!(vma->vm_flags & VM_SHARED))
                ret = do_cow_fault(vmf);
@@ -4333,6 +4406,9 @@ int generic_access_phys(struct vm_area_struct *vma, unsigned long addr,
                return -EINVAL;
 
        maddr = ioremap_prot(phys_addr, PAGE_ALIGN(len + offset), prot);
+       if (!maddr)
+               return -ENOMEM;
+
        if (write)
                memcpy_toio(maddr + offset, buf, len);
        else