mm: mm_event: add special kernel allocation stat

[GitHub/LineageOS/android_kernel_motorola_exynos9610.git] / mm / memory.c
diff --git a/mm/memory.c b/mm/memory.c

index a728bed16c206902de6498921a1d130d141ff7b7..16956844e5535450ea80926db381c4b98c34ff52 100644 (file)
--- a/mm/memory.c
+++ b/mm/memory.c
@@ -81,7 +81,7 @@
  
  #include "internal.h"
  
-#ifdef LAST_CPUPID_NOT_IN_PAGE_FLAGS
+#if defined(LAST_CPUPID_NOT_IN_PAGE_FLAGS) && !defined(CONFIG_COMPILE_TEST)
  #warning Unfortunate NUMA and NUMA Balancing config, growing page-frame for last_cpupid.
  #endif
  
@@ -246,9 +246,6 @@ static void tlb_flush_mmu_tlbonly(struct mmu_gather *tlb)
  
         tlb_flush(tlb);
         mmu_notifier_invalidate_range(tlb->mm, tlb->start, tlb->end);
-#ifdef CONFIG_HAVE_RCU_TABLE_FREE
-       tlb_table_flush(tlb);
-#endif
         __tlb_reset_range(tlb);
  }
  
@@ -256,6 +253,9 @@ static void tlb_flush_mmu_free(struct mmu_gather *tlb)
  {
         struct mmu_gather_batch *batch;
  
+#ifdef CONFIG_HAVE_RCU_TABLE_FREE
+       tlb_table_flush(tlb);
+#endif
         for (batch = &tlb->local; batch && batch->nr; batch = batch->next) {
                 free_pages_and_swap_cache(batch->pages, batch->nr);
                 batch->nr = 0;
@@ -331,6 +331,21 @@ bool __tlb_remove_page_size(struct mmu_gather *tlb, struct page *page, int page_
   * See the comment near struct mmu_table_batch.
   */
  
+/*
+ * If we want tlb_remove_table() to imply TLB invalidates.
+ */
+static inline void tlb_table_invalidate(struct mmu_gather *tlb)
+{
+#ifdef CONFIG_HAVE_RCU_TABLE_INVALIDATE
+       /*
+        * Invalidate page-table caches used by hardware walkers. Then we still
+        * need to RCU-sched wait while freeing the pages because software
+        * walkers can still be in-flight.
+        */
+       tlb_flush_mmu_tlbonly(tlb);
+#endif
+}
+
  static void tlb_remove_table_smp_sync(void *arg)
  {
         /* Simply deliver the interrupt */
@@ -367,6 +382,7 @@ void tlb_table_flush(struct mmu_gather *tlb)
         struct mmu_table_batch **batch = &tlb->batch;
  
         if (*batch) {
+               tlb_table_invalidate(tlb);
                 call_rcu_sched(&(*batch)->rcu, tlb_remove_table_rcu);
                 *batch = NULL;
         }
@@ -376,23 +392,16 @@ void tlb_remove_table(struct mmu_gather *tlb, void *table)
  {
         struct mmu_table_batch **batch = &tlb->batch;
  
-       /*
-        * When there's less then two users of this mm there cannot be a
-        * concurrent page-table walk.
-        */
-       if (atomic_read(&tlb->mm->mm_users) < 2) {
-               __tlb_remove_table(table);
-               return;
-       }
-
         if (*batch == NULL) {
                 *batch = (struct mmu_table_batch *)__get_free_page(GFP_NOWAIT | __GFP_NOWARN);
                 if (*batch == NULL) {
+                       tlb_table_invalidate(tlb);
                         tlb_remove_table_one(table);
                         return;
                 }
                 (*batch)->nr = 0;
         }
+
         (*batch)->tables[(*batch)->nr++] = table;
         if ((*batch)->nr == MAX_TABLE_BATCH)
                 tlb_table_flush(tlb);
@@ -1417,11 +1426,9 @@ static inline unsigned long zap_pmd_range(struct mmu_gather *tlb,
         do {
                 next = pmd_addr_end(addr, end);
                 if (is_swap_pmd(*pmd) || pmd_trans_huge(*pmd) || pmd_devmap(*pmd)) {
-                       if (next - addr != HPAGE_PMD_SIZE) {
-                               VM_BUG_ON_VMA(vma_is_anonymous(vma) &&
-                                   !rwsem_is_locked(&tlb->mm->mmap_sem), vma);
+                       if (next - addr != HPAGE_PMD_SIZE)
                                 __split_huge_pmd(vma, pmd, addr, false, NULL);
-                       } else if (zap_huge_pmd(tlb, vma, pmd, addr))
+                       else if (zap_huge_pmd(tlb, vma, pmd, addr))
                                 goto next;
                         /* fall through */
                 }
@@ -1887,6 +1894,9 @@ int vm_insert_pfn_prot(struct vm_area_struct *vma, unsigned long addr,
         if (addr < vma->vm_start || addr >= vma->vm_end)
                 return -EFAULT;
  
+       if (!pfn_modify_allowed(pfn, pgprot))
+               return -EACCES;
+
         track_pfn_insert(vma, &pgprot, __pfn_to_pfn_t(pfn, PFN_DEV));
  
         ret = insert_pfn(vma, addr, __pfn_to_pfn_t(pfn, PFN_DEV), pgprot,
@@ -1908,6 +1918,9 @@ static int __vm_insert_mixed(struct vm_area_struct *vma, unsigned long addr,
  
         track_pfn_insert(vma, &pgprot, pfn);
  
+       if (!pfn_modify_allowed(pfn_t_to_pfn(pfn), pgprot))
+               return -EACCES;
+
         /*
          * If we don't have pte special, then we have to use the pfn_valid()
          * based VM_MIXEDMAP scheme (see vm_normal_page), and thus we *must*
@@ -1955,6 +1968,7 @@ static int remap_pte_range(struct mm_struct *mm, pmd_t *pmd,
  {
         pte_t *pte;
         spinlock_t *ptl;
+       int err = 0;
  
         pte = pte_alloc_map_lock(mm, pmd, addr, &ptl);
         if (!pte)
@@ -1962,12 +1976,16 @@ static int remap_pte_range(struct mm_struct *mm, pmd_t *pmd,
         arch_enter_lazy_mmu_mode();
         do {
                 BUG_ON(!pte_none(*pte));
+               if (!pfn_modify_allowed(pfn, prot)) {
+                       err = -EACCES;
+                       break;
+               }
                 set_pte_at(mm, addr, pte, pte_mkspecial(pfn_pte(pfn, prot)));
                 pfn++;
         } while (pte++, addr += PAGE_SIZE, addr != end);
         arch_leave_lazy_mmu_mode();
         pte_unmap_unlock(pte - 1, ptl);
-       return 0;
+       return err;
  }
  
  static inline int remap_pmd_range(struct mm_struct *mm, pud_t *pud,
@@ -1976,6 +1994,7 @@ static inline int remap_pmd_range(struct mm_struct *mm, pud_t *pud,
  {
         pmd_t *pmd;
         unsigned long next;
+       int err;
  
         pfn -= addr >> PAGE_SHIFT;
         pmd = pmd_alloc(mm, pud, addr);
@@ -1984,9 +2003,10 @@ static inline int remap_pmd_range(struct mm_struct *mm, pud_t *pud,
         VM_BUG_ON(pmd_trans_huge(*pmd));
         do {
                 next = pmd_addr_end(addr, end);
-               if (remap_pte_range(mm, pmd, addr, next,
-                               pfn + (addr >> PAGE_SHIFT), prot))
-                       return -ENOMEM;
+               err = remap_pte_range(mm, pmd, addr, next,
+                               pfn + (addr >> PAGE_SHIFT), prot);
+               if (err)
+                       return err;
         } while (pmd++, addr = next, addr != end);
         return 0;
  }
@@ -1997,6 +2017,7 @@ static inline int remap_pud_range(struct mm_struct *mm, p4d_t *p4d,
  {
         pud_t *pud;
         unsigned long next;
+       int err;
  
         pfn -= addr >> PAGE_SHIFT;
         pud = pud_alloc(mm, p4d, addr);
@@ -2004,9 +2025,10 @@ static inline int remap_pud_range(struct mm_struct *mm, p4d_t *p4d,
                 return -ENOMEM;
         do {
                 next = pud_addr_end(addr, end);
-               if (remap_pmd_range(mm, pud, addr, next,
-                               pfn + (addr >> PAGE_SHIFT), prot))
-                       return -ENOMEM;
+               err = remap_pmd_range(mm, pud, addr, next,
+                               pfn + (addr >> PAGE_SHIFT), prot);
+               if (err)
+                       return err;
         } while (pud++, addr = next, addr != end);
         return 0;
  }
@@ -2017,6 +2039,7 @@ static inline int remap_p4d_range(struct mm_struct *mm, pgd_t *pgd,
  {
         p4d_t *p4d;
         unsigned long next;
+       int err;
  
         pfn -= addr >> PAGE_SHIFT;
         p4d = p4d_alloc(mm, pgd, addr);
@@ -2024,9 +2047,10 @@ static inline int remap_p4d_range(struct mm_struct *mm, pgd_t *pgd,
                 return -ENOMEM;
         do {
                 next = p4d_addr_end(addr, end);
-               if (remap_pud_range(mm, p4d, addr, next,
-                               pfn + (addr >> PAGE_SHIFT), prot))
-                       return -ENOMEM;
+               err = remap_pud_range(mm, p4d, addr, next,
+                               pfn + (addr >> PAGE_SHIFT), prot);
+               if (err)
+                       return err;
         } while (p4d++, addr = next, addr != end);
         return 0;
  }
@@ -3028,7 +3052,7 @@ int do_swap_page(struct vm_fault *vmf)
  unlock:
         pte_unmap_unlock(vmf->pte, vmf->ptl);
  out:
-       return ret;
+       return ret | VM_FAULT_SWAP;
  out_nomap:
         mem_cgroup_cancel_charge(page, memcg, false);
         pte_unmap_unlock(vmf->pte, vmf->ptl);
@@ -3040,7 +3064,7 @@ out_release:
                 unlock_page(swapcache);
                 put_page(swapcache);
         }
-       return ret;
+       return ret | VM_FAULT_SWAP;
  }
  
  /*
@@ -3167,6 +3191,29 @@ static int __do_fault(struct vm_fault *vmf)
         struct vm_area_struct *vma = vmf->vma;
         int ret;
  
+       /*
+        * Preallocate pte before we take page_lock because this might lead to
+        * deadlocks for memcg reclaim which waits for pages under writeback:
+        *                              lock_page(A)
+        *                              SetPageWriteback(A)
+        *                              unlock_page(A)
+        * lock_page(B)
+        *                              lock_page(B)
+        * pte_alloc_pne
+        *   shrink_page_list
+        *     wait_on_page_writeback(A)
+        *                              SetPageWriteback(B)
+        *                              unlock_page(B)
+        *                              # flush A, B to clear the writeback
+        */
+       if (pmd_none(*vmf->pmd) && !vmf->prealloc_pte) {
+               vmf->prealloc_pte = pte_alloc_one(vmf->vma->vm_mm,
+                                                 vmf->address);
+               if (!vmf->prealloc_pte)
+                       return VM_FAULT_OOM;
+               smp_wmb(); /* See comment in __pte_alloc() */
+       }
+
         ret = vma->vm_ops->fault(vmf);
         if (unlikely(ret & (VM_FAULT_ERROR | VM_FAULT_NOPAGE | VM_FAULT_RETRY |
                             VM_FAULT_DONE_COW)))
@@ -3673,10 +3720,36 @@ static int do_fault(struct vm_fault *vmf)
         struct vm_area_struct *vma = vmf->vma;
         int ret;
  
-       /* The VMA was not fully populated on mmap() or missing VM_DONTEXPAND */
-       if (!vma->vm_ops->fault)
-               ret = VM_FAULT_SIGBUS;
-       else if (!(vmf->flags & FAULT_FLAG_WRITE))
+       /*
+        * The VMA was not fully populated on mmap() or missing VM_DONTEXPAND
+        */
+       if (!vma->vm_ops->fault) {
+               /*
+                * If we find a migration pmd entry or a none pmd entry, which
+                * should never happen, return SIGBUS
+                */
+               if (unlikely(!pmd_present(*vmf->pmd)))
+                       ret = VM_FAULT_SIGBUS;
+               else {
+                       vmf->pte = pte_offset_map_lock(vmf->vma->vm_mm,
+                                                      vmf->pmd,
+                                                      vmf->address,
+                                                      &vmf->ptl);
+                       /*
+                        * Make sure this is not a temporary clearing of pte
+                        * by holding ptl and checking again. A R/M/W update
+                        * of pte involves: take ptl, clearing the pte so that
+                        * we don't have concurrent modification by hardware
+                        * followed by an update.
+                        */
+                       if (unlikely(pte_none(*vmf->pte)))
+                               ret = VM_FAULT_SIGBUS;
+                       else
+                               ret = VM_FAULT_NOPAGE;
+
+                       pte_unmap_unlock(vmf->pte, vmf->ptl);
+               }
+       } else if (!(vmf->flags & FAULT_FLAG_WRITE))
                 ret = do_read_fault(vmf);
         else if (!(vma->vm_flags & VM_SHARED))
                 ret = do_cow_fault(vmf);
@@ -4333,6 +4406,9 @@ int generic_access_phys(struct vm_area_struct *vma, unsigned long addr,
                 return -EINVAL;
  
         maddr = ioremap_prot(phys_addr, PAGE_ALIGN(len + offset), prot);
+       if (!maddr)
+               return -ENOMEM;
+
         if (write)
                 memcpy_toio(maddr + offset, buf, len);
         else