[RAMEN9610-12171] mm: hpa: change allocate logic from buddy
[GitHub/LineageOS/android_kernel_motorola_exynos9610.git] / mm / hugetlb.c
index 424b0ef08a60cb616d40e9249e5447287c1705e3..dfd2947e046e7f27350548860497bd9072c911d5 100644 (file)
@@ -18,6 +18,7 @@
 #include <linux/bootmem.h>
 #include <linux/sysfs.h>
 #include <linux/slab.h>
+#include <linux/mmdebug.h>
 #include <linux/sched/signal.h>
 #include <linux/rmap.h>
 #include <linux/string_helpers.h>
@@ -2158,6 +2159,7 @@ static void __init gather_bootmem_prealloc(void)
                 */
                if (hstate_is_gigantic(h))
                        adjust_managed_page_count(page, 1 << h->order);
+               cond_resched();
        }
 }
 
@@ -3125,6 +3127,13 @@ static void hugetlb_vm_op_close(struct vm_area_struct *vma)
        }
 }
 
+static int hugetlb_vm_op_split(struct vm_area_struct *vma, unsigned long addr)
+{
+       if (addr & ~(huge_page_mask(hstate_vma(vma))))
+               return -EINVAL;
+       return 0;
+}
+
 /*
  * We cannot handle pagefaults against hugetlb pages at all.  They cause
  * handle_mm_fault() to try to instantiate regular-sized pages in the
@@ -3141,6 +3150,7 @@ const struct vm_operations_struct hugetlb_vm_ops = {
        .fault = hugetlb_vm_op_fault,
        .open = hugetlb_vm_op_open,
        .close = hugetlb_vm_op_close,
+       .split = hugetlb_vm_op_split,
 };
 
 static pte_t make_huge_pte(struct vm_area_struct *vma, struct page *page,
@@ -3984,6 +3994,9 @@ int hugetlb_mcopy_atomic_pte(struct mm_struct *dst_mm,
                            unsigned long src_addr,
                            struct page **pagep)
 {
+       struct address_space *mapping;
+       pgoff_t idx;
+       unsigned long size;
        int vm_shared = dst_vma->vm_flags & VM_SHARED;
        struct hstate *h = hstate_vma(dst_vma);
        pte_t _dst_pte;
@@ -4021,13 +4034,24 @@ int hugetlb_mcopy_atomic_pte(struct mm_struct *dst_mm,
        __SetPageUptodate(page);
        set_page_huge_active(page);
 
+       mapping = dst_vma->vm_file->f_mapping;
+       idx = vma_hugecache_offset(h, dst_vma, dst_addr);
+
        /*
         * If shared, add to page cache
         */
        if (vm_shared) {
-               struct address_space *mapping = dst_vma->vm_file->f_mapping;
-               pgoff_t idx = vma_hugecache_offset(h, dst_vma, dst_addr);
+               size = i_size_read(mapping->host) >> huge_page_shift(h);
+               ret = -EFAULT;
+               if (idx >= size)
+                       goto out_release_nounlock;
 
+               /*
+                * Serialization between remove_inode_hugepages() and
+                * huge_add_to_page_cache() below happens through the
+                * hugetlb_fault_mutex_table that here must be hold by
+                * the caller.
+                */
                ret = huge_add_to_page_cache(page, mapping, idx);
                if (ret)
                        goto out_release_nounlock;
@@ -4036,6 +4060,20 @@ int hugetlb_mcopy_atomic_pte(struct mm_struct *dst_mm,
        ptl = huge_pte_lockptr(h, dst_mm, dst_pte);
        spin_lock(ptl);
 
+       /*
+        * Recheck the i_size after holding PT lock to make sure not
+        * to leave any page mapped (as page_mapped()) beyond the end
+        * of the i_size (remove_inode_hugepages() is strict about
+        * enforcing that). If we bail out here, we'll also leave a
+        * page in the radix tree in the vm_shared case beyond the end
+        * of the i_size, but remove_inode_hugepages() will take care
+        * of it as soon as we drop the hugetlb_fault_mutex_table.
+        */
+       size = i_size_read(mapping->host) >> huge_page_shift(h);
+       ret = -EFAULT;
+       if (idx >= size)
+               goto out_release_unlock;
+
        ret = -EEXIST;
        if (!huge_pte_none(huge_ptep_get(dst_pte)))
                goto out_release_unlock;
@@ -4308,6 +4346,12 @@ int hugetlb_reserve_pages(struct inode *inode,
        struct resv_map *resv_map;
        long gbl_reserve;
 
+       /* This should never happen */
+       if (from > to) {
+               VM_WARN(1, "%s called with a negative range\n", __func__);
+               return -EINVAL;
+       }
+
        /*
         * Only apply hugepage reservation if asked. At fault time, an
         * attempt will be made for VM_NORESERVE to allocate a page
@@ -4589,7 +4633,9 @@ pte_t *huge_pte_alloc(struct mm_struct *mm,
        pte_t *pte = NULL;
 
        pgd = pgd_offset(mm, addr);
-       p4d = p4d_offset(pgd, addr);
+       p4d = p4d_alloc(mm, pgd, addr);
+       if (!p4d)
+               return NULL;
        pud = pud_alloc(mm, p4d, addr);
        if (pud) {
                if (sz == PUD_SIZE) {