FROMLIST: android: binder: Move buffer out of area shared with user space

[GitHub/LineageOS/android_kernel_samsung_universal7580.git] / mm / page_alloc.c
diff --git a/mm/page_alloc.c b/mm/page_alloc.c

index 378a15bcd64940cd4bab29335604f757543839be..41bb5fa9f82927435709fdf87c0c6fb188e35b84 100644 (file)
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -110,6 +110,19 @@ unsigned long totalreserve_pages __read_mostly;
   */
  unsigned long dirty_balance_reserve __read_mostly;
  
+static unsigned int boot_mode = 0;
+static int __init setup_bootmode(char *str)
+{
+       printk("%s: boot_mode is %u\n", __func__, boot_mode);
+       if (get_option(&str, &boot_mode)) {
+               printk("%s: boot_mode is %u\n", __func__, boot_mode);
+               return 0;
+       }
+
+       return -EINVAL;
+}
+early_param("bootmode", setup_bootmode);
+
  int percpu_pagelist_fraction;
  gfp_t gfp_allowed_mask __read_mostly = GFP_BOOT_MASK;
  
@@ -196,7 +209,20 @@ static char * const zone_names[MAX_NR_ZONES] = {
          "Movable",
  };
  
+/*
+ * Try to keep at least this much lowmem free.  Do not allow normal
+ * allocations below this point, only high priority ones. Automatically
+ * tuned according to the amount of memory in the system.
+ */
  int min_free_kbytes = 1024;
+int min_free_order_shift = 1;
+
+/*
+ * Extra memory for the system to try freeing. Used to temporarily
+ * free memory, to make space for new workloads. Anyone can allocate
+ * down to the min watermarks controlled by min_free_kbytes above.
+ */
+int extra_free_kbytes = 0;
  
  static unsigned long __meminitdata nr_kernel_pages;
  static unsigned long __meminitdata nr_all_pages;
@@ -360,9 +386,11 @@ void prep_compound_page(struct page *page, unsigned long order)
         __SetPageHead(page);
         for (i = 1; i < nr_pages; i++) {
                 struct page *p = page + i;
-               __SetPageTail(p);
                 set_page_count(p, 0);
                 p->first_page = page;
+               /* Make sure p->first_page is always valid for PageTail() */
+               smp_wmb();
+               __SetPageTail(p);
         }
  }
  
@@ -767,6 +795,11 @@ void __meminit __free_pages_bootmem(struct page *page, unsigned int order)
  }
  
  #ifdef CONFIG_CMA
+bool is_cma_pageblock(struct page *page)
+{
+       return get_pageblock_migratetype(page) == MIGRATE_CMA;
+}
+
  /* Free whole pageblock and set it's migration type to MIGRATE_CMA. */
  void __init init_cma_reserved_pageblock(struct page *page)
  {
@@ -929,6 +962,11 @@ static int fallbacks[MIGRATE_TYPES][4] = {
  #endif
  };
  
+int *get_migratetype_fallbacks(int mtype)
+{
+       return fallbacks[mtype];
+}
+
  /*
   * Move the free pages in a range to the free lists of the requested type.
   * Note that start_page and end_pages are not aligned on a pageblock
@@ -1052,6 +1090,8 @@ __rmqueue_fallback(struct zone *zone, int order, int start_migratetype)
                         if (!is_migrate_cma(migratetype) &&
                             (unlikely(current_order >= pageblock_order / 2) ||
                              start_migratetype == MIGRATE_RECLAIMABLE ||
+                            start_migratetype == MIGRATE_UNMOVABLE ||
+                            start_migratetype == MIGRATE_MOVABLE ||
                              page_group_by_mobility_disabled)) {
                                 int pages;
                                 pages = move_freepages_block(zone, page,
@@ -1059,6 +1099,7 @@ __rmqueue_fallback(struct zone *zone, int order, int start_migratetype)
  
                                 /* Claim the whole block if over half of it is free */
                                 if (pages >= (1 << (pageblock_order-1)) ||
+                                       start_migratetype == MIGRATE_MOVABLE ||
                                                 page_group_by_mobility_disabled)
                                         set_pageblock_migratetype(page,
                                                                 start_migratetype);
@@ -1120,6 +1161,39 @@ retry_reserve:
         return page;
  }
  
+static struct page *__rmqueue_cma(struct zone *zone, unsigned int order,
+                                                       int migratetype)
+{
+       struct page *page = 0;
+#ifdef CONFIG_CMA
+       if (migratetype == MIGRATE_MOVABLE && !zone->cma_alloc) {
+               page = __rmqueue_smallest(zone, order, MIGRATE_CMA);
+               if (!page)
+                       page = __rmqueue_smallest(zone, order, migratetype);
+       } else
+#endif
+retry_reserve :
+               page = __rmqueue_smallest(zone, order, migratetype);
+
+
+       if (unlikely(!page) && migratetype != MIGRATE_RESERVE) {
+               page = __rmqueue_fallback(zone, order, migratetype);
+
+               /*
+                * Use MIGRATE_RESERVE rather than fail an allocation. goto
+                * is used because __rmqueue_smallest is an inline function
+                * and we want just one call site
+                */
+               if (!page) {
+                       migratetype = MIGRATE_RESERVE;
+                       goto retry_reserve;
+               }
+       }
+
+       trace_mm_page_alloc_zone_locked(page, order, migratetype);
+       return page;
+}
+
  /*
   * Obtain a specified number of elements from the buddy allocator, all under
   * a single hold of the lock, for efficiency.  Add them to the supplied list.
@@ -1127,13 +1201,17 @@ retry_reserve:
   */
  static int rmqueue_bulk(struct zone *zone, unsigned int order,
                         unsigned long count, struct list_head *list,
-                       int migratetype, int cold)
+                       int migratetype, int cold, int cma)
  {
         int mt = migratetype, i;
  
         spin_lock(&zone->lock);
         for (i = 0; i < count; ++i) {
-               struct page *page = __rmqueue(zone, order, migratetype);
+               struct page *page;
+               if (cma)
+                       page = __rmqueue_cma(zone, order, migratetype);
+               else
+                       page = __rmqueue(zone, order, migratetype);
                 if (unlikely(page == NULL))
                         break;
  
@@ -1491,7 +1569,8 @@ again:
                 if (list_empty(list)) {
                         pcp->count += rmqueue_bulk(zone, 0,
                                         pcp->batch, list,
-                                       migratetype, cold);
+                                       migratetype, cold,
+                                       gfp_flags & __GFP_CMA);
                         if (unlikely(list_empty(list)))
                                 goto failed;
                 }
@@ -1518,7 +1597,10 @@ again:
                         WARN_ON_ONCE(order > 1);
                 }
                 spin_lock_irqsave(&zone->lock, flags);
-               page = __rmqueue(zone, order, migratetype);
+               if (gfp_flags & __GFP_CMA)
+                       page = __rmqueue_cma(zone, order, migratetype);
+               else
+                       page = __rmqueue(zone, order, migratetype);
                 spin_unlock(&zone->lock);
                 if (!page)
                         goto failed;
@@ -1628,6 +1710,7 @@ static bool __zone_watermark_ok(struct zone *z, int order, unsigned long mark,
         long min = mark;
         long lowmem_reserve = z->lowmem_reserve[classzone_idx];
         int o;
+       long free_cma = 0;
  
         free_pages -= (1 << order) - 1;
         if (alloc_flags & ALLOC_HIGH)
@@ -1637,16 +1720,17 @@ static bool __zone_watermark_ok(struct zone *z, int order, unsigned long mark,
  #ifdef CONFIG_CMA
         /* If allocation can't use CMA areas don't use free CMA pages */
         if (!(alloc_flags & ALLOC_CMA))
-               free_pages -= zone_page_state(z, NR_FREE_CMA_PAGES);
+               free_cma = zone_page_state(z, NR_FREE_CMA_PAGES);
  #endif
-       if (free_pages <= min + lowmem_reserve)
+
+       if (free_pages - free_cma <= min + lowmem_reserve)
                 return false;
         for (o = 0; o < order; o++) {
                 /* At the next order, this order's pages become unavailable */
                 free_pages -= z->free_area[o].nr_free << o;
  
                 /* Require fewer higher order pages to be free */
-               min >>= 1;
+               min >>= min_free_order_shift;
  
                 if (free_pages <= min)
                         return false;
@@ -2115,6 +2199,14 @@ __alloc_pages_may_oom(gfp_t gfp_mask, unsigned int order,
                 return NULL;
         }
  
+       /*
+        * PM-freezer should be notified that there might be an OOM killer on
+        * its way to kill and wake somebody up. This is too early and we might
+        * end up not killing anything but false positives are acceptable.
+        * See freeze_processes.
+        */
+       note_oom_kill();
+
         /*
          * Go through the zonelist yet one more time, keep very high watermark
          * here, this is only to catch a parallel oom killing, we must fail if
@@ -2335,7 +2427,7 @@ static inline int
  gfp_to_alloc_flags(gfp_t gfp_mask)
  {
         int alloc_flags = ALLOC_WMARK_MIN | ALLOC_CPUSET;
-       const gfp_t wait = gfp_mask & __GFP_WAIT;
+       const bool atomic = !(gfp_mask & (__GFP_WAIT | __GFP_NO_KSWAPD));
  
         /* __GFP_HIGH is assumed to be the same as ALLOC_HIGH to save a branch. */
         BUILD_BUG_ON(__GFP_HIGH != (__force gfp_t) ALLOC_HIGH);
@@ -2344,20 +2436,20 @@ gfp_to_alloc_flags(gfp_t gfp_mask)
          * The caller may dip into page reserves a bit more if the caller
          * cannot run direct reclaim, or if the caller has realtime scheduling
          * policy or is asking for __GFP_HIGH memory.  GFP_ATOMIC requests will
-        * set both ALLOC_HARDER (!wait) and ALLOC_HIGH (__GFP_HIGH).
+        * set both ALLOC_HARDER (atomic == true) and ALLOC_HIGH (__GFP_HIGH).
          */
         alloc_flags |= (__force int) (gfp_mask & __GFP_HIGH);
  
-       if (!wait) {
+       if (atomic) {
                 /*
-                * Not worth trying to allocate harder for
-                * __GFP_NOMEMALLOC even if it can't schedule.
+                * Not worth trying to allocate harder for __GFP_NOMEMALLOC even
+                * if it can't schedule.
                  */
-               if  (!(gfp_mask & __GFP_NOMEMALLOC))
+               if (!(gfp_mask & __GFP_NOMEMALLOC))
                         alloc_flags |= ALLOC_HARDER;
                 /*
-                * Ignore cpuset if GFP_ATOMIC (!wait) rather than fail alloc.
-                * See also cpuset_zone_allowed() comment in kernel/cpuset.c.
+                * Ignore cpuset mems for GFP_ATOMIC rather than fail, see the
+                * comment for __cpuset_node_allowed_softwall().
                  */
                 alloc_flags &= ~ALLOC_CPUSET;
         } else if (unlikely(rt_task(current)) && !in_interrupt())
@@ -2399,6 +2491,9 @@ __alloc_pages_slowpath(gfp_t gfp_mask, unsigned int order,
         bool sync_migration = false;
         bool deferred_compaction = false;
         bool contended_compaction = false;
+#ifdef CONFIG_SEC_OOM_KILLER
+       unsigned long oom_invoke_timeout = jiffies + HZ/4;
+#endif
  
         /*
          * In the slowpath, we sanity check order to avoid ever trying to
@@ -2411,6 +2506,10 @@ __alloc_pages_slowpath(gfp_t gfp_mask, unsigned int order,
                 return NULL;
         }
  
+#if defined(CONFIG_ARM) || defined(CONFIG_ARM64)
+       set_tsk_thread_flag(current, TIF_MEMALLOC);
+#endif
+
         /*
          * GFP_THISNODE (meaning __GFP_THISNODE, __GFP_NORETRY and
          * __GFP_NOWARN set) should not cause reclaim since the subsystem
@@ -2518,8 +2617,16 @@ rebalance:
         /*
          * If we failed to make any progress reclaiming, then we are
          * running out of options and have to consider going OOM
+        * If we are looping more than 250 ms, go to OOM
          */
-       if (!did_some_progress) {
+
+#ifdef CONFIG_SEC_OOM_KILLER
+#define SHOULD_CONSIDER_OOM (!did_some_progress || time_after(jiffies, oom_invoke_timeout)) && (boot_mode != 2)
+#else
+#define SHOULD_CONSIDER_OOM !did_some_progress && (boot_mode != 2)
+#endif
+
+       if (SHOULD_CONSIDER_OOM) {
                 if ((gfp_mask & __GFP_FS) && !(gfp_mask & __GFP_NORETRY)) {
                         if (oom_killer_disabled)
                                 goto nopage;
@@ -2527,6 +2634,13 @@ rebalance:
                         if ((current->flags & PF_DUMPCORE) &&
                             !(gfp_mask & __GFP_NOFAIL))
                                 goto nopage;
+
+#ifdef CONFIG_SEC_OOM_KILLER
+                       if (did_some_progress)
+                               pr_info("time's up : calling "
+                                       "__alloc_pages_may_oom(o:%u, gfp:0x%x)\n",
+                                                               order, gfp_mask);
+#endif
                         page = __alloc_pages_may_oom(gfp_mask, order,
                                         zonelist, high_zoneidx,
                                         nodemask, preferred_zone,
@@ -2552,6 +2666,9 @@ rebalance:
                                         goto nopage;
                         }
  
+#ifdef CONFIG_SEC_OOM_KILLER
+                       oom_invoke_timeout = jiffies + HZ/4;
+#endif
                         goto restart;
                 }
         }
@@ -2582,9 +2699,15 @@ rebalance:
         }
  
  nopage:
+#if defined(CONFIG_ARM) || defined(CONFIG_ARM64)
+       clear_tsk_thread_flag(current, TIF_MEMALLOC);
+#endif
         warn_alloc_failed(gfp_mask, order, NULL);
         return page;
  got_pg:
+#if defined(CONFIG_ARM) || defined(CONFIG_ARM64)
+       clear_tsk_thread_flag(current, TIF_MEMALLOC);
+#endif
         if (kmemcheck_enabled)
                 kmemcheck_pagealloc_alloc(page, order, gfp_mask);
  
@@ -4525,7 +4648,7 @@ static inline void setup_usemap(struct pglist_data *pgdat, struct zone *zone,
  #ifdef CONFIG_HUGETLB_PAGE_SIZE_VARIABLE
  
  /* Initialise the number of pages represented by NR_PAGEBLOCK_BITS */
-void __init set_pageblock_order(void)
+void __paginginit set_pageblock_order(void)
  {
         unsigned int order;
  
@@ -4553,7 +4676,7 @@ void __init set_pageblock_order(void)
   * include/linux/pageblock-flags.h for the values of pageblock_order based on
   * the kernel config
   */
-void __init set_pageblock_order(void)
+void __paginginit set_pageblock_order(void)
  {
  }
  
@@ -5048,15 +5171,18 @@ void __init free_area_init_nodes(unsigned long *max_zone_pfn)
                                 sizeof(arch_zone_lowest_possible_pfn));
         memset(arch_zone_highest_possible_pfn, 0,
                                 sizeof(arch_zone_highest_possible_pfn));
-       arch_zone_lowest_possible_pfn[0] = find_min_pfn_with_active_regions();
-       arch_zone_highest_possible_pfn[0] = max_zone_pfn[0];
-       for (i = 1; i < MAX_NR_ZONES; i++) {
+
+       start_pfn = find_min_pfn_with_active_regions();
+
+       for (i = 0; i < MAX_NR_ZONES; i++) {
                 if (i == ZONE_MOVABLE)
                         continue;
-               arch_zone_lowest_possible_pfn[i] =
-                       arch_zone_highest_possible_pfn[i-1];
-               arch_zone_highest_possible_pfn[i] =
-                       max(max_zone_pfn[i], arch_zone_lowest_possible_pfn[i]);
+
+               end_pfn = max(max_zone_pfn[i], start_pfn);
+               arch_zone_lowest_possible_pfn[i] = start_pfn;
+               arch_zone_highest_possible_pfn[i] = end_pfn;
+
+               start_pfn = end_pfn;
         }
         arch_zone_lowest_possible_pfn[ZONE_MOVABLE] = 0;
         arch_zone_highest_possible_pfn[ZONE_MOVABLE] = 0;
@@ -5162,8 +5288,8 @@ unsigned long free_reserved_area(unsigned long start, unsigned long end,
         }
  
         if (pages && s)
-               pr_info("Freeing %s memory: %ldK (%lx - %lx)\n",
-                       s, pages << (PAGE_SHIFT - 10), start, end);
+               pr_info("Freeing %s memory: %ldK\n",
+                       s, pages << (PAGE_SHIFT - 10));
  
         return pages;
  }
@@ -5318,6 +5444,7 @@ static void setup_per_zone_lowmem_reserve(void)
  static void __setup_per_zone_wmarks(void)
  {
         unsigned long pages_min = min_free_kbytes >> (PAGE_SHIFT - 10);
+       unsigned long pages_low = extra_free_kbytes >> (PAGE_SHIFT - 10);
         unsigned long lowmem_pages = 0;
         struct zone *zone;
         unsigned long flags;
@@ -5329,11 +5456,14 @@ static void __setup_per_zone_wmarks(void)
         }
  
         for_each_zone(zone) {
-               u64 tmp;
+               u64 min, low;
  
                 spin_lock_irqsave(&zone->lock, flags);
-               tmp = (u64)pages_min * zone->managed_pages;
-               do_div(tmp, lowmem_pages);
+               min = (u64)pages_min * zone->managed_pages;
+               do_div(min, lowmem_pages);
+               low = (u64)pages_low * zone->managed_pages;
+               do_div(low, vm_total_pages);
+
                 if (is_highmem(zone)) {
                         /*
                          * __GFP_HIGH and PF_MEMALLOC allocations usually don't
@@ -5354,11 +5484,13 @@ static void __setup_per_zone_wmarks(void)
                          * If it's a lowmem zone, reserve a number of pages
                          * proportionate to the zone's size.
                          */
-                       zone->watermark[WMARK_MIN] = tmp;
+                       zone->watermark[WMARK_MIN] = min;
                 }
  
-               zone->watermark[WMARK_LOW]  = min_wmark_pages(zone) + (tmp >> 2);
-               zone->watermark[WMARK_HIGH] = min_wmark_pages(zone) + (tmp >> 1);
+               zone->watermark[WMARK_LOW]  = min_wmark_pages(zone) +
+                                       low + (min >> 2);
+               zone->watermark[WMARK_HIGH] = min_wmark_pages(zone) +
+                                       low + (min >> 1);
  
                 setup_zone_migrate_reserve(zone);
                 spin_unlock_irqrestore(&zone->lock, flags);
@@ -5405,6 +5537,9 @@ void setup_per_zone_wmarks(void)
   */
  static void __meminit calculate_zone_inactive_ratio(struct zone *zone)
  {
+#ifdef CONFIG_FIX_INACTIVE_RATIO
+       zone->inactive_ratio = 1;
+#else
         unsigned int gb, ratio;
  
         /* Zone size in gigabytes */
@@ -5415,6 +5550,7 @@ static void __meminit calculate_zone_inactive_ratio(struct zone *zone)
                 ratio = 1;
  
         zone->inactive_ratio = ratio;
+#endif
  }
  
  static void __meminit setup_per_zone_inactive_ratio(void)
@@ -5471,7 +5607,7 @@ module_init(init_per_zone_wmark_min)
  /*
   * min_free_kbytes_sysctl_handler - just a wrapper around proc_dointvec() so 
   *     that we can call two helper functions whenever min_free_kbytes
- *     changes.
+ *     or extra_free_kbytes changes.
   */
  int min_free_kbytes_sysctl_handler(ctl_table *table, int write, 
         void __user *buffer, size_t *length, loff_t *ppos)
@@ -5928,11 +6064,12 @@ int alloc_contig_range(unsigned long start, unsigned long end,
  {
         unsigned long outer_start, outer_end;
         int ret = 0, order;
+       struct zone *zone = page_zone(pfn_to_page(start));
  
         struct compact_control cc = {
                 .nr_migratepages = 0,
                 .order = -1,
-               .zone = page_zone(pfn_to_page(start)),
+               .zone = zone,
                 .sync = true,
                 .ignore_skip_hint = true,
         };
@@ -5968,6 +6105,8 @@ int alloc_contig_range(unsigned long start, unsigned long end,
         if (ret)
                 return ret;
  
+       zone->cma_alloc = 1;
+
         ret = __alloc_contig_migrate_range(&cc, start, end);
         if (ret)
                 goto done;
@@ -6027,6 +6166,7 @@ int alloc_contig_range(unsigned long start, unsigned long end,
  done:
         undo_isolate_page_range(pfn_max_align_down(start),
                                 pfn_max_align_up(end), migratetype);
+       zone->cma_alloc = 0;
         return ret;
  }
  
@@ -6140,6 +6280,10 @@ __offline_isolated_pages(unsigned long start_pfn, unsigned long end_pfn)
                 list_del(&page->lru);
                 rmv_page_order(page);
                 zone->free_area[order].nr_free--;
+#ifdef CONFIG_HIGHMEM
+               if (PageHighMem(page))
+                       totalhigh_pages -= 1 << order;
+#endif
                 for (i = 0; i < (1 << order); i++)
                         SetPageReserved((page+i));
                 pfn += (1 << order);