remove libdss from Makefile

[GitHub/moto-9609/android_kernel_motorola_exynos9610.git] / mm / page_alloc.c
diff --git a/mm/page_alloc.c b/mm/page_alloc.c

index 82a6270c9743c9da4dd0d3a6d97f93923ea63bd8..7ab6eb89e9a33b2ebe8663928e4572bf45ba093e 100644 (file)
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -24,7 +24,6 @@
  #include <linux/memblock.h>
  #include <linux/compiler.h>
  #include <linux/kernel.h>
-#include <linux/kmemcheck.h>
  #include <linux/kasan.h>
  #include <linux/module.h>
  #include <linux/suspend.h>
@@ -68,6 +67,7 @@
  #include <linux/ftrace.h>
  #include <linux/lockdep.h>
  #include <linux/nmi.h>
+#include <linux/psi.h>
  
  #include <asm/sections.h>
  #include <asm/tlbflush.h>
@@ -131,24 +131,6 @@ unsigned long totalcma_pages __read_mostly;
  int percpu_pagelist_fraction;
  gfp_t gfp_allowed_mask __read_mostly = GFP_BOOT_MASK;
  
-/*
- * A cached value of the page's pageblock's migratetype, used when the page is
- * put on a pcplist. Used to avoid the pageblock migratetype lookup when
- * freeing from pcplists in most cases, at the cost of possibly becoming stale.
- * Also the migratetype set in the page does not necessarily match the pcplist
- * index, e.g. page might have MIGRATE_CMA set but be on a pcplist with any
- * other index - this ensures that it will be put on the correct CMA freelist.
- */
-static inline int get_pcppage_migratetype(struct page *page)
-{
-       return page->index;
-}
-
-static inline void set_pcppage_migratetype(struct page *page, int migratetype)
-{
-       page->index = migratetype;
-}
-
  #ifdef CONFIG_PM_SLEEP
  /*
   * The following functions are used by the suspend/hibernate code to temporarily
@@ -259,10 +241,22 @@ compound_page_dtor * const compound_page_dtors[] = {
  #endif
  };
  
+/*
+ * Try to keep at least this much lowmem free.  Do not allow normal
+ * allocations below this point, only high priority ones. Automatically
+ * tuned according to the amount of memory in the system.
+ */
  int min_free_kbytes = 1024;
  int user_min_free_kbytes = -1;
  int watermark_scale_factor = 10;
  
+/*
+ * Extra memory for the system to try freeing. Used to temporarily
+ * free memory, to make space for new workloads. Anyone can allocate
+ * down to the min watermarks controlled by min_free_kbytes above.
+ */
+int extra_free_kbytes = 0;
+
  static unsigned long __meminitdata nr_kernel_pages;
  static unsigned long __meminitdata nr_all_pages;
  static unsigned long __meminitdata dma_reserve;
@@ -1022,7 +1016,6 @@ static __always_inline bool free_pages_prepare(struct page *page,
         VM_BUG_ON_PAGE(PageTail(page), page);
  
         trace_mm_page_free(page, order);
-       kmemcheck_free_shadow(page, order);
  
         /*
          * Check tail pages before head page information is cleared to
@@ -1766,8 +1759,8 @@ inline void post_alloc_hook(struct page *page, unsigned int order,
  
         arch_alloc_page(page, order);
         kernel_map_pages(page, 1 << order, 1);
-       kernel_poison_pages(page, 1 << order, 1);
         kasan_alloc_pages(page, order);
+       kernel_poison_pages(page, 1 << order, 1);
         set_page_owner(page, order, gfp_flags);
  }
  
@@ -2297,20 +2290,35 @@ do_steal:
  /*
   * Do the hard work of removing an element from the buddy allocator.
   * Call me with the zone->lock already held.
+ * If gfp mask of the page allocation has GFP_HIGHUSER_MOVABLE, @migratetype
+ * is changed from MIGRATE_MOVABLE to MIGRATE_CMA in rmqueue() to select the
+ * free list of MIGRATE_CMA. It helps depleting CMA free pages so that
+ * evaluation of watermark for unmovable page allocations is not too different
+ * from movable page allocations.
+ * If @migratetype is MIGRATE_CMA, it should be corrected to MIGRATE_MOVABLE
+ * after the free list of MIGRATE_CMA is searched to have a chance to search the
+ * free list of MIGRATE_MOVABLE. It also records correct migrate type in the
+ * trace as intended by the page allocation.
   */
  static struct page *__rmqueue(struct zone *zone, unsigned int order,
                                 int migratetype)
  {
-       struct page *page;
+       struct page *page = NULL;
  
-retry:
-       page = __rmqueue_smallest(zone, order, migratetype);
-       if (unlikely(!page)) {
-               if (migratetype == MIGRATE_MOVABLE)
-                       page = __rmqueue_cma_fallback(zone, order);
+#ifdef CONFIG_CMA
+       if (migratetype == MIGRATE_CMA) {
+#else
+       if (migratetype == MIGRATE_MOVABLE) {
+#endif
+               page = __rmqueue_cma_fallback(zone, order);
+               migratetype = MIGRATE_MOVABLE;
+       }
  
-               if (!page && __rmqueue_fallback(zone, order, migratetype))
-                       goto retry;
+       while (!page) {
+               page = __rmqueue_smallest(zone, order, migratetype);
+               if (unlikely(!page) &&
+                   !__rmqueue_fallback(zone, order, migratetype))
+                       break;
         }
  
         trace_mm_page_alloc_zone_locked(page, order, migratetype);
@@ -2487,10 +2495,6 @@ void drain_all_pages(struct zone *zone)
         if (WARN_ON_ONCE(!mm_percpu_wq))
                 return;
  
-       /* Workqueues cannot recurse */
-       if (current->flags & PF_WQ_WORKER)
-               return;
-
         /*
          * Do not drain if one is already in progress unless it's specific to
          * a zone. Such callers are primarily CMA and memory hotplug and need
@@ -2678,15 +2682,6 @@ void split_page(struct page *page, unsigned int order)
         VM_BUG_ON_PAGE(PageCompound(page), page);
         VM_BUG_ON_PAGE(!page_count(page), page);
  
-#ifdef CONFIG_KMEMCHECK
-       /*
-        * Split shadow pages too, because free(page[0]) would
-        * otherwise free the whole shadow.
-        */
-       if (kmemcheck_page_is_tracked(page))
-               split_page(virt_to_page(page[0].shadow), order);
-#endif
-
         for (i = 1; i < (1 << order); i++)
                 set_page_refcounted(page + i);
         split_page_owner(page, order);
@@ -2712,7 +2707,7 @@ int __isolate_free_page(struct page *page, unsigned int order)
                  * exists.
                  */
                 watermark = min_wmark_pages(zone) + (1UL << order);
-               if (!zone_watermark_ok(zone, 0, watermark, 0, ALLOC_CMA))
+               if (!zone_watermark_ok(zone, 0, watermark, 0, 0))
                         return 0;
  
                 __mod_zone_freepage_state(zone, -(1UL << order), mt);
@@ -2786,6 +2781,25 @@ static struct page *__rmqueue_pcplist(struct zone *zone, int migratetype,
                 else
                         page = list_first_entry(list, struct page, lru);
  
+               /*
+                * If the head or the tail page in the pcp list is CMA page and
+                * the gfp flags is not GFP_HIGHUSER_MOVABLE, do not allocate a
+                * page from the pcp list. The free list of MIGRATE_CMA is a
+                * special case of the free list of MIGRATE_MOVABLE and the
+                * pages from the free list of MIGRATE_CMA are pushed to the pcp
+                * list of MIGRATE_MOVABLE. Since the pcp list of
+                * MIGRATE_MOVABLE is selected if the gfp flags has GFP_MOVABLE,
+                * we should avoid the case that a cma page in the pcp list of
+                * MIGRATE_MOVABLE is allocated to a movable allocation without
+                * GFP_HIGHUSER_MOVABLE.
+                * If this is the case, allocate a movable page from the free
+                * list of MIGRATE_MOVABLE instead of pcp list of
+                * MIGRATE_MOVABLE.
+                */
+#ifdef CONFIG_CMA
+               if (is_migrate_cma_page(page) && (migratetype != MIGRATE_CMA))
+                       return NULL;
+#endif
                 list_del(&page->lru);
                 pcp->count--;
         } while (check_new_pcp(page));
@@ -2796,7 +2810,8 @@ static struct page *__rmqueue_pcplist(struct zone *zone, int migratetype,
  /* Lock and remove page from the per-cpu list */
  static struct page *rmqueue_pcplist(struct zone *preferred_zone,
                         struct zone *zone, unsigned int order,
-                       gfp_t gfp_flags, int migratetype)
+                       gfp_t gfp_flags, int migratetype,
+                       int migratetype_rmqueue)
  {
         struct per_cpu_pages *pcp;
         struct list_head *list;
@@ -2807,7 +2822,7 @@ static struct page *rmqueue_pcplist(struct zone *preferred_zone,
         local_irq_save(flags);
         pcp = &this_cpu_ptr(zone->pageset)->pcp;
         list = &pcp->lists[migratetype];
-       page = __rmqueue_pcplist(zone,  migratetype, cold, pcp, list);
+       page = __rmqueue_pcplist(zone,  migratetype_rmqueue, cold, pcp, list);
         if (page) {
                 __count_zid_vm_events(PGALLOC, page_zonenum(page), 1 << order);
                 zone_statistics(preferred_zone, zone);
@@ -2827,11 +2842,25 @@ struct page *rmqueue(struct zone *preferred_zone,
  {
         unsigned long flags;
         struct page *page;
+       int migratetype_rmqueue = migratetype;
  
+#ifdef CONFIG_CMA
+       if ((migratetype_rmqueue == MIGRATE_MOVABLE) &&
+           ((gfp_flags & GFP_HIGHUSER_MOVABLE) == GFP_HIGHUSER_MOVABLE))
+               migratetype_rmqueue = MIGRATE_CMA;
+#endif
         if (likely(order == 0)) {
                 page = rmqueue_pcplist(preferred_zone, zone, order,
-                               gfp_flags, migratetype);
-               goto out;
+                               gfp_flags, migratetype, migratetype_rmqueue);
+               /*
+                * Allocation with GFP_MOVABLE and !GFP_HIGHMEM will have
+                * another chance of page allocation from the free list.
+                * See the comment in __rmqueue_pcplist().
+                */
+#ifdef CONFIG_CMA
+               if (likely(page) || (migratetype_rmqueue != MIGRATE_MOVABLE))
+#endif
+                       goto out;
         }
  
         /*
@@ -2849,7 +2878,7 @@ struct page *rmqueue(struct zone *preferred_zone,
                                 trace_mm_page_alloc_zone_locked(page, order, migratetype);
                 }
                 if (!page)
-                       page = __rmqueue(zone, order, migratetype);
+                       page = __rmqueue(zone, order, migratetype_rmqueue);
         } while (page && check_new_pages(page, order));
         spin_unlock(&zone->lock);
         if (!page)
@@ -3015,9 +3044,6 @@ bool __zone_watermark_ok(struct zone *z, unsigned int order, unsigned long mark,
                 if (!area->nr_free)
                         continue;
  
-               if (alloc_harder)
-                       return true;
-
                 for (mt = 0; mt < MIGRATE_PCPTYPES; mt++) {
                         if (!list_empty(&area->free_list[mt]))
                                 return true;
@@ -3029,6 +3055,9 @@ bool __zone_watermark_ok(struct zone *z, unsigned int order, unsigned long mark,
                         return true;
                 }
  #endif
+               if (alloc_harder &&
+                       !list_empty(&area->free_list[MIGRATE_HIGHATOMIC]))
+                       return true;
         }
         return false;
  }
@@ -3386,15 +3415,20 @@ __alloc_pages_direct_compact(gfp_t gfp_mask, unsigned int order,
                 enum compact_priority prio, enum compact_result *compact_result)
  {
         struct page *page;
+       unsigned long pflags;
         unsigned int noreclaim_flag;
  
         if (!order)
                 return NULL;
  
+       psi_memstall_enter(&pflags);
         noreclaim_flag = memalloc_noreclaim_save();
+
         *compact_result = try_to_compact_pages(gfp_mask, order, alloc_flags, ac,
                                                                         prio);
+
         memalloc_noreclaim_restore(noreclaim_flag);
+       psi_memstall_leave(&pflags);
  
         if (*compact_result <= COMPACT_INACTIVE)
                 return NULL;
@@ -3547,7 +3581,7 @@ static bool __need_fs_reclaim(gfp_t gfp_mask)
                 return false;
  
         /* this guy won't enter reclaim */
-       if ((current->flags & PF_MEMALLOC) && !(gfp_mask & __GFP_NOMEMALLOC))
+       if (current->flags & PF_MEMALLOC)
                 return false;
  
         /* We're only interested __GFP_FS allocations for now */
@@ -3583,11 +3617,13 @@ __perform_reclaim(gfp_t gfp_mask, unsigned int order,
         struct reclaim_state reclaim_state;
         int progress;
         unsigned int noreclaim_flag;
+       unsigned long pflags;
  
         cond_resched();
  
         /* We now go into synchronous reclaim */
         cpuset_memory_pressure_bump();
+       psi_memstall_enter(&pflags);
         noreclaim_flag = memalloc_noreclaim_save();
         fs_reclaim_acquire(gfp_mask);
         reclaim_state.reclaimed_slab = 0;
@@ -3599,6 +3635,7 @@ __perform_reclaim(gfp_t gfp_mask, unsigned int order,
         current->reclaim_state = NULL;
         fs_reclaim_release(gfp_mask);
         memalloc_noreclaim_restore(noreclaim_flag);
+       psi_memstall_leave(&pflags);
  
         cond_resched();
  
@@ -3682,7 +3719,8 @@ gfp_to_alloc_flags(gfp_t gfp_mask)
                 alloc_flags |= ALLOC_HARDER;
  
  #ifdef CONFIG_CMA
-       if (gfpflags_to_migratetype(gfp_mask) == MIGRATE_MOVABLE)
+       if ((gfpflags_to_migratetype(gfp_mask) == MIGRATE_MOVABLE) ||
+               ((gfp_mask & GFP_HIGHUSER_MOVABLE) == GFP_HIGHUSER_MOVABLE))
                 alloc_flags |= ALLOC_CMA;
  #endif
         return alloc_flags;
@@ -3877,22 +3915,9 @@ __alloc_pages_slowpath(gfp_t gfp_mask, unsigned int order,
         enum compact_result compact_result;
         int compaction_retries;
         int no_progress_loops;
-       unsigned long alloc_start = jiffies;
-       unsigned int stall_timeout = 10 * HZ;
         unsigned int cpuset_mems_cookie;
         int reserve_flags;
  
-       /*
-        * In the slowpath, we sanity check order to avoid ever trying to
-        * reclaim >= MAX_ORDER areas which will never succeed. Callers may
-        * be using allocators in order of preference for an area that is
-        * too large.
-        */
-       if (order >= MAX_ORDER) {
-               WARN_ON_ONCE(!(gfp_mask & __GFP_NOWARN));
-               return NULL;
-       }
-
         /*
          * We also sanity check to catch abuse of atomic reserves being used by
          * callers that are not in atomic context.
@@ -3996,7 +4021,6 @@ retry:
          * orientated.
          */
         if (!(alloc_flags & ALLOC_CPUSET) || reserve_flags) {
-               ac->zonelist = node_zonelist(numa_node_id(), gfp_mask);
                 ac->preferred_zoneref = first_zones_zonelist(ac->zonelist,
                                         ac->high_zoneidx, ac->nodemask);
         }
@@ -4010,14 +4034,6 @@ retry:
         if (!can_direct_reclaim)
                 goto nopage;
  
-       /* Make sure we know about allocations which stall for too long */
-       if (time_after(jiffies, alloc_start + stall_timeout)) {
-               warn_alloc(gfp_mask & ~__GFP_NOWARN, ac->nodemask,
-                       "page allocation stalls for %ums, order:%u",
-                       jiffies_to_msecs(jiffies-alloc_start), order);
-               stall_timeout += 10 * HZ;
-       }
-
         /* Avoid recursion of direct reclaim */
         if (current->flags & PF_MEMALLOC)
                 goto nopage;
@@ -4195,6 +4211,15 @@ __alloc_pages_nodemask(gfp_t gfp_mask, unsigned int order, int preferred_nid,
         gfp_t alloc_mask; /* The gfp_t that was actually used for allocation */
         struct alloc_context ac = { };
  
+       /*
+        * There are several places where we assume that the order value is sane
+        * so bail out early if the request is out of bound.
+        */
+       if (unlikely(order >= MAX_ORDER)) {
+               WARN_ON_ONCE(!(gfp_mask & __GFP_NOWARN));
+               return NULL;
+       }
+
         gfp_mask &= gfp_allowed_mask;
         alloc_mask = gfp_mask;
         if (!prepare_alloc_pages(gfp_mask, order, preferred_nid, nodemask, &ac, &alloc_mask, &alloc_flags))
@@ -4232,9 +4257,6 @@ out:
                 page = NULL;
         }
  
-       if (kmemcheck_enabled && page)
-               kmemcheck_pagealloc_alloc(page, order, gfp_mask);
-
         trace_mm_page_alloc(page, order, alloc_mask, ac.migratetype);
  
         return page;
@@ -4356,11 +4378,11 @@ refill:
                 /* Even if we own the page, we do not use atomic_set().
                  * This would break get_page_unless_zero() users.
                  */
-               page_ref_add(page, size - 1);
+               page_ref_add(page, size);
  
                 /* reset page count bias and offset to start of new frag */
                 nc->pfmemalloc = page_is_pfmemalloc(page);
-               nc->pagecnt_bias = size;
+               nc->pagecnt_bias = size + 1;
                 nc->offset = size;
         }
  
@@ -4376,10 +4398,10 @@ refill:
                 size = nc->size;
  #endif
                 /* OK, page count is 0, we can safely set it */
-               set_page_count(page, size);
+               set_page_count(page, size + 1);
  
                 /* reset page count bias and offset to start of new frag */
-               nc->pagecnt_bias = size;
+               nc->pagecnt_bias = size + 1;
                 offset = size - fragsz;
         }
  
@@ -4576,6 +4598,13 @@ long si_mem_available(void)
                      min(global_node_page_state(NR_SLAB_RECLAIMABLE) / 2,
                          wmark_low);
  
+       /*
+        * Part of the kernel memory, which can be released under memory
+        * pressure.
+        */
+       available += global_node_page_state(NR_INDIRECTLY_RECLAIMABLE_BYTES) >>
+               PAGE_SHIFT;
+
         if (available < 0)
                 available = 0;
         return available;
@@ -5320,17 +5349,8 @@ void __meminit memmap_init_zone(unsigned long size, int nid, unsigned long zone,
                 if (context != MEMMAP_EARLY)
                         goto not_early;
  
-               if (!early_pfn_valid(pfn)) {
-#ifdef CONFIG_HAVE_MEMBLOCK_NODE_MAP
-                       /*
-                        * Skip to the pfn preceding the next valid one (or
-                        * end_pfn), such that we hit a valid pfn (or end_pfn)
-                        * on our next iteration of the loop.
-                        */
-                       pfn = memblock_next_valid_pfn(pfn, end_pfn) - 1;
-#endif
+               if (!early_pfn_valid(pfn))
                         continue;
-               }
                 if (!early_pfn_in_nid(pfn, nid))
                         continue;
                 if (!update_defer_init(pgdat, pfn, end_pfn, &nr_initialised))
@@ -5577,8 +5597,10 @@ void __meminit init_currently_empty_zone(struct zone *zone,
                                         unsigned long size)
  {
         struct pglist_data *pgdat = zone->zone_pgdat;
+       int zone_idx = zone_idx(zone) + 1;
  
-       pgdat->nr_zones = zone_idx(zone) + 1;
+       if (zone_idx > pgdat->nr_zones)
+               pgdat->nr_zones = zone_idx;
  
         zone->zone_start_pfn = zone_start_pfn;
  
@@ -6919,6 +6941,7 @@ static void setup_per_zone_lowmem_reserve(void)
  static void __setup_per_zone_wmarks(void)
  {
         unsigned long pages_min = min_free_kbytes >> (PAGE_SHIFT - 10);
+       unsigned long pages_low = extra_free_kbytes >> (PAGE_SHIFT - 10);
         unsigned long lowmem_pages = 0;
         struct zone *zone;
         unsigned long flags;
@@ -6930,11 +6953,14 @@ static void __setup_per_zone_wmarks(void)
         }
  
         for_each_zone(zone) {
-               u64 tmp;
+               u64 min, low;
  
                 spin_lock_irqsave(&zone->lock, flags);
-               tmp = (u64)pages_min * zone->managed_pages;
-               do_div(tmp, lowmem_pages);
+               min = (u64)pages_min * zone->managed_pages;
+               do_div(min, lowmem_pages);
+               low = (u64)pages_low * zone->managed_pages;
+               do_div(low, vm_total_pages);
+
                 if (is_highmem(zone)) {
                         /*
                          * __GFP_HIGH and PF_MEMALLOC allocations usually don't
@@ -6955,7 +6981,7 @@ static void __setup_per_zone_wmarks(void)
                          * If it's a lowmem zone, reserve a number of pages
                          * proportionate to the zone's size.
                          */
-                       zone->watermark[WMARK_MIN] = tmp;
+                       zone->watermark[WMARK_MIN] = min;
                 }
  
                 /*
@@ -6963,12 +6989,14 @@ static void __setup_per_zone_wmarks(void)
                  * scale factor in proportion to available memory, but
                  * ensure a minimum size on small systems.
                  */
-               tmp = max_t(u64, tmp >> 2,
+               min = max_t(u64, min >> 2,
                             mult_frac(zone->managed_pages,
                                       watermark_scale_factor, 10000));
  
-               zone->watermark[WMARK_LOW]  = min_wmark_pages(zone) + tmp;
-               zone->watermark[WMARK_HIGH] = min_wmark_pages(zone) + tmp * 2;
+               zone->watermark[WMARK_LOW]  = min_wmark_pages(zone) +
+                                       low + min;
+               zone->watermark[WMARK_HIGH] = min_wmark_pages(zone) +
+                                       low + min * 2;
  
                 spin_unlock_irqrestore(&zone->lock, flags);
         }
@@ -7051,7 +7079,7 @@ core_initcall(init_per_zone_wmark_min)
  /*
   * min_free_kbytes_sysctl_handler - just a wrapper around proc_dointvec() so
   *     that we can call two helper functions whenever min_free_kbytes
- *     changes.
+ *     or extra_free_kbytes changes.
   */
  int min_free_kbytes_sysctl_handler(struct ctl_table *table, int write,
         void __user *buffer, size_t *length, loff_t *ppos)
@@ -7478,7 +7506,8 @@ static unsigned long pfn_max_align_up(unsigned long pfn)
  
  /* [start, end) must belong to a single zone. */
  static int __alloc_contig_migrate_range(struct compact_control *cc,
-                                       unsigned long start, unsigned long end)
+                                       unsigned long start, unsigned long end,
+                                       bool drain)
  {
         /* This function is based on compact_zone() from compaction.c. */
         unsigned long nr_reclaimed;
@@ -7486,7 +7515,8 @@ static int __alloc_contig_migrate_range(struct compact_control *cc,
         unsigned int tries = 0;
         int ret = 0;
  
-       migrate_prep();
+       if (drain)
+               migrate_prep();
  
         while (pfn < end || !list_empty(&cc->migratepages)) {
                 if (fatal_signal_pending(current)) {
@@ -7512,7 +7542,7 @@ static int __alloc_contig_migrate_range(struct compact_control *cc,
                 cc->nr_migratepages -= nr_reclaimed;
  
                 ret = migrate_pages(&cc->migratepages, alloc_migrate_target,
-                                   NULL, 0, cc->mode, MR_CMA);
+                                   NULL, 0, cc->mode, drain ? MR_CMA : MR_HPA);
         }
         if (ret < 0) {
                 putback_movable_pages(&cc->migratepages);
@@ -7542,8 +7572,8 @@ static int __alloc_contig_migrate_range(struct compact_control *cc,
   * pages which PFN is in [start, end) are allocated for the caller and
   * need to be freed with free_contig_range().
   */
-int alloc_contig_range(unsigned long start, unsigned long end,
-                      unsigned migratetype, gfp_t gfp_mask)
+int __alloc_contig_range(unsigned long start, unsigned long end,
+                        unsigned migratetype, gfp_t gfp_mask, bool drain)
  {
         unsigned long outer_start, outer_end;
         unsigned int order;
@@ -7591,11 +7621,18 @@ int alloc_contig_range(unsigned long start, unsigned long end,
  
         /*
          * In case of -EBUSY, we'd like to know which page causes problem.
-        * So, just fall through. We will check it in test_pages_isolated().
+        * So, just fall through. test_pages_isolated() has a tracepoint
+        * which will report the busy page.
+        *
+        * It is possible that busy pages could become available before
+        * the call to test_pages_isolated, and the range will actually be
+        * allocated.  So, if we fall through be sure to clear ret so that
+        * -EBUSY is not accidentally used or returned to caller.
          */
-       ret = __alloc_contig_migrate_range(&cc, start, end);
+       ret = __alloc_contig_migrate_range(&cc, start, end, drain);
         if (ret && ret != -EBUSY)
                 goto done;
+       ret =0;
  
         /*
          * Pages from [start, end) are within a MAX_ORDER_NR_PAGES
@@ -7614,38 +7651,41 @@ int alloc_contig_range(unsigned long start, unsigned long end,
          * isolated thus they won't get removed from buddy.
          */
  
-       lru_add_drain_all();
-       drain_all_pages(cc.zone);
-
         order = 0;
         outer_start = start;
-       while (!PageBuddy(pfn_to_page(outer_start))) {
-               if (++order >= MAX_ORDER) {
-                       outer_start = start;
-                       break;
+
+       if (drain) {
+               lru_add_drain_all();
+               drain_all_pages(cc.zone);
+
+               while (!PageBuddy(pfn_to_page(outer_start))) {
+                       if (++order >= MAX_ORDER) {
+                               outer_start = start;
+                               break;
+                       }
+                       outer_start &= ~0UL << order;
                 }
-               outer_start &= ~0UL << order;
-       }
  
-       if (outer_start != start) {
-               order = page_order(pfn_to_page(outer_start));
+               if (outer_start != start) {
+                       order = page_order(pfn_to_page(outer_start));
  
-               /*
-                * outer_start page could be small order buddy page and
-                * it doesn't include start page. Adjust outer_start
-                * in this case to report failed page properly
-                * on tracepoint in test_pages_isolated()
-                */
-               if (outer_start + (1UL << order) <= start)
-                       outer_start = start;
-       }
+                       /*
+                        * outer_start page could be small order buddy page and
+                        * it doesn't include start page. Adjust outer_start
+                        * in this case to report failed page properly
+                        * on tracepoint in test_pages_isolated()
+                        */
+                       if (outer_start + (1UL << order) <= start)
+                               outer_start = start;
+               }
  
-       /* Make sure the range is really isolated. */
-       if (test_pages_isolated(outer_start, end, false)) {
-               pr_info_ratelimited("%s: [%lx, %lx) PFNs busy\n",
-                       __func__, outer_start, end);
-               ret = -EBUSY;
-               goto done;
+               /* Make sure the range is really isolated. */
+               if (test_pages_isolated(outer_start, end, false)) {
+                       pr_info_ratelimited("%s: [%lx, %lx) PFNs busy\n",
+                               __func__, outer_start, end);
+                       ret = -EBUSY;
+                       goto done;
+               }
         }
  
         /* Grab isolated pages from freelists. */
@@ -7667,6 +7707,18 @@ done:
         return ret;
  }
  
+int alloc_contig_range(unsigned long start, unsigned long end,
+                      unsigned migratetype, gfp_t gfp_mask)
+{
+       return __alloc_contig_range(start, end, migratetype, gfp_mask, true);
+}
+
+int alloc_contig_range_fast(unsigned long start, unsigned long end,
+                           unsigned migratetype)
+{
+       return __alloc_contig_range(start, end, migratetype, GFP_KERNEL, false);
+}
+
  void free_contig_range(unsigned long pfn, unsigned nr_pages)
  {
         unsigned int count = 0;