#include <linux/ftrace.h>
#include <linux/lockdep.h>
#include <linux/nmi.h>
+#include <linux/psi.h>
#include <asm/sections.h>
#include <asm/tlbflush.h>
int percpu_pagelist_fraction;
gfp_t gfp_allowed_mask __read_mostly = GFP_BOOT_MASK;
-/*
- * A cached value of the page's pageblock's migratetype, used when the page is
- * put on a pcplist. Used to avoid the pageblock migratetype lookup when
- * freeing from pcplists in most cases, at the cost of possibly becoming stale.
- * Also the migratetype set in the page does not necessarily match the pcplist
- * index, e.g. page might have MIGRATE_CMA set but be on a pcplist with any
- * other index - this ensures that it will be put on the correct CMA freelist.
- */
-static inline int get_pcppage_migratetype(struct page *page)
-{
- return page->index;
-}
-
-static inline void set_pcppage_migratetype(struct page *page, int migratetype)
-{
- page->index = migratetype;
-}
-
#ifdef CONFIG_PM_SLEEP
/*
* The following functions are used by the suspend/hibernate code to temporarily
arch_alloc_page(page, order);
kernel_map_pages(page, 1 << order, 1);
- kernel_poison_pages(page, 1 << order, 1);
kasan_alloc_pages(page, order);
+ kernel_poison_pages(page, 1 << order, 1);
set_page_owner(page, order, gfp_flags);
}
/*
* Do the hard work of removing an element from the buddy allocator.
* Call me with the zone->lock already held.
+ * If gfp mask of the page allocation has GFP_HIGHUSER_MOVABLE, @migratetype
+ * is changed from MIGRATE_MOVABLE to MIGRATE_CMA in rmqueue() to select the
+ * free list of MIGRATE_CMA. It helps depleting CMA free pages so that
+ * evaluation of watermark for unmovable page allocations is not too different
+ * from movable page allocations.
+ * If @migratetype is MIGRATE_CMA, it should be corrected to MIGRATE_MOVABLE
+ * after the free list of MIGRATE_CMA is searched to have a chance to search the
+ * free list of MIGRATE_MOVABLE. It also records correct migrate type in the
+ * trace as intended by the page allocation.
*/
static struct page *__rmqueue(struct zone *zone, unsigned int order,
int migratetype)
{
- struct page *page;
+ struct page *page = NULL;
-retry:
- page = __rmqueue_smallest(zone, order, migratetype);
- if (unlikely(!page)) {
- if (migratetype == MIGRATE_MOVABLE)
- page = __rmqueue_cma_fallback(zone, order);
+#ifdef CONFIG_CMA
+ if (migratetype == MIGRATE_CMA) {
+#else
+ if (migratetype == MIGRATE_MOVABLE) {
+#endif
+ page = __rmqueue_cma_fallback(zone, order);
+ migratetype = MIGRATE_MOVABLE;
+ }
- if (!page && __rmqueue_fallback(zone, order, migratetype))
- goto retry;
+ while (!page) {
+ page = __rmqueue_smallest(zone, order, migratetype);
+ if (unlikely(!page) &&
+ !__rmqueue_fallback(zone, order, migratetype))
+ break;
}
trace_mm_page_alloc_zone_locked(page, order, migratetype);
* exists.
*/
watermark = min_wmark_pages(zone) + (1UL << order);
- if (!zone_watermark_ok(zone, 0, watermark, 0, ALLOC_CMA))
+ if (!zone_watermark_ok(zone, 0, watermark, 0, 0))
return 0;
__mod_zone_freepage_state(zone, -(1UL << order), mt);
else
page = list_first_entry(list, struct page, lru);
+ /*
+ * If the head or the tail page in the pcp list is CMA page and
+ * the gfp flags is not GFP_HIGHUSER_MOVABLE, do not allocate a
+ * page from the pcp list. The free list of MIGRATE_CMA is a
+ * special case of the free list of MIGRATE_MOVABLE and the
+ * pages from the free list of MIGRATE_CMA are pushed to the pcp
+ * list of MIGRATE_MOVABLE. Since the pcp list of
+ * MIGRATE_MOVABLE is selected if the gfp flags has GFP_MOVABLE,
+ * we should avoid the case that a cma page in the pcp list of
+ * MIGRATE_MOVABLE is allocated to a movable allocation without
+ * GFP_HIGHUSER_MOVABLE.
+ * If this is the case, allocate a movable page from the free
+ * list of MIGRATE_MOVABLE instead of pcp list of
+ * MIGRATE_MOVABLE.
+ */
+#ifdef CONFIG_CMA
+ if (is_migrate_cma_page(page) && (migratetype != MIGRATE_CMA))
+ return NULL;
+#endif
list_del(&page->lru);
pcp->count--;
} while (check_new_pcp(page));
/* Lock and remove page from the per-cpu list */
static struct page *rmqueue_pcplist(struct zone *preferred_zone,
struct zone *zone, unsigned int order,
- gfp_t gfp_flags, int migratetype)
+ gfp_t gfp_flags, int migratetype,
+ int migratetype_rmqueue)
{
struct per_cpu_pages *pcp;
struct list_head *list;
local_irq_save(flags);
pcp = &this_cpu_ptr(zone->pageset)->pcp;
list = &pcp->lists[migratetype];
- page = __rmqueue_pcplist(zone, migratetype, cold, pcp, list);
+ page = __rmqueue_pcplist(zone, migratetype_rmqueue, cold, pcp, list);
if (page) {
__count_zid_vm_events(PGALLOC, page_zonenum(page), 1 << order);
zone_statistics(preferred_zone, zone);
{
unsigned long flags;
struct page *page;
+ int migratetype_rmqueue = migratetype;
+#ifdef CONFIG_CMA
+ if ((migratetype_rmqueue == MIGRATE_MOVABLE) &&
+ ((gfp_flags & GFP_HIGHUSER_MOVABLE) == GFP_HIGHUSER_MOVABLE))
+ migratetype_rmqueue = MIGRATE_CMA;
+#endif
if (likely(order == 0)) {
page = rmqueue_pcplist(preferred_zone, zone, order,
- gfp_flags, migratetype);
- goto out;
+ gfp_flags, migratetype, migratetype_rmqueue);
+ /*
+ * Allocation with GFP_MOVABLE and !GFP_HIGHMEM will have
+ * another chance of page allocation from the free list.
+ * See the comment in __rmqueue_pcplist().
+ */
+#ifdef CONFIG_CMA
+ if (likely(page) || (migratetype_rmqueue != MIGRATE_MOVABLE))
+#endif
+ goto out;
}
/*
trace_mm_page_alloc_zone_locked(page, order, migratetype);
}
if (!page)
- page = __rmqueue(zone, order, migratetype);
+ page = __rmqueue(zone, order, migratetype_rmqueue);
} while (page && check_new_pages(page, order));
spin_unlock(&zone->lock);
if (!page)
enum compact_priority prio, enum compact_result *compact_result)
{
struct page *page;
+ unsigned long pflags;
unsigned int noreclaim_flag;
if (!order)
return NULL;
+ psi_memstall_enter(&pflags);
noreclaim_flag = memalloc_noreclaim_save();
+
*compact_result = try_to_compact_pages(gfp_mask, order, alloc_flags, ac,
prio);
+
memalloc_noreclaim_restore(noreclaim_flag);
+ psi_memstall_leave(&pflags);
if (*compact_result <= COMPACT_INACTIVE)
return NULL;
struct reclaim_state reclaim_state;
int progress;
unsigned int noreclaim_flag;
+ unsigned long pflags;
cond_resched();
/* We now go into synchronous reclaim */
cpuset_memory_pressure_bump();
+ psi_memstall_enter(&pflags);
noreclaim_flag = memalloc_noreclaim_save();
fs_reclaim_acquire(gfp_mask);
reclaim_state.reclaimed_slab = 0;
current->reclaim_state = NULL;
fs_reclaim_release(gfp_mask);
memalloc_noreclaim_restore(noreclaim_flag);
+ psi_memstall_leave(&pflags);
cond_resched();
alloc_flags |= ALLOC_HARDER;
#ifdef CONFIG_CMA
- if (gfpflags_to_migratetype(gfp_mask) == MIGRATE_MOVABLE)
+ if ((gfpflags_to_migratetype(gfp_mask) == MIGRATE_MOVABLE) ||
+ ((gfp_mask & GFP_HIGHUSER_MOVABLE) == GFP_HIGHUSER_MOVABLE))
alloc_flags |= ALLOC_CMA;
#endif
return alloc_flags;
/* Even if we own the page, we do not use atomic_set().
* This would break get_page_unless_zero() users.
*/
- page_ref_add(page, size - 1);
+ page_ref_add(page, size);
/* reset page count bias and offset to start of new frag */
nc->pfmemalloc = page_is_pfmemalloc(page);
- nc->pagecnt_bias = size;
+ nc->pagecnt_bias = size + 1;
nc->offset = size;
}
size = nc->size;
#endif
/* OK, page count is 0, we can safely set it */
- set_page_count(page, size);
+ set_page_count(page, size + 1);
/* reset page count bias and offset to start of new frag */
- nc->pagecnt_bias = size;
+ nc->pagecnt_bias = size + 1;
offset = size - fragsz;
}
/* [start, end) must belong to a single zone. */
static int __alloc_contig_migrate_range(struct compact_control *cc,
- unsigned long start, unsigned long end)
+ unsigned long start, unsigned long end,
+ bool drain)
{
/* This function is based on compact_zone() from compaction.c. */
unsigned long nr_reclaimed;
unsigned int tries = 0;
int ret = 0;
- migrate_prep();
+ if (drain)
+ migrate_prep();
while (pfn < end || !list_empty(&cc->migratepages)) {
if (fatal_signal_pending(current)) {
cc->nr_migratepages -= nr_reclaimed;
ret = migrate_pages(&cc->migratepages, alloc_migrate_target,
- NULL, 0, cc->mode, MR_CMA);
+ NULL, 0, cc->mode, drain ? MR_CMA : MR_HPA);
}
if (ret < 0) {
putback_movable_pages(&cc->migratepages);
* pages which PFN is in [start, end) are allocated for the caller and
* need to be freed with free_contig_range().
*/
-int alloc_contig_range(unsigned long start, unsigned long end,
- unsigned migratetype, gfp_t gfp_mask)
+int __alloc_contig_range(unsigned long start, unsigned long end,
+ unsigned migratetype, gfp_t gfp_mask, bool drain)
{
unsigned long outer_start, outer_end;
unsigned int order;
* allocated. So, if we fall through be sure to clear ret so that
* -EBUSY is not accidentally used or returned to caller.
*/
- ret = __alloc_contig_migrate_range(&cc, start, end);
+ ret = __alloc_contig_migrate_range(&cc, start, end, drain);
if (ret && ret != -EBUSY)
goto done;
ret =0;
* isolated thus they won't get removed from buddy.
*/
- lru_add_drain_all();
- drain_all_pages(cc.zone);
-
order = 0;
outer_start = start;
- while (!PageBuddy(pfn_to_page(outer_start))) {
- if (++order >= MAX_ORDER) {
- outer_start = start;
- break;
+
+ if (drain) {
+ lru_add_drain_all();
+ drain_all_pages(cc.zone);
+
+ while (!PageBuddy(pfn_to_page(outer_start))) {
+ if (++order >= MAX_ORDER) {
+ outer_start = start;
+ break;
+ }
+ outer_start &= ~0UL << order;
}
- outer_start &= ~0UL << order;
- }
- if (outer_start != start) {
- order = page_order(pfn_to_page(outer_start));
+ if (outer_start != start) {
+ order = page_order(pfn_to_page(outer_start));
- /*
- * outer_start page could be small order buddy page and
- * it doesn't include start page. Adjust outer_start
- * in this case to report failed page properly
- * on tracepoint in test_pages_isolated()
- */
- if (outer_start + (1UL << order) <= start)
- outer_start = start;
- }
+ /*
+ * outer_start page could be small order buddy page and
+ * it doesn't include start page. Adjust outer_start
+ * in this case to report failed page properly
+ * on tracepoint in test_pages_isolated()
+ */
+ if (outer_start + (1UL << order) <= start)
+ outer_start = start;
+ }
- /* Make sure the range is really isolated. */
- if (test_pages_isolated(outer_start, end, false)) {
- pr_info_ratelimited("%s: [%lx, %lx) PFNs busy\n",
- __func__, outer_start, end);
- ret = -EBUSY;
- goto done;
+ /* Make sure the range is really isolated. */
+ if (test_pages_isolated(outer_start, end, false)) {
+ pr_info_ratelimited("%s: [%lx, %lx) PFNs busy\n",
+ __func__, outer_start, end);
+ ret = -EBUSY;
+ goto done;
+ }
}
/* Grab isolated pages from freelists. */
return ret;
}
+int alloc_contig_range(unsigned long start, unsigned long end,
+ unsigned migratetype, gfp_t gfp_mask)
+{
+ return __alloc_contig_range(start, end, migratetype, gfp_mask, true);
+}
+
+int alloc_contig_range_fast(unsigned long start, unsigned long end,
+ unsigned migratetype)
+{
+ return __alloc_contig_range(start, end, migratetype, GFP_KERNEL, false);
+}
+
void free_contig_range(unsigned long pfn, unsigned nr_pages)
{
unsigned int count = 0;