4 * Copyright (C) 2015 Samsung Electronics, Inc. All Rights Reserved.
6 * This program is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU General Public License
8 * as published by the Free Software Foundation; either version
9 * 2 of the License, or (at your option) any later version.
11 * Does best efforts to allocate required high-order pages.
14 #include <linux/list.h>
15 #include <linux/bootmem.h>
16 #include <linux/memblock.h>
18 #include <linux/mm_types.h>
19 #include <linux/mmzone.h>
20 #include <linux/migrate.h>
21 #include <linux/memcontrol.h>
22 #include <linux/page-isolation.h>
23 #include <linux/mm_inline.h>
24 #include <linux/swap.h>
25 #include <linux/scatterlist.h>
26 #include <linux/debugfs.h>
27 #include <linux/vmalloc.h>
28 #include <linux/device.h>
29 #include <linux/oom.h>
30 #include <linux/sched/task.h>
31 #include <linux/sched/mm.h>
35 #define MAX_SCAN_TRY (2)
37 static unsigned long start_pfn
, end_pfn
;
38 static unsigned long cached_scan_pfn
;
40 #define HPA_MIN_OOMADJ 100
42 static bool oom_unkillable_task(struct task_struct
*p
)
44 if (is_global_init(p
))
46 if (p
->flags
& PF_KTHREAD
)
51 static bool oom_skip_task(struct task_struct
*p
, int selected_adj
)
53 if (same_thread_group(p
, current
))
55 if (p
->signal
->oom_score_adj
<= HPA_MIN_OOMADJ
)
57 if ((p
->signal
->oom_score_adj
< selected_adj
) &&
58 (selected_adj
<= OOM_SCORE_ADJ_MAX
))
60 if (test_bit(MMF_OOM_SKIP
, &p
->mm
->flags
))
67 static int hpa_killer(void)
69 struct task_struct
*tsk
, *p
;
70 struct task_struct
*selected
= NULL
;
71 unsigned long selected_tasksize
= 0;
72 int selected_adj
= OOM_SCORE_ADJ_MAX
+ 1;
75 for_each_process(tsk
) {
79 if (oom_unkillable_task(tsk
))
82 p
= find_lock_task_mm(tsk
);
86 if (oom_skip_task(p
, selected_adj
)) {
91 tasksize
= get_mm_rss(p
->mm
);
92 tasksize
+= get_mm_counter(p
->mm
, MM_SWAPENTS
);
93 tasksize
+= atomic_long_read(&p
->mm
->nr_ptes
);
94 tasksize
+= mm_nr_pmds(p
->mm
);
95 current_adj
= p
->signal
->oom_score_adj
;
100 (current_adj
== selected_adj
) &&
101 (tasksize
<= selected_tasksize
))
105 put_task_struct(selected
);
108 selected_tasksize
= tasksize
;
109 selected_adj
= current_adj
;
110 get_task_struct(selected
);
115 pr_info("HPA: no killable task\n");
119 pr_info("HPA: Killing '%s' (%d), adj %hd to free %lukB\n",
120 selected
->comm
, task_pid_nr(selected
), selected_adj
,
121 selected_tasksize
* (PAGE_SIZE
/ SZ_1K
));
123 do_send_sig_info(SIGKILL
, SEND_SIG_FORCED
, selected
, true);
125 put_task_struct(selected
);
130 static bool is_movable_chunk(unsigned long pfn
, unsigned int order
)
132 struct page
*page
= pfn_to_page(pfn
);
133 struct page
*page_end
= pfn_to_page(pfn
+ (1 << order
));
135 while (page
!= page_end
) {
136 if (PageCompound(page
) || PageReserved(page
))
138 if (!PageLRU(page
) && !__PageMovable(page
))
141 page
+= PageBuddy(page
) ? 1 << page_order(page
) : 1;
147 static int get_exception_of_page(phys_addr_t phys
,
148 phys_addr_t exception_areas
[][2],
153 for (i
= 0; i
< nr_exception
; i
++)
154 if ((exception_areas
[i
][0] <= phys
) &&
155 (phys
<= exception_areas
[i
][1]))
160 static inline void expand(struct zone
*zone
, struct page
*page
,
161 int low
, int high
, struct free_area
*area
,
164 unsigned long size
= 1 << high
;
171 list_add(&page
[size
].lru
, &area
->free_list
[migratetype
]);
173 set_page_private(&page
[size
], high
);
174 __SetPageBuddy(&page
[size
]);
178 static struct page
*alloc_freepage_one(struct zone
*zone
, unsigned int order
,
179 phys_addr_t exception_areas
[][2],
182 unsigned int current_order
;
183 struct free_area
*area
;
187 for (mt
= MIGRATE_UNMOVABLE
; mt
< MIGRATE_PCPTYPES
; ++mt
) {
188 for (current_order
= order
;
189 current_order
< MAX_ORDER
; ++current_order
) {
190 area
= &(zone
->free_area
[current_order
]);
192 list_for_each_entry(page
, &area
->free_list
[mt
], lru
) {
193 if (get_exception_of_page(page_to_phys(page
),
198 list_del(&page
->lru
);
200 __ClearPageBuddy(page
);
201 set_page_private(page
, 0);
203 expand(zone
, page
, order
,
204 current_order
, area
, mt
);
205 set_pcppage_migratetype(page
, mt
);
215 static int alloc_freepages_range(struct zone
*zone
, unsigned int order
,
216 struct page
**pages
, int required
,
217 phys_addr_t exception_areas
[][2],
226 spin_lock_irqsave(&zone
->lock
, flags
);
228 while (required
> count
) {
229 wmark
= min_wmark_pages(zone
) + (1 << order
);
230 if (!zone_watermark_ok(zone
, order
, wmark
, 0, 0))
233 page
= alloc_freepage_one(zone
, order
, exception_areas
,
238 post_alloc_hook(page
, order
, GFP_KERNEL
);
239 __mod_zone_page_state(zone
, NR_FREE_PAGES
, -(1 << order
));
240 pages
[count
++] = page
;
241 __count_zid_vm_events(PGALLOC
, page_zonenum(page
), 1 << order
);
245 spin_unlock_irqrestore(&zone
->lock
, flags
);
250 static void prep_highorder_pages(unsigned long base_pfn
, int order
)
252 int nr_pages
= 1 << order
;
255 for (pfn
= base_pfn
+ 1; pfn
< base_pfn
+ nr_pages
; pfn
++)
256 set_page_count(pfn_to_page(pfn
), 0);
260 * alloc_pages_highorder_except() - allocate large order pages
261 * @order: required page order
262 * @pages: array to store allocated @order order pages
263 * @nents: number of @order order pages
264 * @exception_areas: memory areas that should not include pages in @pages
265 * @nr_exception: number of memory areas in @exception_areas
267 * Returns 0 on allocation success. -error otherwise.
269 * Allocates @nents pages of @order << PAGE_SHIFT number of consecutive pages
270 * and store the page descriptors of the allocated pages to @pages. Every page
271 * in @pages should also be aligned by @order << PAGE_SHIFT.
273 * If @nr_exception is larger than 0, alloc_page_highorder_except() does not
274 * allocate pages in the areas described in @exception_areas. @exception_areas
275 * is an array of array with two elements: The first element is the start
276 * address of an area and the last element is the end address. The end address
277 * is the last byte address in the area, that is "[start address] + [size] - 1".
279 int alloc_pages_highorder_except(int order
, struct page
**pages
, int nents
,
280 phys_addr_t exception_areas
[][2],
284 unsigned int nr_pages
= 1 << order
;
285 unsigned long total_scanned
= 0;
286 unsigned long pfn
, tmp
;
287 int remained
= nents
;
293 for_each_zone(zone
) {
294 if (zone
->spanned_pages
== 0)
297 allocated
= alloc_freepages_range(zone
, order
,
298 pages
+ nents
- remained
, remained
,
299 exception_areas
, nr_exception
);
300 remained
-= allocated
;
308 for (pfn
= ALIGN(cached_scan_pfn
, nr_pages
);
309 (total_scanned
< (end_pfn
- start_pfn
) * MAX_SCAN_TRY
)
311 pfn
+= nr_pages
, total_scanned
+= nr_pages
) {
314 if (pfn
+ nr_pages
> end_pfn
) {
319 /* pfn validation check in the range */
324 } while (++tmp
< (pfn
+ nr_pages
));
326 if (tmp
< (pfn
+ nr_pages
))
329 mt
= get_pageblock_migratetype(pfn_to_page(pfn
));
331 * CMA pages should not be reclaimed.
332 * Isolated page blocks should not be tried again because it
333 * causes isolated page block remained in isolated state
336 if (is_migrate_cma(mt
) || is_migrate_isolate(mt
)) {
337 /* nr_pages is added before next iteration */
338 pfn
= ALIGN(pfn
+ 1, pageblock_nr_pages
) - nr_pages
;
342 ret
= get_exception_of_page(pfn
<< PAGE_SHIFT
,
343 exception_areas
, nr_exception
);
345 pfn
= (exception_areas
[ret
][1] + 1) >> PAGE_SHIFT
;
350 if (!is_movable_chunk(pfn
, order
))
353 ret
= alloc_contig_range_fast(pfn
, pfn
+ nr_pages
, mt
);
355 prep_highorder_pages(pfn
, order
);
359 pages
[nents
- remained
] = pfn_to_page(pfn
);
363 /* save latest scanned pfn */
364 cached_scan_pfn
= pfn
;
370 count_vm_event(DROP_SLAB
);
374 pr_info("HPA: drop_slab and killer retry %d count\n",
379 for (i
= 0; i
< (nents
- remained
); i
++)
380 __free_pages(pages
[i
], order
);
382 pr_info("%s: remained=%d / %d, not enough memory in order %d\n",
383 __func__
, remained
, nents
, order
);
391 int free_pages_highorder(int order
, struct page
**pages
, int nents
)
395 for (i
= 0; i
< nents
; i
++)
396 __free_pages(pages
[i
], order
);
401 static int __init
init_highorder_pages_allocator(void)
405 for_each_zone(zone
) {
406 if (zone
->spanned_pages
== 0)
408 if (zone_idx(zone
) == ZONE_MOVABLE
) {
409 start_pfn
= zone
->zone_start_pfn
;
410 end_pfn
= start_pfn
+ zone
->present_pages
;
415 start_pfn
= __phys_to_pfn(memblock_start_of_DRAM());
419 cached_scan_pfn
= start_pfn
;
423 late_initcall(init_highorder_pages_allocator
);