Currently, to charge a non-slab allocation to kmemcg one has to use
alloc_kmem_pages helper with __GFP_ACCOUNT flag. A page allocated with
this helper should finally be freed using free_kmem_pages, otherwise it
won't be uncharged.
This API suits its current users fine, but it turns out to be impossible
to use along with page reference counting, i.e. when an allocation is
supposed to be freed with put_page, as it is the case with pipe or unix
socket buffers.
To overcome this limitation, this patch moves charging/uncharging to
generic page allocator paths, i.e. to __alloc_pages_nodemask and
free_pages_prepare, and zaps alloc/free_kmem_pages helpers. This way,
one can use any of the available page allocation functions to get the
allocated page charged to kmemcg - it's enough to pass __GFP_ACCOUNT,
just like in case of kmalloc and friends. A charged page will be
automatically uncharged on free.
To make it possible, we need to mark pages charged to kmemcg somehow.
To avoid introducing a new page flag, we make use of page->_mapcount for
marking such pages. Since pages charged to kmemcg are not supposed to
be mapped to userspace, it should work just fine. There are other
(ab)users of page->_mapcount - buddy and balloon pages - but we don't
conflict with them.
In case kmemcg is compiled out or not used at runtime, this patch
introduces no overhead to generic page allocator paths. If kmemcg is
used, it will be plus one gfp flags check on alloc and plus one
page->_mapcount check on free, which shouldn't hurt performance, because
the data accessed are hot.
Link: http://lkml.kernel.org/r/a9736d856f895bcb465d9f257b54efe32eda6f99.1464079538.git.vdavydov@virtuozzo.com
Signed-off-by: Vladimir Davydov <vdavydov@virtuozzo.com>
Cc: Johannes Weiner <hannes@cmpxchg.org>
Cc: Michal Hocko <mhocko@kernel.org>
Cc: Eric Dumazet <eric.dumazet@gmail.com>
Cc: Minchan Kim <minchan@kernel.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
* __GFP_THISNODE forces the allocation to be satisified from the requested
* node with no fallbacks or placement policy enforcements.
*
- * __GFP_ACCOUNT causes the allocation to be accounted to kmemcg (only relevant
- * to kmem allocations).
+ * __GFP_ACCOUNT causes the allocation to be accounted to kmemcg.
*/
#define __GFP_RECLAIMABLE ((__force gfp_t)___GFP_RECLAIMABLE)
#define __GFP_WRITE ((__force gfp_t)___GFP_WRITE)
#define alloc_page_vma_node(gfp_mask, vma, addr, node) \
alloc_pages_vma(gfp_mask, 0, vma, addr, node, false)
-extern struct page *alloc_kmem_pages(gfp_t gfp_mask, unsigned int order);
-extern struct page *alloc_kmem_pages_node(int nid, gfp_t gfp_mask,
- unsigned int order);
-
extern unsigned long __get_free_pages(gfp_t gfp_mask, unsigned int order);
extern unsigned long get_zeroed_page(gfp_t gfp_mask);
unsigned int fragsz, gfp_t gfp_mask);
extern void __free_page_frag(void *addr);
-extern void __free_kmem_pages(struct page *page, unsigned int order);
-extern void free_kmem_pages(unsigned long addr, unsigned int order);
-
#define __free_page(page) __free_pages((page), 0)
#define free_page(addr) free_pages((addr), 0)
#define PAGE_BALLOON_MAPCOUNT_VALUE (-256)
PAGE_MAPCOUNT_OPS(Balloon, BALLOON)
+/*
+ * If kmemcg is enabled, the buddy allocator will set PageKmemcg() on
+ * pages allocated with __GFP_ACCOUNT. It gets cleared on page free.
+ */
+#define PAGE_KMEMCG_MAPCOUNT_VALUE (-512)
+PAGE_MAPCOUNT_OPS(Kmemcg, KMEMCG)
+
extern bool is_free_buddy_page(struct page *page);
__PAGEFLAG(Isolated, isolated, PF_ANY);
static unsigned long *alloc_thread_stack_node(struct task_struct *tsk,
int node)
{
- struct page *page = alloc_kmem_pages_node(node, THREADINFO_GFP,
- THREAD_SIZE_ORDER);
+ struct page *page = alloc_pages_node(node, THREADINFO_GFP,
+ THREAD_SIZE_ORDER);
if (page)
memcg_kmem_update_page_stat(page, MEMCG_KERNEL_STACK,
memcg_kmem_update_page_stat(page, MEMCG_KERNEL_STACK,
-(1 << THREAD_SIZE_ORDER));
- __free_kmem_pages(page, THREAD_SIZE_ORDER);
+ __free_pages(page, THREAD_SIZE_ORDER);
}
# else
static struct kmem_cache *thread_stack_cache;
#include <linux/sched/rt.h>
#include <linux/page_owner.h>
#include <linux/kthread.h>
+#include <linux/memcontrol.h>
#include <asm/sections.h>
#include <asm/tlbflush.h>
}
if (PageMappingFlags(page))
page->mapping = NULL;
+ if (memcg_kmem_enabled() && PageKmemcg(page)) {
+ memcg_kmem_uncharge(page, order);
+ __ClearPageKmemcg(page);
+ }
if (check_free)
bad += free_pages_check(page);
if (bad)
}
out:
+ if (memcg_kmem_enabled() && (gfp_mask & __GFP_ACCOUNT) && page) {
+ if (unlikely(memcg_kmem_charge(page, gfp_mask, order))) {
+ __free_pages(page, order);
+ page = NULL;
+ } else
+ __SetPageKmemcg(page);
+ }
+
if (kmemcheck_enabled && page)
kmemcheck_pagealloc_alloc(page, order, gfp_mask);
}
EXPORT_SYMBOL(__free_page_frag);
-/*
- * alloc_kmem_pages charges newly allocated pages to the kmem resource counter
- * of the current memory cgroup if __GFP_ACCOUNT is set, other than that it is
- * equivalent to alloc_pages.
- *
- * It should be used when the caller would like to use kmalloc, but since the
- * allocation is large, it has to fall back to the page allocator.
- */
-struct page *alloc_kmem_pages(gfp_t gfp_mask, unsigned int order)
-{
- struct page *page;
-
- page = alloc_pages(gfp_mask, order);
- if (memcg_kmem_enabled() && (gfp_mask & __GFP_ACCOUNT) &&
- page && memcg_kmem_charge(page, gfp_mask, order) != 0) {
- __free_pages(page, order);
- page = NULL;
- }
- return page;
-}
-
-struct page *alloc_kmem_pages_node(int nid, gfp_t gfp_mask, unsigned int order)
-{
- struct page *page;
-
- page = alloc_pages_node(nid, gfp_mask, order);
- if (memcg_kmem_enabled() && (gfp_mask & __GFP_ACCOUNT) &&
- page && memcg_kmem_charge(page, gfp_mask, order) != 0) {
- __free_pages(page, order);
- page = NULL;
- }
- return page;
-}
-
-/*
- * __free_kmem_pages and free_kmem_pages will free pages allocated with
- * alloc_kmem_pages.
- */
-void __free_kmem_pages(struct page *page, unsigned int order)
-{
- if (memcg_kmem_enabled())
- memcg_kmem_uncharge(page, order);
- __free_pages(page, order);
-}
-
-void free_kmem_pages(unsigned long addr, unsigned int order)
-{
- if (addr != 0) {
- VM_BUG_ON(!virt_addr_valid((void *)addr));
- __free_kmem_pages(virt_to_page((void *)addr), order);
- }
-}
-
static void *make_alloc_exact(unsigned long addr, unsigned int order,
size_t size)
{
struct page *page;
flags |= __GFP_COMP;
- page = alloc_kmem_pages(flags, order);
+ page = alloc_pages(flags, order);
ret = page ? page_address(page) : NULL;
kmemleak_alloc(ret, size, 1, flags);
kasan_kmalloc_large(ret, size, flags);
if (unlikely(!PageSlab(page))) {
BUG_ON(!PageCompound(page));
kfree_hook(object);
- __free_kmem_pages(page, compound_order(page));
+ __free_pages(page, compound_order(page));
p[size] = NULL; /* mark object processed */
return size;
}
void *ptr = NULL;
flags |= __GFP_COMP | __GFP_NOTRACK;
- page = alloc_kmem_pages_node(node, flags, get_order(size));
+ page = alloc_pages_node(node, flags, get_order(size));
if (page)
ptr = page_address(page);
if (unlikely(!PageSlab(page))) {
BUG_ON(!PageCompound(page));
kfree_hook(x);
- __free_kmem_pages(page, compound_order(page));
+ __free_pages(page, compound_order(page));
return;
}
slab_free(page->slab_cache, page, object, NULL, 1, _RET_IP_);
struct page *page = area->pages[i];
BUG_ON(!page);
- __free_kmem_pages(page, 0);
+ __free_pages(page, 0);
}
kvfree(area->pages);
struct page *page;
if (node == NUMA_NO_NODE)
- page = alloc_kmem_pages(alloc_mask, order);
+ page = alloc_pages(alloc_mask, order);
else
- page = alloc_kmem_pages_node(node, alloc_mask, order);
+ page = alloc_pages_node(node, alloc_mask, order);
if (unlikely(!page)) {
/* Successfully allocated i pages, free them in __vunmap() */