hugetlb/cgroup: add charge/uncharge routines for hugetlb cgroup
authorAneesh Kumar K.V <aneesh.kumar@linux.vnet.ibm.com>
Tue, 31 Jul 2012 23:42:18 +0000 (16:42 -0700)
committerLinus Torvalds <torvalds@linux-foundation.org>
Wed, 1 Aug 2012 01:42:41 +0000 (18:42 -0700)
Add the charge and uncharge routines for hugetlb cgroup.  We do cgroup
charging in page alloc and uncharge in compound page destructor.
Assigning page's hugetlb cgroup is protected by hugetlb_lock.

[liwp@linux.vnet.ibm.com: add huge_page_order check to avoid incorrect uncharge]
Signed-off-by: Aneesh Kumar K.V <aneesh.kumar@linux.vnet.ibm.com>
Cc: David Rientjes <rientjes@google.com>
Acked-by: KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com>
Cc: Hillf Danton <dhillf@gmail.com>
Cc: Michal Hocko <mhocko@suse.cz>
Cc: KOSAKI Motohiro <kosaki.motohiro@jp.fujitsu.com>
Signed-off-by: Aneesh Kumar K.V <aneesh.kumar@linux.vnet.ibm.com>
Signed-off-by: Wanpeng Li <liwp.linux@gmail.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
include/linux/hugetlb_cgroup.h
mm/hugetlb.c
mm/hugetlb_cgroup.c

index e5451a3b4ebc07156c829bc79ad6b8f9795aeceb..7d3fde996be32576c4a9e7e8334fadcc678d4e18 100644 (file)
@@ -53,6 +53,16 @@ static inline bool hugetlb_cgroup_disabled(void)
        return false;
 }
 
+extern int hugetlb_cgroup_charge_cgroup(int idx, unsigned long nr_pages,
+                                       struct hugetlb_cgroup **ptr);
+extern void hugetlb_cgroup_commit_charge(int idx, unsigned long nr_pages,
+                                        struct hugetlb_cgroup *h_cg,
+                                        struct page *page);
+extern void hugetlb_cgroup_uncharge_page(int idx, unsigned long nr_pages,
+                                        struct page *page);
+extern void hugetlb_cgroup_uncharge_cgroup(int idx, unsigned long nr_pages,
+                                          struct hugetlb_cgroup *h_cg);
+
 #else
 static inline struct hugetlb_cgroup *hugetlb_cgroup_from_page(struct page *page)
 {
@@ -70,5 +80,33 @@ static inline bool hugetlb_cgroup_disabled(void)
        return true;
 }
 
+static inline int
+hugetlb_cgroup_charge_cgroup(int idx, unsigned long nr_pages,
+                            struct hugetlb_cgroup **ptr)
+{
+       return 0;
+}
+
+static inline void
+hugetlb_cgroup_commit_charge(int idx, unsigned long nr_pages,
+                            struct hugetlb_cgroup *h_cg,
+                            struct page *page)
+{
+       return;
+}
+
+static inline void
+hugetlb_cgroup_uncharge_page(int idx, unsigned long nr_pages, struct page *page)
+{
+       return;
+}
+
+static inline void
+hugetlb_cgroup_uncharge_cgroup(int idx, unsigned long nr_pages,
+                              struct hugetlb_cgroup *h_cg)
+{
+       return;
+}
+
 #endif  /* CONFIG_MEM_RES_CTLR_HUGETLB */
 #endif
index efe29b53daff85620bd6fa3c40d3964cd721642a..16a0f32c4820d7332792150ef013cb3cb070ff88 100644 (file)
@@ -627,6 +627,8 @@ static void free_huge_page(struct page *page)
        BUG_ON(page_mapcount(page));
 
        spin_lock(&hugetlb_lock);
+       hugetlb_cgroup_uncharge_page(hstate_index(h),
+                                    pages_per_huge_page(h), page);
        if (h->surplus_huge_pages_node[nid] && huge_page_order(h) < MAX_ORDER) {
                /* remove the page from active list */
                list_del(&page->lru);
@@ -1115,7 +1117,10 @@ static struct page *alloc_huge_page(struct vm_area_struct *vma,
        struct hstate *h = hstate_vma(vma);
        struct page *page;
        long chg;
+       int ret, idx;
+       struct hugetlb_cgroup *h_cg;
 
+       idx = hstate_index(h);
        /*
         * Processes that did not create the mapping will have no
         * reserves and will not have accounted against subpool
@@ -1131,6 +1136,11 @@ static struct page *alloc_huge_page(struct vm_area_struct *vma,
                if (hugepage_subpool_get_pages(spool, chg))
                        return ERR_PTR(-ENOSPC);
 
+       ret = hugetlb_cgroup_charge_cgroup(idx, pages_per_huge_page(h), &h_cg);
+       if (ret) {
+               hugepage_subpool_put_pages(spool, chg);
+               return ERR_PTR(-ENOSPC);
+       }
        spin_lock(&hugetlb_lock);
        page = dequeue_huge_page_vma(h, vma, addr, avoid_reserve);
        spin_unlock(&hugetlb_lock);
@@ -1138,6 +1148,9 @@ static struct page *alloc_huge_page(struct vm_area_struct *vma,
        if (!page) {
                page = alloc_buddy_huge_page(h, NUMA_NO_NODE);
                if (!page) {
+                       hugetlb_cgroup_uncharge_cgroup(idx,
+                                                      pages_per_huge_page(h),
+                                                      h_cg);
                        hugepage_subpool_put_pages(spool, chg);
                        return ERR_PTR(-ENOSPC);
                }
@@ -1146,7 +1159,8 @@ static struct page *alloc_huge_page(struct vm_area_struct *vma,
        set_page_private(page, (unsigned long)spool);
 
        vma_commit_reservation(h, vma, addr);
-
+       /* update page cgroup details */
+       hugetlb_cgroup_commit_charge(idx, pages_per_huge_page(h), h_cg, page);
        return page;
 }
 
index 0d1a66e9039bc7500d5e936b8dde718c22d2bdfe..63e04cfa437dc1e754f324a363e2bdcd98c41142 100644 (file)
@@ -111,6 +111,86 @@ static int hugetlb_cgroup_pre_destroy(struct cgroup *cgroup)
           return -EBUSY;
 }
 
+int hugetlb_cgroup_charge_cgroup(int idx, unsigned long nr_pages,
+                                struct hugetlb_cgroup **ptr)
+{
+       int ret = 0;
+       struct res_counter *fail_res;
+       struct hugetlb_cgroup *h_cg = NULL;
+       unsigned long csize = nr_pages * PAGE_SIZE;
+
+       if (hugetlb_cgroup_disabled())
+               goto done;
+       /*
+        * We don't charge any cgroup if the compound page have less
+        * than 3 pages.
+        */
+       if (huge_page_order(&hstates[idx]) < HUGETLB_CGROUP_MIN_ORDER)
+               goto done;
+again:
+       rcu_read_lock();
+       h_cg = hugetlb_cgroup_from_task(current);
+       if (!css_tryget(&h_cg->css)) {
+               rcu_read_unlock();
+               goto again;
+       }
+       rcu_read_unlock();
+
+       ret = res_counter_charge(&h_cg->hugepage[idx], csize, &fail_res);
+       css_put(&h_cg->css);
+done:
+       *ptr = h_cg;
+       return ret;
+}
+
+void hugetlb_cgroup_commit_charge(int idx, unsigned long nr_pages,
+                                 struct hugetlb_cgroup *h_cg,
+                                 struct page *page)
+{
+       if (hugetlb_cgroup_disabled() || !h_cg)
+               return;
+
+       spin_lock(&hugetlb_lock);
+       set_hugetlb_cgroup(page, h_cg);
+       spin_unlock(&hugetlb_lock);
+       return;
+}
+
+/*
+ * Should be called with hugetlb_lock held
+ */
+void hugetlb_cgroup_uncharge_page(int idx, unsigned long nr_pages,
+                                 struct page *page)
+{
+       struct hugetlb_cgroup *h_cg;
+       unsigned long csize = nr_pages * PAGE_SIZE;
+
+       if (hugetlb_cgroup_disabled())
+               return;
+       VM_BUG_ON(!spin_is_locked(&hugetlb_lock));
+       h_cg = hugetlb_cgroup_from_page(page);
+       if (unlikely(!h_cg))
+               return;
+       set_hugetlb_cgroup(page, NULL);
+       res_counter_uncharge(&h_cg->hugepage[idx], csize);
+       return;
+}
+
+void hugetlb_cgroup_uncharge_cgroup(int idx, unsigned long nr_pages,
+                                   struct hugetlb_cgroup *h_cg)
+{
+       unsigned long csize = nr_pages * PAGE_SIZE;
+
+       if (hugetlb_cgroup_disabled() || !h_cg)
+               return;
+
+       if (huge_page_order(&hstates[idx]) < HUGETLB_CGROUP_MIN_ORDER)
+               return;
+
+       res_counter_uncharge(&h_cg->hugepage[idx], csize);
+       return;
+}
+
 struct cgroup_subsys hugetlb_subsys = {
        .name = "hugetlb",
        .create     = hugetlb_cgroup_create,