mm/hugetlb: add new HugeTLB cgroup
authorAneesh Kumar K.V <aneesh.kumar@linux.vnet.ibm.com>
Tue, 31 Jul 2012 23:42:12 +0000 (16:42 -0700)
committerLinus Torvalds <torvalds@linux-foundation.org>
Wed, 1 Aug 2012 01:42:40 +0000 (18:42 -0700)
Implement a new controller that allows us to control HugeTLB allocations.
The extension allows to limit the HugeTLB usage per control group and
enforces the controller limit during page fault.  Since HugeTLB doesn't
support page reclaim, enforcing the limit at page fault time implies that,
the application will get SIGBUS signal if it tries to access HugeTLB pages
beyond its limit.  This requires the application to know beforehand how
much HugeTLB pages it would require for its use.

The charge/uncharge calls will be added to HugeTLB code in later patch.
Support for cgroup removal will be added in later patches.

[akpm@linux-foundation.org: s/CONFIG_CGROUP_HUGETLB_RES_CTLR/CONFIG_MEMCG_HUGETLB/g]
[akpm@linux-foundation.org: s/CONFIG_MEMCG_HUGETLB/CONFIG_CGROUP_HUGETLB/g]
Reviewed-by: KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com>
Signed-off-by: Aneesh Kumar K.V <aneesh.kumar@linux.vnet.ibm.com>
Cc: David Rientjes <rientjes@google.com>
Cc: Hillf Danton <dhillf@gmail.com>
Reviewed-by: Michal Hocko <mhocko@suse.cz>
Cc: KOSAKI Motohiro <kosaki.motohiro@jp.fujitsu.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
include/linux/cgroup_subsys.h
include/linux/hugetlb_cgroup.h [new file with mode: 0644]
init/Kconfig
mm/Makefile
mm/hugetlb_cgroup.c [new file with mode: 0644]

index 0bd390ce98b2a9e9fd5fa69a697bb3d02f252194..5b41ce07902499f070b2eaabbfb1674acdfa6253 100644 (file)
@@ -72,3 +72,9 @@ SUBSYS(net_prio)
 #endif
 
 /* */
+
+#ifdef CONFIG_CGROUP_HUGETLB
+SUBSYS(hugetlb)
+#endif
+
+/* */
diff --git a/include/linux/hugetlb_cgroup.h b/include/linux/hugetlb_cgroup.h
new file mode 100644 (file)
index 0000000..f19889e
--- /dev/null
@@ -0,0 +1,37 @@
+/*
+ * Copyright IBM Corporation, 2012
+ * Author Aneesh Kumar K.V <aneesh.kumar@linux.vnet.ibm.com>
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of version 2.1 of the GNU Lesser General Public License
+ * as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it would be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+ *
+ */
+
+#ifndef _LINUX_HUGETLB_CGROUP_H
+#define _LINUX_HUGETLB_CGROUP_H
+
+#include <linux/res_counter.h>
+
+struct hugetlb_cgroup;
+
+#ifdef CONFIG_CGROUP_HUGETLB
+static inline bool hugetlb_cgroup_disabled(void)
+{
+       if (hugetlb_subsys.disabled)
+               return true;
+       return false;
+}
+
+#else
+static inline bool hugetlb_cgroup_disabled(void)
+{
+       return true;
+}
+
+#endif  /* CONFIG_MEM_RES_CTLR_HUGETLB */
+#endif
index b3f55f15e1074ca90bfd1cacbba5cd0c68be9ad7..72437760e90e49abdf992d50fb4f283e8105f278 100644 (file)
@@ -751,6 +751,21 @@ config CGROUP_MEM_RES_CTLR_KMEM
          the kmem extension can use it to guarantee that no group of processes
          will ever exhaust kernel resources alone.
 
+config CGROUP_HUGETLB
+       bool "HugeTLB Resource Controller for Control Groups"
+       depends on RESOURCE_COUNTERS && HUGETLB_PAGE && EXPERIMENTAL
+       default n
+       help
+         Provides a cgroup Resource Controller for HugeTLB pages.
+         When you enable this, you can put a per cgroup limit on HugeTLB usage.
+         The limit is enforced during page fault. Since HugeTLB doesn't
+         support page reclaim, enforcing the limit at page fault time implies
+         that, the application will get SIGBUS signal if it tries to access
+         HugeTLB pages beyond its limit. This requires the application to know
+         beforehand how much HugeTLB pages it would require for its use. The
+         control group is tracked in the third page lru pointer. This means
+         that we cannot use the controller with huge page less than 3 pages.
+
 config CGROUP_PERF
        bool "Enable perf_event per-cpu per-container group (cgroup) monitoring"
        depends on PERF_EVENTS && CGROUPS
index 8e81fe263c94f63dcf3700fd63828a58d0c963af..fd6fc1c1966c25a1d6dfe09c5d60f4ec9650a64c 100644 (file)
@@ -50,6 +50,7 @@ obj-$(CONFIG_MIGRATION) += migrate.o
 obj-$(CONFIG_QUICKLIST) += quicklist.o
 obj-$(CONFIG_TRANSPARENT_HUGEPAGE) += huge_memory.o
 obj-$(CONFIG_CGROUP_MEM_RES_CTLR) += memcontrol.o page_cgroup.o
+obj-$(CONFIG_CGROUP_HUGETLB) += hugetlb_cgroup.o
 obj-$(CONFIG_MEMORY_FAILURE) += memory-failure.o
 obj-$(CONFIG_HWPOISON_INJECT) += hwpoison-inject.o
 obj-$(CONFIG_DEBUG_KMEMLEAK) += kmemleak.o
diff --git a/mm/hugetlb_cgroup.c b/mm/hugetlb_cgroup.c
new file mode 100644 (file)
index 0000000..0d1a66e
--- /dev/null
@@ -0,0 +1,120 @@
+/*
+ *
+ * Copyright IBM Corporation, 2012
+ * Author Aneesh Kumar K.V <aneesh.kumar@linux.vnet.ibm.com>
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of version 2.1 of the GNU Lesser General Public License
+ * as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it would be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+ *
+ */
+
+#include <linux/cgroup.h>
+#include <linux/slab.h>
+#include <linux/hugetlb.h>
+#include <linux/hugetlb_cgroup.h>
+
+struct hugetlb_cgroup {
+       struct cgroup_subsys_state css;
+       /*
+        * the counter to account for hugepages from hugetlb.
+        */
+       struct res_counter hugepage[HUGE_MAX_HSTATE];
+};
+
+struct cgroup_subsys hugetlb_subsys __read_mostly;
+static struct hugetlb_cgroup *root_h_cgroup __read_mostly;
+
+static inline
+struct hugetlb_cgroup *hugetlb_cgroup_from_css(struct cgroup_subsys_state *s)
+{
+       return container_of(s, struct hugetlb_cgroup, css);
+}
+
+static inline
+struct hugetlb_cgroup *hugetlb_cgroup_from_cgroup(struct cgroup *cgroup)
+{
+       return hugetlb_cgroup_from_css(cgroup_subsys_state(cgroup,
+                                                          hugetlb_subsys_id));
+}
+
+static inline
+struct hugetlb_cgroup *hugetlb_cgroup_from_task(struct task_struct *task)
+{
+       return hugetlb_cgroup_from_css(task_subsys_state(task,
+                                                        hugetlb_subsys_id));
+}
+
+static inline bool hugetlb_cgroup_is_root(struct hugetlb_cgroup *h_cg)
+{
+       return (h_cg == root_h_cgroup);
+}
+
+static inline struct hugetlb_cgroup *parent_hugetlb_cgroup(struct cgroup *cg)
+{
+       if (!cg->parent)
+               return NULL;
+       return hugetlb_cgroup_from_cgroup(cg->parent);
+}
+
+static inline bool hugetlb_cgroup_have_usage(struct cgroup *cg)
+{
+       int idx;
+       struct hugetlb_cgroup *h_cg = hugetlb_cgroup_from_cgroup(cg);
+
+       for (idx = 0; idx < hugetlb_max_hstate; idx++) {
+               if ((res_counter_read_u64(&h_cg->hugepage[idx], RES_USAGE)) > 0)
+                       return true;
+       }
+       return false;
+}
+
+static struct cgroup_subsys_state *hugetlb_cgroup_create(struct cgroup *cgroup)
+{
+       int idx;
+       struct cgroup *parent_cgroup;
+       struct hugetlb_cgroup *h_cgroup, *parent_h_cgroup;
+
+       h_cgroup = kzalloc(sizeof(*h_cgroup), GFP_KERNEL);
+       if (!h_cgroup)
+               return ERR_PTR(-ENOMEM);
+
+       parent_cgroup = cgroup->parent;
+       if (parent_cgroup) {
+               parent_h_cgroup = hugetlb_cgroup_from_cgroup(parent_cgroup);
+               for (idx = 0; idx < HUGE_MAX_HSTATE; idx++)
+                       res_counter_init(&h_cgroup->hugepage[idx],
+                                        &parent_h_cgroup->hugepage[idx]);
+       } else {
+               root_h_cgroup = h_cgroup;
+               for (idx = 0; idx < HUGE_MAX_HSTATE; idx++)
+                       res_counter_init(&h_cgroup->hugepage[idx], NULL);
+       }
+       return &h_cgroup->css;
+}
+
+static void hugetlb_cgroup_destroy(struct cgroup *cgroup)
+{
+       struct hugetlb_cgroup *h_cgroup;
+
+       h_cgroup = hugetlb_cgroup_from_cgroup(cgroup);
+       kfree(h_cgroup);
+}
+
+static int hugetlb_cgroup_pre_destroy(struct cgroup *cgroup)
+{
+       /* We will add the cgroup removal support in later patches */
+          return -EBUSY;
+}
+
+struct cgroup_subsys hugetlb_subsys = {
+       .name = "hugetlb",
+       .create     = hugetlb_cgroup_create,
+       .pre_destroy = hugetlb_cgroup_pre_destroy,
+       .destroy    = hugetlb_cgroup_destroy,
+       .subsys_id  = hugetlb_subsys_id,
+};