cgroup: RCU protect each cgroup_subsys_state release

author Tejun Heo <tj@kernel.org>

Wed, 14 Aug 2013 00:22:51 +0000 (20:22 -0400)

committer Tejun Heo <tj@kernel.org>

Wed, 14 Aug 2013 00:22:51 +0000 (20:22 -0400)
author Tejun Heo <tj@kernel.org>
Wed, 14 Aug 2013 00:22:51 +0000 (20:22 -0400)
committer Tejun Heo <tj@kernel.org>
Wed, 14 Aug 2013 00:22:51 +0000 (20:22 -0400)
diff --git a/include/linux/cgroup.h b/include/linux/cgroup.h

index 71e77e7cdb6fb2872727245948eb0c080ec4c8b8..c24bd0b9f93a11138b8cab60c0a686196ed2e75b 100644 (file)
--- a/include/linux/cgroup.h
+++ b/include/linux/cgroup.h
@@ -82,7 +82,8 @@ struct cgroup_subsys_state {
         /* ID for this css, if possible */
         struct css_id __rcu *id;
  
-       /* percpu_ref killing and putting dentry on the last css_put() */
+       /* percpu_ref killing and RCU release */
+       struct rcu_head rcu_head;
         struct work_struct destroy_work;
  };
  
diff --git a/kernel/cgroup.c b/kernel/cgroup.c

index 3137e38995b03c8e871506eb0a9bce235f3c1334..66d01078eebe3569f2f3dafdf3e9c3e8fd2d449d 100644 (file)
--- a/kernel/cgroup.c
+++ b/kernel/cgroup.c
@@ -869,18 +869,8 @@ static struct cgroup_name *cgroup_alloc_name(struct dentry *dentry)
  static void cgroup_free_fn(struct work_struct *work)
  {
         struct cgroup *cgrp = container_of(work, struct cgroup, destroy_work);
-       struct cgroup_subsys *ss;
  
         mutex_lock(&cgroup_mutex);
-       /*
-        * Release the subsystem state objects.
-        */
-       for_each_root_subsys(cgrp->root, ss) {
-               struct cgroup_subsys_state *css = cgroup_css(cgrp, ss->subsys_id);
-
-               ss->css_free(css);
-       }
-
         cgrp->root->number_of_cgroups--;
         mutex_unlock(&cgroup_mutex);
  
@@ -4281,32 +4271,62 @@ err:
         return ret;
  }
  
+/*
+ * css destruction is four-stage process.
+ *
+ * 1. Destruction starts.  Killing of the percpu_ref is initiated.
+ *    Implemented in kill_css().
+ *
+ * 2. When the percpu_ref is confirmed to be visible as killed on all CPUs
+ *    and thus css_tryget() is guaranteed to fail, the css can be offlined
+ *    by invoking offline_css().  After offlining, the base ref is put.
+ *    Implemented in css_killed_work_fn().
+ *
+ * 3. When the percpu_ref reaches zero, the only possible remaining
+ *    accessors are inside RCU read sections.  css_release() schedules the
+ *    RCU callback.
+ *
+ * 4. After the grace period, the css can be freed.  Implemented in
+ *    css_free_work_fn().
+ *
+ * It is actually hairier because both step 2 and 4 require process context
+ * and thus involve punting to css->destroy_work adding two additional
+ * steps to the already complex sequence.
+ */
  static void css_free_work_fn(struct work_struct *work)
  {
         struct cgroup_subsys_state *css =
                 container_of(work, struct cgroup_subsys_state, destroy_work);
+       struct cgroup *cgrp = css->cgroup;
  
         if (css->parent)
                 css_put(css->parent);
  
-       cgroup_dput(css->cgroup);
+       css->ss->css_free(css);
+       cgroup_dput(cgrp);
  }
  
-static void css_release(struct percpu_ref *ref)
+static void css_free_rcu_fn(struct rcu_head *rcu_head)
  {
         struct cgroup_subsys_state *css =
-               container_of(ref, struct cgroup_subsys_state, refcnt);
+               container_of(rcu_head, struct cgroup_subsys_state, rcu_head);
  
         /*
          * css holds an extra ref to @cgrp->dentry which is put on the last
-        * css_put().  dput() requires process context, which css_put() may
-        * be called without.  @css->destroy_work will be used to invoke
-        * dput() asynchronously from css_put().
+        * css_put().  dput() requires process context which we don't have.
          */
         INIT_WORK(&css->destroy_work, css_free_work_fn);
         schedule_work(&css->destroy_work);
  }
  
+static void css_release(struct percpu_ref *ref)
+{
+       struct cgroup_subsys_state *css =
+               container_of(ref, struct cgroup_subsys_state, refcnt);
+
+       call_rcu(&css->rcu_head, css_free_rcu_fn);
+}
+
  static void init_css(struct cgroup_subsys_state *css, struct cgroup_subsys *ss,
                      struct cgroup *cgrp)
  {
@@ -4356,6 +4376,7 @@ static void offline_css(struct cgroup_subsys_state *css)
  
         css->flags &= ~CSS_ONLINE;
         css->cgroup->nr_css--;
+       RCU_INIT_POINTER(css->cgroup->subsys[ss->subsys_id], css);
  }
  
  /*
author	Tejun Heo <tj@kernel.org>
	Wed, 14 Aug 2013 00:22:51 +0000 (20:22 -0400)
committer	Tejun Heo <tj@kernel.org>
	Wed, 14 Aug 2013 00:22:51 +0000 (20:22 -0400)
include/linux/cgroup.h		patch \| blob \| blame \| history
kernel/cgroup.c		patch \| blob \| blame \| history