memcg: fix css_is_ancestor() RCU locking
authorKAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com>
Tue, 11 May 2010 21:06:59 +0000 (14:06 -0700)
committerLinus Torvalds <torvalds@linux-foundation.org>
Wed, 12 May 2010 00:33:42 +0000 (17:33 -0700)
Some callers (in memcontrol.c) calls css_is_ancestor() without
rcu_read_lock.  Because css_is_ancestor() has to access RCU protected
data, it should be under rcu_read_lock().

This makes css_is_ancestor() itself does safe access to RCU protected
area.  (At least, "root" can have refcnt==0 if it's not an ancestor of
"child".  So, we need rcu_read_lock().)

Signed-off-by: KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com>
Cc: "Paul E. McKenney" <paulmck@linux.vnet.ibm.com>
Cc: Daisuke Nishimura <nishimura@mxp.nes.nec.co.jp>
Cc: Balbir Singh <balbir@linux.vnet.ibm.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
kernel/cgroup.c
mm/memcontrol.c

index 6db8b7f297a1c44834d875947a15e29bc28caf81..6d870f2d1228945a94344409012cb3b332a3b0f2 100644 (file)
@@ -4464,15 +4464,36 @@ unsigned short css_depth(struct cgroup_subsys_state *css)
 }
 EXPORT_SYMBOL_GPL(css_depth);
 
+/**
+ *  css_is_ancestor - test "root" css is an ancestor of "child"
+ * @child: the css to be tested.
+ * @root: the css supporsed to be an ancestor of the child.
+ *
+ * Returns true if "root" is an ancestor of "child" in its hierarchy. Because
+ * this function reads css->id, this use rcu_dereference() and rcu_read_lock().
+ * But, considering usual usage, the csses should be valid objects after test.
+ * Assuming that the caller will do some action to the child if this returns
+ * returns true, the caller must take "child";s reference count.
+ * If "child" is valid object and this returns true, "root" is valid, too.
+ */
+
 bool css_is_ancestor(struct cgroup_subsys_state *child,
                    const struct cgroup_subsys_state *root)
 {
-       struct css_id *child_id = rcu_dereference(child->id);
-       struct css_id *root_id = rcu_dereference(root->id);
+       struct css_id *child_id;
+       struct css_id *root_id;
+       bool ret = true;
 
-       if (!child_id || !root_id || (child_id->depth < root_id->depth))
-               return false;
-       return child_id->stack[root_id->depth] == root_id->id;
+       rcu_read_lock();
+       child_id  = rcu_dereference(child->id);
+       root_id = rcu_dereference(root->id);
+       if (!child_id
+           || !root_id
+           || (child_id->depth < root_id->depth)
+           || (child_id->stack[root_id->depth] != root_id->id))
+               ret = false;
+       rcu_read_unlock();
+       return ret;
 }
 
 static void __free_css_id_cb(struct rcu_head *head)
index 595d03f33b2c014aa31def364e2f3df24a4d2727..8a79a6f0f029842860fae7cd9b15c8c90600d56c 100644 (file)
@@ -811,12 +811,10 @@ int task_in_mem_cgroup(struct task_struct *task, const struct mem_cgroup *mem)
         * enabled in "curr" and "curr" is a child of "mem" in *cgroup*
         * hierarchy(even if use_hierarchy is disabled in "mem").
         */
-       rcu_read_lock();
        if (mem->use_hierarchy)
                ret = css_is_ancestor(&curr->css, &mem->css);
        else
                ret = (curr == mem);
-       rcu_read_unlock();
        css_put(&curr->css);
        return ret;
 }
@@ -1603,7 +1601,6 @@ static int __mem_cgroup_try_charge(struct mm_struct *mm,
                         * There is a small race that "from" or "to" can be
                         * freed by rmdir, so we use css_tryget().
                         */
-                       rcu_read_lock();
                        from = mc.from;
                        to = mc.to;
                        if (from && css_tryget(&from->css)) {
@@ -1624,7 +1621,6 @@ static int __mem_cgroup_try_charge(struct mm_struct *mm,
                                        do_continue = (to == mem_over_limit);
                                css_put(&to->css);
                        }
-                       rcu_read_unlock();
                        if (do_continue) {
                                DEFINE_WAIT(wait);
                                prepare_to_wait(&mc.waitq, &wait,