revert "memcg: enhance memcg iterator to support predicates"
authorAndrew Morton <akpm@linux-foundation.org>
Tue, 24 Sep 2013 22:27:37 +0000 (15:27 -0700)
committerLinus Torvalds <torvalds@linux-foundation.org>
Wed, 25 Sep 2013 00:00:26 +0000 (17:00 -0700)
Revert commit de57780dc659 ("memcg: enhance memcg iterator to support
predicates")

I merged this prematurely - Michal and Johannes still disagree about the
overall design direction and the future remains unclear.

Cc: Michal Hocko <mhocko@suse.cz>
Cc: Johannes Weiner <hannes@cmpxchg.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
include/linux/memcontrol.h
mm/memcontrol.c
mm/vmscan.c

index 60e95872da2983365300f1abb193b6e81974b5de..ef2b9bd7fafa7af1b6e9d94d5ba148283782a505 100644 (file)
@@ -53,23 +53,6 @@ struct mem_cgroup_reclaim_cookie {
        unsigned int generation;
 };
 
-enum mem_cgroup_filter_t {
-       VISIT,          /* visit current node */
-       SKIP,           /* skip the current node and continue traversal */
-       SKIP_TREE,      /* skip the whole subtree and continue traversal */
-};
-
-/*
- * mem_cgroup_filter_t predicate might instruct mem_cgroup_iter_cond how to
- * iterate through the hierarchy tree. Each tree element is checked by the
- * predicate before it is returned by the iterator. If a filter returns
- * SKIP or SKIP_TREE then the iterator code continues traversal (with the
- * next node down the hierarchy or the next node that doesn't belong under the
- * memcg's subtree).
- */
-typedef enum mem_cgroup_filter_t
-(*mem_cgroup_iter_filter)(struct mem_cgroup *memcg, struct mem_cgroup *root);
-
 #ifdef CONFIG_MEMCG
 /*
  * All "charge" functions with gfp_mask should use GFP_KERNEL or
@@ -137,18 +120,9 @@ mem_cgroup_prepare_migration(struct page *page, struct page *newpage,
 extern void mem_cgroup_end_migration(struct mem_cgroup *memcg,
        struct page *oldpage, struct page *newpage, bool migration_ok);
 
-struct mem_cgroup *mem_cgroup_iter_cond(struct mem_cgroup *root,
-                                  struct mem_cgroup *prev,
-                                  struct mem_cgroup_reclaim_cookie *reclaim,
-                                  mem_cgroup_iter_filter cond);
-
-static inline struct mem_cgroup *mem_cgroup_iter(struct mem_cgroup *root,
-                                  struct mem_cgroup *prev,
-                                  struct mem_cgroup_reclaim_cookie *reclaim)
-{
-       return mem_cgroup_iter_cond(root, prev, reclaim, NULL);
-}
-
+struct mem_cgroup *mem_cgroup_iter(struct mem_cgroup *,
+                                  struct mem_cgroup *,
+                                  struct mem_cgroup_reclaim_cookie *);
 void mem_cgroup_iter_break(struct mem_cgroup *, struct mem_cgroup *);
 
 /*
@@ -260,8 +234,7 @@ static inline void mem_cgroup_dec_page_stat(struct page *page,
        mem_cgroup_update_page_stat(page, idx, -1);
 }
 
-enum mem_cgroup_filter_t
-mem_cgroup_soft_reclaim_eligible(struct mem_cgroup *memcg,
+bool mem_cgroup_soft_reclaim_eligible(struct mem_cgroup *memcg,
                struct mem_cgroup *root);
 
 void __mem_cgroup_count_vm_event(struct mm_struct *mm, enum vm_event_item idx);
@@ -376,15 +349,6 @@ static inline void mem_cgroup_end_migration(struct mem_cgroup *memcg,
                struct page *oldpage, struct page *newpage, bool migration_ok)
 {
 }
-static inline struct mem_cgroup *
-mem_cgroup_iter_cond(struct mem_cgroup *root,
-               struct mem_cgroup *prev,
-               struct mem_cgroup_reclaim_cookie *reclaim,
-               mem_cgroup_iter_filter cond)
-{
-       /* first call must return non-NULL, second return NULL */
-       return (struct mem_cgroup *)(unsigned long)!prev;
-}
 
 static inline struct mem_cgroup *
 mem_cgroup_iter(struct mem_cgroup *root,
@@ -471,11 +435,10 @@ static inline void mem_cgroup_dec_page_stat(struct page *page,
 }
 
 static inline
-enum mem_cgroup_filter_t
-mem_cgroup_soft_reclaim_eligible(struct mem_cgroup *memcg,
+bool mem_cgroup_soft_reclaim_eligible(struct mem_cgroup *memcg,
                struct mem_cgroup *root)
 {
-       return VISIT;
+       return false;
 }
 
 static inline void mem_cgroup_split_huge_fixup(struct page *head)
index 916892c2b8e007bf1b69dacd090c2751c5baa0c6..65e7bec4b0f0b8fdce3eac1dc9281c413a7ed47a 100644 (file)
@@ -862,15 +862,6 @@ struct mem_cgroup *try_get_mem_cgroup_from_mm(struct mm_struct *mm)
        return memcg;
 }
 
-static enum mem_cgroup_filter_t
-mem_cgroup_filter(struct mem_cgroup *memcg, struct mem_cgroup *root,
-               mem_cgroup_iter_filter cond)
-{
-       if (!cond)
-               return VISIT;
-       return cond(memcg, root);
-}
-
 /*
  * Returns a next (in a pre-order walk) alive memcg (with elevated css
  * ref. count) or NULL if the whole root's subtree has been visited.
@@ -878,7 +869,7 @@ mem_cgroup_filter(struct mem_cgroup *memcg, struct mem_cgroup *root,
  * helper function to be used by mem_cgroup_iter
  */
 static struct mem_cgroup *__mem_cgroup_iter_next(struct mem_cgroup *root,
-               struct mem_cgroup *last_visited, mem_cgroup_iter_filter cond)
+               struct mem_cgroup *last_visited)
 {
        struct cgroup_subsys_state *prev_css, *next_css;
 
@@ -896,31 +887,11 @@ skip_node:
        if (next_css) {
                struct mem_cgroup *mem = mem_cgroup_from_css(next_css);
 
-               switch (mem_cgroup_filter(mem, root, cond)) {
-               case SKIP:
+               if (css_tryget(&mem->css))
+                       return mem;
+               else {
                        prev_css = next_css;
                        goto skip_node;
-               case SKIP_TREE:
-                       if (mem == root)
-                               return NULL;
-                       /*
-                        * css_rightmost_descendant is not an optimal way to
-                        * skip through a subtree (especially for imbalanced
-                        * trees leaning to right) but that's what we have right
-                        * now. More effective solution would be traversing
-                        * right-up for first non-NULL without calling
-                        * css_next_descendant_pre afterwards.
-                        */
-                       prev_css = css_rightmost_descendant(next_css);
-                       goto skip_node;
-               case VISIT:
-                       if (css_tryget(&mem->css))
-                               return mem;
-                       else {
-                               prev_css = next_css;
-                               goto skip_node;
-                       }
-                       break;
                }
        }
 
@@ -984,7 +955,6 @@ static void mem_cgroup_iter_update(struct mem_cgroup_reclaim_iter *iter,
  * @root: hierarchy root
  * @prev: previously returned memcg, NULL on first invocation
  * @reclaim: cookie for shared reclaim walks, NULL for full walks
- * @cond: filter for visited nodes, NULL for no filter
  *
  * Returns references to children of the hierarchy below @root, or
  * @root itself, or %NULL after a full round-trip.
@@ -997,18 +967,15 @@ static void mem_cgroup_iter_update(struct mem_cgroup_reclaim_iter *iter,
  * divide up the memcgs in the hierarchy among all concurrent
  * reclaimers operating on the same zone and priority.
  */
-struct mem_cgroup *mem_cgroup_iter_cond(struct mem_cgroup *root,
+struct mem_cgroup *mem_cgroup_iter(struct mem_cgroup *root,
                                   struct mem_cgroup *prev,
-                                  struct mem_cgroup_reclaim_cookie *reclaim,
-                                  mem_cgroup_iter_filter cond)
+                                  struct mem_cgroup_reclaim_cookie *reclaim)
 {
        struct mem_cgroup *memcg = NULL;
        struct mem_cgroup *last_visited = NULL;
 
-       if (mem_cgroup_disabled()) {
-               /* first call must return non-NULL, second return NULL */
-               return (struct mem_cgroup *)(unsigned long)!prev;
-       }
+       if (mem_cgroup_disabled())
+               return NULL;
 
        if (!root)
                root = root_mem_cgroup;
@@ -1019,9 +986,7 @@ struct mem_cgroup *mem_cgroup_iter_cond(struct mem_cgroup *root,
        if (!root->use_hierarchy && root != root_mem_cgroup) {
                if (prev)
                        goto out_css_put;
-               if (mem_cgroup_filter(root, root, cond) == VISIT)
-                       return root;
-               return NULL;
+               return root;
        }
 
        rcu_read_lock();
@@ -1044,7 +1009,7 @@ struct mem_cgroup *mem_cgroup_iter_cond(struct mem_cgroup *root,
                        last_visited = mem_cgroup_iter_load(iter, root, &seq);
                }
 
-               memcg = __mem_cgroup_iter_next(root, last_visited, cond);
+               memcg = __mem_cgroup_iter_next(root, last_visited);
 
                if (reclaim) {
                        mem_cgroup_iter_update(iter, last_visited, memcg, seq);
@@ -1055,11 +1020,7 @@ struct mem_cgroup *mem_cgroup_iter_cond(struct mem_cgroup *root,
                                reclaim->generation = iter->generation;
                }
 
-               /*
-                * We have finished the whole tree walk or no group has been
-                * visited because filter told us to skip the root node.
-                */
-               if (!memcg && (prev || (cond && !last_visited)))
+               if (prev && !memcg)
                        goto out_unlock;
        }
 out_unlock:
@@ -1804,14 +1765,13 @@ int mem_cgroup_select_victim_node(struct mem_cgroup *memcg)
  *     a) it is over its soft limit
  *     b) any parent up the hierarchy is over its soft limit
  */
-enum mem_cgroup_filter_t
-mem_cgroup_soft_reclaim_eligible(struct mem_cgroup *memcg,
+bool mem_cgroup_soft_reclaim_eligible(struct mem_cgroup *memcg,
                struct mem_cgroup *root)
 {
        struct mem_cgroup *parent = memcg;
 
        if (res_counter_soft_limit_excess(&memcg->res))
-               return VISIT;
+               return true;
 
        /*
         * If any parent up to the root in the hierarchy is over its soft limit
@@ -1819,12 +1779,12 @@ mem_cgroup_soft_reclaim_eligible(struct mem_cgroup *memcg,
         */
        while ((parent = parent_mem_cgroup(parent))) {
                if (res_counter_soft_limit_excess(&parent->res))
-                       return VISIT;
+                       return true;
                if (parent == root)
                        break;
        }
 
-       return SKIP;
+       return false;
 }
 
 static DEFINE_SPINLOCK(memcg_oom_lock);
index cdd300b8148564cda113da33d1606af010be37da..17a7134de4d78d04bb0082476722df59a9187b1c 100644 (file)
@@ -2185,16 +2185,21 @@ __shrink_zone(struct zone *zone, struct scan_control *sc, bool soft_reclaim)
                        .zone = zone,
                        .priority = sc->priority,
                };
-               struct mem_cgroup *memcg = NULL;
-               mem_cgroup_iter_filter filter = (soft_reclaim) ?
-                       mem_cgroup_soft_reclaim_eligible : NULL;
+               struct mem_cgroup *memcg;
 
                nr_reclaimed = sc->nr_reclaimed;
                nr_scanned = sc->nr_scanned;
 
-               while ((memcg = mem_cgroup_iter_cond(root, memcg, &reclaim, filter))) {
+               memcg = mem_cgroup_iter(root, NULL, &reclaim);
+               do {
                        struct lruvec *lruvec;
 
+                       if (soft_reclaim &&
+                           !mem_cgroup_soft_reclaim_eligible(memcg, root)) {
+                               memcg = mem_cgroup_iter(root, memcg, &reclaim);
+                               continue;
+                       }
+
                        lruvec = mem_cgroup_zone_lruvec(zone, memcg);
 
                        shrink_lruvec(lruvec, sc);
@@ -2214,7 +2219,8 @@ __shrink_zone(struct zone *zone, struct scan_control *sc, bool soft_reclaim)
                                mem_cgroup_iter_break(root, memcg);
                                break;
                        }
-               }
+                       memcg = mem_cgroup_iter(root, memcg, &reclaim);
+               } while (memcg);
 
                vmpressure(sc->gfp_mask, sc->target_mem_cgroup,
                           sc->nr_scanned - nr_scanned,