memcg: skip scanning active lists based on individual size
authorJohannes Weiner <jweiner@redhat.com>
Wed, 2 Nov 2011 20:38:23 +0000 (13:38 -0700)
committerLinus Torvalds <torvalds@linux-foundation.org>
Wed, 2 Nov 2011 23:07:00 +0000 (16:07 -0700)
Reclaim decides to skip scanning an active list when the corresponding
inactive list is above a certain size in comparison to leave the assumed
working set alone while there are still enough reclaim candidates around.

The memcg implementation of comparing those lists instead reports whether
the whole memcg is low on the requested type of inactive pages,
considering all nodes and zones.

This can lead to an oversized active list not being scanned because of the
state of the other lists in the memcg, as well as an active list being
scanned while its corresponding inactive list has enough pages.

Not only is this wrong, it's also a scalability hazard, because the global
memory state over all nodes and zones has to be gathered for each memcg
and zone scanned.

Make these calculations purely based on the size of the two LRU lists
that are actually affected by the outcome of the decision.

Signed-off-by: Johannes Weiner <jweiner@redhat.com>
Reviewed-by: Rik van Riel <riel@redhat.com>
Cc: KOSAKI Motohiro <kosaki.motohiro@jp.fujitsu.com>
Acked-by: KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com>
Cc: Daisuke Nishimura <nishimura@mxp.nes.nec.co.jp>
Cc: Balbir Singh <bsingharora@gmail.com>
Reviewed-by: Minchan Kim <minchan.kim@gmail.com>
Reviewed-by: Ying Han <yinghan@google.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
Documentation/cgroups/memory.txt
include/linux/memcontrol.h
mm/memcontrol.c
mm/vmscan.c

index 06eb6d957c83097b85fd15e87e94b8ed7edfe1cf..cc0ebc5241b39f2e9513d89a39d880771cab9c67 100644 (file)
@@ -418,7 +418,6 @@ total_unevictable   - sum of all children's "unevictable"
 
 # The following additional stats are dependent on CONFIG_DEBUG_VM.
 
-inactive_ratio         - VM internal parameter. (see mm/page_alloc.c)
 recent_rotated_anon    - VM internal parameter. (see mm/vmscan.c)
 recent_rotated_file    - VM internal parameter. (see mm/vmscan.c)
 recent_scanned_anon    - VM internal parameter. (see mm/vmscan.c)
index 05206aac59652cbc4a95fa8cb6e05581f26d0e09..b87068a1a09ef84ece916a4a722ebf70e921d1c6 100644 (file)
@@ -106,8 +106,10 @@ extern void mem_cgroup_end_migration(struct mem_cgroup *memcg,
 /*
  * For memory reclaim.
  */
-int mem_cgroup_inactive_anon_is_low(struct mem_cgroup *memcg);
-int mem_cgroup_inactive_file_is_low(struct mem_cgroup *memcg);
+int mem_cgroup_inactive_anon_is_low(struct mem_cgroup *memcg,
+                                   struct zone *zone);
+int mem_cgroup_inactive_file_is_low(struct mem_cgroup *memcg,
+                                   struct zone *zone);
 int mem_cgroup_select_victim_node(struct mem_cgroup *memcg);
 unsigned long mem_cgroup_zone_nr_lru_pages(struct mem_cgroup *memcg,
                                        int nid, int zid, unsigned int lrumask);
@@ -295,13 +297,13 @@ static inline bool mem_cgroup_disabled(void)
 }
 
 static inline int
-mem_cgroup_inactive_anon_is_low(struct mem_cgroup *memcg)
+mem_cgroup_inactive_anon_is_low(struct mem_cgroup *memcg, struct zone *zone)
 {
        return 1;
 }
 
 static inline int
-mem_cgroup_inactive_file_is_low(struct mem_cgroup *memcg)
+mem_cgroup_inactive_file_is_low(struct mem_cgroup *memcg, struct zone *zone)
 {
        return 1;
 }
index f6c4beb4db5655b6153ced40894cd16be5e6e218..ce7b35d024e9e8701c44f1d6f22a44fc62084fa4 100644 (file)
@@ -1104,15 +1104,19 @@ int task_in_mem_cgroup(struct task_struct *task, const struct mem_cgroup *memcg)
        return ret;
 }
 
-static int calc_inactive_ratio(struct mem_cgroup *memcg, unsigned long *present_pages)
+int mem_cgroup_inactive_anon_is_low(struct mem_cgroup *memcg, struct zone *zone)
 {
-       unsigned long active;
+       unsigned long inactive_ratio;
+       int nid = zone_to_nid(zone);
+       int zid = zone_idx(zone);
        unsigned long inactive;
+       unsigned long active;
        unsigned long gb;
-       unsigned long inactive_ratio;
 
-       inactive = mem_cgroup_nr_lru_pages(memcg, BIT(LRU_INACTIVE_ANON));
-       active = mem_cgroup_nr_lru_pages(memcg, BIT(LRU_ACTIVE_ANON));
+       inactive = mem_cgroup_zone_nr_lru_pages(memcg, nid, zid,
+                                               BIT(LRU_INACTIVE_ANON));
+       active = mem_cgroup_zone_nr_lru_pages(memcg, nid, zid,
+                                             BIT(LRU_ACTIVE_ANON));
 
        gb = (inactive + active) >> (30 - PAGE_SHIFT);
        if (gb)
@@ -1120,39 +1124,20 @@ static int calc_inactive_ratio(struct mem_cgroup *memcg, unsigned long *present_
        else
                inactive_ratio = 1;
 
-       if (present_pages) {
-               present_pages[0] = inactive;
-               present_pages[1] = active;
-       }
-
-       return inactive_ratio;
+       return inactive * inactive_ratio < active;
 }
 
-int mem_cgroup_inactive_anon_is_low(struct mem_cgroup *memcg)
-{
-       unsigned long active;
-       unsigned long inactive;
-       unsigned long present_pages[2];
-       unsigned long inactive_ratio;
-
-       inactive_ratio = calc_inactive_ratio(memcg, present_pages);
-
-       inactive = present_pages[0];
-       active = present_pages[1];
-
-       if (inactive * inactive_ratio < active)
-               return 1;
-
-       return 0;
-}
-
-int mem_cgroup_inactive_file_is_low(struct mem_cgroup *memcg)
+int mem_cgroup_inactive_file_is_low(struct mem_cgroup *memcg, struct zone *zone)
 {
        unsigned long active;
        unsigned long inactive;
+       int zid = zone_idx(zone);
+       int nid = zone_to_nid(zone);
 
-       inactive = mem_cgroup_nr_lru_pages(memcg, BIT(LRU_INACTIVE_FILE));
-       active = mem_cgroup_nr_lru_pages(memcg, BIT(LRU_ACTIVE_FILE));
+       inactive = mem_cgroup_zone_nr_lru_pages(memcg, nid, zid,
+                                               BIT(LRU_INACTIVE_FILE));
+       active = mem_cgroup_zone_nr_lru_pages(memcg, nid, zid,
+                                             BIT(LRU_ACTIVE_FILE));
 
        return (active > inactive);
 }
@@ -4192,8 +4177,6 @@ static int mem_control_stat_show(struct cgroup *cont, struct cftype *cft,
        }
 
 #ifdef CONFIG_DEBUG_VM
-       cb->fill(cb, "inactive_ratio", calc_inactive_ratio(mem_cont, NULL));
-
        {
                int nid, zid;
                struct mem_cgroup_per_zone *mz;
index a90c603a8d02937fd41bac6a7e72d2891f86bfaa..132d1ddb2238179466f45a599b1ccd687f9ee3f8 100644 (file)
@@ -1767,7 +1767,7 @@ static int inactive_anon_is_low(struct zone *zone, struct scan_control *sc)
        if (scanning_global_lru(sc))
                low = inactive_anon_is_low_global(zone);
        else
-               low = mem_cgroup_inactive_anon_is_low(sc->mem_cgroup);
+               low = mem_cgroup_inactive_anon_is_low(sc->mem_cgroup, zone);
        return low;
 }
 #else
@@ -1810,7 +1810,7 @@ static int inactive_file_is_low(struct zone *zone, struct scan_control *sc)
        if (scanning_global_lru(sc))
                low = inactive_file_is_low_global(zone);
        else
-               low = mem_cgroup_inactive_file_is_low(sc->mem_cgroup);
+               low = mem_cgroup_inactive_file_is_low(sc->mem_cgroup, zone);
        return low;
 }