mm: vmscan: convert global reclaim to per-memcg LRU lists
authorJohannes Weiner <jweiner@redhat.com>
Fri, 13 Jan 2012 01:18:06 +0000 (17:18 -0800)
committerLinus Torvalds <torvalds@linux-foundation.org>
Fri, 13 Jan 2012 04:13:05 +0000 (20:13 -0800)
The global per-zone LRU lists are about to go away on memcg-enabled
kernels, global reclaim must be able to find its pages on the per-memcg
LRU lists.

Since the LRU pages of a zone are distributed over all existing memory
cgroups, a scan target for a zone is complete when all memory cgroups
are scanned for their proportional share of a zone's memory.

The forced scanning of small scan targets from kswapd is limited to
zones marked unreclaimable, otherwise kswapd can quickly overreclaim by
force-scanning the LRU lists of multiple memory cgroups.

Signed-off-by: Johannes Weiner <jweiner@redhat.com>
Reviewed-by: KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com>
Reviewed-by: Michal Hocko <mhocko@suse.cz>
Reviewed-by: Kirill A. Shutemov <kirill@shutemov.name>
Cc: Daisuke Nishimura <nishimura@mxp.nes.nec.co.jp>
Cc: Balbir Singh <bsingharora@gmail.com>
Cc: Ying Han <yinghan@google.com>
Cc: Greg Thelen <gthelen@google.com>
Cc: Michel Lespinasse <walken@google.com>
Cc: Rik van Riel <riel@redhat.com>
Cc: Minchan Kim <minchan.kim@gmail.com>
Cc: Christoph Hellwig <hch@infradead.org>
Cc: Hugh Dickins <hughd@google.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
mm/vmscan.c

index 136c7eb0ad88ed60d3a4a2f2907c2280f4e1aa8f..024168cfdcb0a6af3390547f4c96a5571ba0d95c 100644 (file)
@@ -1886,7 +1886,7 @@ static void get_scan_count(struct mem_cgroup_zone *mz, struct scan_control *sc,
         * latencies, so it's better to scan a minimum amount there as
         * well.
         */
-       if (current_is_kswapd())
+       if (current_is_kswapd() && mz->zone->all_unreclaimable)
                force_scan = true;
        if (!global_reclaim(sc))
                force_scan = true;
@@ -2111,16 +2111,6 @@ static void shrink_zone(int priority, struct zone *zone,
        };
        struct mem_cgroup *memcg;
 
-       if (global_reclaim(sc)) {
-               struct mem_cgroup_zone mz = {
-                       .mem_cgroup = NULL,
-                       .zone = zone,
-               };
-
-               shrink_mem_cgroup_zone(priority, &mz, sc);
-               return;
-       }
-
        memcg = mem_cgroup_iter(root, NULL, &reclaim);
        do {
                struct mem_cgroup_zone mz = {
@@ -2134,6 +2124,10 @@ static void shrink_zone(int priority, struct zone *zone,
                 * scanned it with decreasing priority levels until
                 * nr_to_reclaim had been reclaimed.  This priority
                 * cycle is thus over after a single memcg.
+                *
+                * Direct reclaim and kswapd, on the other hand, have
+                * to scan all memory cgroups to fulfill the overall
+                * scan target for the zone.
                 */
                if (!global_reclaim(sc)) {
                        mem_cgroup_iter_break(root, memcg);
@@ -2478,13 +2472,24 @@ unsigned long try_to_free_mem_cgroup_pages(struct mem_cgroup *mem_cont,
 static void age_active_anon(struct zone *zone, struct scan_control *sc,
                            int priority)
 {
-       struct mem_cgroup_zone mz = {
-               .mem_cgroup = NULL,
-               .zone = zone,
-       };
+       struct mem_cgroup *memcg;
 
-       if (inactive_anon_is_low(&mz))
-               shrink_active_list(SWAP_CLUSTER_MAX, &mz, sc, priority, 0);
+       if (!total_swap_pages)
+               return;
+
+       memcg = mem_cgroup_iter(NULL, NULL, NULL);
+       do {
+               struct mem_cgroup_zone mz = {
+                       .mem_cgroup = memcg,
+                       .zone = zone,
+               };
+
+               if (inactive_anon_is_low(&mz))
+                       shrink_active_list(SWAP_CLUSTER_MAX, &mz,
+                                          sc, priority, 0);
+
+               memcg = mem_cgroup_iter(NULL, memcg, NULL);
+       } while (memcg);
 }
 
 /*