vmscan: have kswapd sleep for a short interval and double check it should be asleep
authorMel Gorman <mel@csn.ul.ie>
Tue, 15 Dec 2009 01:58:53 +0000 (17:58 -0800)
committerLinus Torvalds <torvalds@linux-foundation.org>
Tue, 15 Dec 2009 16:53:16 +0000 (08:53 -0800)
After kswapd balances all zones in a pgdat, it goes to sleep.  In the
event of no IO congestion, kswapd can go to sleep very shortly after the
high watermark was reached.  If there are a constant stream of allocations
from parallel processes, it can mean that kswapd went to sleep too quickly
and the high watermark is not being maintained for sufficient length time.

This patch makes kswapd go to sleep as a two-stage process.  It first
tries to sleep for HZ/10.  If it is woken up by another process or the
high watermark is no longer met, it's considered a premature sleep and
kswapd continues work.  Otherwise it goes fully to sleep.

This adds more counters to distinguish between fast and slow breaches of
watermarks.  A "fast" premature sleep is one where the low watermark was
hit in a very short time after kswapd going to sleep.  A "slow" premature
sleep indicates that the high watermark was breached after a very short
interval.

Signed-off-by: Mel Gorman <mel@csn.ul.ie>
Cc: Frans Pop <elendil@planet.nl>
Cc: KOSAKI Motohiro <kosaki.motohiro@jp.fujitsu.com>
Cc: Rik van Riel <riel@redhat.com>
Cc: Christoph Lameter <cl@linux-foundation.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
include/linux/vmstat.h
mm/vmscan.c
mm/vmstat.c

index d85889710f9b6a4fbf5040fd941c1cfca7847428..fd5be240c0b72f075d135d2bc0e00c7d7dd9aca8 100644 (file)
@@ -40,6 +40,7 @@ enum vm_event_item { PGPGIN, PGPGOUT, PSWPIN, PSWPOUT,
                PGSCAN_ZONE_RECLAIM_FAILED,
 #endif
                PGINODESTEAL, SLABS_SCANNED, KSWAPD_STEAL, KSWAPD_INODESTEAL,
+               KSWAPD_PREMATURE_FAST, KSWAPD_PREMATURE_SLOW,
                PAGEOUTRUN, ALLOCSTALL, PGROTATED,
 #ifdef CONFIG_HUGETLB_PAGE
                HTLB_BUDDY_PGALLOC, HTLB_BUDDY_PGALLOC_FAIL,
index 61d3a9a0d96f87ec0ff29fdb68908ca13a810e7b..e176bd3936dad2a353e1747de62d128e79f0587b 100644 (file)
@@ -1904,6 +1904,24 @@ unsigned long try_to_free_mem_cgroup_pages(struct mem_cgroup *mem_cont,
 }
 #endif
 
+/* is kswapd sleeping prematurely? */
+static int sleeping_prematurely(int order, long remaining)
+{
+       struct zone *zone;
+
+       /* If a direct reclaimer woke kswapd within HZ/10, it's premature */
+       if (remaining)
+               return 1;
+
+       /* If after HZ/10, a zone is below the high mark, it's premature */
+       for_each_populated_zone(zone)
+               if (!zone_watermark_ok(zone, order, high_wmark_pages(zone),
+                                                               0, 0))
+                       return 1;
+
+       return 0;
+}
+
 /*
  * For kswapd, balance_pgdat() will work across all this node's zones until
  * they are all at high_wmark_pages(zone).
@@ -2185,8 +2203,30 @@ static int kswapd(void *p)
                         */
                        order = new_order;
                } else {
-                       if (!freezing(current) && !kthread_should_stop())
-                               schedule();
+                       if (!freezing(current) && !kthread_should_stop()) {
+                               long remaining = 0;
+
+                               /* Try to sleep for a short interval */
+                               if (!sleeping_prematurely(order, remaining)) {
+                                       remaining = schedule_timeout(HZ/10);
+                                       finish_wait(&pgdat->kswapd_wait, &wait);
+                                       prepare_to_wait(&pgdat->kswapd_wait, &wait, TASK_INTERRUPTIBLE);
+                               }
+
+                               /*
+                                * After a short sleep, check if it was a
+                                * premature sleep. If not, then go fully
+                                * to sleep until explicitly woken up
+                                */
+                               if (!sleeping_prematurely(order, remaining))
+                                       schedule();
+                               else {
+                                       if (remaining)
+                                               count_vm_event(KSWAPD_PREMATURE_FAST);
+                                       else
+                                               count_vm_event(KSWAPD_PREMATURE_SLOW);
+                               }
+                       }
 
                        order = pgdat->kswapd_max_order;
                }
index dad2327e45804e16a8ff07564a80eb0cac79f482..63ab71455c5bcd1e6ca46b77ee9c939654ae3fcd 100644 (file)
@@ -683,6 +683,8 @@ static const char * const vmstat_text[] = {
        "slabs_scanned",
        "kswapd_steal",
        "kswapd_inodesteal",
+       "kswapd_slept_prematurely_fast",
+       "kswapd_slept_prematurely_slow",
        "pageoutrun",
        "allocstall",