sched/numa: Resist moving tasks towards nodes with fewer hinting faults
authorMel Gorman <mgorman@suse.de>
Mon, 7 Oct 2013 10:29:01 +0000 (11:29 +0100)
committerIngo Molnar <mingo@kernel.org>
Wed, 9 Oct 2013 10:40:27 +0000 (12:40 +0200)
Just as "sched: Favour moving tasks towards the preferred node" favours
moving tasks towards nodes with a higher number of recorded NUMA hinting
faults, this patch resists moving tasks towards nodes with lower faults.

Signed-off-by: Mel Gorman <mgorman@suse.de>
Reviewed-by: Rik van Riel <riel@redhat.com>
Cc: Andrea Arcangeli <aarcange@redhat.com>
Cc: Johannes Weiner <hannes@cmpxchg.org>
Cc: Srikar Dronamraju <srikar@linux.vnet.ibm.com>
Signed-off-by: Peter Zijlstra <peterz@infradead.org>
Link: http://lkml.kernel.org/r/1381141781-10992-24-git-send-email-mgorman@suse.de
Signed-off-by: Ingo Molnar <mingo@kernel.org>
kernel/sched/fair.c
kernel/sched/features.h

index 6ffddca687feafb005250200857212c9a48f2d8c..89431248d33ddc12cd6ef00c540fec58813bc56b 100644 (file)
@@ -4107,12 +4107,43 @@ static bool migrate_improves_locality(struct task_struct *p, struct lb_env *env)
 
        return false;
 }
+
+
+static bool migrate_degrades_locality(struct task_struct *p, struct lb_env *env)
+{
+       int src_nid, dst_nid;
+
+       if (!sched_feat(NUMA) || !sched_feat(NUMA_RESIST_LOWER))
+               return false;
+
+       if (!p->numa_faults || !(env->sd->flags & SD_NUMA))
+               return false;
+
+       src_nid = cpu_to_node(env->src_cpu);
+       dst_nid = cpu_to_node(env->dst_cpu);
+
+       if (src_nid == dst_nid ||
+           p->numa_migrate_seq >= sysctl_numa_balancing_settle_count)
+               return false;
+
+       if (p->numa_faults[dst_nid] < p->numa_faults[src_nid])
+               return true;
+
+       return false;
+}
+
 #else
 static inline bool migrate_improves_locality(struct task_struct *p,
                                             struct lb_env *env)
 {
        return false;
 }
+
+static inline bool migrate_degrades_locality(struct task_struct *p,
+                                            struct lb_env *env)
+{
+       return false;
+}
 #endif
 
 /*
@@ -4177,6 +4208,8 @@ int can_migrate_task(struct task_struct *p, struct lb_env *env)
         * 3) too many balance attempts have failed.
         */
        tsk_cache_hot = task_hot(p, rq_clock_task(env->src_rq), env->sd);
+       if (!tsk_cache_hot)
+               tsk_cache_hot = migrate_degrades_locality(p, env);
 
        if (migrate_improves_locality(p, env)) {
 #ifdef CONFIG_SCHEDSTATS
index d9278ce2c4b458d765871814c122c08b3c9faa8c..5716929a2e3a4ad2d492ad2de0c3431291ea81a1 100644 (file)
@@ -74,4 +74,12 @@ SCHED_FEAT(NUMA,     false)
  * balancing.
  */
 SCHED_FEAT(NUMA_FAVOUR_HIGHER, true)
+
+/*
+ * NUMA_RESIST_LOWER will resist moving tasks towards nodes where a
+ * lower number of hinting faults have been recorded. As this has
+ * the potential to prevent a task ever migrating to a new node
+ * due to CPU overload it is disabled by default.
+ */
+SCHED_FEAT(NUMA_RESIST_LOWER, false)
 #endif