sched/numa: Do not migrate memory immediately after switching node

author Rik van Riel <riel@redhat.com>

Mon, 7 Oct 2013 10:29:08 +0000 (11:29 +0100)

committer Ingo Molnar <mingo@kernel.org>

Wed, 9 Oct 2013 10:40:36 +0000 (12:40 +0200)
author Rik van Riel <riel@redhat.com>
Mon, 7 Oct 2013 10:29:08 +0000 (11:29 +0100)
committer Ingo Molnar <mingo@kernel.org>
Wed, 9 Oct 2013 10:40:36 +0000 (12:40 +0200)
diff --git a/kernel/sched/core.c b/kernel/sched/core.c

index 66b878e945548d1415e545489efcb603f6d389e1..9060a7f4e9ed33f3cbdc4048144f0a78122e6c19 100644 (file)
--- a/kernel/sched/core.c
+++ b/kernel/sched/core.c
@@ -1631,7 +1631,7 @@ static void __sched_fork(struct task_struct *p)
  
         p->node_stamp = 0ULL;
         p->numa_scan_seq = p->mm ? p->mm->numa_scan_seq : 0;
-       p->numa_migrate_seq = 0;
+       p->numa_migrate_seq = 1;
         p->numa_scan_period = sysctl_numa_balancing_scan_delay;
         p->numa_preferred_nid = -1;
         p->numa_work.next = &p->numa_work;
diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c

index b1de7c55e9f74825d85347655a6f010ee39f3830..61ec0d4765b95bbdedf43f77254df9d3099fe3f3 100644 (file)
--- a/kernel/sched/fair.c
+++ b/kernel/sched/fair.c
@@ -884,7 +884,7 @@ static unsigned int task_scan_max(struct task_struct *p)
   * the preferred node but still allow the scheduler to move the task again if
   * the nodes CPUs are overloaded.
   */
-unsigned int sysctl_numa_balancing_settle_count __read_mostly = 3;
+unsigned int sysctl_numa_balancing_settle_count __read_mostly = 4;
  
  static inline int task_faults_idx(int nid, int priv)
  {
@@ -980,7 +980,7 @@ static void task_numa_placement(struct task_struct *p)
  
                 /* Update the preferred nid and migrate task if possible */
                 p->numa_preferred_nid = max_nid;
-               p->numa_migrate_seq = 0;
+               p->numa_migrate_seq = 1;
                 migrate_task_to(p, preferred_cpu);
         }
  }
@@ -4121,6 +4121,20 @@ static void move_task(struct task_struct *p, struct lb_env *env)
         set_task_cpu(p, env->dst_cpu);
         activate_task(env->dst_rq, p, 0);
         check_preempt_curr(env->dst_rq, p, 0);
+#ifdef CONFIG_NUMA_BALANCING
+       if (p->numa_preferred_nid != -1) {
+               int src_nid = cpu_to_node(env->src_cpu);
+               int dst_nid = cpu_to_node(env->dst_cpu);
+
+               /*
+                * If the load balancer has moved the task then limit
+                * migrations from taking place in the short term in
+                * case this is a short-lived migration.
+                */
+               if (src_nid != dst_nid && dst_nid != p->numa_preferred_nid)
+                       p->numa_migrate_seq = 0;
+       }
+#endif
  }
  
  /*
diff --git a/mm/mempolicy.c b/mm/mempolicy.c

index aff1f1ed3dc544b8f336c4ddb8f112dbc7128699..196d8da2b657f84a662d3caa6dbd85f670e09d60 100644 (file)
--- a/mm/mempolicy.c
+++ b/mm/mempolicy.c
@@ -2378,6 +2378,18 @@ int mpol_misplaced(struct page *page, struct vm_area_struct *vma, unsigned long
                 last_nidpid = page_nidpid_xchg_last(page, this_nidpid);
                 if (!nidpid_pid_unset(last_nidpid) && nidpid_to_nid(last_nidpid) != polnid)
                         goto out;
+
+#ifdef CONFIG_NUMA_BALANCING
+               /*
+                * If the scheduler has just moved us away from our
+                * preferred node, do not bother migrating pages yet.
+                * This way a short and temporary process migration will
+                * not cause excessive memory migration.
+                */
+               if (polnid != current->numa_preferred_nid &&
+                               !current->numa_migrate_seq)
+                       goto out;
+#endif
         }
  
         if (curnid != polnid)
author	Rik van Riel <riel@redhat.com>
	Mon, 7 Oct 2013 10:29:08 +0000 (11:29 +0100)
committer	Ingo Molnar <mingo@kernel.org>
	Wed, 9 Oct 2013 10:40:36 +0000 (12:40 +0200)
kernel/sched/core.c		patch \| blob \| blame \| history
kernel/sched/fair.c		patch \| blob \| blame \| history
mm/mempolicy.c		patch \| blob \| blame \| history