sched: Weaken SD_POWERSAVINGS_BALANCE
authorPeter Zijlstra <a.p.zijlstra@chello.nl>
Thu, 10 Sep 2009 12:40:57 +0000 (14:40 +0200)
committerIngo Molnar <mingo@elte.hu>
Tue, 15 Sep 2009 14:01:06 +0000 (16:01 +0200)
One of the problems of power-saving balancing is that under certain
scenarios it is too slow and allows tons of real work to pile up.

Avoid this by ignoring the powersave stuff when there's real work
to be done.

Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
LKML-Reference: <new-submission>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
kernel/sched.c
kernel/sched_fair.c

index 6c819f338b11a6aeadf1a67e5edaf40dc05c08cf..f0ccb8b926c806985b346a790f8965294abe78b4 100644 (file)
@@ -1538,6 +1538,26 @@ static unsigned long target_load(int cpu, int type)
        return max(rq->cpu_load[type-1], total);
 }
 
+static struct sched_group *group_of(int cpu)
+{
+       struct sched_domain *sd = rcu_dereference(cpu_rq(cpu)->sd);
+
+       if (!sd)
+               return NULL;
+
+       return sd->groups;
+}
+
+static unsigned long power_of(int cpu)
+{
+       struct sched_group *group = group_of(cpu);
+
+       if (!group)
+               return SCHED_LOAD_SCALE;
+
+       return group->cpu_power;
+}
+
 static int task_hot(struct task_struct *p, u64 now, struct sched_domain *sd);
 
 static unsigned long cpu_avg_load_per_task(int cpu)
@@ -3982,26 +4002,6 @@ ret:
        return NULL;
 }
 
-static struct sched_group *group_of(int cpu)
-{
-       struct sched_domain *sd = rcu_dereference(cpu_rq(cpu)->sd);
-
-       if (!sd)
-               return NULL;
-
-       return sd->groups;
-}
-
-static unsigned long power_of(int cpu)
-{
-       struct sched_group *group = group_of(cpu);
-
-       if (!group)
-               return SCHED_LOAD_SCALE;
-
-       return group->cpu_power;
-}
-
 /*
  * find_busiest_queue - find the busiest runqueue among the cpus in group.
  */
index 09d19f77eb3a849f421a05127421a7778424b53d..eaa00014b4990ac242508cfcdbb60dbcdf17e30a 100644 (file)
@@ -1333,10 +1333,25 @@ static int select_task_rq_fair(struct task_struct *p, int flag, int sync)
 
        for_each_domain(cpu, tmp) {
                /*
-                * If power savings logic is enabled for a domain, stop there.
+                * If power savings logic is enabled for a domain, see if we
+                * are not overloaded, if so, don't balance wider.
                 */
-               if (tmp->flags & SD_POWERSAVINGS_BALANCE)
-                       break;
+               if (tmp->flags & SD_POWERSAVINGS_BALANCE) {
+                       unsigned long power = 0;
+                       unsigned long nr_running = 0;
+                       unsigned long capacity;
+                       int i;
+
+                       for_each_cpu(i, sched_domain_span(tmp)) {
+                               power += power_of(i);
+                               nr_running += cpu_rq(i)->cfs.nr_running;
+                       }
+
+                       capacity = DIV_ROUND_CLOSEST(power, SCHED_LOAD_SCALE);
+
+                       if (nr_running/2 < capacity)
+                               break;
+               }
 
                switch (flag) {
                case SD_BALANCE_WAKE: