sched: Fix SD_POWERSAVING_BALANCE|SD_PREFER_LOCAL vs SD_WAKE_AFFINE
authorPeter Zijlstra <a.p.zijlstra@chello.nl>
Thu, 17 Sep 2009 07:01:14 +0000 (09:01 +0200)
committerIngo Molnar <mingo@elte.hu>
Thu, 17 Sep 2009 08:40:31 +0000 (10:40 +0200)
The SD_POWERSAVING_BALANCE|SD_PREFER_LOCAL code can break out of
the domain iteration early, making us miss the SD_WAKE_AFFINE bits.

Fix this by continuing iteration until there is no need for a
larger domain.

This also cleans up the cgroup stuff a bit, but not having two
update_shares() invocations.

Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
LKML-Reference: <new-submission>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
kernel/sched_fair.c

index ffee827fa22fb4b19d67c9d5f6c2bff5bcebcaa4..10d218ab69f2ba4eac39a2d07461c0b2b256de34 100644 (file)
@@ -1333,11 +1333,12 @@ find_idlest_cpu(struct sched_group *group, struct task_struct *p, int this_cpu)
  */
 static int select_task_rq_fair(struct task_struct *p, int sd_flag, int wake_flags)
 {
-       struct sched_domain *tmp, *shares = NULL, *sd = NULL;
+       struct sched_domain *tmp, *affine_sd = NULL, *sd = NULL;
        int cpu = smp_processor_id();
        int prev_cpu = task_cpu(p);
        int new_cpu = cpu;
        int want_affine = 0;
+       int want_sd = 1;
        int sync = wake_flags & WF_SYNC;
 
        if (sd_flag & SD_BALANCE_WAKE) {
@@ -1369,33 +1370,44 @@ static int select_task_rq_fair(struct task_struct *p, int sd_flag, int wake_flag
                                nr_running /= 2;
 
                        if (nr_running < capacity)
-                               break;
+                               want_sd = 0;
                }
 
                if (want_affine && (tmp->flags & SD_WAKE_AFFINE) &&
                    cpumask_test_cpu(prev_cpu, sched_domain_span(tmp))) {
 
-                       if (sched_feat(LB_SHARES_UPDATE)) {
-                               update_shares(tmp);
-                               shares = tmp;
-                       }
-
-                       if (wake_affine(tmp, p, sync)) {
-                               new_cpu = cpu;
-                               goto out;
-                       }
-
+                       affine_sd = tmp;
                        want_affine = 0;
                }
 
+               if (!want_sd && !want_affine)
+                       break;
+
                if (!(tmp->flags & sd_flag))
                        continue;
 
-               sd = tmp;
+               if (want_sd)
+                       sd = tmp;
+       }
+
+       if (sched_feat(LB_SHARES_UPDATE)) {
+               /*
+                * Pick the largest domain to update shares over
+                */
+               tmp = sd;
+               if (affine_sd && (!tmp ||
+                                 cpumask_weight(sched_domain_span(affine_sd)) >
+                                 cpumask_weight(sched_domain_span(sd))))
+                       tmp = affine_sd;
+
+               if (tmp)
+                       update_shares(tmp);
        }
 
-       if (sd && sd != shares && sched_feat(LB_SHARES_UPDATE))
-               update_shares(sd);
+       if (affine_sd && wake_affine(affine_sd, p, sync)) {
+               new_cpu = cpu;
+               goto out;
+       }
 
        while (sd) {
                int load_idx = sd->forkexec_idx;