sched/fair: Try to estimate possible idle states.
authorChris Redpath <chris.redpath@arm.com>
Tue, 4 Jul 2017 09:23:03 +0000 (10:23 +0100)
committerChris Redpath <chris.redpath@arm.com>
Tue, 25 Jul 2017 15:31:00 +0000 (16:31 +0100)
In the current EAS group energy calculations, we only use
the idle state of the group as it is right now. This means
that there are times when EAS cannot see that we are about
to remove all utilization from a group which is likely to
result in us being able to idle that entire group.

This is an attempt to detect that situation and at least
allow the energy calculation to include savings in that
scenario, regardless of what we might be able to actually
achieve in the real world. If a cluster or cpu looks like
it will have some idle time available to it, we try to
map the utilization onto an idle state.

Change-Id: I8fcb1e507f65ae6a2c5647eeef75a4bf28c7a0c0
Signed-off-by: Chris Redpath <chris.redpath@arm.com>
kernel/sched/fair.c

index cd72ce14297485870998c8a257b546b9322abfbe..d1445e7b298a0690d58055232bdaf0615a6160db 100644 (file)
@@ -5396,9 +5396,11 @@ static int find_new_capacity(struct energy_env *eenv,
        return idx;
 }
 
-static int group_idle_state(struct sched_group *sg)
+static int group_idle_state(struct energy_env *eenv, struct sched_group *sg)
 {
        int i, state = INT_MAX;
+       int src_in_grp, dst_in_grp;
+       long grp_util = 0;
 
        /* Find the shallowest idle state in the sched group. */
        for_each_cpu(i, sched_group_cpus(sg))
@@ -5407,6 +5409,54 @@ static int group_idle_state(struct sched_group *sg)
        /* Take non-cpuidle idling into account (active idle/arch_cpu_idle()) */
        state++;
 
+       /*
+        * Try to estimate if a deeper idle state is
+        * achievable when we move the task.
+        */
+       for_each_cpu(i, sched_group_cpus(sg))
+               grp_util += cpu_util(i);
+
+       src_in_grp = cpumask_test_cpu(eenv->src_cpu, sched_group_cpus(sg));
+       dst_in_grp = cpumask_test_cpu(eenv->dst_cpu, sched_group_cpus(sg));
+       if (src_in_grp == dst_in_grp) {
+               /* both CPUs under consideration are in the same group or not in
+                * either group, migration should leave idle state the same.
+                */
+               goto end;
+       }
+       /* add or remove util as appropriate to indicate what group util
+        * will be (worst case - no concurrent execution) after moving the task
+        */
+       grp_util += src_in_grp ? -eenv->util_delta : eenv->util_delta;
+
+       if (grp_util <=
+               ((long)sg->sgc->max_capacity * (int)sg->group_weight)) {
+               /* after moving, this group is at most partly
+                * occupied, so it should have some idle time.
+                */
+               int max_idle_state_idx = sg->sge->nr_idle_states - 2;
+               int new_state = grp_util * max_idle_state_idx;
+               if (grp_util <= 0)
+                       /* group will have no util, use lowest state */
+                       new_state = max_idle_state_idx + 1;
+               else {
+                       /* for partially idle, linearly map util to idle
+                        * states, excluding the lowest one. This does not
+                        * correspond to the state we expect to enter in
+                        * reality, but an indication of what might happen.
+                        */
+                       new_state = min(max_idle_state_idx, (int)
+                                       (new_state / sg->sgc->max_capacity));
+                       new_state = max_idle_state_idx - new_state;
+               }
+               state = new_state;
+       } else {
+               /* After moving, the group will be fully occupied
+                * so assume it will not be idle at all.
+                */
+               state = 0;
+       }
+end:
        return state;
 }
 
@@ -5479,8 +5529,9 @@ static int sched_group_energy(struct energy_env *eenv)
                                        }
                                }
 
-                               idle_idx = group_idle_state(sg);
+                               idle_idx = group_idle_state(eenv, sg);
                                group_util = group_norm_util(eenv, sg);
+
                                sg_busy_energy = (group_util * sg->sge->cap_states[cap_idx].power)
                                                                >> SCHED_CAPACITY_SHIFT;
                                sg_idle_energy = ((SCHED_LOAD_SCALE-group_util)