ANDROID: sched/fair: prevent possible infinite loop in sched_group_energy
authorChris Redpath <chris.redpath@arm.com>
Mon, 2 Apr 2018 18:37:51 +0000 (11:37 -0700)
committerTodd Kjos <tkjos@google.com>
Thu, 23 Aug 2018 22:47:08 +0000 (22:47 +0000)
There is a race between hotplug and energy_diff which might result
in endless loop in sched_group_energy. When this happens, the end
condition cannot be detected.

We can store how many CPUs we need to visit at the beginning, and
bail out of the energy calculation if we visit more cpus than expected.

Bug: 72311797 72202633
Change-Id: I01c5a5ee9b9712903cb321d3b5ccf2539205d55d
Signed-off-by: Chris Redpath <chris.redpath@arm.com>
Signed-off-by: Puja Gupta <pujag@codeaurora.org>
kernel/sched/fair.c

index d10b39326d3084a308332c76b75cb21a67763dff..6d792b4cd645a0c58b771655836f64d24761f8d3 100644 (file)
@@ -6236,11 +6236,21 @@ static int compute_energy(struct energy_env *eenv)
        int cpu;
        struct cpumask visit_cpus;
        struct sched_group *sg;
+       int cpu_count;
 
        WARN_ON(!eenv->sg_top->sge);
 
        cpumask_copy(&visit_cpus, sched_group_span(eenv->sg_top));
-
+       /* If a cpu is hotplugged in while we are in this function, it does
+        * not appear in the existing visit_cpus mask which came from the
+        * sched_group pointer of the sched_domain pointed at by sd_ea for
+        * either the prev or next cpu and was dereferenced in
+        * select_energy_cpu_idx.
+        * Since we will dereference sd_scs later as we iterate through the
+        * CPUs we expect to visit, new CPUs can be present which are not in
+        * the visit_cpus mask. Guard this with cpu_count.
+        */
+       cpu_count = cpumask_weight(&visit_cpus);
        while (!cpumask_empty(&visit_cpus)) {
                struct sched_group *sg_shared_cap = NULL;
 
@@ -6249,6 +6259,8 @@ static int compute_energy(struct energy_env *eenv)
                /*
                 * Is the group utilization affected by cpus outside this
                 * sched_group?
+                * This sd may have groups with cpus which were not present
+                * when we took visit_cpus.
                 */
                sd = rcu_dereference(per_cpu(sd_scs, cpu));
                if (sd && sd->parent)
@@ -6274,8 +6286,24 @@ static int compute_energy(struct energy_env *eenv)
                                calc_sg_energy(eenv);
 
                                /* remove CPUs we have just visited */
-                               if (!sd->child)
+                               if (!sd->child) {
+                                       /*
+                                        * cpu_count here is the number of
+                                        * cpus we expect to visit in this
+                                        * calculation. If we race against
+                                        * hotplug, we can have extra cpus
+                                        * added to the groups we are
+                                        * iterating which do not appear in
+                                        * the visit_cpus mask. In that case
+                                        * we are not able to calculate energy
+                                        * without restarting so we will bail
+                                        * out and use prev_cpu this time.
+                                        */
+                                       if (!cpu_count)
+                                               return -EINVAL;
                                        cpumask_xor(&visit_cpus, &visit_cpus, sched_group_span(sg));
+                                       cpu_count--;
+                               }
 
                                if (cpumask_equal(sched_group_span(sg), sched_group_span(eenv->sg_top)))
                                        goto next_cpu;