int cpu_util_wake(int cpu, struct task_struct *p)
{
- unsigned long util, capacity;
+ struct cfs_rq *cfs_rq;
+ unsigned int util;
/* Task has no contribution or is new */
- if (cpu != task_cpu(p) || !p->se.avg.last_update_time)
+ if (cpu != task_cpu(p) || !READ_ONCE(p->se.avg.last_update_time))
return cpu_util(cpu);
- capacity = capacity_orig_of(cpu);
- util = max_t(long, cpu_rq(cpu)->cfs.avg.util_avg - task_util(p), 0);
+ cfs_rq = &cpu_rq(cpu)->cfs;
+ util = READ_ONCE(cfs_rq->avg.util_avg);
- return (util >= capacity) ? capacity : util;
+ /* Discount task's blocked util from CPU's util */
+ util -= min_t(unsigned int, util, task_util_est(p));
+
+ /*
+ * Covered cases:
+ *
+ * a) if *p is the only task sleeping on this CPU, then:
+ * cpu_util (== task_util) > util_est (== 0)
+ * and thus we return:
+ * cpu_util_wake = (cpu_util - task_util) = 0
+ *
+ * b) if other tasks are SLEEPING on this CPU, which is now exiting
+ * IDLE, then:
+ * cpu_util >= task_util
+ * cpu_util > util_est (== 0)
+ * and thus we discount *p's blocked utilization to return:
+ * cpu_util_wake = (cpu_util - task_util) >= 0
+ *
+ * c) if other tasks are RUNNABLE on that CPU and
+ * util_est > cpu_util
+ * then we use util_est since it returns a more restrictive
+ * estimation of the spare capacity on that CPU, by just
+ * considering the expected utilization of tasks already
+ * runnable on that CPU.
+ *
+ * Cases a) and b) are covered by the above code, while case c) is
+ * covered by the following code when estimated utilization is
+ * enabled.
+ */
+ if (sched_feat(UTIL_EST))
+ util = max(util, READ_ONCE(cfs_rq->avg.util_est.enqueued));
+
+ /*
+ * Utilization (estimated) can exceed the CPU capacity, thus let's
+ * clamp to the maximum CPU capacity to ensure consistency with
+ * the cpu_util call.
+ */
+ return min_t(unsigned long, util, capacity_orig_of(cpu));
}
static inline int
unsigned long wake_util, new_util;
wake_util = cpu_util_wake(i, p);
- new_util = wake_util + task_util(p);
+ new_util = wake_util + task_util_est(p);
/* skip over-capacity cpu */
if (new_util > capacity_orig)
extern int task_util(struct task_struct *p);
extern int cpu_util_wake(int cpu, struct task_struct *p);
+extern unsigned long task_util_est(struct task_struct *p);
unsigned long wake_util, new_util;
wake_util = cpu_util_wake(cpu, p);
- new_util = wake_util + task_util(p);
+ new_util = wake_util + task_util_est(p);
/* checking prev cpu is meaningless */
if (eenv->prev_cpu == cpu)
util[cpu] = cpu_util_wake(cpu, p);
if (unlikely(cpu == target_cpu))
- util[cpu] += task_util(p);
+ util[cpu] += task_util_est(p);
}
for_each_possible_cpu(cpu) {
#include <trace/events/ems.h>
+#include "ems.h"
#include "../sched.h"
-static int cpu_util_wake(int cpu, struct task_struct *p)
-{
- unsigned long util, capacity;
-
- /* Task has no contribution or is new */
- if (cpu != task_cpu(p) || !p->se.avg.last_update_time)
- return cpu_util(cpu);
-
- capacity = capacity_orig_of(cpu);
- util = max_t(long, cpu_rq(cpu)->cfs.avg.util_avg - p->se.avg.util_avg, 0);
-
- return (util >= capacity) ? capacity : util;
-}
-
/*
* Currently, PCF is composed of a selection algorithm based on distributed
* processing, for example, selecting idle cpu or cpu with biggest spare
continue;
wake_util = cpu_util_wake(i, p);
- new_util = wake_util + task_util(p);
+ new_util = wake_util + task_util_est(p);
- trace_ems_prefer_idle(p, task_cpu(p), i, capacity_orig, task_util(p),
+ trace_ems_prefer_idle(p, task_cpu(p), i, capacity_orig, task_util_est(p),
new_util, idle_cpu(i));
if (new_util > capacity_orig)
util_init_done:
if (entity_is_task(se)) {
struct task_struct *p = task_of(se);
+ struct sched_avg *sa = &se->avg;
if (p->sched_class != &fair_sched_class) {
/*
* For !fair tasks do:
se->avg.last_update_time = cfs_rq_clock_task(cfs_rq);
return;
}
+
+ sa->util_est.ewma = 0;
+ sa->util_est.enqueued = 0;
}
attach_entity_cfs_rq(se);
*/
ue.enqueued = (task_util(p) | UTIL_AVG_UNCHANGED);
last_ewma_diff = ue.enqueued - ue.ewma;
- if (within_margin(last_ewma_diff, (SCHED_CAPACITY_SCALE / 100)))
+ if (within_margin(last_ewma_diff, capacity_orig_of(task_cpu(p)) / 100))
return;
/*