sched: ems: introduce task band
[GitHub/LineageOS/android_kernel_motorola_exynos9610.git] / kernel / sched / ems / core.c
CommitLineData
9da19d24
PB
1/*
2 * Core Exynos Mobile Scheduler
3 *
4 * Copyright (C) 2018 Samsung Electronics Co., Ltd
5 * Park Bumgyu <bumgyu.park@samsung.com>
6 */
7
f18d73c2
PB
8#include <linux/ems.h>
9
9da19d24
PB
10#define CREATE_TRACE_POINTS
11#include <trace/events/ems.h>
12
13#include "ems.h"
14#include "../sched.h"
15
2191aa1c
PB
16int task_util(struct task_struct *p)
17{
6d016614
JP
18 if (rt_task(p))
19 return p->rt.avg.util_avg;
20 else
21 return p->se.avg.util_avg;
2191aa1c
PB
22}
23
24int cpu_util_wake(int cpu, struct task_struct *p)
25{
5684373f 26 struct cfs_rq *cfs_rq;
27 unsigned int util;
2191aa1c
PB
28
29 /* Task has no contribution or is new */
5684373f 30 if (cpu != task_cpu(p) || !READ_ONCE(p->se.avg.last_update_time))
2191aa1c
PB
31 return cpu_util(cpu);
32
5684373f 33 cfs_rq = &cpu_rq(cpu)->cfs;
34 util = READ_ONCE(cfs_rq->avg.util_avg);
35
36 /* Discount task's blocked util from CPU's util */
37 util -= min_t(unsigned int, util, task_util_est(p));
38
39 /*
40 * Covered cases:
41 *
42 * a) if *p is the only task sleeping on this CPU, then:
43 * cpu_util (== task_util) > util_est (== 0)
44 * and thus we return:
45 * cpu_util_wake = (cpu_util - task_util) = 0
46 *
47 * b) if other tasks are SLEEPING on this CPU, which is now exiting
48 * IDLE, then:
49 * cpu_util >= task_util
50 * cpu_util > util_est (== 0)
51 * and thus we discount *p's blocked utilization to return:
52 * cpu_util_wake = (cpu_util - task_util) >= 0
53 *
54 * c) if other tasks are RUNNABLE on that CPU and
55 * util_est > cpu_util
56 * then we use util_est since it returns a more restrictive
57 * estimation of the spare capacity on that CPU, by just
58 * considering the expected utilization of tasks already
59 * runnable on that CPU.
60 *
61 * Cases a) and b) are covered by the above code, while case c) is
62 * covered by the following code when estimated utilization is
63 * enabled.
64 */
65 if (sched_feat(UTIL_EST))
66 util = max(util, READ_ONCE(cfs_rq->avg.util_est.enqueued));
67
68 /*
69 * Utilization (estimated) can exceed the CPU capacity, thus let's
70 * clamp to the maximum CPU capacity to ensure consistency with
71 * the cpu_util call.
72 */
73 return min_t(unsigned long, util, capacity_orig_of(cpu));
2191aa1c
PB
74}
75
1b4978b2
PB
76static inline int
77check_cpu_capacity(struct rq *rq, struct sched_domain *sd)
78{
79 return ((rq->cpu_capacity * sd->imbalance_pct) <
80 (rq->cpu_capacity_orig * 100));
81}
82
83#define lb_sd_parent(sd) \
84 (sd->parent && sd->parent->groups != sd->parent->groups->next)
85
86int exynos_need_active_balance(enum cpu_idle_type idle, struct sched_domain *sd,
87 int src_cpu, int dst_cpu)
88{
89 unsigned int src_imb_pct = lb_sd_parent(sd) ? sd->imbalance_pct : 1;
90 unsigned int dst_imb_pct = lb_sd_parent(sd) ? 100 : 1;
91 unsigned long src_cap = capacity_of(src_cpu);
92 unsigned long dst_cap = capacity_of(dst_cpu);
93 int level = sd->level;
94
95 /* dst_cpu is idle */
96 if ((idle != CPU_NOT_IDLE) &&
97 (cpu_rq(src_cpu)->cfs.h_nr_running == 1)) {
98 if ((check_cpu_capacity(cpu_rq(src_cpu), sd)) &&
99 (src_cap * sd->imbalance_pct < dst_cap * 100)) {
100 return 1;
101 }
102
103 /* This domain is top and dst_cpu is bigger than src_cpu*/
104 if (!lb_sd_parent(sd) && src_cap < dst_cap)
105 if (lbt_overutilized(src_cpu, level) || global_boosted())
106 return 1;
107 }
108
109 if ((src_cap * src_imb_pct < dst_cap * dst_imb_pct) &&
110 cpu_rq(src_cpu)->cfs.h_nr_running == 1 &&
111 lbt_overutilized(src_cpu, level) &&
112 !lbt_overutilized(dst_cpu, level)) {
113 return 1;
114 }
115
116 return unlikely(sd->nr_balance_failed > sd->cache_nice_tries + 2);
117}
118
aa533107 119static int select_proper_cpu(struct task_struct *p, int prev_cpu)
9da19d24 120{
aa533107
PB
121 int cpu;
122 unsigned long best_min_util = ULONG_MAX;
123 int best_cpu = -1;
124
125 for_each_possible_cpu(cpu) {
126 int i;
127
128 /* visit each coregroup only once */
129 if (cpu != cpumask_first(cpu_coregroup_mask(cpu)))
130 continue;
131
132 /* skip if task cannot be assigned to coregroup */
133 if (!cpumask_intersects(&p->cpus_allowed, cpu_coregroup_mask(cpu)))
134 continue;
135
136 for_each_cpu_and(i, tsk_cpus_allowed(p), cpu_coregroup_mask(cpu)) {
137 unsigned long capacity_orig = capacity_orig_of(i);
138 unsigned long wake_util, new_util;
139
140 wake_util = cpu_util_wake(i, p);
5684373f 141 new_util = wake_util + task_util_est(p);
aa533107
PB
142
143 /* skip over-capacity cpu */
144 if (new_util > capacity_orig)
145 continue;
146
147 /*
148 * According to the criteria determined by the LBT(Load
149 * Balance trigger), the cpu that becomes overutilized
150 * when the task is assigned is skipped.
151 */
152 if (lbt_bring_overutilize(i, p))
153 continue;
154
155 /*
156 * Best target) lowest utilization among lowest-cap cpu
157 *
158 * If the sequence reaches this function, the wakeup task
159 * does not require performance and the prev cpu is over-
160 * utilized, so it should do load balancing without
161 * considering energy side. Therefore, it selects cpu
162 * with smallest cpapacity and the least utilization among
163 * cpu that fits the task.
164 */
165 if (best_min_util < new_util)
166 continue;
167
168 best_min_util = new_util;
169 best_cpu = i;
170 }
171
172 /*
173 * if it fails to find the best cpu in this coregroup, visit next
174 * coregroup.
175 */
176 if (cpu_selected(best_cpu))
177 break;
178 }
179
180 trace_ems_select_proper_cpu(p, best_cpu, best_min_util);
181
182 /*
183 * if it fails to find the vest cpu, choosing any cpu is meaningless.
184 * Return prev cpu.
185 */
186 return cpu_selected(best_cpu) ? best_cpu : prev_cpu;
9da19d24
PB
187}
188
9da19d24
PB
189extern void sync_entity_load_avg(struct sched_entity *se);
190
405334d3 191int exynos_wakeup_balance(struct task_struct *p, int prev_cpu, int sd_flag, int sync)
9da19d24
PB
192{
193 int target_cpu = -1;
194 char state[30] = "fail";
195
196 /*
197 * Since the utilization of a task is accumulated before sleep, it updates
198 * the utilization to determine which cpu the task will be assigned to.
199 * Exclude new task.
200 */
8a2d6134
PB
201 if (!(sd_flag & SD_BALANCE_FORK)) {
202 unsigned long old_util = task_util(p);
203
9da19d24 204 sync_entity_load_avg(&p->se);
8a2d6134
PB
205 /* update the band if a large amount of task util is decayed */
206 update_band(p, old_util);
207 }
9da19d24
PB
208
209 /*
210 * Priority 1 : ontime task
211 *
212 * If task which has more utilization than threshold wakes up, the task is
213 * classified as "ontime task" and assigned to performance cpu. Conversely,
214 * if heavy task that has been classified as ontime task sleeps for a long
215 * time and utilization becomes small, it is excluded from ontime task and
216 * is no longer guaranteed to operate on performance cpu.
217 *
218 * Ontime task is very sensitive to performance because it is usually the
219 * main task of application. Therefore, it has the highest priority.
220 */
221 target_cpu = ontime_task_wakeup(p);
222 if (cpu_selected(target_cpu)) {
223 strcpy(state, "ontime migration");
224 goto out;
225 }
226
227 /*
228 * Priority 2 : prefer-perf
229 *
230 * Prefer-perf is a function that operates on cgroup basis managed by
231 * schedtune. When perfer-perf is set to 1, the tasks in the group are
232 * preferentially assigned to the performance cpu.
233 *
234 * It has a high priority because it is a function that is turned on
235 * temporarily in scenario requiring reactivity(touch, app laucning).
236 */
237 target_cpu = prefer_perf_cpu(p);
238 if (cpu_selected(target_cpu)) {
239 strcpy(state, "prefer-perf");
240 goto out;
241 }
242
243 /*
8a2d6134
PB
244 * Priority 3 : task band
245 *
246 * The tasks in a process are likely to interact, and its operations are
247 * sequential and share resources. Therefore, if these tasks are packed and
248 * and assign on a specific cpu or cluster, the latency for interaction
249 * decreases and the reusability of the cache increases, thereby improving
250 * performance.
251 *
252 * The "task band" is a function that groups tasks on a per-process basis
253 * and assigns them to a specific cpu or cluster. If the attribute "band"
254 * of schedtune.cgroup is set to '1', task band operate on this cgroup.
255 */
256 target_cpu = band_play_cpu(p);
257 if (cpu_selected(target_cpu)) {
258 strcpy(state, "task band");
259 goto out;
260 }
261
262 /*
263 * Priority 4 : global boosting
9da19d24
PB
264 *
265 * Global boost is a function that preferentially assigns all tasks in the
266 * system to the performance cpu. Unlike prefer-perf, which targets only
267 * group tasks, global boost targets all tasks. So, it maximizes performance
268 * cpu utilization.
269 *
270 * Typically, prefer-perf operates on groups that contains UX related tasks,
271 * such as "top-app" or "foreground", so that major tasks are likely to be
272 * assigned to performance cpu. On the other hand, global boost assigns
273 * all tasks to performance cpu, which is not as effective as perfer-perf.
274 * For this reason, global boost has a lower priority than prefer-perf.
275 */
276 target_cpu = global_boosting(p);
277 if (cpu_selected(target_cpu)) {
278 strcpy(state, "global boosting");
279 goto out;
280 }
281
282 /*
8a2d6134 283 * Priority 5 : group balancing
9da19d24
PB
284 */
285 target_cpu = group_balancing(p);
286 if (cpu_selected(target_cpu)) {
287 strcpy(state, "group balancing");
288 goto out;
289 }
290
291 /*
8a2d6134 292 * Priority 6 : prefer-idle
9da19d24
PB
293 *
294 * Prefer-idle is a function that operates on cgroup basis managed by
295 * schedtune. When perfer-idle is set to 1, the tasks in the group are
296 * preferentially assigned to the idle cpu.
297 *
298 * Prefer-idle has a smaller performance impact than the above. Therefore
299 * it has a relatively low priority.
300 */
301 target_cpu = prefer_idle_cpu(p);
302 if (cpu_selected(target_cpu)) {
303 strcpy(state, "prefer-idle");
304 goto out;
305 }
306
307 /*
8a2d6134 308 * Priority 7 : energy cpu
9da19d24
PB
309 *
310 * A scheduling scheme based on cpu energy, find the least power consumption
311 * cpu referring energy table when assigning task.
312 */
405334d3 313 target_cpu = select_energy_cpu(p, prev_cpu, sd_flag, sync);
9da19d24
PB
314 if (cpu_selected(target_cpu)) {
315 strcpy(state, "energy cpu");
316 goto out;
317 }
318
319 /*
8a2d6134 320 * Priority 8 : proper cpu
9da19d24 321 */
aa533107 322 target_cpu = select_proper_cpu(p, prev_cpu);
9da19d24
PB
323 if (cpu_selected(target_cpu))
324 strcpy(state, "proper cpu");
325
326out:
327 trace_ems_wakeup_balance(p, target_cpu, state);
328 return target_cpu;
329}
330
331struct kobject *ems_kobj;
332
333static int __init init_sysfs(void)
334{
335 ems_kobj = kobject_create_and_add("ems", kernel_kobj);
336
337 return 0;
338}
339core_initcall(init_sysfs);