Commit | Line | Data |
---|---|---|
9da19d24 PB |
1 | /* |
2 | * Core Exynos Mobile Scheduler | |
3 | * | |
4 | * Copyright (C) 2018 Samsung Electronics Co., Ltd | |
5 | * Park Bumgyu <bumgyu.park@samsung.com> | |
6 | */ | |
7 | ||
f18d73c2 PB |
8 | #include <linux/ems.h> |
9 | ||
9da19d24 PB |
10 | #define CREATE_TRACE_POINTS |
11 | #include <trace/events/ems.h> | |
12 | ||
13 | #include "ems.h" | |
14 | #include "../sched.h" | |
15 | ||
2191aa1c PB |
16 | int task_util(struct task_struct *p) |
17 | { | |
6d016614 JP |
18 | if (rt_task(p)) |
19 | return p->rt.avg.util_avg; | |
20 | else | |
21 | return p->se.avg.util_avg; | |
2191aa1c PB |
22 | } |
23 | ||
24 | int cpu_util_wake(int cpu, struct task_struct *p) | |
25 | { | |
5684373f | 26 | struct cfs_rq *cfs_rq; |
27 | unsigned int util; | |
2191aa1c PB |
28 | |
29 | /* Task has no contribution or is new */ | |
5684373f | 30 | if (cpu != task_cpu(p) || !READ_ONCE(p->se.avg.last_update_time)) |
2191aa1c PB |
31 | return cpu_util(cpu); |
32 | ||
5684373f | 33 | cfs_rq = &cpu_rq(cpu)->cfs; |
34 | util = READ_ONCE(cfs_rq->avg.util_avg); | |
35 | ||
36 | /* Discount task's blocked util from CPU's util */ | |
37 | util -= min_t(unsigned int, util, task_util_est(p)); | |
38 | ||
39 | /* | |
40 | * Covered cases: | |
41 | * | |
42 | * a) if *p is the only task sleeping on this CPU, then: | |
43 | * cpu_util (== task_util) > util_est (== 0) | |
44 | * and thus we return: | |
45 | * cpu_util_wake = (cpu_util - task_util) = 0 | |
46 | * | |
47 | * b) if other tasks are SLEEPING on this CPU, which is now exiting | |
48 | * IDLE, then: | |
49 | * cpu_util >= task_util | |
50 | * cpu_util > util_est (== 0) | |
51 | * and thus we discount *p's blocked utilization to return: | |
52 | * cpu_util_wake = (cpu_util - task_util) >= 0 | |
53 | * | |
54 | * c) if other tasks are RUNNABLE on that CPU and | |
55 | * util_est > cpu_util | |
56 | * then we use util_est since it returns a more restrictive | |
57 | * estimation of the spare capacity on that CPU, by just | |
58 | * considering the expected utilization of tasks already | |
59 | * runnable on that CPU. | |
60 | * | |
61 | * Cases a) and b) are covered by the above code, while case c) is | |
62 | * covered by the following code when estimated utilization is | |
63 | * enabled. | |
64 | */ | |
65 | if (sched_feat(UTIL_EST)) | |
66 | util = max(util, READ_ONCE(cfs_rq->avg.util_est.enqueued)); | |
67 | ||
68 | /* | |
69 | * Utilization (estimated) can exceed the CPU capacity, thus let's | |
70 | * clamp to the maximum CPU capacity to ensure consistency with | |
71 | * the cpu_util call. | |
72 | */ | |
73 | return min_t(unsigned long, util, capacity_orig_of(cpu)); | |
2191aa1c PB |
74 | } |
75 | ||
1b4978b2 PB |
76 | static inline int |
77 | check_cpu_capacity(struct rq *rq, struct sched_domain *sd) | |
78 | { | |
79 | return ((rq->cpu_capacity * sd->imbalance_pct) < | |
80 | (rq->cpu_capacity_orig * 100)); | |
81 | } | |
82 | ||
83 | #define lb_sd_parent(sd) \ | |
84 | (sd->parent && sd->parent->groups != sd->parent->groups->next) | |
85 | ||
86 | int exynos_need_active_balance(enum cpu_idle_type idle, struct sched_domain *sd, | |
87 | int src_cpu, int dst_cpu) | |
88 | { | |
89 | unsigned int src_imb_pct = lb_sd_parent(sd) ? sd->imbalance_pct : 1; | |
90 | unsigned int dst_imb_pct = lb_sd_parent(sd) ? 100 : 1; | |
91 | unsigned long src_cap = capacity_of(src_cpu); | |
92 | unsigned long dst_cap = capacity_of(dst_cpu); | |
93 | int level = sd->level; | |
94 | ||
95 | /* dst_cpu is idle */ | |
96 | if ((idle != CPU_NOT_IDLE) && | |
97 | (cpu_rq(src_cpu)->cfs.h_nr_running == 1)) { | |
98 | if ((check_cpu_capacity(cpu_rq(src_cpu), sd)) && | |
99 | (src_cap * sd->imbalance_pct < dst_cap * 100)) { | |
100 | return 1; | |
101 | } | |
102 | ||
103 | /* This domain is top and dst_cpu is bigger than src_cpu*/ | |
104 | if (!lb_sd_parent(sd) && src_cap < dst_cap) | |
105 | if (lbt_overutilized(src_cpu, level) || global_boosted()) | |
106 | return 1; | |
107 | } | |
108 | ||
109 | if ((src_cap * src_imb_pct < dst_cap * dst_imb_pct) && | |
110 | cpu_rq(src_cpu)->cfs.h_nr_running == 1 && | |
111 | lbt_overutilized(src_cpu, level) && | |
112 | !lbt_overutilized(dst_cpu, level)) { | |
113 | return 1; | |
114 | } | |
115 | ||
116 | return unlikely(sd->nr_balance_failed > sd->cache_nice_tries + 2); | |
117 | } | |
118 | ||
aa533107 | 119 | static int select_proper_cpu(struct task_struct *p, int prev_cpu) |
9da19d24 | 120 | { |
aa533107 PB |
121 | int cpu; |
122 | unsigned long best_min_util = ULONG_MAX; | |
123 | int best_cpu = -1; | |
124 | ||
125 | for_each_possible_cpu(cpu) { | |
126 | int i; | |
127 | ||
128 | /* visit each coregroup only once */ | |
129 | if (cpu != cpumask_first(cpu_coregroup_mask(cpu))) | |
130 | continue; | |
131 | ||
132 | /* skip if task cannot be assigned to coregroup */ | |
133 | if (!cpumask_intersects(&p->cpus_allowed, cpu_coregroup_mask(cpu))) | |
134 | continue; | |
135 | ||
136 | for_each_cpu_and(i, tsk_cpus_allowed(p), cpu_coregroup_mask(cpu)) { | |
137 | unsigned long capacity_orig = capacity_orig_of(i); | |
138 | unsigned long wake_util, new_util; | |
139 | ||
140 | wake_util = cpu_util_wake(i, p); | |
5684373f | 141 | new_util = wake_util + task_util_est(p); |
aa533107 PB |
142 | |
143 | /* skip over-capacity cpu */ | |
144 | if (new_util > capacity_orig) | |
145 | continue; | |
146 | ||
147 | /* | |
148 | * According to the criteria determined by the LBT(Load | |
149 | * Balance trigger), the cpu that becomes overutilized | |
150 | * when the task is assigned is skipped. | |
151 | */ | |
152 | if (lbt_bring_overutilize(i, p)) | |
153 | continue; | |
154 | ||
155 | /* | |
156 | * Best target) lowest utilization among lowest-cap cpu | |
157 | * | |
158 | * If the sequence reaches this function, the wakeup task | |
159 | * does not require performance and the prev cpu is over- | |
160 | * utilized, so it should do load balancing without | |
161 | * considering energy side. Therefore, it selects cpu | |
162 | * with smallest cpapacity and the least utilization among | |
163 | * cpu that fits the task. | |
164 | */ | |
165 | if (best_min_util < new_util) | |
166 | continue; | |
167 | ||
168 | best_min_util = new_util; | |
169 | best_cpu = i; | |
170 | } | |
171 | ||
172 | /* | |
173 | * if it fails to find the best cpu in this coregroup, visit next | |
174 | * coregroup. | |
175 | */ | |
176 | if (cpu_selected(best_cpu)) | |
177 | break; | |
178 | } | |
179 | ||
180 | trace_ems_select_proper_cpu(p, best_cpu, best_min_util); | |
181 | ||
182 | /* | |
183 | * if it fails to find the vest cpu, choosing any cpu is meaningless. | |
184 | * Return prev cpu. | |
185 | */ | |
186 | return cpu_selected(best_cpu) ? best_cpu : prev_cpu; | |
9da19d24 PB |
187 | } |
188 | ||
9da19d24 PB |
189 | extern void sync_entity_load_avg(struct sched_entity *se); |
190 | ||
405334d3 | 191 | int exynos_wakeup_balance(struct task_struct *p, int prev_cpu, int sd_flag, int sync) |
9da19d24 PB |
192 | { |
193 | int target_cpu = -1; | |
194 | char state[30] = "fail"; | |
195 | ||
196 | /* | |
197 | * Since the utilization of a task is accumulated before sleep, it updates | |
198 | * the utilization to determine which cpu the task will be assigned to. | |
199 | * Exclude new task. | |
200 | */ | |
8a2d6134 PB |
201 | if (!(sd_flag & SD_BALANCE_FORK)) { |
202 | unsigned long old_util = task_util(p); | |
203 | ||
9da19d24 | 204 | sync_entity_load_avg(&p->se); |
8a2d6134 PB |
205 | /* update the band if a large amount of task util is decayed */ |
206 | update_band(p, old_util); | |
207 | } | |
9da19d24 PB |
208 | |
209 | /* | |
210 | * Priority 1 : ontime task | |
211 | * | |
212 | * If task which has more utilization than threshold wakes up, the task is | |
213 | * classified as "ontime task" and assigned to performance cpu. Conversely, | |
214 | * if heavy task that has been classified as ontime task sleeps for a long | |
215 | * time and utilization becomes small, it is excluded from ontime task and | |
216 | * is no longer guaranteed to operate on performance cpu. | |
217 | * | |
218 | * Ontime task is very sensitive to performance because it is usually the | |
219 | * main task of application. Therefore, it has the highest priority. | |
220 | */ | |
221 | target_cpu = ontime_task_wakeup(p); | |
222 | if (cpu_selected(target_cpu)) { | |
223 | strcpy(state, "ontime migration"); | |
224 | goto out; | |
225 | } | |
226 | ||
227 | /* | |
228 | * Priority 2 : prefer-perf | |
229 | * | |
230 | * Prefer-perf is a function that operates on cgroup basis managed by | |
231 | * schedtune. When perfer-perf is set to 1, the tasks in the group are | |
232 | * preferentially assigned to the performance cpu. | |
233 | * | |
234 | * It has a high priority because it is a function that is turned on | |
235 | * temporarily in scenario requiring reactivity(touch, app laucning). | |
236 | */ | |
237 | target_cpu = prefer_perf_cpu(p); | |
238 | if (cpu_selected(target_cpu)) { | |
239 | strcpy(state, "prefer-perf"); | |
240 | goto out; | |
241 | } | |
242 | ||
243 | /* | |
8a2d6134 PB |
244 | * Priority 3 : task band |
245 | * | |
246 | * The tasks in a process are likely to interact, and its operations are | |
247 | * sequential and share resources. Therefore, if these tasks are packed and | |
248 | * and assign on a specific cpu or cluster, the latency for interaction | |
249 | * decreases and the reusability of the cache increases, thereby improving | |
250 | * performance. | |
251 | * | |
252 | * The "task band" is a function that groups tasks on a per-process basis | |
253 | * and assigns them to a specific cpu or cluster. If the attribute "band" | |
254 | * of schedtune.cgroup is set to '1', task band operate on this cgroup. | |
255 | */ | |
256 | target_cpu = band_play_cpu(p); | |
257 | if (cpu_selected(target_cpu)) { | |
258 | strcpy(state, "task band"); | |
259 | goto out; | |
260 | } | |
261 | ||
262 | /* | |
263 | * Priority 4 : global boosting | |
9da19d24 PB |
264 | * |
265 | * Global boost is a function that preferentially assigns all tasks in the | |
266 | * system to the performance cpu. Unlike prefer-perf, which targets only | |
267 | * group tasks, global boost targets all tasks. So, it maximizes performance | |
268 | * cpu utilization. | |
269 | * | |
270 | * Typically, prefer-perf operates on groups that contains UX related tasks, | |
271 | * such as "top-app" or "foreground", so that major tasks are likely to be | |
272 | * assigned to performance cpu. On the other hand, global boost assigns | |
273 | * all tasks to performance cpu, which is not as effective as perfer-perf. | |
274 | * For this reason, global boost has a lower priority than prefer-perf. | |
275 | */ | |
276 | target_cpu = global_boosting(p); | |
277 | if (cpu_selected(target_cpu)) { | |
278 | strcpy(state, "global boosting"); | |
279 | goto out; | |
280 | } | |
281 | ||
282 | /* | |
8a2d6134 | 283 | * Priority 5 : group balancing |
9da19d24 PB |
284 | */ |
285 | target_cpu = group_balancing(p); | |
286 | if (cpu_selected(target_cpu)) { | |
287 | strcpy(state, "group balancing"); | |
288 | goto out; | |
289 | } | |
290 | ||
291 | /* | |
8a2d6134 | 292 | * Priority 6 : prefer-idle |
9da19d24 PB |
293 | * |
294 | * Prefer-idle is a function that operates on cgroup basis managed by | |
295 | * schedtune. When perfer-idle is set to 1, the tasks in the group are | |
296 | * preferentially assigned to the idle cpu. | |
297 | * | |
298 | * Prefer-idle has a smaller performance impact than the above. Therefore | |
299 | * it has a relatively low priority. | |
300 | */ | |
301 | target_cpu = prefer_idle_cpu(p); | |
302 | if (cpu_selected(target_cpu)) { | |
303 | strcpy(state, "prefer-idle"); | |
304 | goto out; | |
305 | } | |
306 | ||
307 | /* | |
8a2d6134 | 308 | * Priority 7 : energy cpu |
9da19d24 PB |
309 | * |
310 | * A scheduling scheme based on cpu energy, find the least power consumption | |
311 | * cpu referring energy table when assigning task. | |
312 | */ | |
405334d3 | 313 | target_cpu = select_energy_cpu(p, prev_cpu, sd_flag, sync); |
9da19d24 PB |
314 | if (cpu_selected(target_cpu)) { |
315 | strcpy(state, "energy cpu"); | |
316 | goto out; | |
317 | } | |
318 | ||
319 | /* | |
8a2d6134 | 320 | * Priority 8 : proper cpu |
9da19d24 | 321 | */ |
aa533107 | 322 | target_cpu = select_proper_cpu(p, prev_cpu); |
9da19d24 PB |
323 | if (cpu_selected(target_cpu)) |
324 | strcpy(state, "proper cpu"); | |
325 | ||
326 | out: | |
327 | trace_ems_wakeup_balance(p, target_cpu, state); | |
328 | return target_cpu; | |
329 | } | |
330 | ||
331 | struct kobject *ems_kobj; | |
332 | ||
333 | static int __init init_sysfs(void) | |
334 | { | |
335 | ems_kobj = kobject_create_and_add("ems", kernel_kobj); | |
336 | ||
337 | return 0; | |
338 | } | |
339 | core_initcall(init_sysfs); |