2 * Load balance - Exynos Mobile Scheduler
4 * Copyright (C) 2018 Samsung Electronics Co., Ltd
5 * Lakkyung Jung <lakkyung.jung@samsung.com>
8 #include <linux/sched.h>
9 #include <linux/cpuidle.h>
10 #include <linux/pm_qos.h>
11 #include <linux/sched/energy.h>
12 #include <linux/ems.h>
14 #include <trace/events/ems.h>
20 struct list_head
*lb_cfs_tasks(struct rq
*rq
, int sse
)
22 return sse
? &rq
->sse_cfs_tasks
: &rq
->uss_cfs_tasks
;
25 void lb_add_cfs_task(struct rq
*rq
, struct sched_entity
*se
)
27 struct list_head
*tasks
= lb_cfs_tasks(rq
, task_of(se
)->sse
);
29 list_add(&se
->group_node
, tasks
);
32 int lb_check_priority(int src_cpu
, int dst_cpu
)
34 if (capacity_orig_of_sse(dst_cpu
, 0) > capacity_orig_of_sse(src_cpu
, 0))
36 else if (capacity_orig_of_sse(dst_cpu
, 1) > capacity_orig_of_sse(src_cpu
, 1))
42 struct list_head
*lb_prefer_cfs_tasks(int src_cpu
, int dst_cpu
)
44 struct rq
*src_rq
= cpu_rq(src_cpu
);
45 int sse
= lb_check_priority(src_cpu
, dst_cpu
);
46 struct list_head
*tasks
;
48 tasks
= lb_cfs_tasks(src_rq
, sse
);
49 if (!list_empty(tasks
))
52 return lb_cfs_tasks(src_rq
, !sse
);
56 check_cpu_capacity(struct rq
*rq
, struct sched_domain
*sd
)
58 return ((rq
->cpu_capacity
* sd
->imbalance_pct
) <
59 (rq
->cpu_capacity_orig
* 100));
62 #define lb_sd_parent(sd) \
63 (sd->parent && sd->parent->groups != sd->parent->groups->next)
65 int lb_need_active_balance(enum cpu_idle_type idle
, struct sched_domain
*sd
,
66 int src_cpu
, int dst_cpu
)
68 struct task_struct
*p
= cpu_rq(src_cpu
)->curr
;
69 unsigned int src_imb_pct
= lb_sd_parent(sd
) ? sd
->imbalance_pct
: 1;
70 unsigned int dst_imb_pct
= lb_sd_parent(sd
) ? 100 : 1;
71 unsigned long src_cap
= capacity_orig_of_sse(src_cpu
, p
->sse
);
72 unsigned long dst_cap
= capacity_orig_of_sse(dst_cpu
, p
->sse
);
74 int level
= sd
->level
;
77 if ((idle
!= CPU_NOT_IDLE
) &&
78 (cpu_rq(src_cpu
)->cfs
.h_nr_running
== 1)) {
79 if ((check_cpu_capacity(cpu_rq(src_cpu
), sd
)) &&
80 (src_cap
* sd
->imbalance_pct
< dst_cap
* 100)) {
84 /* This domain is top and dst_cpu is bigger than src_cpu*/
85 if (!lb_sd_parent(sd
) && src_cap
< dst_cap
)
86 if (lbt_overutilized(src_cpu
, level
) || global_boosted())
90 if ((src_cap
* src_imb_pct
< dst_cap
* dst_imb_pct
) &&
91 cpu_rq(src_cpu
)->cfs
.h_nr_running
== 1 &&
92 lbt_overutilized(src_cpu
, level
) &&
93 !lbt_overutilized(dst_cpu
, level
)) {
97 return unlikely(sd
->nr_balance_failed
> sd
->cache_nice_tries
+ 2);
100 /****************************************************************/
101 /* Load Balance Trigger */
102 /****************************************************************/
103 #define DISABLE_OU -1
104 #define DEFAULT_OU_RATIO 80
106 struct lbt_overutil
{
109 unsigned long capacity
;
112 DEFINE_PER_CPU(struct lbt_overutil
*, lbt_overutil
);
114 static inline struct sched_domain
*find_sd_by_level(int cpu
, int level
)
116 struct sched_domain
*sd
;
118 for_each_domain(cpu
, sd
) {
119 if (sd
->level
== level
)
126 static inline int get_topology_depth(void)
128 struct sched_domain
*sd
;
130 for_each_domain(0, sd
) {
131 if (sd
->parent
== NULL
)
138 static inline int get_last_level(struct lbt_overutil
*ou
)
140 int level
, depth
= get_topology_depth();
142 for (level
= 0; level
<= depth
; level
++) {
143 if (&ou
[level
] == NULL
)
146 if (ou
[level
].top
== true)
153 /****************************************************************/
155 /****************************************************************/
156 bool lbt_overutilized(int cpu
, int level
)
158 struct lbt_overutil
*ou
= per_cpu(lbt_overutil
, cpu
);
164 overutilized
= (ml_cpu_util(cpu
) > ou
[level
].capacity
) ? true : false;
167 trace_ems_lbt_overutilized(cpu
, level
, ml_cpu_util(cpu
),
168 ou
[level
].capacity
, overutilized
);
173 void update_lbt_overutil(int cpu
, unsigned long capacity
)
175 struct lbt_overutil
*ou
= per_cpu(lbt_overutil
, cpu
);
176 int level
, last
= get_last_level(ou
);
178 for (level
= 0; level
<= last
; level
++) {
179 if (ou
[level
].ratio
== DISABLE_OU
)
182 ou
[level
].capacity
= (capacity
* ou
[level
].ratio
) / 100;
186 /****************************************************************/
188 /****************************************************************/
189 #define lbt_attr_init(_attr, _name, _mode, _show, _store) \
190 sysfs_attr_init(&_attr.attr); \
191 _attr.attr.name = _name; \
192 _attr.attr.mode = VERIFY_OCTAL_PERMISSIONS(_mode); \
193 _attr.show = _show; \
194 _attr.store = _store;
196 static struct kobject
*lbt_kobj
;
197 static struct attribute
**lbt_attrs
;
198 static struct kobj_attribute
*lbt_kattrs
;
199 static struct attribute_group lbt_group
;
201 static ssize_t
show_overutil_ratio(struct kobject
*kobj
,
202 struct kobj_attribute
*attr
, char *buf
)
204 struct lbt_overutil
*ou
= per_cpu(lbt_overutil
, 0);
205 int level
= attr
- lbt_kattrs
;
208 for_each_possible_cpu(cpu
) {
209 ou
= per_cpu(lbt_overutil
, cpu
);
211 if (ou
[level
].ratio
== DISABLE_OU
)
214 ret
+= sprintf(buf
+ ret
, "cpu%d ratio:%3d capacity:%4lu\n",
215 cpu
, ou
[level
].ratio
, ou
[level
].capacity
);
221 static ssize_t
store_overutil_ratio(struct kobject
*kobj
,
222 struct kobj_attribute
*attr
, const char *buf
,
225 struct lbt_overutil
*ou
;
226 unsigned long capacity
;
227 int level
= attr
- lbt_kattrs
;
230 if (sscanf(buf
, "%d %d", &cpu
, &ratio
) != 2)
233 /* Check cpu is possible */
234 if (!cpumask_test_cpu(cpu
, cpu_possible_mask
))
236 ou
= per_cpu(lbt_overutil
, cpu
);
238 /* If ratio is outrage, disable overutil */
239 if (ratio
< 0 || ratio
> 100)
240 ratio
= DEFAULT_OU_RATIO
;
242 for_each_cpu(cpu
, &ou
[level
].cpus
) {
243 ou
= per_cpu(lbt_overutil
, cpu
);
244 if (ou
[level
].ratio
== DISABLE_OU
)
247 ou
[level
].ratio
= ratio
;
248 capacity
= capacity_orig_of(cpu
);
249 update_lbt_overutil(cpu
, capacity
);
255 static int alloc_lbt_sysfs(int size
)
260 lbt_attrs
= kzalloc(sizeof(struct attribute
*) * (size
+ 1),
265 lbt_kattrs
= kzalloc(sizeof(struct kobj_attribute
) * (size
),
276 pr_err("LBT(%s): failed to alloc sysfs attrs\n", __func__
);
280 static int __init
lbt_sysfs_init(void)
282 int depth
= get_topology_depth();
285 if (alloc_lbt_sysfs(depth
+ 1))
288 for (i
= 0; i
<= depth
; i
++) {
292 scnprintf(buf
, sizeof(buf
), "overutil_ratio_level%d", i
);
293 name
= kstrdup(buf
, GFP_KERNEL
);
297 lbt_attr_init(lbt_kattrs
[i
], name
, 0644,
298 show_overutil_ratio
, store_overutil_ratio
);
299 lbt_attrs
[i
] = &lbt_kattrs
[i
].attr
;
302 lbt_group
.attrs
= lbt_attrs
;
304 lbt_kobj
= kobject_create_and_add("lbt", ems_kobj
);
308 if (sysfs_create_group(lbt_kobj
, &lbt_group
))
317 pr_err("LBT(%s): failed to create sysfs node\n", __func__
);
320 late_initcall(lbt_sysfs_init
);
322 /****************************************************************/
324 /****************************************************************/
325 static void free_lbt_overutil(void)
329 for_each_possible_cpu(cpu
) {
330 if (per_cpu(lbt_overutil
, cpu
))
331 kfree(per_cpu(lbt_overutil
, cpu
));
335 static int alloc_lbt_overutil(void)
337 int cpu
, depth
= get_topology_depth();
339 for_each_possible_cpu(cpu
) {
340 struct lbt_overutil
*ou
= kzalloc(sizeof(struct lbt_overutil
) *
341 (depth
+ 1), GFP_KERNEL
);
345 per_cpu(lbt_overutil
, cpu
) = ou
;
354 static void default_lbt_overutil(int level
)
356 struct sched_domain
*sd
;
357 struct lbt_overutil
*ou
;
362 /* If current level is same with topology depth, it is top level */
363 top
= !(get_topology_depth() - level
);
365 cpumask_clear(&cpus
);
367 for_each_possible_cpu(cpu
) {
370 if (cpumask_test_cpu(cpu
, &cpus
))
373 sd
= find_sd_by_level(cpu
, level
);
375 ou
= per_cpu(lbt_overutil
, cpu
);
376 ou
[level
].ratio
= DISABLE_OU
;
381 cpumask_copy(&cpus
, sched_domain_span(sd
));
382 for_each_cpu(c
, &cpus
) {
383 ou
= per_cpu(lbt_overutil
, c
);
384 cpumask_copy(&ou
[level
].cpus
, &cpus
);
385 ou
[level
].ratio
= DEFAULT_OU_RATIO
;
391 static void set_lbt_overutil(int level
, const char *mask
, int ratio
)
393 struct lbt_overutil
*ou
;
395 bool top
, overlap
= false;
398 cpulist_parse(mask
, &cpus
);
399 cpumask_and(&cpus
, &cpus
, cpu_possible_mask
);
400 if (!cpumask_weight(&cpus
))
403 /* If current level is same with topology depth, it is top level */
404 top
= !(get_topology_depth() - level
);
406 /* If this level is overlapped with prev level, disable this level */
408 ou
= per_cpu(lbt_overutil
, cpumask_first(&cpus
));
409 overlap
= cpumask_equal(&cpus
, &ou
[level
-1].cpus
);
412 for_each_cpu(cpu
, &cpus
) {
413 ou
= per_cpu(lbt_overutil
, cpu
);
414 cpumask_copy(&ou
[level
].cpus
, &cpus
);
415 ou
[level
].ratio
= overlap
? DISABLE_OU
: ratio
;
420 static void parse_lbt_overutil(struct device_node
*dn
)
422 struct device_node
*lbt
, *ou
;
423 int level
, depth
= get_topology_depth();
425 /* If lbt node isn't, set by default value (80%) */
426 lbt
= of_get_child_by_name(dn
, "lbt");
428 for (level
= 0; level
<= depth
; level
++)
429 default_lbt_overutil(level
);
433 if (!cpumask_equal(cpu_possible_mask
, cpu_all_mask
)) {
434 for (level
= 0; level
<= depth
; level
++)
435 default_lbt_overutil(level
);
439 for (level
= 0; level
<= depth
; level
++) {
441 const char *mask
[NR_CPUS
];
442 struct cpumask combi
, each
;
446 snprintf(name
, sizeof(name
), "overutil-level%d", level
);
447 ou
= of_get_child_by_name(lbt
, name
);
449 goto default_setting
;
451 proplen
= of_property_count_strings(ou
, "cpus");
452 if ((proplen
< 0) || (proplen
!= of_property_count_u32_elems(ou
, "ratio"))) {
454 goto default_setting
;
457 of_property_read_string_array(ou
, "cpus", mask
, proplen
);
458 of_property_read_u32_array(ou
, "ratio", ratio
, proplen
);
462 * If combination of each cpus doesn't correspond with
463 * cpu_possible_mask, do not use this property
465 cpumask_clear(&combi
);
466 for (i
= 0; i
< proplen
; i
++) {
467 cpulist_parse(mask
[i
], &each
);
468 cpumask_or(&combi
, &combi
, &each
);
470 if (!cpumask_equal(&combi
, cpu_possible_mask
))
471 goto default_setting
;
473 for (i
= 0; i
< proplen
; i
++)
474 set_lbt_overutil(level
, mask
[i
], ratio
[i
]);
478 default_lbt_overutil(level
);
484 static int __init
init_lbt(void)
486 struct device_node
*dn
= of_find_node_by_path("/cpus/ems");
488 if (alloc_lbt_overutil()) {
489 pr_err("LBT(%s): failed to allocate lbt_overutil\n", __func__
);
494 parse_lbt_overutil(dn
);
498 pure_initcall(init_lbt
);