From b2d32aca78535a334d5892b9e00c8be397f4b3de Mon Sep 17 00:00:00 2001 From: Park Bumgyu Date: Tue, 27 Mar 2018 12:50:03 +0900 Subject: [PATCH] sched: ems: support find energy efficient cpu Based on the task utilization, it is a function to find the cpu with the least energy efficiency considering cpu cpacity and power. Change-Id: I28c333f8f6881ea4aaaf57361cc612975dc35c26 Signed-off-by: Park Bumgyu --- include/trace/events/ems.h | 42 +++++ kernel/sched/ems/core.c | 309 ++++++++++++++++++++++++++++++++++++- kernel/sched/ems/ems.h | 4 +- kernel/sched/fair.c | 2 +- 4 files changed, 348 insertions(+), 9 deletions(-) diff --git a/include/trace/events/ems.h b/include/trace/events/ems.h index af380165f450..866b1843e236 100644 --- a/include/trace/events/ems.h +++ b/include/trace/events/ems.h @@ -15,6 +15,48 @@ #include #include +/* + * Tracepoint for selecting eco cpu + */ +TRACE_EVENT(ems_select_eco_cpu, + + TP_PROTO(struct task_struct *p, int eco_cpu, int prev_cpu, int best_cpu, int backup_cpu, + unsigned int prev_energy, unsigned int best_energy, unsigned int backup_energy), + + TP_ARGS(p, eco_cpu, prev_cpu, best_cpu, backup_cpu, + prev_energy, best_energy, backup_energy), + + TP_STRUCT__entry( + __array( char, comm, TASK_COMM_LEN ) + __field( pid_t, pid ) + __field( int, eco_cpu ) + __field( int, prev_cpu ) + __field( int, best_cpu ) + __field( int, backup_cpu ) + __field( unsigned int, prev_energy ) + __field( unsigned int, best_energy ) + __field( unsigned int, backup_energy ) + ), + + TP_fast_assign( + memcpy(__entry->comm, p->comm, TASK_COMM_LEN); + __entry->pid = p->pid; + __entry->eco_cpu = eco_cpu; + __entry->prev_cpu = prev_cpu; + __entry->best_cpu = best_cpu; + __entry->backup_cpu = backup_cpu; + __entry->prev_energy = prev_energy; + __entry->best_energy = best_energy; + __entry->backup_energy = backup_energy; + ), + + TP_printk("comm=%s pid=%d eco_cpu=%d prev_cpu=%d best_cpu=%d backup_cpu=%d " + "prev_energy=%u best_energy=%u backup_energy=%u", + __entry->comm, __entry->pid, + __entry->eco_cpu, __entry->prev_cpu, __entry->best_cpu, __entry->backup_cpu, + __entry->prev_energy, __entry->best_energy, __entry->backup_energy) +); + /* * Tracepoint for wakeup balance */ diff --git a/kernel/sched/ems/core.c b/kernel/sched/ems/core.c index 8ab7905f1c9a..26f2e65f800b 100644 --- a/kernel/sched/ems/core.c +++ b/kernel/sched/ems/core.c @@ -11,9 +11,308 @@ #include "ems.h" #include "../sched.h" -static int select_energy_cpu(struct task_struct *p) +#define cpu_selected(cpu) (cpu >= 0) + +static int task_util(struct task_struct *p) { - return -1; + return p->se.avg.util_avg; +} + +static int cpu_util_wake(int cpu, struct task_struct *p) +{ + unsigned long util, capacity; + + /* Task has no contribution or is new */ + if (cpu != task_cpu(p) || !p->se.avg.last_update_time) + return cpu_util(cpu); + + capacity = capacity_orig_of(cpu); + util = max_t(long, cpu_rq(cpu)->cfs.avg.util_avg - task_util(p), 0); + + return (util >= capacity) ? capacity : util; +} + +struct eco_env { + struct task_struct *p; + + int prev_cpu; + int best_cpu; + int backup_cpu; +}; + +static void find_eco_target(struct eco_env *eenv) +{ + struct task_struct *p = eenv->p; + unsigned long best_min_cap_orig = ULONG_MAX; + unsigned long backup_min_cap_orig = ULONG_MAX; + unsigned long best_spare_cap = 0; + int backup_idle_cstate = INT_MAX; + int best_cpu = -1; + int backup_cpu = -1; + int cpu; + + rcu_read_lock(); + + for_each_cpu_and(cpu, &p->cpus_allowed, cpu_active_mask) { + unsigned long capacity_orig = capacity_orig_of(cpu); + unsigned long wake_util, new_util; + + wake_util = cpu_util_wake(cpu, p); + new_util = wake_util + task_util(p); + + /* checking prev cpu is meaningless */ + if (eenv->prev_cpu == cpu) + continue; + + /* skip over-capacity cpu */ + if (new_util > capacity_orig) + continue; + + /* + * According to the criteria determined by the LBT(Load + * Balance trigger), the cpu that becomes overutilized when + * the task is assigned is skipped. + */ + if (lbt_bring_overutilize(cpu, p)) + continue; + + /* + * Backup target) shallowest idle cpu among min-cap cpu + * + * In general, assigning a task to an idle cpu is + * disadvantagerous in energy. To minimize the energy increase + * associated with selecting idle cpu, choose a cpu that is + * in the lowest performance and shallowest idle state. + */ + if (idle_cpu(cpu)) { + int idle_idx; + + if (backup_min_cap_orig < capacity_orig) + continue; + + idle_idx = idle_get_state_idx(cpu_rq(cpu)); + if (backup_idle_cstate <= idle_idx) + continue; + + backup_min_cap_orig = capacity_orig; + backup_idle_cstate = idle_idx; + backup_cpu = cpu; + continue; + } + + /* + * Best target) biggest spare cpu among min-cap cpu + * + * Select the cpu with the biggest spare capacity to maintain + * frequency as possible without waking up idle cpu. Also, to + * maximize the use of energy-efficient cpu, we choose the + * lowest performance cpu. + */ + if (best_min_cap_orig < capacity_orig) + continue; + + if (best_spare_cap > (capacity_orig - new_util)) + continue; + + best_spare_cap = capacity_orig - new_util; + best_min_cap_orig = capacity_orig; + best_cpu = cpu; + } + + rcu_read_unlock(); + + eenv->best_cpu = best_cpu; + eenv->backup_cpu = backup_cpu; +} + +struct energy_table { + struct capacity_state *states; + unsigned int nr_states; +}; + +DEFINE_PER_CPU(struct energy_table, energy_table); + +static unsigned int calculate_energy(struct task_struct *p, int target_cpu) +{ + unsigned long util[NR_CPUS] = {0, }; + unsigned int total_energy = 0; + int cpu; + + /* + * 0. Calculate utilization of the entire active cpu when task + * is assigned to target cpu. + */ + for_each_cpu(cpu, cpu_active_mask) { + util[cpu] = cpu_util_wake(cpu, p); + + if (unlikely(cpu == target_cpu)) + util[cpu] += task_util(p); + } + + for_each_possible_cpu(cpu) { + struct energy_table *table; + unsigned long max_util = 0, util_sum = 0; + unsigned long capacity; + int i, cap_idx; + + /* Compute coregroup energy with only one cpu per coregroup */ + if (cpu != cpumask_first(cpu_coregroup_mask(cpu))) + continue; + + /* + * 1. The cpu in the coregroup has same capacity and the + * capacity depends on the cpu that has the biggest + * utilization. Find biggest utilization in the coregroup + * to know what capacity the cpu will have. + */ + for_each_cpu(i, cpu_coregroup_mask(cpu)) + if (util[i] > max_util) + max_util = util[i]; + + /* + * 2. Find the capacity according to biggest utilization in + * coregroup. + */ + table = &per_cpu(energy_table, cpu); + cap_idx = table->nr_states - 1; + for (i = 0; i < table->nr_states; i++) { + if (table->states[i].cap >= max_util) { + capacity = table->states[i].cap; + cap_idx = i; + break; + } + } + + /* + * 3. Get the utilization sum of coregroup. Since cpu + * utilization of CFS reflects the performance of cpu, + * normalize the utilization to calculate the amount of + * cpu usuage that excludes cpu performance. + */ + for_each_cpu(i, cpu_coregroup_mask(cpu)) { + /* utilization with task exceeds max capacity of cpu */ + if (util[i] >= capacity) { + util_sum += SCHED_CAPACITY_SCALE; + continue; + } + + /* normalize cpu utilization */ + util_sum += (util[i] << SCHED_CAPACITY_SHIFT) / capacity; + } + + /* + * 4. compute active energy + */ + total_energy += util_sum * table->states[cap_idx].power; + } + + return total_energy; +} + +static int select_eco_cpu(struct eco_env *eenv) +{ + unsigned int prev_energy, best_energy, backup_energy; + unsigned int temp_energy; + int temp_cpu; + int eco_cpu = eenv->prev_cpu; + int margin; + + prev_energy = calculate_energy(eenv->p, eenv->prev_cpu); + + /* + * find_eco_target() may not find best or backup cup. Ignore unfound + * cpu, and if both are found, select a cpu that consumes less energy + * when assigning task. + */ + best_energy = backup_energy = UINT_MAX; + + if (cpu_selected(eenv->best_cpu)) + best_energy = calculate_energy(eenv->p, eenv->best_cpu); + + if (cpu_selected(eenv->backup_cpu)) + backup_energy = calculate_energy(eenv->p, eenv->backup_cpu); + + if (best_energy < backup_energy) { + temp_energy = best_energy; + temp_cpu = eenv->best_cpu; + } else { + temp_energy = backup_energy; + temp_cpu = eenv->backup_cpu; + } + + /* + * Compare prev cpu to target cpu among best and backup cpu to determine + * whether keeping the task on PREV CPU and sending the task to TARGET + * CPU is beneficial for energy. + */ + if (temp_energy < prev_energy) { + /* + * Compute the dead-zone margin used to prevent too many task + * migrations with negligible energy savings. + * An energy saving is considered meaningful if it reduces the + * energy consumption of PREV CPU candidate by at least ~1.56%. + */ + margin = prev_energy >> 6; + if ((prev_energy - temp_energy) < margin) + goto out; + + eco_cpu = temp_cpu; + } + +out: + trace_ems_select_eco_cpu(eenv->p, eco_cpu, + eenv->prev_cpu, eenv->best_cpu, eenv->backup_cpu, + prev_energy, best_energy, backup_energy); + return eco_cpu; +} + +static int +select_energy_cpu(struct task_struct *p, int prev_cpu, int sd_flag, int sync) +{ + struct sched_domain *sd = NULL; + int cpu = smp_processor_id(); + struct eco_env eenv = { + .p = p, + .prev_cpu = prev_cpu, + }; + + if (!sched_feat(ENERGY_AWARE)) + return -1; + + /* + * Energy-aware wakeup placement on overutilized cpu is hard to get + * energy gain. + */ + rcu_read_lock(); + sd = rcu_dereference_sched(cpu_rq(prev_cpu)->sd); + if (!sd || sd->shared->overutilized) { + rcu_read_unlock(); + return -1; + } + rcu_read_unlock(); + + /* + * We cannot do energy-aware wakeup placement sensibly for tasks + * with 0 utilization, so let them be placed according to the normal + * strategy. + */ + if (!task_util(p)) + return -1; + + if (sysctl_sched_sync_hint_enable && sync) + if (cpumask_test_cpu(cpu, &p->cpus_allowed)) + return cpu; + + /* + * Find eco-friendly target. + * After selecting the best and backup cpu according to strategy, we + * choose a cpu that is energy efficient compared to prev cpu. + */ + find_eco_target(&eenv); + if (eenv.best_cpu < 0 && eenv.backup_cpu < 0) + return prev_cpu; + + return select_eco_cpu(&eenv); } static int select_proper_cpu(struct task_struct *p) @@ -21,11 +320,9 @@ static int select_proper_cpu(struct task_struct *p) return -1; } -#define cpu_selected(cpu) (cpu >= 0) - extern void sync_entity_load_avg(struct sched_entity *se); -int exynos_wakeup_balance(struct task_struct *p, int sd_flag, int sync) +int exynos_wakeup_balance(struct task_struct *p, int prev_cpu, int sd_flag, int sync) { int target_cpu = -1; char state[30] = "fail"; @@ -123,7 +420,7 @@ int exynos_wakeup_balance(struct task_struct *p, int sd_flag, int sync) * A scheduling scheme based on cpu energy, find the least power consumption * cpu referring energy table when assigning task. */ - target_cpu = select_energy_cpu(p); + target_cpu = select_energy_cpu(p, prev_cpu, sd_flag, sync); if (cpu_selected(target_cpu)) { strcpy(state, "energy cpu"); goto out; diff --git a/kernel/sched/ems/ems.h b/kernel/sched/ems/ems.h index 000d9219776e..8d2a02b5b4d4 100644 --- a/kernel/sched/ems/ems.h +++ b/kernel/sched/ems/ems.h @@ -35,10 +35,10 @@ static inline int group_balancing(struct task_struct *p) { return -1; } #ifdef CONFIG_SCHED_EMS extern int -exynos_wakeup_balance(struct task_struct *p, int sd_flag, int sync); +exynos_wakeup_balance(struct task_struct *p, int prev_cpu, int sd_flag, int sync); #else static inline int -exynos_wakeup_balance(struct task_struct *p, int sd_flag, int sync) +exynos_wakeup_balance(struct task_struct *p, int prev_cpu, int sd_flag, int sync) { return -1; } diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c index 8f74a808eadf..5a157c85c774 100644 --- a/kernel/sched/fair.c +++ b/kernel/sched/fair.c @@ -7448,7 +7448,7 @@ select_task_rq_fair(struct task_struct *p, int prev_cpu, int sd_flag, int wake_f int target_cpu; if (sched_feat(EXYNOS_MS)) { - target_cpu = exynos_wakeup_balance(p, sd_flag, sync); + target_cpu = exynos_wakeup_balance(p, prev_cpu, sd_flag, sync); if (target_cpu >= 0) return target_cpu; } -- 2.20.1