From: Johnlay Park Date: Thu, 22 Mar 2018 14:53:43 +0000 (+0900) Subject: [COMMON] sched/rt: introduce FRT scheduler X-Git-Url: https://git.stricted.de/?a=commitdiff_plain;h=a30b1b3ce66ffa3cec824fd6f38d8129002f6824;p=GitHub%2FLineageOS%2Fandroid_kernel_motorola_exynos9610.git [COMMON] sched/rt: introduce FRT scheduler Cpu selection logic change for fluid scheduling. Fluid RT Scheduler consider task load for core selection and it can victim higher priority rt task. because Fluid RT scheduler make a decision through weighted load by priority. Change-Id: I40c3c93d4fcf985b0002796748037cacd04e813e Signed-off-by: Johnlay Park --- diff --git a/include/linux/sched.h b/include/linux/sched.h index f5e0cc02c845..104ba91ec97a 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -713,6 +713,9 @@ struct task_struct { u32 init_load_pct; u64 last_sleep_ts; #endif +#ifdef CONFIG_SCHED_USE_FLUID_RT + int victim_flag; +#endif #ifdef CONFIG_CGROUP_SCHED struct task_group *sched_task_group; diff --git a/init/Kconfig b/init/Kconfig index da82b6662d97..3004ed5c7544 100644 --- a/init/Kconfig +++ b/init/Kconfig @@ -1088,6 +1088,17 @@ config DEFAULT_USE_ENERGY_AWARE Say N if unsure. +config SCHED_USE_FLUID_RT + bool "Enable Fluid RT scheduler feature" + depends on SMP + default n + help + Basically, the Fluid RT selects the core by a task priority as usual. + But beyond the basic behavior, FRT performs the load balancing of + RT task by core selection with reference to utilization of rq. + And in some circumstances, she allows the task of lower priority + to preempt the higher one based on weighted load. + config SYSFS_DEPRECATED bool "Enable deprecated sysfs features to support old userspace tools" depends on SYSFS diff --git a/kernel/sched/core.c b/kernel/sched/core.c index ebdce0f3a397..625be6676047 100644 --- a/kernel/sched/core.c +++ b/kernel/sched/core.c @@ -3981,7 +3981,7 @@ int idle_cpu(int cpu) if (rq->curr != rq->idle) return 0; - if (rq->nr_running) + if (rq->nr_running == 1) return 0; #ifdef CONFIG_SMP @@ -6849,3 +6849,20 @@ const u32 sched_prio_to_wmult[40] = { /* 10 */ 39045157, 49367440, 61356676, 76695844, 95443717, /* 15 */ 119304647, 148102320, 186737708, 238609294, 286331153, }; + +/* + * RT Extension for 'prio_to_weight' + */ +const int rtprio_to_weight[51] = { + /* 0 */ 17222521, 15500269, 13950242, 12555218, 11299696, + /* 10 */ 10169726, 9152754, 8237478, 7413730, 6672357, + /* 20 */ 6005122, 5404609, 4864149, 4377734, 3939960, + /* 30 */ 3545964, 3191368, 2872231, 2585008, 2326507, + /* 40 */ 2093856, 1884471, 1696024, 1526421, 1373779, + /* 50 */ 1236401, 1112761, 1001485, 901337, 811203, + /* 60 */ 730083, 657074, 591367, 532230, 479007, + /* 70 */ 431106, 387996, 349196, 314277, 282849, + /* 80 */ 254564, 229108, 206197, 185577, 167019, + /* 90 */ 150318, 135286, 121757, 109581, 98623, + /* 100 for Fair class */ 88761, +}; diff --git a/kernel/sched/ems/ehmp.c b/kernel/sched/ems/ehmp.c index c90a09d6b372..8c3e10319352 100644 --- a/kernel/sched/ems/ehmp.c +++ b/kernel/sched/ems/ehmp.c @@ -28,9 +28,12 @@ extern int find_best_target(struct task_struct *p, int *backup_cpu, extern u64 decay_load(u64 val, u64 n); extern int start_cpu(bool boosted); -static unsigned long task_util(struct task_struct *p) +unsigned long task_util(struct task_struct *p) { - return p->se.avg.util_avg; + if (rt_task(p)) + return p->rt.avg.util_avg; + else + return p->se.avg.util_avg; } static inline struct task_struct *task_of(struct sched_entity *se) @@ -523,7 +526,7 @@ static int check_migration_task(struct task_struct *p) return !p->se.avg.last_update_time; } -static unsigned long cpu_util_wake(int cpu, struct task_struct *p) +unsigned long cpu_util_wake(int cpu, struct task_struct *p) { unsigned long util, capacity; diff --git a/kernel/sched/rt.c b/kernel/sched/rt.c index 9831cea4674b..53c292f95d2e 100644 --- a/kernel/sched/rt.c +++ b/kernel/sched/rt.c @@ -1580,6 +1580,21 @@ select_task_rq_rt(struct task_struct *p, int cpu, int sd_flag, int flags, curr->prio <= p->prio)) { int target = find_lowest_rq(p); +#ifdef CONFIG_SCHED_USE_FLUID_RT + /* + * Even though the destination CPU is running + * a higher priority task, FluidRT can bother moving it + * when its utilization is very small, and the other CPU is too busy + * to accomodate the p in the point of priority and utilization. + * + * BTW, if the curr has higher priority than p, FluidRT tries to find + * the other CPUs first. In the worst case, curr can be victim, if it + * has very small utilization. + */ + if (likely(target != -1)) { + cpu = target; + } +#else /* * Don't bother moving it if the destination CPU is * not running a lower priority task. @@ -1587,6 +1602,7 @@ select_task_rq_rt(struct task_struct *p, int cpu, int sd_flag, int flags, if (target != -1 && p->prio < cpu_rq(target)->rt.highest_prio.curr) cpu = target; +#endif } rcu_read_unlock(); @@ -1831,6 +1847,28 @@ void init_rt_entity_runnable_average(struct sched_rt_entity *rt_se) void init_rt_entity_runnable_average(struct sched_rt_entity *rt_se) { } #endif /* CONFIG_SMP */ +#ifdef CONFIG_SCHED_USE_FLUID_RT +static inline void set_victim_flag(struct task_struct *p) +{ + p->victim_flag = 1; +} + +static inline void clear_victim_flag(struct task_struct *p) +{ + p->victim_flag = 0; +} + +static inline bool test_victim_flag(struct task_struct *p) +{ + if (p->victim_flag) + return true; + else + return false; +} +#else +static inline bool test_victim_flag(struct task_struct *p) { return false; } +static inline void clear_victim_flag(struct task_struct *p) {} +#endif /* * Preempt the current task with a newly woken task if needed: */ @@ -1839,6 +1877,10 @@ static void check_preempt_curr_rt(struct rq *rq, struct task_struct *p, int flag if (p->prio < rq->curr->prio) { resched_curr(rq); return; + } else if (test_victim_flag(p)) { + requeue_task_rt(rq, p, 1); + resched_curr(rq); + return; } #ifdef CONFIG_SMP @@ -1948,6 +1990,8 @@ pick_next_task_rt(struct rq *rq, struct task_struct *prev, struct rq_flags *rf) update_rt_rq_load_avg(rq_clock_task(rq), cpu_of(rq), rt_rq, rq->curr->sched_class == &rt_sched_class); + clear_victim_flag(p); + return p; } @@ -2118,8 +2162,246 @@ static struct task_struct *pick_highest_pushable_task(struct rq *rq, int cpu) static DEFINE_PER_CPU(cpumask_var_t, local_cpu_mask); +#ifdef CONFIG_SCHED_USE_FLUID_RT +static unsigned int sched_rt_boost_threshold = 60; + +static inline struct cpumask *sched_group_cpus_rt(struct sched_group *sg) +{ + return to_cpumask(sg->cpumask); +} + +static inline int weight_from_rtprio(int prio) +{ + int idx = (prio >> 1); + + if (!rt_prio(prio)) + return sched_prio_to_weight[prio - MAX_RT_PRIO]; + + if ((idx << 1) == prio) + return rtprio_to_weight[idx]; + else + return ((rtprio_to_weight[idx] + rtprio_to_weight[idx+1]) >> 1); +} + +static inline int affordable_cpu(int cpu, unsigned long task_util) +{ + /* HACK : Need to change the programming style, too naive */ + if (cpu_curr(cpu)->state != TASK_INTERRUPTIBLE) + return 0; + + if (capacity_of(cpu) <= task_util) + return 0; + + if ((capacity_orig_of(cpu) - capacity_of(cpu)) >= task_util) + return 0; + + return 1; +} + +extern unsigned long cpu_util_wake(int cpu, struct task_struct *p); +extern unsigned long task_util(struct task_struct *p); + +/* + * Must find the victim or recessive (not in lowest_mask) + * + */ +/* Future-safe accessor for struct task_struct's cpus_allowed. */ +#define rttsk_cpus_allowed(tsk) (&(tsk)->cpus_allowed) + +static int find_victim_rt_rq(struct task_struct *task, struct sched_group *sg, int *best_cpu) { + struct cpumask *sg_cpus = sched_group_cpus_rt(sg); + int i; + unsigned long victim_rtweight, target_rtweight, min_rtweight; + unsigned int victim_cpu_cap, min_cpu_cap = arch_scale_cpu_capacity(NULL, task_cpu(task)); + bool victim_rt = true; + + if (!rt_task(task)) + return *best_cpu; + + target_rtweight = task->rt.avg.util_avg * weight_from_rtprio(task->prio); + min_rtweight = target_rtweight; + + for_each_cpu_and(i, sg_cpus, rttsk_cpus_allowed(task)) { + struct task_struct *victim = cpu_rq(i)->curr; + + if (victim->nr_cpus_allowed < 2) + continue; + + if (rt_task(victim)) { + victim_cpu_cap = arch_scale_cpu_capacity(NULL, i); + victim_rtweight = victim->rt.avg.util_avg * weight_from_rtprio(victim->prio); + + if (min_cpu_cap == victim_cpu_cap) { + if (victim_rtweight < min_rtweight) { + min_rtweight = victim_rtweight; + *best_cpu = i; + min_cpu_cap = victim_cpu_cap; + } + } else { + /* + * It's necessary to un-cap the cpu capacity when comparing + * utilization of each CPU. This is why the Fluid RT tries to give + * the green light on big CPU to the long-run RT task + * in accordance with the priority. + */ + if (victim_rtweight * min_cpu_cap < min_rtweight * victim_cpu_cap) { + min_rtweight = victim_rtweight; + *best_cpu = i; + min_cpu_cap = victim_cpu_cap; + } + } + } else { + /* If Non-RT CPU is exist, select it first. */ + *best_cpu = i; + victim_rt = false; + break; + } + } + + if (*best_cpu >= 0 && victim_rt) { + set_victim_flag(cpu_rq(*best_cpu)->curr); + } + + return *best_cpu; + +} + +static int find_lowest_rq_fluid(struct task_struct *task) +{ + int cpu, best_cpu = -1; + int prefer_cpu = smp_processor_id(); /* Cache-hot with itself or waker (default). */ + int boosted = 0; + struct cpumask *lowest_mask = this_cpu_cpumask_var_ptr(local_cpu_mask); + struct sched_domain *sd; + struct sched_group *sg; + u64 cpu_load = ULLONG_MAX, min_load = ULLONG_MAX, min_rt_load = ULLONG_MAX; + int min_cpu = -1, min_rt_cpu = -1; + + /* Make sure the mask is initialized first */ + if (unlikely(!lowest_mask)) + goto out; + + if (task->nr_cpus_allowed == 1) + goto out; /* No other targets possible */ + + /* update the per-cpu local_cpu_mask (lowest_mask) */ + cpupri_find(&task_rq(task)->rd->cpupri, task, lowest_mask); + + /* + * + * Fluid Sched Core selection procedure: + * + * 1. Cache hot : this cpu (waker if wake_list is null) + * 2. idle CPU selection (prev_cpu first) + * 3. recessive task first (prev_cpu first) + * 4. victim task first (prev_cpu first) + */ + + /* + * 1. Cache hot : packing the callee and caller, + * when there is nothing to run except callee + */ + if (cpumask_test_cpu(prefer_cpu, lowest_mask) && + affordable_cpu(prefer_cpu, task_util(task))) { + best_cpu = prefer_cpu; + goto out; + } + + prefer_cpu = task_cpu(task); + + /* + * 2. idle CPU selection + */ + boosted = (task->rt.avg.util_avg > sched_rt_boost_threshold) ? (1) : (0); + + /* TODO: Need to refer the scheduling status of eHMP */ + for_each_cpu(cpu, cpu_online_mask){ + if (boosted && cpu < cpumask_first(cpu_coregroup_mask(prefer_cpu))) + continue; + + if (idle_cpu(cpu)) { + best_cpu = cpu; + goto out; + } + } + + rcu_read_lock(); + + sd = boosted ? + rcu_dereference(per_cpu(sd_ea, 0)) : + rcu_dereference(per_cpu(sd_ea, prefer_cpu)); + + if (!sd) + goto unlock; + + sg = sd->groups; + + /* + * 3. recessive task first + */ + do { + for_each_cpu_and(cpu, sched_group_span(sg), lowest_mask) { + + cpu_load = cpu_util_wake(cpu, task) + task_util(task); + + if (rt_task(cpu_rq(cpu)->curr)) { + if (cpu_load < min_rt_load || + (cpu_load == min_rt_load && cpu == prefer_cpu)) { + min_rt_load = cpu_load; + min_rt_cpu = cpu; + } + + continue; + } + if (cpu_load < min_load || + (cpu_load == min_load && cpu == prefer_cpu)) { + min_load = cpu_load; + min_cpu = cpu; + } + + } + + /* Fair recessive task : best min-load of non-rt cpu is exist? */ + if (min_cpu >= 0 && + ((capacity_of(min_cpu) >= min_load) || (min_cpu == prefer_cpu))) { + best_cpu = min_cpu; + goto unlock; + } + + /* RT recessive task : best min-load of rt cpu is exist? */ + if (min_rt_cpu >= 0 && + ((capacity_of(min_rt_cpu) >= min_rt_load) || (min_rt_cpu == prefer_cpu))) { + best_cpu = min_rt_cpu; + goto unlock; + } + + } while (sg = sg->next, sg != sd->groups); + /* need to check the method for traversing the sg */ + + sg = sd->groups; + + /* + * 4. victim task first + */ + do { + if (find_victim_rt_rq(task, sg, &best_cpu) != -1) + break; + } while (sg = sg->next, sg != sd->groups); + + if (best_cpu < 0) + best_cpu = prefer_cpu; +unlock: + rcu_read_unlock(); +out: + return best_cpu; +} +#endif /* CONFIG_SCHED_USE_FLUID_RT */ + static int find_lowest_rq(struct task_struct *task) { +#ifdef CONFIG_SCHED_USE_FLUID_RT + return find_lowest_rq_fluid(task); +#else struct sched_domain *sd; struct cpumask *lowest_mask = this_cpu_cpumask_var_ptr(local_cpu_mask); int this_cpu = smp_processor_id(); @@ -2190,6 +2472,7 @@ static int find_lowest_rq(struct task_struct *task) if (cpu < nr_cpu_ids) return cpu; return -1; +#endif /* CONFIG_SCHED_USE_FLUID_RT */ } /* Will lock the rq it finds */ @@ -2207,6 +2490,13 @@ static struct rq *find_lock_lowest_rq(struct task_struct *task, struct rq *rq) lowest_rq = cpu_rq(cpu); +#ifdef CONFIG_SCHED_USE_FLUID_RT + /* + * Even though the lowest rq has a task of higher priority, + * FluidRT can expel it (victim task) if it has small utilization, + * or is not current task. Just keep trying. + */ +#else if (lowest_rq->rt.highest_prio.curr <= task->prio) { /* * Target rq has tasks of equal or higher priority, @@ -2216,6 +2506,7 @@ static struct rq *find_lock_lowest_rq(struct task_struct *task, struct rq *rq) lowest_rq = NULL; break; } +#endif /* if the prio of this runqueue changed, try again */ if (double_lock_balance(rq, lowest_rq)) { @@ -2237,6 +2528,11 @@ static struct rq *find_lock_lowest_rq(struct task_struct *task, struct rq *rq) } } +#ifdef CONFIG_SCHED_USE_FLUID_RT + /* task is still rt task */ + if (likely(rt_task(task))) + break; +#else /* If this rq is still suitable use it. */ if (lowest_rq->rt.highest_prio.curr > task->prio) break; @@ -2244,6 +2540,7 @@ static struct rq *find_lock_lowest_rq(struct task_struct *task, struct rq *rq) /* try again */ double_unlock_balance(rq, lowest_rq); lowest_rq = NULL; +#endif } return lowest_rq; diff --git a/kernel/sched/sched.h b/kernel/sched/sched.h index 27d8582df918..bb96dbe19bc9 100644 --- a/kernel/sched/sched.h +++ b/kernel/sched/sched.h @@ -1564,6 +1564,8 @@ static inline void finish_lock_switch(struct rq *rq, struct task_struct *prev) extern const int sched_prio_to_weight[40]; extern const u32 sched_prio_to_wmult[40]; +extern const int rtprio_to_weight[51]; + /* * {de,en}queue flags: