From: Youngtae Lee <yt0729.lee@samsung.com>
Date: Thu, 9 Aug 2018 02:11:05 +0000 (+0900)
Subject: sched: rt: Refactoring select idle cpu and recessive cpu
X-Git-Url: https://git.stricted.de/?a=commitdiff_plain;h=1bfddfc469243c02a478059ca330ccfbccc6677f;p=GitHub%2FLineageOS%2Fandroid_kernel_motorola_exynos9610.git

sched: rt: Refactoring select idle cpu and recessive cpu

Implement init code for sysfs, dt to
support multi min_thr to detect heavy rt task
change loop and condition for find_idle_cpu/find_recessive_cpu

Change-Id: I91e5199c9a86849025af7271c0e6aeeee6b3f966
Signed-off-by: Youngtae Lee <yt0729.lee@samsung.com>
---

diff --git a/include/linux/ems.h b/include/linux/ems.h
index c38d7f206b5c..b0f1252f877f 100644
--- a/include/linux/ems.h
+++ b/include/linux/ems.h
@@ -47,6 +47,9 @@ struct task_band {
 };
 
 #ifdef CONFIG_SCHED_EMS
+extern struct kobject *ems_kobj;
+extern unsigned int get_cpu_max_capacity(unsigned int cpu);
+
 /* task util initialization */
 extern void exynos_init_entity_util_avg(struct sched_entity *se);
 
diff --git a/kernel/sched/rt.c b/kernel/sched/rt.c
index c9998fad577a..c4e4fbbf079f 100644
--- a/kernel/sched/rt.c
+++ b/kernel/sched/rt.c
@@ -8,16 +8,241 @@
 
 #include <linux/slab.h>
 #include <linux/irq_work.h>
+#include <linux/ems.h>
 #include "tune.h"
 
 #include "walt.h"
-
 #include <trace/events/sched.h>
 
+struct frt_dom {
+	bool			enabled;
+	unsigned int		coverage_ratio;
+	int			coregroup;
+	struct cpumask		cpus;
+
+	struct list_head	list;
+	/* kobject for sysfs group */
+	struct kobject		kobj;
+};
+
+LIST_HEAD(frt_list);
+
+struct frt_rq {
+	unsigned int	min_thr;
+};
+DEFINE_PER_CPU_SHARED_ALIGNED(struct frt_rq, frt_runqueues);
+
+static struct kobject *frt_kobj;
+
+static int frt_set_thr(int cpu);
+struct frt_attr {
+	struct attribute attr;
+	ssize_t (*show)(struct kobject *, char *);
+	ssize_t (*store)(struct kobject *, const char *, size_t count);
+};
+
+#define frt_attr_rw(_name)				\
+static struct frt_attr _name##_attr =			\
+__ATTR(_name, 0644, show_##_name, store_##_name)
+
+#define frt_show(_name)								\
+static ssize_t show_##_name(struct kobject *k, char *buf)			\
+{										\
+	struct frt_dom *dom = container_of(k, struct frt_dom, kobj);		\
+										\
+	return sprintf(buf, "%u\n", (unsigned int)dom->_name);			\
+}
+
+#define frt_store(_name, _type, _max)						\
+static ssize_t store_##_name(struct kobject *k, const char *buf, size_t count)	\
+{										\
+	unsigned int val;							\
+	struct frt_dom *dom = container_of(k, struct frt_dom, kobj);		\
+										\
+	if (!sscanf(buf, "%u", &val))						\
+		return -EINVAL;							\
+										\
+	val = val > _max ? _max : val;						\
+	dom->_name = (_type)val;						\
+	frt_set_thr(cpumask_first(&dom->cpus));					\
+										\
+	return count;								\
+}
+
+static ssize_t show_coverage_ratio(struct kobject *k, char *buf)
+{
+	struct frt_dom *dom = container_of(k, struct frt_dom, kobj);
+	unsigned int cov_ratio = dom->coverage_ratio;
+	unsigned int min_thr = per_cpu(frt_runqueues, cpumask_first(&dom->cpus)).min_thr;
+
+	return sprintf(buf, "%u(%d)\n", cov_ratio, min_thr);
+}
+
+frt_store(coverage_ratio, int, 100);
+frt_attr_rw(coverage_ratio);
+
+static ssize_t show(struct kobject *kobj, struct attribute *at, char *buf)
+{
+	struct frt_attr *frtattr = container_of(at, struct frt_attr, attr);
+
+	return frtattr->show(kobj, buf);
+}
+
+static ssize_t store(struct kobject *kobj, struct attribute *at,
+		     const char *buf, size_t count)
+{
+	struct frt_attr *frtattr = container_of(at, struct frt_attr, attr);
+
+	return frtattr->store(kobj, buf, count);
+}
+
+static const struct sysfs_ops frt_sysfs_ops = {
+	.show	= show,
+	.store	= store,
+};
+
+static struct attribute *frt_attrs[] = {
+	&coverage_ratio_attr.attr,
+	NULL
+};
+
+static struct kobj_type ktype_frt = {
+	.sysfs_ops	= &frt_sysfs_ops,
+	.default_attrs	= frt_attrs,
+};
+
+static struct frt_dom *frt_get_dom(int cpu)
+{
+	struct frt_dom *dom;
+
+	list_for_each_entry(dom, &frt_list, list)
+		if (cpumask_test_cpu(cpu, &dom->cpus))
+			return dom;
+
+	return NULL;
+}
+
+static int frt_find_prefer_cpu(struct task_struct *task)
+{
+	int cpu, allowed_cpu = 0;
+	unsigned int min_thr;
+	struct frt_dom *dom;
+
+	list_for_each_entry(dom, &frt_list, list) {
+		min_thr = per_cpu(frt_runqueues, cpumask_first(&dom->cpus)).min_thr;
+		for_each_cpu_and(cpu, &task->cpus_allowed, &dom->cpus) {
+			allowed_cpu = cpu;
+			if (task->rt.avg.util_avg < min_thr)
+				return allowed_cpu;
+		}
+	}
+	return allowed_cpu;
+}
+
+static int frt_set_thr(int cpu)
+{
+	unsigned long capacity;
+	struct frt_dom *dom = frt_get_dom(cpu);
+
+	capacity = get_cpu_max_capacity(cpumask_first(&dom->cpus));
+	for_each_cpu(cpu, &dom->cpus)
+		 per_cpu(frt_runqueues, cpu).min_thr =
+			(capacity * dom->coverage_ratio) / 100;
+
+	return 0;
+}
+
+static int __init frt_sysfs_init(void)
+{
+	struct frt_dom *dom;
+
+	if (list_empty(&frt_list))
+		return 0;
+
+	frt_kobj = kobject_create_and_add("frt", ems_kobj);
+	if (!frt_kobj)
+		goto out;
+
+	/* Add frt sysfs node for each coregroup */
+	list_for_each_entry(dom, &frt_list, list) {
+		int ret;
+
+		ret = kobject_init_and_add(&dom->kobj, &ktype_frt,
+				frt_kobj, "coregroup%d", dom->coregroup);
+		if (ret)
+			goto out;
+	}
+
+	return 0;
+
+out:
+	pr_err("FRT(%s): failed to create sysfs node\n", __func__);
+	return -EINVAL;
+}
+
+static void frt_parse_dt(struct device_node *dn, struct frt_dom *dom, int cnt)
+{
+	struct device_node *frt, *coregroup;
+	char name[15];
+
+	frt = of_get_child_by_name(dn, "frt");
+	if (!frt)
+		goto disable;
+
+	snprintf(name, sizeof(name), "coregroup%d", cnt);
+	coregroup = of_get_child_by_name(frt, name);
+	if (!coregroup)
+		goto disable;
+	dom->coregroup = cnt;
+
+	of_property_read_u32(coregroup, "coverage-ratio", &dom->coverage_ratio);
+	if (!dom->coverage_ratio)
+		dom->coverage_ratio = 100;
+
+	dom->enabled = true;
+	return;
+
+disable:
+	pr_err("FRT(%s): failed to parse frt node\n", __func__);
+}
+
+static int __init init_frt(void)
+{
+	struct frt_dom *dom;
+	struct device_node *dn;
+	int cpu, cnt = 0;
+
+	dn = of_find_node_by_path("/cpus/ems");
+	if (!dn)
+		return 0;
+
+	INIT_LIST_HEAD(&frt_list);
+
+	for_each_possible_cpu(cpu) {
+		if (cpu != cpumask_first(cpu_coregroup_mask(cpu)))
+			continue;
+
+		dom = kzalloc(sizeof(struct frt_dom), GFP_KERNEL);
+
+		cpumask_copy(&dom->cpus, cpu_coregroup_mask(cpu));
+
+		frt_parse_dt(dn, dom, cnt++);
+
+		list_add_tail(&dom->list, &frt_list);
+
+		frt_set_thr(cpu);
+	}
+
+	frt_sysfs_init();
+
+	of_node_put(dn);
+	return 0;
+
+} late_initcall(init_frt);
+
 int sched_rr_timeslice = RR_TIMESLICE;
 int sysctl_sched_rr_timeslice = (MSEC_PER_SEC / HZ) * RR_TIMESLICE;
 
-
 void update_rt_load_avg(u64 now, struct sched_rt_entity *rt_se);
 
 static int do_sched_rt_period_timer(struct rt_bandwidth *rt_b, int overrun);
@@ -2179,13 +2404,6 @@ static struct task_struct *pick_highest_pushable_task(struct rq *rq, int cpu)
 static DEFINE_PER_CPU(cpumask_var_t, local_cpu_mask);
 
 #ifdef CONFIG_SCHED_USE_FLUID_RT
-static unsigned int sched_rt_boost_threshold = 60;
-
-static inline struct cpumask *sched_group_cpus_rt(struct sched_group *sg)
-{
-	return to_cpumask(sg->cpumask);
-}
-
 static inline int weight_from_rtprio(int prio)
 {
 	int idx = (prio >> 1);
@@ -2337,30 +2555,116 @@ static int find_victim_rt_rq(struct task_struct *task, const struct cpumask *sg_
 
 }
 
-static int find_lowest_rq_fluid(struct task_struct *task, int wake_flags)
+static int find_idle_cpu(struct task_struct *task)
 {
-	int cpu, icpu, best_cpu = -1;
-	int prefer_cpu = smp_processor_id();	/* Cache-hot with itself or waker (default). */
-	struct cpumask *lowest_mask = this_cpu_cpumask_var_ptr(local_cpu_mask);
+	int best_cpu = -1, cpu, cpu_prio, max_prio = -1, prefer_cpu;
+	u64 cpu_load = ULLONG_MAX, min_load = ULLONG_MAX;
+	struct cpumask candidate_cpus;
+
+	cpumask_and(&candidate_cpus, &task->cpus_allowed, cpu_active_mask);
+	prefer_cpu = frt_find_prefer_cpu(task);
+
+	while (!cpumask_empty(&candidate_cpus)) {
+		const struct cpumask* grp_mask = cpu_coregroup_mask(prefer_cpu);
+
+		for_each_cpu(cpu, grp_mask) {
+			cpumask_clear_cpu(cpu, &candidate_cpus);
+
+			if (!idle_cpu(cpu))
+				continue;
+			cpu_prio = cpu_rq(cpu)->rt.highest_prio.curr;
+			if (cpu_prio < max_prio)
+				continue;
+
+			cpu_load = frt_cpu_util_wake(cpu, task) + task_util(task);
+			if ((cpu_prio > max_prio) || (cpu_load < min_load) ||
+					(cpu_load == min_load && task_cpu(task) == cpu)) {
+				min_load = cpu_load;
+				max_prio = cpu_prio;
+				best_cpu = cpu;
+			}
+		}
+
+		if (cpu_selected(best_cpu)) {
+			trace_sched_fluid_stat(task, &task->rt.avg, best_cpu, "IDLE-FIRST");
+			return best_cpu;
+		}
 
-	u64 cpu_load = ULLONG_MAX, min_load = ULLONG_MAX, min_rt_load = ULLONG_MAX;
-	u64 min_icl = ULLONG_MAX;
-	int min_cpu = -1, min_rt_cpu = -1;
+		/*
+		 * If heavy util rt task, search higher performance sched group.
+		 * In the opposite case, search lower performance sched group
+		 */
+		prefer_cpu = cpumask_first(grp_mask);
+		prefer_cpu += cpumask_weight(grp_mask);
+		if (prefer_cpu >= NR_CPUS)
+			prefer_cpu = 0;
+	}
 
+	return best_cpu;
+}
+
+static int find_recessive_cpu(struct task_struct *task)
+{
+	int best_cpu = -1, cpu, prefer_cpu;
+	struct cpumask *lowest_mask;
+	u64 cpu_load = ULLONG_MAX, min_load = ULLONG_MAX;
+	struct cpumask candidate_cpus;
+	lowest_mask = this_cpu_cpumask_var_ptr(local_cpu_mask);
 	/* Make sure the mask is initialized first */
 	if (unlikely(!lowest_mask)) {
 		trace_sched_fluid_stat(task, &task->rt.avg, best_cpu, "NA LOWESTMSK");
-		goto out;
+		return best_cpu;
 	}
+	/* update the per-cpu local_cpu_mask (lowest_mask) */
+	cpupri_find(&task_rq(task)->rd->cpupri, task, lowest_mask);
+
+	cpumask_and(&candidate_cpus, &task->cpus_allowed, lowest_mask);
+	prefer_cpu = frt_find_prefer_cpu(task);
+
+	while (!cpumask_empty(&candidate_cpus)) {
+		const struct cpumask* grp_mask = cpu_coregroup_mask(prefer_cpu);
+
+		for_each_cpu(cpu, grp_mask) {
+			cpumask_clear_cpu(cpu, &candidate_cpus);
+			cpu_load = frt_cpu_util_wake(cpu, task) + task_util(task);
+
+			if (cpu_load < min_load ||
+				(cpu_load == min_load && cpu == prefer_cpu)) {
+				min_load = cpu_load;
+				best_cpu = cpu;
+			}
+		}
+
+		if (cpu_selected(best_cpu) &&
+			((capacity_orig_of(best_cpu) >= min_load) || (best_cpu == prefer_cpu))) {
+			trace_sched_fluid_stat(task, &task->rt.avg, best_cpu,
+				rt_task(cpu_rq(best_cpu)->curr) ? "RT-RECESS" : "FAIR-RECESS");
+			return best_cpu;
+		}
+
+		/*
+		 * If heavy util rt task, search higher performance sched group.
+		 * In the opposite case, search lower performance sched group
+		 */
+		prefer_cpu = cpumask_first(grp_mask);
+		prefer_cpu += cpumask_weight(grp_mask);
+		if (prefer_cpu >= NR_CPUS)
+			prefer_cpu = 0;
+	}
+
+	return best_cpu;
+}
+
+static int find_lowest_rq_fluid(struct task_struct *task, int wake_flags)
+{
+	int cpu, best_cpu = -1;
+	int prefer_cpu = smp_processor_id();	/* Cache-hot with itself or waker (default). */
 
 	if (task->nr_cpus_allowed == 1) {
 		trace_sched_fluid_stat(task, &task->rt.avg, best_cpu, "NA ALLOWED");
 		goto out; /* No other targets possible */
 	}
 
-	/* update the per-cpu local_cpu_mask (lowest_mask) */
-	cpupri_find(&task_rq(task)->rd->cpupri, task, lowest_mask);
-
 	/*
 	 *
 	 * Fluid Sched Core selection procedure:
@@ -2373,7 +2677,7 @@ static int find_lowest_rq_fluid(struct task_struct *task, int wake_flags)
 
 	/*
 	 * 1. Cache hot : packing the callee and caller,
-	 * 	when there is nothing to run except callee, or
+	 *	when there is nothing to run except callee, or
 	 *	wake_flags are set.
 	 */
 	/* FUTURE WORK: Hierarchical cache hot */
@@ -2386,85 +2690,17 @@ static int find_lowest_rq_fluid(struct task_struct *task, int wake_flags)
 	}
 
 	/*
-	 * 2. idle CPU selection
-	 */
-	prefer_cpu = task_cpu(task);
-	prefer_cpu = (task->rt.avg.util_avg > sched_rt_boost_threshold) ?
-		cpumask_first(cpu_coregroup_mask(prefer_cpu)) :
-		cpumask_first(cpu_online_mask);
-
-	/* TODO: Need to refer the scheduling status of eHMP */
-	for_each_online_cpu(cpu){
-		const struct cpumask* traversingDom = cpu_coregroup_mask(cpu);
-		if (cpu != cpumask_first(traversingDom))
-			continue;
-
-		if (cpumask_first(traversingDom) < prefer_cpu)
-			continue;
-
-		for_each_cpu_and(icpu, rttsk_cpus_allowed(task), traversingDom) {
-			if (idle_cpu(icpu)) {
-				cpu_load = frt_cpu_util_wake(icpu, task) + task_util(task);
-				if ((min_icl > cpu_load) ||
-					(min_icl == cpu_load && task_cpu(task) == icpu)) {
-					min_icl = cpu_load;
-					best_cpu = icpu;
-				}
-			}
-		}
-
-		if (cpu_selected(best_cpu)) {
-			trace_sched_fluid_stat(task, &task->rt.avg, best_cpu, "IDLE-FIRST");
-			goto out;
-		}
-	}
-
-	/*
-	 * 3. recessive task first
+	 * Find 2.IDLE CPU and 3.Recessive CPU
 	 */
-	prefer_cpu = task_cpu(task);
-
-	for_each_online_cpu(cpu) {
-		if (cpu != cpumask_first(cpu_coregroup_mask(cpu)))
-			continue;
-
-		for_each_cpu_and(icpu, rttsk_cpus_allowed(task), cpu_coregroup_mask(cpu)) {
-			if (!cpumask_test_cpu(icpu, lowest_mask))
-				continue;
-
-			cpu_load = frt_cpu_util_wake(icpu, task) + task_util(task);
-
-			if (rt_task(cpu_rq(icpu)->curr)) {
-				if (cpu_load < min_rt_load ||
-					(cpu_load == min_rt_load && icpu == prefer_cpu)) {
-					min_rt_load = cpu_load;
-					min_rt_cpu = icpu;
-				}
-				continue;
-			}
-			if (cpu_load < min_load ||
-				(cpu_load == min_load && icpu == prefer_cpu)) {
-				min_load = cpu_load;
-				min_cpu = icpu;
-			}
-
-		}
-		/* Fair recessive task : best min-load of non-rt cpu is exist? */
-		if (cpu_selected(min_cpu) &&
-			((capacity_of(min_cpu) >= min_load) || (min_cpu == prefer_cpu))) {
-			best_cpu = min_cpu;
-			trace_sched_fluid_stat(task, &task->rt.avg, best_cpu, "FAIR-RECESS");
-			goto out;
-		}
+	/* 2. idle CPU selection */
+	best_cpu = find_idle_cpu(task);
+	if (cpu_selected(best_cpu))
+		goto out;
 
-		/* RT recessive task : best min-load of rt cpu is exist? */
-		if (cpu_selected(min_rt_cpu) &&
-			((capacity_of(min_rt_cpu) > min_rt_load) || (min_rt_cpu == prefer_cpu))) {
-			best_cpu = min_rt_cpu;
-			trace_sched_fluid_stat(task, &task->rt.avg, best_cpu, "RT-RECESS");
-			goto out;
-		}
-	}
+	/* 3. recessive task first */
+	best_cpu = find_recessive_cpu(task);
+	if (cpu_selected(best_cpu))
+		goto out;
 
 	/*
 	 * 4. victim task first