From 919856a53fb5ec1839d5a41b7b062e0a3bd85fd1 Mon Sep 17 00:00:00 2001
From: Johnlay Park <jonglae.park@samsung.com>
Date: Tue, 27 Feb 2018 15:57:52 +0900
Subject: [PATCH] [COMMON] sched/rt: migrate utilization in task migration

Implement removal of utilization from src rt_rq and
addition to the same to dst_rq.

Refer the Ic4757299f0561f13e58671b9a163c00d10c6a147

Change-Id: I4be0b776c96d1871e246a50255e23d007dcee0e3
Signed-off-by: Johnlay Park <jonglae.park@samsung.com>
---
 kernel/sched/fair.c  |  13 +++++-
 kernel/sched/rt.c    | 100 +++++++++++++++++++++++++++++++++++++++----
 kernel/sched/sched.h |   1 +
 3 files changed, 104 insertions(+), 10 deletions(-)

diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c
index 5a1ea8b00488..72cf9ee92a70 100644
--- a/kernel/sched/fair.c
+++ b/kernel/sched/fair.c
@@ -3403,8 +3403,19 @@ update_cfs_rq_load_avg(u64 now, struct cfs_rq *cfs_rq)
 int update_rt_rq_load_avg(u64 now, int cpu, struct rt_rq *rt_rq, int running)
 {
 	int ret;
+	struct sched_avg *sa = &rt_rq->avg;
 
-	ret = ___update_load_avg(now, cpu, &rt_rq->avg, 0, running, NULL, rt_rq);
+	if (atomic_long_read(&rt_rq->removed_util_avg)) {
+		long r = atomic_long_xchg(&rt_rq->removed_util_avg, 0);
+		sub_positive(&sa->util_avg, r);
+		sub_positive(&sa->util_sum, r * LOAD_AVG_MAX);
+	}
+
+	/* TODO:
+	 * Do something on removed_load_avg
+	 * Do propagate_avg for removed_load/util_avg
+	 */
+	ret = ___update_load_avg(now, cpu, sa, 0, running, NULL, rt_rq);
 
 	return ret;
 }
diff --git a/kernel/sched/rt.c b/kernel/sched/rt.c
index e3d6b2124ecc..17fb5acac3cd 100644
--- a/kernel/sched/rt.c
+++ b/kernel/sched/rt.c
@@ -98,6 +98,7 @@ void init_rt_rq(struct rt_rq *rt_rq)
 	rt_rq->rt_nr_migratory = 0;
 	rt_rq->overloaded = 0;
 	plist_head_init(&rt_rq->pushable_tasks);
+	atomic_long_set(&rt_rq->removed_util_avg, 0);
 #endif /* CONFIG_SMP */
 	/* We start is dequeued state, because no RT tasks are queued */
 	rt_rq->rt_queued = 0;
@@ -1290,7 +1291,7 @@ void dec_rt_tasks(struct sched_rt_entity *rt_se, struct rt_rq *rt_rq)
 
 #ifdef CONFIG_SMP
 static void
-attach_entity_load_avg(struct rt_rq *rt_rq, struct sched_rt_entity *rt_se)
+attach_rt_entity_load_avg(struct rt_rq *rt_rq, struct sched_rt_entity *rt_se)
 {
 	rt_se->avg.last_update_time = rt_rq->avg.last_update_time;
 	rt_rq->avg.util_avg += rt_se->avg.util_avg;
@@ -1303,7 +1304,7 @@ attach_entity_load_avg(struct rt_rq *rt_rq, struct sched_rt_entity *rt_se)
 }
 
 static void
-detach_entity_load_avg(struct rt_rq *rt_rq, struct sched_rt_entity *rt_se)
+detach_rt_entity_load_avg(struct rt_rq *rt_rq, struct sched_rt_entity *rt_se)
 {
 	sub_positive(&rt_rq->avg.util_avg, rt_se->avg.util_avg);
 	sub_positive(&rt_rq->avg.util_sum, rt_se->avg.util_sum);
@@ -1315,9 +1316,9 @@ detach_entity_load_avg(struct rt_rq *rt_rq, struct sched_rt_entity *rt_se)
 }
 #else
 static inline void
-attach_entity_load_avg(struct cfs_rq *cfs_rq, struct sched_entity *se) {}
+attach_rt_entity_load_avg(struct rt_rq *rt_rq, struct sched_rt_entity *rt_se) {}
 static inline void
-detach_entity_load_avg(struct cfs_rq *cfs_rq, struct sched_entity *se) {}
+detach_rt_entity_load_avg(struct rt_rq *rt_rq, struct sched_rt_entity *rt_se) {}
 #endif
 
 /*
@@ -1376,9 +1377,8 @@ static void __enqueue_rt_entity(struct sched_rt_entity *rt_se, unsigned int flag
 
 	update_rt_load_avg(rq_clock_task(rq_of_rt_rq(rt_rq)), rt_se);
 
-	/* TODO:
-	 * Need to attach entity load average for rt entity is task
-	 */
+	if (rt_entity_is_task(rt_se) && !rt_se->avg.last_update_time)
+		attach_rt_entity_load_avg(rt_rq, rt_se);
 
 	inc_rt_tasks(rt_se, rt_rq);
 }
@@ -1573,6 +1573,62 @@ out:
 	return cpu;
 }
 
+#ifndef CONFIG_64BIT
+static inline u64 rt_rq_last_update_time(struct rt_rq *rt_rq)
+{
+	u64 last_update_time_copy;
+	u64 last_update_time;
+
+	do {
+		last_update_time_copy = rt_rq->load_last_update_time_copy;
+		smp_rmb();
+		last_update_time = rt_rq->avg.last_update_time;
+	} while (last_update_time != last_update_time_copy);
+
+	return last_update_time;
+}
+#else
+static inline u64 rt_rq_last_update_time(struct rt_rq *rt_rq)
+{
+	return rt_rq->avg.last_update_time;
+}
+#endif
+
+/*
+ * Synchronize entity load avg of dequeued entity without locking
+ * the previous rq.
+ */
+void sync_rt_entity_load_avg(struct sched_rt_entity *rt_se)
+{
+	struct rt_rq *rt_rq = rt_rq_of_se(rt_se);
+	u64 last_update_time;
+
+	last_update_time = rt_rq_last_update_time(rt_rq);
+	update_rt_load_avg(last_update_time, rt_se);
+}
+
+/*
+ * Task first catches up with rt_rq, and then subtract
+ * itself from the rt_rq (task must be off the queue now).
+ */
+static void remove_rt_entity_load_avg(struct sched_rt_entity *rt_se)
+{
+	struct rt_rq *rt_rq = rt_rq_of_se(rt_se);
+
+	/*
+	 * tasks cannot exit without having gone through wake_up_new_task() ->
+	 * post_init_entity_util_avg() which will have added things to the
+	 * rt_rq, so we can remove unconditionally.
+	 *
+	 * Similarly for groups, they will have passed through
+	 * post_init_entity_util_avg() before unregister_sched_fair_group()
+	 * calls this.
+	 */
+
+	sync_rt_entity_load_avg(rt_se);
+	atomic_long_add(rt_se->avg.util_avg, &rt_rq->removed_util_avg);
+}
+
 static void attach_task_rt_rq(struct task_struct *p)
 {
 	struct sched_rt_entity *rt_se = &p->rt;
@@ -1580,7 +1636,7 @@ static void attach_task_rt_rq(struct task_struct *p)
 	u64 now = rq_clock_task(rq_of_rt_rq(rt_rq));
 
 	update_rt_load_avg(now, rt_se);
-	attach_entity_load_avg(rt_rq, rt_se);
+	attach_rt_entity_load_avg(rt_rq, rt_se);
 }
 
 static void detach_task_rt_rq(struct task_struct *p)
@@ -1590,7 +1646,30 @@ static void detach_task_rt_rq(struct task_struct *p)
 	u64 now = rq_clock_task(rq_of_rt_rq(rt_rq));
 
 	update_rt_load_avg(now, rt_se);
-	detach_entity_load_avg(rt_rq, rt_se);
+	detach_rt_entity_load_avg(rt_rq, rt_se);
+}
+
+static void migrate_task_rq_rt(struct task_struct *p)
+{
+	/*
+	 * We are supposed to update the task to "current" time, then its up to date
+	 * and ready to go to new CPU/cfs_rq. But we have difficulty in getting
+	 * what current time is, so simply throw away the out-of-date time. This
+	 * will result in the wakee task is less decayed, but giving the wakee more
+	 * load sounds not bad.
+	 */
+	remove_rt_entity_load_avg(&p->rt);
+
+	/* Tell new CPU we are migrated */
+	p->rt.avg.last_update_time = 0;
+
+	/* We have migrated, no longer consider this task hot */
+	p->se.exec_start = 0;
+}
+
+static void task_dead_rt(struct task_struct *p)
+{
+	remove_rt_entity_load_avg(&p->rt);
 }
 
 #ifdef CONFIG_RT_GROUP_SCHED
@@ -2436,6 +2515,7 @@ static void rq_offline_rt(struct rq *rq)
  */
 static void switched_from_rt(struct rq *rq, struct task_struct *p)
 {
+	detach_task_rt_rq(p);
 	/*
 	 * If there are other RT tasks then we will reschedule
 	 * and the scheduling of the other RT tasks will handle
@@ -2628,6 +2708,8 @@ const struct sched_class rt_sched_class = {
 #ifdef CONFIG_SMP
 	.select_task_rq		= select_task_rq_rt,
 
+	.migrate_task_rq		= migrate_task_rq_rt,
+	.task_dead				= task_dead_rt,
 	.set_cpus_allowed       = set_cpus_allowed_common,
 	.rq_online              = rq_online_rt,
 	.rq_offline             = rq_offline_rt,
diff --git a/kernel/sched/sched.h b/kernel/sched/sched.h
index 70f3c87f73b8..447abeba7f29 100644
--- a/kernel/sched/sched.h
+++ b/kernel/sched/sched.h
@@ -568,6 +568,7 @@ struct rt_rq {
 
 	struct sched_avg avg;
 	struct sched_rt_entity *curr;
+	atomic_long_t removed_util_avg;
 #endif /* CONFIG_SMP */
 	int rt_queued;
 
-- 
2.20.1