From 94219ee2a96c9c7444012b9960a6e0ff83d1dc8f Mon Sep 17 00:00:00 2001
From: Park Bumgyu <bumgyu.park@samsung.com>
Date: Tue, 16 Jan 2018 19:01:05 +0900
Subject: [PATCH] [COMMON] sched: ehmp: support EHMP(Exynos HMP)

Change-Id: Ie7ee8a84ed0fdc3a62d10a5b55488477edcdba7f
Signed-off-by: Park Bumgyu <bumgyu.park@samsung.com>
---
 include/linux/ehmp.h         |   88 ++
 include/linux/sched.h        |   22 +
 include/trace/events/ehmp.h  |  340 +++++++
 include/trace/events/sched.h |   61 ++
 init/Kconfig                 |   12 +
 kernel/sched/Makefile        |    1 +
 kernel/sched/ehmp.c          | 1670 ++++++++++++++++++++++++++++++++++
 kernel/sched/fair.c          |  135 +--
 kernel/sched/features.h      |    5 +
 kernel/sched/sched.h         |   82 ++
 kernel/sched/tune.c          |  468 ++++++++++
 kernel/sched/tune.h          |   12 +
 12 files changed, 2811 insertions(+), 85 deletions(-)
 create mode 100644 include/linux/ehmp.h
 create mode 100644 include/trace/events/ehmp.h
 create mode 100644 kernel/sched/ehmp.c

diff --git a/include/linux/ehmp.h b/include/linux/ehmp.h
new file mode 100644
index 000000000000..24948551e089
--- /dev/null
+++ b/include/linux/ehmp.h
@@ -0,0 +1,88 @@
+/*
+ * Copyright (c) 2017 Samsung Electronics Co., Ltd
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 and
+ * only version 2 as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ */
+
+#include <linux/plist.h>
+#include <linux/sched/idle.h>
+
+#ifdef CONFIG_SCHED_TUNE
+enum stune_group {
+	STUNE_ROOT,
+	STUNE_FOREGROUND,
+	STUNE_BACKGROUND,
+	STUNE_TOPAPP,
+	STUNE_GROUP_COUNT,
+};
+#endif
+
+struct gb_qos_request {
+	struct plist_node node;
+	char *name;
+	bool active;
+};
+
+#ifdef CONFIG_SCHED_EHMP
+extern void exynos_init_entity_util_avg(struct sched_entity *se);
+extern int exynos_need_active_balance(enum cpu_idle_type idle,
+		struct sched_domain *sd, int src_cpu, int dst_cpu);
+
+extern unsigned long global_boost(void);
+extern int find_second_max_cap(void);
+
+extern int exynos_select_cpu(struct task_struct *p, int prev_cpu,
+					int sync, int sd_flag);
+
+extern void ontime_migration(void);
+extern int ontime_can_migration(struct task_struct *p, int cpu);
+extern void ontime_update_load_avg(u64 delta, int cpu, unsigned long weight,
+						struct sched_avg *sa);
+extern void ontime_new_entity_load(struct task_struct *parent,
+					struct sched_entity *se);
+extern void ontime_trace_task_info(struct task_struct *p);
+extern void ehmp_update_max_cpu_capacity(int cpu, unsigned long val);
+
+extern void ehmp_update_overutilized(int cpu, unsigned long capacity);
+extern bool ehmp_trigger_lb(int src_cpu, int dst_cpu);
+
+extern void gb_qos_update_request(struct gb_qos_request *req, u32 new_value);
+
+extern void request_kernel_prefer_perf(int grp_idx, int enable);
+#else
+static inline void exynos_init_entity_util_avg(struct sched_entity *se) { }
+static inline int exynos_need_active_balance(enum cpu_idle_type idle,
+		struct sched_domain *sd, int src_cpu, int dst_cpu) { return 0; }
+
+static inline unsigned long global_boost(void) { return 0; }
+static inline int find_second_max_cap(void) { return -EINVAL; }
+
+static inline int exynos_select_cpu(struct task_struct *p,
+						int prev_cpu) { return -EINVAL; }
+static inline int exynos_select_cpu(struct task_struct *p, int prev_cpu,
+					int sync, int sd_flag) { return -EINVAL; }
+
+static inline void ontime_migration(void) { }
+static inline int ontime_can_migration(struct task_struct *p, int cpu) { return 1; }
+static inline void ontime_update_load_avg(u64 delta, int cpu, unsigned long weight,
+							struct sched_avg *sa) { }
+static inline void ontime_new_entity_load(struct task_struct *p,
+					struct sched_entity *se) { }
+static inline void ontime_trace_task_info(struct task_struct *p) { }
+
+static inline void ehmp_update_max_cpu_capacity(int cpu, unsigned long val) { }
+
+static inline void ehmp_update_overutilized(int cpu, unsigned long capacity) { }
+static inline bool ehmp_trigger_lb(int src_cpu, int dst_cpu) { return false; }
+
+static inline void gb_qos_update_request(struct gb_qos_request *req, u32 new_value) { }
+
+extern void request_kernel_prefer_perf(int grp_idx, int enable) { }
+#endif /* CONFIG_SCHED_EHMP */
diff --git a/include/linux/sched.h b/include/linux/sched.h
index f56992ff5508..9e4757aa1704 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -417,6 +417,25 @@ struct sched_avg {
 	struct util_est			util_est;
 };
 
+#ifdef CONFIG_SCHED_EHMP
+#define NOT_ONTIME		1
+#define ONTIME_MIGRATING	2
+#define ONTIME			4
+
+struct ontime_avg {
+	u64 ontime_migration_time;
+	u64 load_sum;
+	u32 period_contrib;
+	unsigned long load_avg;
+};
+
+struct ontime_entity {
+	struct ontime_avg avg;
+	int flags;
+	int cpu;
+};
+#endif
+
 struct sched_statistics {
 #ifdef CONFIG_SCHEDSTATS
 	u64				wait_start;
@@ -487,6 +506,9 @@ struct sched_entity {
 	 */
 	struct sched_avg		avg ____cacheline_aligned_in_smp;
 #endif
+#ifdef CONFIG_SCHED_EHMP
+	struct ontime_entity		ontime;
+#endif
 };
 
 #ifdef CONFIG_SCHED_WALT
diff --git a/include/trace/events/ehmp.h b/include/trace/events/ehmp.h
new file mode 100644
index 000000000000..cd99ba3e93e3
--- /dev/null
+++ b/include/trace/events/ehmp.h
@@ -0,0 +1,340 @@
+/*
+ *  Copyright (C) 2017 Park Bumgyu <bumgyu.park@samsung.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#undef TRACE_SYSTEM
+#define TRACE_SYSTEM ehmp
+
+#if !defined(_TRACE_EHMP_H) || defined(TRACE_HEADER_MULTI_READ)
+#define _TRACE_EHMP_H
+
+#include <linux/sched.h>
+#include <linux/tracepoint.h>
+
+/*
+ * Tracepoint for selection of boost cpu
+ */
+TRACE_EVENT(ehmp_select_boost_cpu,
+
+	TP_PROTO(struct task_struct *p, int cpu, int trigger, char *state),
+
+	TP_ARGS(p, cpu, trigger, state),
+
+	TP_STRUCT__entry(
+		__array(	char,		comm,	TASK_COMM_LEN	)
+		__field(	pid_t,		pid			)
+		__field(	int,		cpu			)
+		__field(	int,		trigger			)
+		__array(	char,		state,		64	)
+	),
+
+	TP_fast_assign(
+		memcpy(__entry->comm, p->comm, TASK_COMM_LEN);
+		__entry->pid		= p->pid;
+		__entry->cpu		= cpu;
+		__entry->trigger	= trigger;
+		memcpy(__entry->state, state, 64);
+	),
+
+	TP_printk("comm=%s pid=%d target_cpu=%d trigger=%d state=%s",
+		  __entry->comm, __entry->pid, __entry->cpu,
+		  __entry->trigger, __entry->state)
+);
+
+/*
+ * Tracepoint for selection of group balancer
+ */
+TRACE_EVENT(ehmp_select_group_boost,
+
+	TP_PROTO(struct task_struct *p, int cpu, char *state),
+
+	TP_ARGS(p, cpu, state),
+
+	TP_STRUCT__entry(
+		__array(	char,		comm,	TASK_COMM_LEN	)
+		__field(	pid_t,		pid			)
+		__field(	int,		cpu			)
+		__array(	char,		state,		64	)
+	),
+
+	TP_fast_assign(
+		memcpy(__entry->comm, p->comm, TASK_COMM_LEN);
+		__entry->pid		= p->pid;
+		__entry->cpu		= cpu;
+		memcpy(__entry->state, state, 64);
+	),
+
+	TP_printk("comm=%s pid=%d target_cpu=%d state=%s",
+		  __entry->comm, __entry->pid, __entry->cpu, __entry->state)
+);
+
+TRACE_EVENT(ehmp_global_boost,
+
+	TP_PROTO(char *name, unsigned long boost),
+
+	TP_ARGS(name, boost),
+
+	TP_STRUCT__entry(
+		__array(	char,		name,		64	)
+		__field(	unsigned long,	boost			)
+	),
+
+	TP_fast_assign(
+		memcpy(__entry->name, name, 64);
+		__entry->boost		= boost;
+	),
+
+	TP_printk("name=%s global_boost_value=%ld", __entry->name, __entry->boost)
+);
+
+/*
+ * Tracepoint for prefer idle
+ */
+TRACE_EVENT(ehmp_prefer_idle,
+
+	TP_PROTO(struct task_struct *p, int orig_cpu, int target_cpu,
+		unsigned long task_util, unsigned long new_util, int idle),
+
+	TP_ARGS(p, orig_cpu, target_cpu, task_util, new_util, idle),
+
+	TP_STRUCT__entry(
+		__array(	char,		comm,	TASK_COMM_LEN	)
+		__field(	pid_t,		pid			)
+		__field(	int,		orig_cpu		)
+		__field(	int,		target_cpu		)
+		__field(	unsigned long,	task_util		)
+		__field(	unsigned long,	new_util		)
+		__field(	int,		idle			)
+	),
+
+	TP_fast_assign(
+		memcpy(__entry->comm, p->comm, TASK_COMM_LEN);
+		__entry->pid		= p->pid;
+		__entry->orig_cpu	= orig_cpu;
+		__entry->target_cpu	= target_cpu;
+		__entry->task_util	= task_util;
+		__entry->new_util	= new_util;
+		__entry->idle		= idle;
+	),
+
+	TP_printk("comm=%s pid=%d orig_cpu=%d target_cpu=%d task_util=%lu new_util=%lu idle=%d",
+		__entry->comm, __entry->pid, __entry->orig_cpu, __entry->target_cpu,
+		__entry->task_util, __entry->new_util, __entry->idle)
+);
+
+TRACE_EVENT(ehmp_prefer_idle_cpu_select,
+
+	TP_PROTO(struct task_struct *p, int cpu),
+
+	TP_ARGS(p, cpu),
+
+	TP_STRUCT__entry(
+		__array(	char,		comm,	TASK_COMM_LEN	)
+		__field(	pid_t,		pid			)
+		__field(	int,		cpu			)
+	),
+
+	TP_fast_assign(
+		memcpy(__entry->comm, p->comm, TASK_COMM_LEN);
+		__entry->pid		= p->pid;
+		__entry->cpu		= cpu;
+	),
+
+	TP_printk("comm=%s pid=%d target_cpu=%d",
+		  __entry->comm, __entry->pid, __entry->cpu)
+);
+
+/*
+ * Tracepoint for cpu selection
+ */
+TRACE_EVENT(ehmp_find_best_target_stat,
+
+	TP_PROTO(int cpu, unsigned long cap, unsigned long util, unsigned long target_util),
+
+	TP_ARGS(cpu, cap, util, target_util),
+
+	TP_STRUCT__entry(
+		__field( int,		cpu	)
+		__field( unsigned long, cap	)
+		__field( unsigned long, util	)
+		__field( unsigned long, target_util	)
+	),
+
+	TP_fast_assign(
+		__entry->cpu = cpu;
+		__entry->cap = cap;
+		__entry->util = util;
+		__entry->target_util = target_util;
+	),
+
+	TP_printk("find_best : [cpu%d] capacity %lu, util %lu, target_util %lu\n",
+		__entry->cpu, __entry->cap, __entry->util, __entry->target_util)
+);
+
+TRACE_EVENT(ehmp_find_best_target_candi,
+
+	TP_PROTO(unsigned int cpu),
+
+	TP_ARGS(cpu),
+
+	TP_STRUCT__entry(
+		__field( unsigned int, cpu	)
+	),
+
+	TP_fast_assign(
+		__entry->cpu = cpu;
+	),
+
+	TP_printk("find_best: energy candidate cpu %d\n", __entry->cpu)
+);
+
+TRACE_EVENT(ehmp_find_best_target_cpu,
+
+	TP_PROTO(unsigned int cpu, unsigned long target_util),
+
+	TP_ARGS(cpu, target_util),
+
+	TP_STRUCT__entry(
+		__field( unsigned int, cpu	)
+		__field( unsigned long, target_util	)
+	),
+
+	TP_fast_assign(
+		__entry->cpu = cpu;
+		__entry->target_util = target_util;
+	),
+
+	TP_printk("find_best: target_cpu %d, target_util %lu\n", __entry->cpu, __entry->target_util)
+);
+
+/*
+ * Tracepoint for ontime migration
+ */
+TRACE_EVENT(ehmp_ontime_migration,
+
+	TP_PROTO(struct task_struct *p, unsigned long load,
+		int src_cpu, int dst_cpu, int boost_migration),
+
+	TP_ARGS(p, load, src_cpu, dst_cpu, boost_migration),
+
+	TP_STRUCT__entry(
+		__array(	char,		comm,	TASK_COMM_LEN	)
+		__field(	pid_t,		pid			)
+		__field(	unsigned long,	load			)
+		__field(	int,		src_cpu			)
+		__field(	int,		dst_cpu			)
+		__field(	int,		bm			)
+	),
+
+	TP_fast_assign(
+		memcpy(__entry->comm, p->comm, TASK_COMM_LEN);
+		__entry->pid		= p->pid;
+		__entry->load		= load;
+		__entry->src_cpu	= src_cpu;
+		__entry->dst_cpu	= dst_cpu;
+		__entry->bm		= boost_migration;
+	),
+
+	TP_printk("comm=%s pid=%d ontime_load_avg=%lu src_cpu=%d dst_cpu=%d boost_migration=%d",
+		__entry->comm, __entry->pid, __entry->load,
+		__entry->src_cpu, __entry->dst_cpu, __entry->bm)
+);
+
+/*
+ * Tracepoint for accounting ontime load averages for tasks.
+ */
+TRACE_EVENT(ehmp_ontime_new_entity_load,
+
+	TP_PROTO(struct task_struct *tsk, struct ontime_avg *avg),
+
+	TP_ARGS(tsk, avg),
+
+	TP_STRUCT__entry(
+		__array( char,		comm,	TASK_COMM_LEN		)
+		__field( pid_t,		pid				)
+		__field( int,		cpu				)
+		__field( unsigned long,	load_avg			)
+		__field( u64,		load_sum			)
+	),
+
+	TP_fast_assign(
+		memcpy(__entry->comm, tsk->comm, TASK_COMM_LEN);
+		__entry->pid			= tsk->pid;
+		__entry->cpu			= task_cpu(tsk);
+		__entry->load_avg		= avg->load_avg;
+		__entry->load_sum		= avg->load_sum;
+	),
+	TP_printk("comm=%s pid=%d cpu=%d load_avg=%lu load_sum=%llu",
+		  __entry->comm,
+		  __entry->pid,
+		  __entry->cpu,
+		  __entry->load_avg,
+		  (u64)__entry->load_sum)
+);
+
+/*
+ * Tracepoint for accounting ontime load averages for tasks.
+ */
+TRACE_EVENT(ehmp_ontime_load_avg_task,
+
+	TP_PROTO(struct task_struct *tsk, struct ontime_avg *avg, int ontime_flag),
+
+	TP_ARGS(tsk, avg, ontime_flag),
+
+	TP_STRUCT__entry(
+		__array( char,		comm,	TASK_COMM_LEN		)
+		__field( pid_t,		pid				)
+		__field( int,		cpu				)
+		__field( unsigned long,	load_avg			)
+		__field( u64,		load_sum			)
+		__field( int,		ontime_flag			)
+	),
+
+	TP_fast_assign(
+		memcpy(__entry->comm, tsk->comm, TASK_COMM_LEN);
+		__entry->pid			= tsk->pid;
+		__entry->cpu			= task_cpu(tsk);
+		__entry->load_avg		= avg->load_avg;
+		__entry->load_sum		= avg->load_sum;
+		__entry->ontime_flag		= ontime_flag;
+	),
+	TP_printk("comm=%s pid=%d cpu=%d load_avg=%lu load_sum=%llu ontime_flag=%d",
+		  __entry->comm, __entry->pid, __entry->cpu, __entry->load_avg,
+		  (u64)__entry->load_sum, __entry->ontime_flag)
+);
+
+TRACE_EVENT(ehmp_ontime_check_migrate,
+
+	TP_PROTO(struct task_struct *tsk, int cpu, int migrate, char *label),
+
+	TP_ARGS(tsk, cpu, migrate, label),
+
+	TP_STRUCT__entry(
+		__array( char,		comm,	TASK_COMM_LEN	)
+		__field( pid_t,		pid			)
+		__field( int,		cpu			)
+		__field( int,		migrate			)
+		__array( char,		label,	64		)
+	),
+
+	TP_fast_assign(
+		memcpy(__entry->comm, tsk->comm, TASK_COMM_LEN);
+		__entry->pid			= tsk->pid;
+		__entry->cpu			= cpu;
+		__entry->migrate		= migrate;
+		strncpy(__entry->label, label, 64);
+	),
+
+	TP_printk("comm=%s pid=%d target_cpu=%d migrate=%d reason=%s",
+		__entry->comm, __entry->pid, __entry->cpu,
+		__entry->migrate, __entry->label)
+);
+
+#endif /* _TRACE_EHMP_H */
+
+/* This part must be outside protection */
+#include <trace/define_trace.h>
diff --git a/include/trace/events/sched.h b/include/trace/events/sched.h
index f1cb20ce6892..6494144d42aa 100644
--- a/include/trace/events/sched.h
+++ b/include/trace/events/sched.h
@@ -967,6 +967,67 @@ TRACE_EVENT(sched_tune_tasks_update,
 		__entry->group_ts)
 );
 
+/*
+ * Tracepoint for schedtune_grouputil_update
+ */
+TRACE_EVENT(sched_tune_grouputil_update,
+
+	TP_PROTO(int idx, int total, int accumulated, unsigned long group_util,
+			struct task_struct *heaviest_p, unsigned long biggest_util),
+
+	TP_ARGS(idx, total, accumulated, group_util, heaviest_p, biggest_util),
+
+	TP_STRUCT__entry(
+		__field( int,		idx		)
+		__field( int,		total		)
+		__field( int,		accumulated	)
+		__field( unsigned long,	group_util	)
+		__field( pid_t,		pid		)
+		__array( char,	comm,	TASK_COMM_LEN	)
+		__field( unsigned long,	biggest_util	)
+	),
+
+	TP_fast_assign(
+		__entry->idx		= idx;
+		__entry->total		= total;
+		__entry->accumulated	= accumulated;
+		__entry->group_util	= group_util;
+		__entry->pid		= heaviest_p->pid;
+		memcpy(__entry->comm, heaviest_p->comm, TASK_COMM_LEN);
+		__entry->biggest_util	= biggest_util;
+	),
+
+	TP_printk("idx=%d total=%d accumulated=%d group_util=%lu "
+			"heaviest task(pid=%d comm=%s util=%lu)",
+		__entry->idx, __entry->total, __entry->accumulated, __entry->group_util,
+		__entry->pid, __entry->comm, __entry->biggest_util)
+);
+
+/*
+ * Tracepoint for checking group balancing
+ */
+TRACE_EVENT(sched_tune_check_group_balance,
+
+	TP_PROTO(int idx, int ib_count, bool balancing),
+
+	TP_ARGS(idx, ib_count, balancing),
+
+	TP_STRUCT__entry(
+		__field( int,		idx		)
+		__field( int,		ib_count	)
+		__field( bool,		balancing	)
+	),
+
+	TP_fast_assign(
+		__entry->idx		= idx;
+		__entry->ib_count	= ib_count;
+		__entry->balancing	= balancing;
+	),
+
+	TP_printk("idx=%d imbalance_count=%d balancing=%d",
+		__entry->idx, __entry->ib_count, __entry->balancing)
+);
+
 /*
  * Tracepoint for schedtune_boostgroup_update
  */
diff --git a/init/Kconfig b/init/Kconfig
index 3988656c235d..f1fa25160c22 100644
--- a/init/Kconfig
+++ b/init/Kconfig
@@ -1066,6 +1066,18 @@ config SCHED_TUNE
 
 	  If unsure, say N.
 
+config SCHED_EHMP
+	bool "Exynos scheduler for Heterogeneous Multi-Processor"
+	depends on SMP
+	help
+	  This option supports Exynos scheduler for HMP architecture. It is
+	  designed to secure the limits of energy aware scheduler. This option
+	  provides features such as independent boosting functinos such as
+	  global boost and on-time migration, and prefer_perf and enhanced
+	  prefer_idle that work in conjunction with SCHEDTUNE.
+
+	  If unsure, say N.
+
 config DEFAULT_USE_ENERGY_AWARE
 	bool "Default to enabling the Energy Aware Scheduler feature"
 	default n
diff --git a/kernel/sched/Makefile b/kernel/sched/Makefile
index 7d5422367729..3fedfec4697a 100644
--- a/kernel/sched/Makefile
+++ b/kernel/sched/Makefile
@@ -20,6 +20,7 @@ obj-y += core.o loadavg.o clock.o cputime.o
 obj-y += idle_task.o fair.o rt.o deadline.o
 obj-y += wait.o wait_bit.o swait.o completion.o idle.o
 obj-$(CONFIG_SMP) += cpupri.o cpudeadline.o topology.o stop_task.o
+obj-$(CONFIG_SCHED_EHMP) += ehmp.o
 obj-$(CONFIG_GENERIC_ARCH_TOPOLOGY) += energy.o
 obj-$(CONFIG_SCHED_WALT) += walt.o
 obj-$(CONFIG_SCHED_AUTOGROUP) += autogroup.o
diff --git a/kernel/sched/ehmp.c b/kernel/sched/ehmp.c
new file mode 100644
index 000000000000..73b1692f16f5
--- /dev/null
+++ b/kernel/sched/ehmp.c
@@ -0,0 +1,1670 @@
+/*
+ * Exynos scheduler for Heterogeneous Multi-Processing (HMP)
+ *
+ * Copyright (C) 2017 Samsung Electronics Co., Ltd
+ * Park Bumgyu <bumgyu.park@samsung.com>
+ */
+
+#include <linux/sched.h>
+#include <linux/cpuidle.h>
+#include <linux/pm_qos.h>
+#include <linux/ehmp.h>
+#include <linux/sched_energy.h>
+
+#define CREATE_TRACE_POINTS
+#include <trace/events/ehmp.h>
+
+#include "sched.h"
+#include "tune.h"
+
+static unsigned long task_util(struct task_struct *p)
+{
+	return p->se.avg.util_avg;
+}
+
+static inline struct task_struct *task_of(struct sched_entity *se)
+{
+	return container_of(se, struct task_struct, se);
+}
+
+static inline struct sched_entity *se_of(struct sched_avg *sa)
+{
+	return container_of(sa, struct sched_entity, avg);
+}
+
+#define entity_is_cfs_rq(se)	(se->my_q)
+#define entity_is_task(se)	(!se->my_q)
+#define LOAD_AVG_MAX		47742
+
+static unsigned long maxcap_val = 1024;
+static int maxcap_cpu = 7;
+
+void ehmp_update_max_cpu_capacity(int cpu, unsigned long val)
+{
+	maxcap_cpu = cpu;
+	maxcap_val = val;
+}
+
+static inline struct device_node *get_ehmp_node(void)
+{
+	return of_find_node_by_path("/cpus/ehmp");
+}
+
+static inline struct cpumask *sched_group_cpus(struct sched_group *sg)
+{
+	return to_cpumask(sg->cpumask);
+}
+
+static bool sd_overutilized(struct sched_domain *sd)
+{
+	return sd->shared->overutilized;
+}
+
+#define tsk_cpus_allowed(tsk)	(&(tsk)->cpus_allowed)
+
+/**********************************************************************
+ * task initialization                                                *
+ **********************************************************************/
+void exynos_init_entity_util_avg(struct sched_entity *se)
+{
+	struct cfs_rq *cfs_rq = se->cfs_rq;
+	struct sched_avg *sa = &se->avg;
+	int cpu = cpu_of(cfs_rq->rq);
+	unsigned long cap_org = capacity_orig_of(cpu);
+	long cap = (long)(cap_org - cfs_rq->avg.util_avg) / 2;
+
+	if (cap > 0) {
+		if (cfs_rq->avg.util_avg != 0) {
+			sa->util_avg  = cfs_rq->avg.util_avg * se->load.weight;
+			sa->util_avg /= (cfs_rq->avg.load_avg + 1);
+
+			if (sa->util_avg > cap)
+				sa->util_avg = cap;
+		} else {
+			sa->util_avg = cap_org >> 2;
+		}
+		/*
+		 * If we wish to restore tuning via setting initial util,
+		 * this is where we should do it.
+		 */
+		sa->util_sum = sa->util_avg * LOAD_AVG_MAX;
+	}
+}
+
+/**********************************************************************
+ * load balance                                                       *
+ **********************************************************************/
+bool cpu_overutilized(int cpu);
+
+#define lb_sd_parent(sd) \
+	(sd->parent && sd->parent->groups != sd->parent->groups->next)
+
+static inline int
+check_cpu_capacity(struct rq *rq, struct sched_domain *sd)
+{
+	return ((rq->cpu_capacity * sd->imbalance_pct) <
+				(rq->cpu_capacity_orig * 100));
+}
+
+unsigned long global_boost(void);
+int exynos_need_active_balance(enum cpu_idle_type idle, struct sched_domain *sd,
+					int src_cpu, int dst_cpu)
+{
+	unsigned int src_imb_pct = lb_sd_parent(sd) ? sd->imbalance_pct : 1;
+	unsigned int dst_imb_pct = lb_sd_parent(sd) ? 100 : 1;
+	unsigned long src_cap = capacity_of(src_cpu);
+	unsigned long dst_cap = capacity_of(dst_cpu);
+
+	if ((idle != CPU_NOT_IDLE) &&
+	    (cpu_rq(src_cpu)->cfs.h_nr_running == 1)) {
+		if ((check_cpu_capacity(cpu_rq(src_cpu), sd)) &&
+		    (src_cap * sd->imbalance_pct < dst_cap * 100)) {
+			return 1;
+		}
+
+		if (!lb_sd_parent(sd) && src_cap < dst_cap)
+			if (cpu_overutilized(src_cpu) || global_boost())
+				return 1;
+	}
+
+	if ((src_cap * src_imb_pct < dst_cap * dst_imb_pct) &&
+			cpu_rq(src_cpu)->cfs.h_nr_running == 1 &&
+			cpu_overutilized(src_cpu) &&
+			!cpu_overutilized(dst_cpu)) {
+		return 1;
+	}
+
+	return unlikely(sd->nr_balance_failed > sd->cache_nice_tries + 2);
+}
+
+/**********************************************************************
+ * load balance_trigger			                              *
+ **********************************************************************/
+struct lbt_overutil {
+	/*
+	 * overutil_ratio means
+	 * N < 0  : disable user_overutilized
+	 * N == 0 : Always overutilized
+	 * N > 0  : overutil_cap = org_capacity * overutil_ratio / 100
+	 */
+	unsigned long overutil_cap;
+	int overutil_ratio;
+};
+
+DEFINE_PER_CPU(struct lbt_overutil, ehmp_bot_overutil);
+DEFINE_PER_CPU(struct lbt_overutil, ehmp_top_overutil);
+#define DISABLE_OU	-1
+
+bool cpu_overutilized(int cpu)
+{
+	struct lbt_overutil *ou = &per_cpu(ehmp_top_overutil, cpu);
+
+	/*
+	 * If top overutil is disabled, use main stream condition
+	 * in the fair.c
+	 */
+	if (ou->overutil_ratio == DISABLE_OU)
+		return (capacity_of(cpu) * 1024) < (cpu_util(cpu) * 1280);
+
+	return cpu_util(cpu) > ou->overutil_cap;
+}
+
+static bool inline lbt_top_overutilized(int cpu)
+{
+//	struct rq *rq = cpu_rq(cpu);
+//	return sched_feat(ENERGY_AWARE) && rq->rd->overutilized;
+	return sched_feat(ENERGY_AWARE);
+}
+
+static bool inline lbt_bot_overutilized(int cpu)
+{
+	struct lbt_overutil *ou = &per_cpu(ehmp_bot_overutil, cpu);
+
+	/* if bot overutil is disabled, return false */
+	if (ou->overutil_ratio == DISABLE_OU)
+		return false;
+
+	return cpu_util(cpu) > ou->overutil_cap;
+}
+
+static void inline lbt_update_overutilized(int cpu,
+			unsigned long capacity, bool top)
+{
+	struct lbt_overutil *ou;
+	ou = top ? &per_cpu(ehmp_top_overutil, cpu) :
+			&per_cpu(ehmp_bot_overutil, cpu);
+
+	if (ou->overutil_ratio == DISABLE_OU)
+		ou->overutil_cap = 0;
+	else
+		ou->overutil_cap = (capacity * ou->overutil_ratio) / 100;
+}
+
+void ehmp_update_overutilized(int cpu, unsigned long capacity)
+{
+	lbt_update_overutilized(cpu, capacity, true);
+	lbt_update_overutilized(cpu, capacity, false);
+}
+
+static bool lbt_is_same_group(int src_cpu, int dst_cpu)
+{
+	struct sched_domain *sd  = rcu_dereference(per_cpu(sd_ea, src_cpu));
+	struct sched_group *sg;
+
+	if (!sd)
+		return false;
+
+	sg = sd->groups;
+	return cpumask_test_cpu(dst_cpu, sched_group_cpus(sg));
+}
+
+static bool lbt_overutilized(int src_cpu, int dst_cpu)
+{
+	bool top_overutilized, bot_overutilized;
+
+	/* src and dst are in the same domain, check top_overutilized */
+	top_overutilized = lbt_top_overutilized(src_cpu);
+	if (!lbt_is_same_group(src_cpu, dst_cpu))
+		return top_overutilized;
+
+	/* check bot overutilized */
+	bot_overutilized = lbt_bot_overutilized(src_cpu);
+	return bot_overutilized || top_overutilized;
+}
+
+static ssize_t _show_overutil(char *buf, bool top)
+{
+	struct sched_domain *sd;
+	struct sched_group *sg;
+	struct lbt_overutil *ou;
+	int cpu, ret = 0;
+
+	rcu_read_lock();
+
+	sd = rcu_dereference(per_cpu(sd_ea, 0));
+	if (!sd) {
+		rcu_read_unlock();
+		return ret;
+	}
+
+	sg = sd->groups;
+	do {
+		for_each_cpu_and(cpu, sched_group_cpus(sg), cpu_active_mask) {
+			ou = top ? &per_cpu(ehmp_top_overutil, cpu) :
+						&per_cpu(ehmp_bot_overutil, cpu);
+			ret += sprintf(buf + ret, "cpu%d ratio:%3d cap:%4lu\n",
+					cpu, ou->overutil_ratio, ou->overutil_cap);
+
+		}
+	} while (sg = sg->next, sg != sd->groups);
+
+	rcu_read_unlock();
+	return ret;
+}
+
+static ssize_t _store_overutil(const char *buf,
+				size_t count, bool top)
+{
+	struct sched_domain *sd;
+	struct sched_group *sg;
+	struct lbt_overutil *ou;
+	unsigned long capacity;
+	int cpu;
+	const char *cp = buf;
+	int tokenized_data;
+
+	rcu_read_lock();
+
+	sd = rcu_dereference(per_cpu(sd_ea, 0));
+	if (!sd) {
+		rcu_read_unlock();
+		return count;
+	}
+
+	sg = sd->groups;
+	do {
+		if (sscanf(cp, "%d", &tokenized_data) != 1)
+			tokenized_data = -1;
+
+		for_each_cpu_and(cpu, sched_group_cpus(sg), cpu_active_mask) {
+			ou = top ? &per_cpu(ehmp_top_overutil, cpu) :
+					&per_cpu(ehmp_bot_overutil, cpu);
+			ou->overutil_ratio = tokenized_data;
+
+			capacity = arch_scale_cpu_capacity(sd, cpu);
+			ehmp_update_overutilized(cpu, capacity);
+		}
+
+		cp = strpbrk(cp, " :");
+		if (!cp)
+			break;
+		cp++;
+	} while (sg = sg->next, sg != sd->groups);
+
+	rcu_read_unlock();
+	return count;
+}
+
+static ssize_t show_top_overutil(struct kobject *kobj,
+		struct kobj_attribute *attr, char *buf)
+{
+	return _show_overutil(buf, true);
+}
+static ssize_t store_top_overutil(struct kobject *kobj,
+		struct kobj_attribute *attr, const char *buf,
+		size_t count)
+{
+	return _store_overutil(buf, count, true);
+}
+static ssize_t show_bot_overutil(struct kobject *kobj,
+		struct kobj_attribute *attr, char *buf)
+{
+	return _show_overutil(buf, false);
+}
+static ssize_t store_bot_overutil(struct kobject *kobj,
+		struct kobj_attribute *attr, const char *buf,
+		size_t count)
+{
+	return _store_overutil(buf, count, false);
+}
+
+static struct kobj_attribute top_overutil_attr =
+__ATTR(top_overutil, 0644, show_top_overutil, store_top_overutil);
+static struct kobj_attribute bot_overutil_attr =
+__ATTR(bot_overutil, 0644, show_bot_overutil, store_bot_overutil);
+
+static int __init init_lbt(void)
+{
+	struct device_node *dn;
+	int top_ou[NR_CPUS] = {-1, }, bot_ou[NR_CPUS] = {-1, };
+	int cpu;
+
+	dn = get_ehmp_node();
+	if (!dn)
+		return 0;
+
+	if (of_property_read_u32_array(dn, "top-overutil", top_ou, NR_CPUS) < 0)
+		return 0;
+
+	if (of_property_read_u32_array(dn, "bot-overutil", bot_ou, NR_CPUS) < 0)
+		return 0;
+
+	for_each_possible_cpu(cpu) {
+		per_cpu(ehmp_top_overutil, cpu).overutil_ratio = top_ou[cpu];
+		per_cpu(ehmp_bot_overutil, cpu).overutil_ratio = bot_ou[cpu];
+	}
+
+	return 0;
+}
+pure_initcall(init_lbt);
+
+bool ehmp_trigger_lb(int src_cpu, int dst_cpu)
+{
+	/* check overutilized condition */
+	return lbt_overutilized(src_cpu, dst_cpu);
+}
+
+/**********************************************************************
+ * Global boost                                                       *
+ **********************************************************************/
+static unsigned long gb_value = 0;
+static unsigned long gb_max_value = 0;
+static struct gb_qos_request gb_req_user =
+{
+	.name = "ehmp_gb_req_user",
+};
+
+static struct plist_head gb_list = PLIST_HEAD_INIT(gb_list);
+
+static DEFINE_SPINLOCK(gb_lock);
+
+static int gb_qos_max_value(void)
+{
+	return plist_last(&gb_list)->prio;
+}
+
+static int gb_qos_req_value(struct gb_qos_request *req)
+{
+	return req->node.prio;
+}
+
+void gb_qos_update_request(struct gb_qos_request *req, u32 new_value)
+{
+	unsigned long flags;
+
+	if (req->node.prio == new_value)
+		return;
+
+	spin_lock_irqsave(&gb_lock, flags);
+
+	if (req->active)
+		plist_del(&req->node, &gb_list);
+	else
+		req->active = 1;
+
+	plist_node_init(&req->node, new_value);
+	plist_add(&req->node, &gb_list);
+
+	gb_value = gb_max_value * gb_qos_max_value() / 100;
+	trace_ehmp_global_boost(req->name, new_value);
+
+	spin_unlock_irqrestore(&gb_lock, flags);
+}
+
+static ssize_t show_global_boost(struct kobject *kobj,
+		struct kobj_attribute *attr, char *buf)
+{
+	struct gb_qos_request *req;
+	int ret = 0;
+
+	plist_for_each_entry(req, &gb_list, node)
+		ret += snprintf(buf + ret, 30, "%s : %d\n",
+				req->name, gb_qos_req_value(req));
+
+	return ret;
+}
+
+static ssize_t store_global_boost(struct kobject *kobj,
+		struct kobj_attribute *attr, const char *buf,
+		size_t count)
+{
+	unsigned int input;
+
+	if (!sscanf(buf, "%d", &input))
+		return -EINVAL;
+
+	gb_qos_update_request(&gb_req_user, input);
+
+	return count;
+}
+
+static struct kobj_attribute global_boost_attr =
+__ATTR(global_boost, 0644, show_global_boost, store_global_boost);
+
+#define BOOT_BOOST_DURATION 40000000	/* microseconds */
+unsigned long global_boost(void)
+{
+	u64 now = ktime_to_us(ktime_get());
+
+	if (now < BOOT_BOOST_DURATION)
+		return gb_max_value;
+
+	return gb_value;
+}
+
+int find_second_max_cap(void)
+{
+	struct sched_domain *sd = rcu_dereference(per_cpu(sd_ea, 0));
+	struct sched_group *sg;
+	int max_cap = 0, second_max_cap = 0;
+
+	if (!sd)
+		return 0;
+
+	sg = sd->groups;
+	do {
+		int i;
+
+		for_each_cpu(i, sched_group_cpus(sg)) {
+			if (max_cap < cpu_rq(i)->cpu_capacity_orig) {
+				second_max_cap = max_cap;
+				max_cap = cpu_rq(i)->cpu_capacity_orig;
+			}
+		}
+	} while (sg = sg->next, sg != sd->groups);
+
+	return second_max_cap;
+}
+
+static int __init init_global_boost(void)
+{
+	gb_max_value = find_second_max_cap() + 1;
+
+	return 0;
+}
+pure_initcall(init_global_boost);
+
+/**********************************************************************
+ * Boost cpu selection (global boost, schedtune.prefer_perf)          *
+ **********************************************************************/
+#define cpu_selected(cpu)	(cpu >= 0)
+
+int kernel_prefer_perf(int grp_idx);
+static ssize_t show_prefer_perf(struct kobject *kobj,
+		struct kobj_attribute *attr, char *buf)
+{
+	int i, ret = 0;
+
+	for (i = 0; i < STUNE_GROUP_COUNT; i++)
+		ret += snprintf(buf + ret, 10, "%d ", kernel_prefer_perf(i));
+
+	ret += snprintf(buf + ret, 10, "\n");
+
+	return ret;
+}
+
+static struct kobj_attribute prefer_perf_attr =
+__ATTR(kernel_prefer_perf, 0444, show_prefer_perf, NULL);
+
+enum {
+	BT_PREFER_PERF = 0,
+	BT_GROUP_BALANCE,
+	BT_GLOBAL_BOOST,
+};
+
+struct boost_trigger {
+	int trigger;
+	int boost_val;
+};
+
+static int check_boost_trigger(struct task_struct *p, struct boost_trigger *bt)
+{
+	int gb;
+
+#ifdef CONFIG_SCHED_TUNE
+	if (schedtune_prefer_perf(p) > 0) {
+		bt->trigger = BT_PREFER_PERF;
+		bt->boost_val = schedtune_perf_threshold();
+		return 1;
+	}
+
+	if (schedtune_need_group_balance(p) > 0) {
+		bt->trigger = BT_GROUP_BALANCE;
+		bt->boost_val = schedtune_perf_threshold();
+		return 1;
+	}
+#endif
+
+	gb = global_boost();
+	if (gb) {
+		bt->trigger = BT_GLOBAL_BOOST;
+		bt->boost_val = gb;
+		return 1;
+	}
+
+	/* not boost state */
+	return 0;
+}
+
+static int boost_select_cpu(struct task_struct *p, struct cpumask *target_cpus)
+{
+	int i, cpu = 0;
+
+	if (cpumask_empty(target_cpus))
+		return -1;
+
+	if (cpumask_test_cpu(task_cpu(p), target_cpus))
+		return task_cpu(p);
+
+	/* Return last cpu in target_cpus */
+	for_each_cpu(i, target_cpus)
+		cpu = i;
+
+	return cpu;
+}
+
+static void mark_shallowest_cpu(int cpu, unsigned int *min_exit_latency,
+						struct cpumask *shallowest_cpus)
+{
+	struct rq *rq = cpu_rq(cpu);
+	struct cpuidle_state *idle = idle_get_state(rq);
+
+	/* Before enabling cpuidle, all idle cpus are marked */
+	if (!idle) {
+		cpumask_set_cpu(cpu, shallowest_cpus);
+		return;
+	}
+
+	/* Deeper idle cpu is ignored */
+	if (idle->exit_latency > *min_exit_latency)
+		return;
+
+	/* if shallower idle cpu is found, previsouly founded cpu is ignored */
+	if (idle->exit_latency < *min_exit_latency) {
+		cpumask_clear(shallowest_cpus);
+		*min_exit_latency = idle->exit_latency;
+	}
+
+	cpumask_set_cpu(cpu, shallowest_cpus);
+}
+static int check_migration_task(struct task_struct *p)
+{
+	return !p->se.avg.last_update_time;
+}
+
+static unsigned long cpu_util_wake(int cpu, struct task_struct *p)
+{
+	unsigned long util, capacity;
+
+	/* Task has no contribution or is new */
+	if (cpu != task_cpu(p) || check_migration_task(p))
+		return cpu_util(cpu);
+
+	capacity = capacity_orig_of(cpu);
+	util = max_t(long, cpu_util(cpu) - task_util(p), 0);
+
+	return (util >= capacity) ? capacity : util;
+}
+
+static int find_group_boost_target(struct task_struct *p)
+{
+	struct sched_domain *sd;
+	int shallowest_cpu = -1;
+	int lowest_cpu = -1;
+	unsigned int min_exit_latency = UINT_MAX;
+	unsigned long lowest_util = ULONG_MAX;
+	int target_cpu = -1;
+	int cpu;
+	char state[30] = "fail";
+
+	sd = rcu_dereference(per_cpu(sd_ea, maxcap_cpu));
+	if (!sd)
+		return target_cpu;
+
+	if (cpumask_test_cpu(task_cpu(p), sched_group_cpus(sd->groups))) {
+		if (idle_cpu(task_cpu(p))) {
+			target_cpu = task_cpu(p);
+			strcpy(state, "current idle");
+			goto find_target;
+		}
+	}
+
+	for_each_cpu_and(cpu, tsk_cpus_allowed(p), sched_group_cpus(sd->groups)) {
+		unsigned long util = cpu_util_wake(cpu, p);
+
+		if (idle_cpu(cpu)) {
+			struct cpuidle_state *idle;
+
+			idle = idle_get_state(cpu_rq(cpu));
+			if (!idle) {
+				target_cpu = cpu;
+				strcpy(state, "idle wakeup");
+				goto find_target;
+			}
+
+			if (idle->exit_latency < min_exit_latency) {
+				min_exit_latency = idle->exit_latency;
+				shallowest_cpu = cpu;
+				continue;
+			}
+		}
+
+		if (cpu_selected(shallowest_cpu))
+			continue;
+
+		if (util < lowest_util) {
+			lowest_cpu = cpu;
+			lowest_util = util;
+		}
+	}
+
+	if (cpu_selected(shallowest_cpu)) {
+		target_cpu = shallowest_cpu;
+		strcpy(state, "shallowest idle");
+		goto find_target;
+	}
+
+	if (cpu_selected(lowest_cpu)) {
+		target_cpu = lowest_cpu;
+		strcpy(state, "lowest util");
+	}
+
+find_target:
+	trace_ehmp_select_group_boost(p, target_cpu, state);
+
+	return target_cpu;
+}
+
+static int
+find_boost_target(struct sched_domain *sd, struct task_struct *p,
+			unsigned long min_util, struct boost_trigger *bt)
+{
+	struct sched_group *sg;
+	int boost = bt->boost_val;
+	unsigned long max_capacity;
+	struct cpumask boost_candidates;
+	struct cpumask backup_boost_candidates;
+	unsigned int min_exit_latency = UINT_MAX;
+	unsigned int backup_min_exit_latency = UINT_MAX;
+	int target_cpu;
+	bool go_up = false;
+	unsigned long lowest_util = ULONG_MAX;
+	int lowest_cpu = -1;
+	char state[30] = "fail";
+
+	if (bt->trigger == BT_GROUP_BALANCE)
+		return find_group_boost_target(p);
+
+	cpumask_setall(&boost_candidates);
+	cpumask_clear(&backup_boost_candidates);
+
+	max_capacity = maxcap_val;
+
+	sg = sd->groups;
+
+	do {
+		int i;
+
+		for_each_cpu_and(i, tsk_cpus_allowed(p), sched_group_cpus(sg)) {
+			unsigned long new_util, wake_util;
+
+			if (!cpu_online(i))
+				continue;
+
+			wake_util = cpu_util_wake(i, p);
+			new_util = wake_util + task_util(p);
+			new_util = max(min_util, new_util);
+
+			if (min(new_util + boost, max_capacity) > capacity_orig_of(i)) {
+				if (!cpu_rq(i)->nr_running)
+					mark_shallowest_cpu(i, &backup_min_exit_latency,
+							&backup_boost_candidates);
+				else if (cpumask_test_cpu(task_cpu(p), sched_group_cpus(sg)))
+					go_up = true;
+
+				continue;
+			}
+
+			if (cpumask_weight(&boost_candidates) >= nr_cpu_ids)
+				cpumask_clear(&boost_candidates);
+
+			if (!cpu_rq(i)->nr_running) {
+				mark_shallowest_cpu(i, &min_exit_latency, &boost_candidates);
+				continue;
+			}
+
+			if (wake_util < lowest_util) {
+				lowest_util = wake_util;
+				lowest_cpu = i;
+			}
+		}
+
+		if (cpumask_weight(&boost_candidates) >= nr_cpu_ids)
+			continue;
+
+		target_cpu = boost_select_cpu(p, &boost_candidates);
+		if (cpu_selected(target_cpu)) {
+			strcpy(state, "big idle");
+			goto out;
+		}
+
+		target_cpu = boost_select_cpu(p, &backup_boost_candidates);
+		if (cpu_selected(target_cpu)) {
+			strcpy(state, "little idle");
+			goto out;
+		}
+	} while (sg = sg->next, sg != sd->groups);
+
+	if (go_up) {
+		strcpy(state, "lowest big cpu");
+		target_cpu = lowest_cpu;
+		goto out;
+	}
+
+	strcpy(state, "current cpu");
+	target_cpu = task_cpu(p);
+
+out:
+	trace_ehmp_select_boost_cpu(p, target_cpu, bt->trigger, state);
+	return target_cpu;
+}
+
+/**********************************************************************
+ * schedtune.prefer_idle                                              *
+ **********************************************************************/
+static void mark_lowest_cpu(int cpu, unsigned long new_util,
+			int *lowest_cpu, unsigned long *lowest_util)
+{
+	if (new_util >= *lowest_util)
+		return;
+
+	*lowest_util = new_util;
+	*lowest_cpu = cpu;
+}
+
+static int find_prefer_idle_target(struct sched_domain *sd,
+			struct task_struct *p, unsigned long min_util)
+{
+	struct sched_group *sg;
+	int target_cpu = -1;
+	int lowest_cpu = -1;
+	int lowest_idle_cpu = -1;
+	int overcap_cpu = -1;
+	unsigned long lowest_util = ULONG_MAX;
+	unsigned long lowest_idle_util = ULONG_MAX;
+	unsigned long overcap_util = ULONG_MAX;
+	struct cpumask idle_candidates;
+	struct cpumask overcap_idle_candidates;
+
+	cpumask_clear(&idle_candidates);
+	cpumask_clear(&overcap_idle_candidates);
+
+	sg = sd->groups;
+
+	do {
+		int i;
+
+		for_each_cpu_and(i, tsk_cpus_allowed(p), sched_group_cpus(sg)) {
+			unsigned long new_util, wake_util;
+
+			if (!cpu_online(i))
+				continue;
+
+			wake_util = cpu_util_wake(i, p);
+			new_util = wake_util + task_util(p);
+			new_util = max(min_util, new_util);
+
+			trace_ehmp_prefer_idle(p, task_cpu(p), i, task_util(p),
+							new_util, idle_cpu(i));
+
+			if (new_util > capacity_orig_of(i)) {
+				if (idle_cpu(i)) {
+					cpumask_set_cpu(i, &overcap_idle_candidates);
+					mark_lowest_cpu(i, new_util,
+						&overcap_cpu, &overcap_util);
+				}
+
+				continue;
+			}
+
+			if (idle_cpu(i)) {
+				if (task_cpu(p) == i) {
+					target_cpu = i;
+					break;
+				}
+
+				cpumask_set_cpu(i, &idle_candidates);
+				mark_lowest_cpu(i, new_util,
+					&lowest_idle_cpu, &lowest_idle_util);
+
+				continue;
+			}
+
+			mark_lowest_cpu(i, new_util, &lowest_cpu, &lowest_util);
+		}
+
+		if (cpu_selected(target_cpu))
+			break;
+
+		if (cpumask_weight(&idle_candidates)) {
+			target_cpu = lowest_idle_cpu;
+			break;
+		}
+
+		if (cpu_selected(lowest_cpu)) {
+			target_cpu = lowest_cpu;
+			break;
+		}
+
+	} while (sg = sg->next, sg != sd->groups);
+
+	if (cpu_selected(target_cpu))
+		goto out;
+
+	if (cpumask_weight(&overcap_idle_candidates)) {
+		if (cpumask_test_cpu(task_cpu(p), &overcap_idle_candidates))
+			target_cpu = task_cpu(p);
+		else
+			target_cpu = overcap_cpu;
+
+		goto out;
+	}
+
+out:
+	trace_ehmp_prefer_idle_cpu_select(p, target_cpu);
+
+	return target_cpu;
+}
+
+/**********************************************************************
+ * On-time migration                                                  *
+ **********************************************************************/
+static unsigned long up_threshold;
+static unsigned long down_threshold;
+static unsigned int min_residency_us;
+
+static ssize_t show_min_residency(struct kobject *kobj,
+		struct kobj_attribute *attr, char *buf)
+{
+	return snprintf(buf, 10, "%d\n", min_residency_us);
+}
+
+static ssize_t store_min_residency(struct kobject *kobj,
+		struct kobj_attribute *attr, const char *buf,
+		size_t count)
+{
+	int input;
+
+	if (!sscanf(buf, "%d", &input))
+		return -EINVAL;
+
+	input = input < 0 ? 0 : input;
+
+	min_residency_us = input;
+
+	return count;
+}
+
+static struct kobj_attribute min_residency_attr =
+__ATTR(min_residency, 0644, show_min_residency, store_min_residency);
+
+static ssize_t show_up_threshold(struct kobject *kobj,
+		struct kobj_attribute *attr, char *buf)
+{
+	return snprintf(buf, 10, "%ld\n", up_threshold);
+}
+
+static ssize_t store_up_threshold(struct kobject *kobj,
+		struct kobj_attribute *attr, const char *buf,
+		size_t count)
+{
+	long input;
+
+	if (!sscanf(buf, "%ld", &input))
+		return -EINVAL;
+
+	input = input < 0 ? 0 : input;
+	input = input > 1024 ? 1024 : input;
+
+	up_threshold = input;
+
+	return count;
+}
+
+static struct kobj_attribute up_threshold_attr =
+__ATTR(up_threshold, 0644, show_up_threshold, store_up_threshold);
+
+static ssize_t show_down_threshold(struct kobject *kobj,
+                struct kobj_attribute *attr, char *buf)
+{
+        return snprintf(buf, 10, "%ld\n", down_threshold);
+}
+
+static ssize_t store_down_threshold(struct kobject *kobj,
+                struct kobj_attribute *attr, const char *buf,
+                size_t count)
+{
+        long input;
+
+        if (!sscanf(buf, "%ld", &input))
+                return -EINVAL;
+
+        input = input < 0 ? 0 : input;
+        input = input > 1024 ? 1024 : input;
+
+        down_threshold = input;
+
+        return count;
+}
+
+static struct kobj_attribute down_threshold_attr =
+__ATTR(down_threshold, 0644, show_down_threshold, store_down_threshold);
+
+#define ontime_flag(p)			(ontime_of(p)->flags)
+#define ontime_migration_time(p)	(ontime_of(p)->avg.ontime_migration_time)
+#define ontime_load_avg(p)		(ontime_of(p)->avg.load_avg)
+
+static inline struct ontime_entity *ontime_of(struct task_struct *p)
+{
+	return &p->se.ontime;
+}
+
+static inline void include_ontime_task(struct task_struct *p)
+{
+	ontime_flag(p) = ONTIME;
+
+	/* Manage time based on clock task of boot cpu(cpu0) */
+	ontime_migration_time(p) = cpu_rq(0)->clock_task;
+}
+
+static inline void exclude_ontime_task(struct task_struct *p)
+{
+	ontime_migration_time(p) = 0;
+	ontime_flag(p) = NOT_ONTIME;
+}
+
+static int
+ontime_select_target_cpu(struct sched_group *sg, const struct cpumask *mask)
+{
+	int cpu;
+	int dest_cpu = -1;
+	unsigned int min_exit_latency = UINT_MAX;
+	struct cpuidle_state *idle;
+
+	for_each_cpu_and(cpu, sched_group_cpus(sg), mask) {
+		if (!idle_cpu(cpu))
+			continue;
+
+		if (cpu_rq(cpu)->ontime_migrating)
+			continue;
+
+		idle = idle_get_state(cpu_rq(cpu));
+		if (!idle)
+			return cpu;
+
+		if (idle && idle->exit_latency < min_exit_latency) {
+			min_exit_latency = idle->exit_latency;
+			dest_cpu = cpu;
+		}
+	}
+
+	return dest_cpu;
+}
+
+#define TASK_TRACK_COUNT	5
+
+extern struct sched_entity *__pick_next_entity(struct sched_entity *se);
+static struct task_struct *
+ontime_pick_heavy_task(struct sched_entity *se, struct cpumask *dst_cpus,
+						int *boost_migration)
+{
+	struct task_struct *heaviest_task = NULL;
+	struct task_struct *p;
+	unsigned int max_util_avg = 0;
+	int task_count = 0;
+	int boosted = !!global_boost();
+
+	/*
+	 * Since current task does not exist in entity list of cfs_rq,
+	 * check first that current task is heavy.
+	 */
+	if (boosted || ontime_load_avg(task_of(se)) >= up_threshold) {
+		heaviest_task = task_of(se);
+		max_util_avg = ontime_load_avg(task_of(se));
+		if (boosted)
+			*boost_migration = 1;
+	}
+
+	se = __pick_first_entity(se->cfs_rq);
+	while (se && task_count < TASK_TRACK_COUNT) {
+		/* Skip non-task entity */
+		if (entity_is_cfs_rq(se))
+			goto next_entity;
+
+		p = task_of(se);
+		if (schedtune_prefer_perf(p)) {
+			heaviest_task = p;
+			*boost_migration = 1;
+			break;
+		}
+
+		if (!boosted && ontime_load_avg(p) < up_threshold)
+			goto next_entity;
+
+		if (ontime_load_avg(p) > max_util_avg &&
+		    cpumask_intersects(dst_cpus, tsk_cpus_allowed(p))) {
+			heaviest_task = p;
+			max_util_avg = ontime_load_avg(p);
+			*boost_migration = boosted;
+		}
+
+next_entity:
+		se = __pick_next_entity(se);
+		task_count++;
+	}
+
+	return heaviest_task;
+}
+
+void ontime_new_entity_load(struct task_struct *parent, struct sched_entity *se)
+{
+	struct ontime_entity *ontime;
+
+	if (entity_is_cfs_rq(se))
+		return;
+
+	ontime = &se->ontime;
+
+	ontime->avg.load_sum = ontime_of(parent)->avg.load_sum;
+	ontime->avg.load_avg = ontime_of(parent)->avg.load_avg;
+	ontime->avg.ontime_migration_time = 0;
+	ontime->avg.period_contrib = 1023;
+	ontime->flags = NOT_ONTIME;
+
+	trace_ehmp_ontime_new_entity_load(task_of(se), &ontime->avg);
+}
+
+/* Structure of ontime migration environment */
+struct ontime_env {
+	struct rq		*dst_rq;
+	int			dst_cpu;
+	struct rq		*src_rq;
+	int			src_cpu;
+	struct task_struct	*target_task;
+	int			boost_migration;
+};
+DEFINE_PER_CPU(struct ontime_env, ontime_env);
+
+static int can_migrate(struct task_struct *p, struct ontime_env *env)
+{
+	if (!cpumask_test_cpu(env->dst_cpu, tsk_cpus_allowed(p)))
+		return 0;
+
+	if (task_running(env->src_rq, p))
+		return 0;
+
+	return 1;
+}
+
+static void move_task(struct task_struct *p, struct ontime_env *env)
+{
+	p->on_rq = TASK_ON_RQ_MIGRATING;
+	deactivate_task(env->src_rq, p, 0);
+	set_task_cpu(p, env->dst_cpu);
+
+	activate_task(env->dst_rq, p, 0);
+	p->on_rq = TASK_ON_RQ_QUEUED;
+	check_preempt_curr(env->dst_rq, p, 0);
+}
+
+static int move_specific_task(struct task_struct *target, struct ontime_env *env)
+{
+	struct task_struct *p, *n;
+
+	list_for_each_entry_safe(p, n, &env->src_rq->cfs_tasks, se.group_node) {
+		if (!can_migrate(p, env))
+			continue;
+
+		if (p != target)
+			continue;
+
+		move_task(p, env);
+		return 1;
+	}
+
+	return 0;
+}
+
+static int ontime_migration_cpu_stop(void *data)
+{
+	struct ontime_env *env = data;
+	struct rq *src_rq, *dst_rq;
+	int src_cpu, dst_cpu;
+	struct task_struct *p;
+	struct sched_domain *sd;
+	int boost_migration;
+
+	/* Initialize environment data */
+	src_rq = env->src_rq;
+	dst_rq = env->dst_rq = cpu_rq(env->dst_cpu);
+	src_cpu = env->src_cpu = env->src_rq->cpu;
+	dst_cpu = env->dst_cpu;
+	p = env->target_task;
+	boost_migration = env->boost_migration;
+
+	raw_spin_lock_irq(&src_rq->lock);
+
+	if (!(ontime_flag(p) & ONTIME_MIGRATING))
+		goto out_unlock;
+
+	if (p->exit_state)
+		goto out_unlock;
+
+	if (unlikely(src_cpu != smp_processor_id()))
+		goto out_unlock;
+
+	if (src_rq->nr_running <= 1)
+		goto out_unlock;
+
+	if (src_rq != task_rq(p))
+		goto out_unlock;
+
+	BUG_ON(src_rq == dst_rq);
+
+	double_lock_balance(src_rq, dst_rq);
+
+	rcu_read_lock();
+	for_each_domain(dst_cpu, sd)
+		if (cpumask_test_cpu(src_cpu, sched_domain_span(sd)))
+			break;
+
+	if (likely(sd) && move_specific_task(p, env)) {
+		if (boost_migration) {
+			/* boost task is not classified as ontime task */
+			exclude_ontime_task(p);
+		} else
+			include_ontime_task(p);
+
+		rcu_read_unlock();
+		double_unlock_balance(src_rq, dst_rq);
+
+		trace_ehmp_ontime_migration(p, ontime_of(p)->avg.load_avg,
+					src_cpu, dst_cpu, boost_migration);
+		goto success_unlock;
+	}
+
+	rcu_read_unlock();
+	double_unlock_balance(src_rq, dst_rq);
+
+out_unlock:
+	exclude_ontime_task(p);
+
+success_unlock:
+	src_rq->active_balance = 0;
+	dst_rq->ontime_migrating = 0;
+
+	raw_spin_unlock_irq(&src_rq->lock);
+	put_task_struct(p);
+
+	return 0;
+}
+
+DEFINE_PER_CPU(struct cpu_stop_work, ontime_migration_work);
+
+static DEFINE_SPINLOCK(om_lock);
+
+void ontime_migration(void)
+{
+	struct sched_domain *sd;
+	struct sched_group *src_sg, *dst_sg;
+	int cpu;
+
+	if (!spin_trylock(&om_lock))
+		return;
+
+	rcu_read_lock();
+
+	sd = rcu_dereference(per_cpu(sd_ea, 0));
+	if (!sd)
+		goto ontime_migration_exit;
+
+	src_sg = sd->groups;
+
+	do {
+		dst_sg = src_sg->next;
+		for_each_cpu_and(cpu, sched_group_cpus(src_sg), cpu_active_mask) {
+			unsigned long flags;
+			struct rq *rq;
+			struct sched_entity *se;
+			struct task_struct *p;
+			int dst_cpu;
+			struct ontime_env *env = &per_cpu(ontime_env, cpu);
+			int boost_migration = 0;
+
+			rq = cpu_rq(cpu);
+			raw_spin_lock_irqsave(&rq->lock, flags);
+
+			/*
+			 * Ontime migration is not performed when active balance
+			 * is in progress.
+			 */
+			if (rq->active_balance) {
+				raw_spin_unlock_irqrestore(&rq->lock, flags);
+				continue;
+			}
+
+			/*
+			 * No need to migration if source cpu does not have cfs
+			 * tasks.
+			 */
+			if (!rq->cfs.curr) {
+				raw_spin_unlock_irqrestore(&rq->lock, flags);
+				continue;
+			}
+
+			se = rq->cfs.curr;
+
+			/* Find task entity if entity is cfs_rq. */
+			if (entity_is_cfs_rq(se)) {
+				struct cfs_rq *cfs_rq;
+
+				cfs_rq = se->my_q;
+				while (cfs_rq) {
+					se = cfs_rq->curr;
+					cfs_rq = se->my_q;
+				}
+			}
+
+			/*
+			 * Select cpu to migrate the task to. Return negative number
+			 * if there is no idle cpu in sg.
+			 */
+			dst_cpu = ontime_select_target_cpu(dst_sg, cpu_active_mask);
+			if (dst_cpu < 0) {
+				raw_spin_unlock_irqrestore(&rq->lock, flags);
+				continue;
+			}
+
+			/*
+			 * Pick task to be migrated. Return NULL if there is no
+			 * heavy task in rq.
+			 */
+			p = ontime_pick_heavy_task(se, sched_group_cpus(dst_sg),
+							&boost_migration);
+			if (!p) {
+				raw_spin_unlock_irqrestore(&rq->lock, flags);
+				continue;
+			}
+
+			ontime_flag(p) = ONTIME_MIGRATING;
+			get_task_struct(p);
+
+			/* Set environment data */
+			env->dst_cpu = dst_cpu;
+			env->src_rq = rq;
+			env->target_task = p;
+			env->boost_migration = boost_migration;
+
+			/* Prevent active balance to use stopper for migration */
+			rq->active_balance = 1;
+
+			cpu_rq(dst_cpu)->ontime_migrating = 1;
+
+			raw_spin_unlock_irqrestore(&rq->lock, flags);
+
+			/* Migrate task through stopper */
+			stop_one_cpu_nowait(cpu,
+				ontime_migration_cpu_stop, env,
+				&per_cpu(ontime_migration_work, cpu));
+		}
+	} while (src_sg = src_sg->next, src_sg->next != sd->groups);
+
+ontime_migration_exit:
+	rcu_read_unlock();
+	spin_unlock(&om_lock);
+}
+
+int ontime_can_migration(struct task_struct *p, int dst_cpu)
+{
+	u64 delta;
+
+	if (ontime_flag(p) & NOT_ONTIME) {
+		trace_ehmp_ontime_check_migrate(p, dst_cpu, true, "not ontime");
+		return true;
+	}
+
+	if (ontime_flag(p) & ONTIME_MIGRATING) {
+		trace_ehmp_ontime_check_migrate(p, dst_cpu, false, "migrating");
+		return false;
+	}
+
+	if (cpumask_test_cpu(dst_cpu, cpu_coregroup_mask(maxcap_cpu))) {
+		trace_ehmp_ontime_check_migrate(p, dst_cpu, true, "ontime on big");
+		return true;
+	}
+
+	/*
+	 * At this point, task is "ontime task" and running on big
+	 * and load balancer is trying to migrate task to LITTLE.
+	 */
+	delta = cpu_rq(0)->clock_task - ontime_migration_time(p);
+	delta = delta >> 10;
+	if (delta <= min_residency_us) {
+		trace_ehmp_ontime_check_migrate(p, dst_cpu, false, "min residency");
+		return false;
+	}
+
+	if (cpu_rq(task_cpu(p))->nr_running > 1) {
+		trace_ehmp_ontime_check_migrate(p, dst_cpu, true, "big is busy");
+		goto release;
+	}
+
+	if (ontime_load_avg(p) >= down_threshold) {
+		trace_ehmp_ontime_check_migrate(p, dst_cpu, false, "heavy task");
+		return false;
+	}
+
+	trace_ehmp_ontime_check_migrate(p, dst_cpu, true, "ontime_release");
+release:
+	exclude_ontime_task(p);
+
+	return true;
+}
+
+static int ontime_task_wakeup(struct task_struct *p)
+{
+	struct sched_domain *sd;
+	u64 delta;
+	int target_cpu = -1;
+
+	if (ontime_flag(p) & NOT_ONTIME)
+		if (ontime_load_avg(p) < up_threshold)
+			return -1;
+
+	if (ontime_flag(p) & ONTIME) {
+		delta = cpu_rq(0)->clock_task - ontime_migration_time(p);
+		delta = delta >> 10;
+
+		if (delta > min_residency_us &&
+		    ontime_load_avg(p) < down_threshold) {
+			exclude_ontime_task(p);
+			return -1;
+		}
+
+		if (idle_cpu(task_cpu(p)))
+			return task_cpu(p);
+	}
+
+	/* caller must hold rcu for sched domain */
+	sd = rcu_dereference(per_cpu(sd_ea, maxcap_cpu));
+	if (!sd)
+		return -1;
+
+	target_cpu = ontime_select_target_cpu(sd->groups, tsk_cpus_allowed(p));
+	if (cpu_selected(target_cpu)) {
+		if (ontime_flag(p) & NOT_ONTIME)
+			include_ontime_task(p);
+	} else {
+		if (ontime_flag(p) & ONTIME)
+			exclude_ontime_task(p);
+	}
+
+	return target_cpu;
+}
+
+static void ontime_update_next_balance(int cpu, struct ontime_avg *oa)
+{
+	if (cpumask_test_cpu(cpu, cpu_coregroup_mask(maxcap_cpu)))
+		return;
+
+	if (oa->load_avg < up_threshold)
+		return;
+
+	/*
+	 * Update the next_balance of this cpu because tick is most likely
+	 * to occur first in currently running cpu.
+	 */
+	cpu_rq(smp_processor_id())->next_balance = jiffies;
+}
+
+#define cap_scale(v, s) ((v)*(s) >> SCHED_CAPACITY_SHIFT)
+
+extern u64 decay_load(u64 val, u64 n);
+
+static u32 __accumulate_pelt_segments(u64 periods, u32 d1, u32 d3)
+{
+	u32 c1, c2, c3 = d3;
+
+	c1 = decay_load((u64)d1, periods);
+	c2 = LOAD_AVG_MAX - decay_load(LOAD_AVG_MAX, periods) - 1024;
+
+	return c1 + c2 + c3;
+}
+
+/*
+ * ontime_update_load_avg : load tracking for ontime-migration
+ *
+ * @sa : sched_avg to be updated
+ * @delta : elapsed time since last update
+ * @period_contrib : amount already accumulated against our next period
+ * @scale_freq : scale vector of cpu frequency
+ * @scale_cpu : scale vector of cpu capacity
+ */
+void ontime_update_load_avg(u64 delta, int cpu, unsigned long weight, struct sched_avg *sa)
+{
+	struct ontime_avg *oa = &se_of(sa)->ontime.avg;
+	unsigned long scale_freq, scale_cpu;
+	u32 contrib = (u32)delta; /* p == 0 -> delta < 1024 */
+	u64 periods;
+
+	scale_freq = arch_scale_freq_capacity(NULL, cpu);
+	scale_cpu = arch_scale_cpu_capacity(NULL, cpu);
+
+	delta += oa->period_contrib;
+	periods = delta / 1024; /* A period is 1024us (~1ms) */
+
+	if (periods) {
+		oa->load_sum = decay_load(oa->load_sum, periods);
+
+		delta %= 1024;
+		contrib = __accumulate_pelt_segments(periods,
+				1024 - oa->period_contrib, delta);
+	}
+	oa->period_contrib = delta;
+
+	if (weight) {
+		contrib = cap_scale(contrib, scale_freq);
+		oa->load_sum += contrib * scale_cpu;
+	}
+
+	if (!periods)
+		return;
+
+	oa->load_avg = div_u64(oa->load_sum, LOAD_AVG_MAX - 1024 + oa->period_contrib);
+	ontime_update_next_balance(cpu, oa);
+}
+
+void ontime_trace_task_info(struct task_struct *p)
+{
+	trace_ehmp_ontime_load_avg_task(p, &ontime_of(p)->avg, ontime_flag(p));
+}
+
+static inline unsigned long mincap_of(int cpu)
+{
+	return sge_array[cpu][SD_LEVEL0]->cap_states[0].cap;
+}
+
+static int __init init_ontime(void)
+{
+	struct device_node *dn;
+	u32 prop;
+
+	dn = get_ehmp_node();
+	if (!dn)
+		return 0;
+
+	/*
+	 * Initilize default values:
+	 *   up_threshold	= 40% of LITTLE maximum capacity
+	 *   down_threshold	= 50% of big minimum capacity
+	 *   min_residency	= 8ms
+	 */
+	up_threshold = capacity_orig_of(0) * 40 / 100;
+	down_threshold = mincap_of(maxcap_cpu) * 50 / 100;
+	min_residency_us = 8192;
+
+	of_property_read_u32(dn, "up-threshold", &prop);
+	up_threshold = prop;
+
+	of_property_read_u32(dn, "down-threshold", &prop);
+	down_threshold = prop;
+
+	of_property_read_u32(dn, "min-residency-us", &prop);
+	min_residency_us = prop;
+
+	return 0;
+}
+pure_initcall(init_ontime);
+
+/**********************************************************************
+ * cpu selection                                                      *
+ **********************************************************************/
+extern unsigned long boosted_task_util(struct task_struct *task);
+extern unsigned long capacity_curr_of(int cpu);
+extern struct energy_env *get_eenv(struct task_struct *p, int prev_cpu);
+extern int select_energy_cpu_idx(struct energy_env *eenv);
+extern int find_best_target(struct task_struct *p, int *backup_cpu,
+				   bool boosted, bool prefer_idle);
+
+#define EAS_CPU_PRV	0
+#define EAS_CPU_NXT	1
+#define EAS_CPU_BKP	2
+
+static int select_energy_cpu(struct sched_domain *sd, struct task_struct *p,
+				int prev_cpu, bool boosted)
+{
+	struct energy_env *eenv;
+	int energy_cpu = -1;
+
+	eenv = get_eenv(p, prev_cpu);
+	if (eenv->max_cpu_count < 2)
+		return energy_cpu;
+
+	eenv->max_cpu_count = EAS_CPU_BKP + 1;
+
+	/* Find a cpu with sufficient capacity */
+	eenv->cpu[EAS_CPU_NXT].cpu_id = find_best_target(p,
+			&eenv->cpu[EAS_CPU_BKP].cpu_id, boosted, 0);
+
+	/* take note if no backup was found */
+	if (eenv->cpu[EAS_CPU_BKP].cpu_id < 0)
+		eenv->max_cpu_count = EAS_CPU_BKP;
+
+	/* take note if no target was found */
+	 if (eenv->cpu[EAS_CPU_NXT].cpu_id < 0)
+		 eenv->max_cpu_count = EAS_CPU_NXT;
+
+	if (eenv->max_cpu_count == EAS_CPU_NXT) {
+		/*
+		 * we did not find any energy-awareness
+		 * candidates beyond prev_cpu, so we will
+		 * fall-back to the regular slow-path.
+		 */
+		return energy_cpu;
+	}
+
+	/* find most energy-efficient CPU */
+	energy_cpu = select_energy_cpu_idx(eenv) < 0 ? -1 :
+					eenv->cpu[eenv->next_idx].cpu_id;
+
+	return energy_cpu;
+}
+
+int exynos_select_cpu(struct task_struct *p, int prev_cpu, int sync, int sd_flag)
+{
+	struct sched_domain *sd, *prev_sd;
+	int target_cpu = -1;
+	bool boosted, prefer_idle;
+	unsigned long min_util;
+	struct boost_trigger trigger = {
+		.trigger = 0,
+		.boost_val = 0
+	};
+
+	rcu_read_lock();
+
+	target_cpu = ontime_task_wakeup(p);
+	if (cpu_selected(target_cpu))
+		goto unlock;
+
+	/* Find target cpu from lowest capacity domain(cpu0) */
+	sd = rcu_dereference(per_cpu(sd_ea, 0));
+	if (!sd)
+		goto unlock;
+
+	boosted = schedtune_task_boost(p) > 0;
+	prefer_idle = sched_feat(EAS_PREFER_IDLE) ? (schedtune_task_boost(p) > 0) : 0;
+
+	min_util = boosted_task_util(p);
+
+	if (check_boost_trigger(p, &trigger)) {
+		target_cpu = find_boost_target(sd, p, min_util, &trigger);
+		if (cpu_selected(target_cpu))
+			goto unlock;
+	}
+
+	if (sysctl_sched_sync_hint_enable && sync) {
+		int cpu = smp_processor_id();
+
+		if (cpumask_test_cpu(cpu, tsk_cpus_allowed(p))) {
+			target_cpu = cpu;
+			goto unlock;
+		}
+	}
+
+	if (prefer_idle) {
+		target_cpu = find_prefer_idle_target(sd, p, min_util);
+		if (cpu_selected(target_cpu))
+			goto unlock;
+	}
+
+	prev_sd = rcu_dereference_sched(cpu_rq(prev_cpu)->sd);
+	if (sched_feat(ENERGY_AWARE) && sd_overutilized(sd))
+		target_cpu = select_energy_cpu(sd, p, prev_cpu, boosted);
+
+unlock:
+	rcu_read_unlock();
+
+	return target_cpu;
+}
+
+/**********************************************************************
+ * Sysfs                                                              *
+ **********************************************************************/
+static struct attribute *ehmp_attrs[] = {
+	&global_boost_attr.attr,
+	&min_residency_attr.attr,
+	&up_threshold_attr.attr,
+	&down_threshold_attr.attr,
+	&top_overutil_attr.attr,
+	&bot_overutil_attr.attr,
+	&prefer_perf_attr.attr,
+	NULL,
+};
+
+static const struct attribute_group ehmp_group = {
+	.attrs = ehmp_attrs,
+};
+
+static struct kobject *ehmp_kobj;
+
+static int __init init_sysfs(void)
+{
+	int ret;
+
+	ehmp_kobj = kobject_create_and_add("ehmp", kernel_kobj);
+	ret = sysfs_create_group(ehmp_kobj, &ehmp_group);
+
+	return 0;
+}
+late_initcall(init_sysfs);
diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c
index be08e5e6c12c..95b50a697842 100644
--- a/kernel/sched/fair.c
+++ b/kernel/sched/fair.c
@@ -33,6 +33,7 @@
 #include <linux/mempolicy.h>
 #include <linux/migrate.h>
 #include <linux/task_work.h>
+#include <linux/ehmp.h>
 
 #include <trace/events/sched.h>
 
@@ -610,7 +611,7 @@ struct sched_entity *__pick_first_entity(struct cfs_rq *cfs_rq)
 	return rb_entry(left, struct sched_entity, run_node);
 }
 
-static struct sched_entity *__pick_next_entity(struct sched_entity *se)
+struct sched_entity *__pick_next_entity(struct sched_entity *se)
 {
 	struct rb_node *next = rb_next(&se->run_node);
 
@@ -756,6 +757,8 @@ void init_entity_runnable_average(struct sched_entity *se)
 		sa->load_avg = scale_load_down(se->load.weight);
 	sa->load_sum = sa->load_avg * LOAD_AVG_MAX;
 	/* when this task enqueue'ed, it will contribute to its cfs_rq's load_avg */
+
+	ontime_new_entity_load(current, se);
 }
 
 static inline u64 cfs_rq_clock_task(struct cfs_rq *cfs_rq);
@@ -794,6 +797,11 @@ void post_init_entity_util_avg(struct sched_entity *se)
 	long cpu_scale = arch_scale_cpu_capacity(NULL, cpu_of(rq_of(cfs_rq)));
 	long cap = (long)(cpu_scale - cfs_rq->avg.util_avg) / 2;
 
+	if (sched_feat(EXYNOS_HMP)) {
+		exynos_init_entity_util_avg(se);
+		goto util_init_done;
+	}
+
 	if (cap > 0) {
 		if (cfs_rq->avg.util_avg != 0) {
 			sa->util_avg  = cfs_rq->avg.util_avg * se->load.weight;
@@ -807,6 +815,7 @@ void post_init_entity_util_avg(struct sched_entity *se)
 		sa->util_sum = sa->util_avg * LOAD_AVG_MAX;
 	}
 
+util_init_done:
 	if (entity_is_task(se)) {
 		struct task_struct *p = task_of(se);
 		if (p->sched_class != &fair_sched_class) {
@@ -2858,7 +2867,7 @@ static inline void cfs_rq_util_change(struct cfs_rq *cfs_rq)
  * Approximate:
  *   val * y^n,    where y^32 ~= 0.5 (~1 scheduling period)
  */
-static u64 decay_load(u64 val, u64 n)
+u64 decay_load(u64 val, u64 n)
 {
 	unsigned int local_n;
 
@@ -3052,6 +3061,9 @@ ___update_load_avg(u64 now, int cpu, struct sched_avg *sa,
 	if (!weight)
 		running = 0;
 
+	if (!cfs_rq && !rt_rq)
+		ontime_update_load_avg(delta, cpu, weight, sa);
+
 	/*
 	 * Now we know we crossed measurement unit boundaries. The *_avg
 	 * accrues by two steps:
@@ -3523,6 +3535,9 @@ static inline void update_load_avg(struct sched_entity *se, int flags)
 
 	if (decayed && (flags & UPDATE_TG))
 		update_tg_load_avg(cfs_rq, 0);
+
+	if (entity_is_task(se))
+		ontime_trace_task_info(task_of(se));
 }
 
 /**
@@ -5773,85 +5788,6 @@ static unsigned long __cpu_norm_util(unsigned long util, unsigned long capacity)
  * Hence - be careful when enabling DEBUG_EENV_DECISIONS
  * expecially if WALT is the task signal.
  */
-/*#define DEBUG_EENV_DECISIONS*/
-
-#ifdef DEBUG_EENV_DECISIONS
-/* max of 8 levels of sched groups traversed */
-#define EAS_EENV_DEBUG_LEVELS 16
-
-struct _eenv_debug {
-	unsigned long cap;
-	unsigned long norm_util;
-	unsigned long cap_energy;
-	unsigned long idle_energy;
-	unsigned long this_energy;
-	unsigned long this_busy_energy;
-	unsigned long this_idle_energy;
-	cpumask_t group_cpumask;
-	unsigned long cpu_util[1];
-};
-#endif
-
-struct eenv_cpu {
-	/* CPU ID, must be in cpus_mask */
-	int     cpu_id;
-
-	/*
-	 * Index (into sched_group_energy::cap_states) of the OPP the
-	 * CPU needs to run at if the task is placed on it.
-	 * This includes the both active and blocked load, due to
-	 * other tasks on this CPU,  as well as the task's own
-	 * utilization.
-	*/
-	int     cap_idx;
-	int     cap;
-
-	/* Estimated system energy */
-	unsigned long energy;
-
-	/* Estimated energy variation wrt EAS_CPU_PRV */
-	long nrg_delta;
-
-#ifdef DEBUG_EENV_DECISIONS
-	struct _eenv_debug *debug;
-	int debug_idx;
-#endif /* DEBUG_EENV_DECISIONS */
-};
-
-struct energy_env {
-	/* Utilization to move */
-	struct task_struct	*p;
-	unsigned long		util_delta;
-	unsigned long		util_delta_boosted;
-
-	/* Mask of CPUs candidates to evaluate */
-	cpumask_t		cpus_mask;
-
-	/* CPU candidates to evaluate */
-	struct eenv_cpu *cpu;
-	int eenv_cpu_count;
-
-#ifdef DEBUG_EENV_DECISIONS
-	/* pointer to the memory block reserved
-	 * for debug on this CPU - there will be
-	 * sizeof(struct _eenv_debug) *
-	 *  (EAS_CPU_CNT * EAS_EENV_DEBUG_LEVELS)
-	 * bytes allocated here.
-	 */
-	struct _eenv_debug *debug;
-#endif
-	/*
-	 * Index (into energy_env::cpu) of the morst energy efficient CPU for
-	 * the specified energy_env::task
-	 */
-	int	next_idx;
-	int	max_cpu_count;
-
-	/* Support data */
-	struct sched_group	*sg_top;
-	struct sched_group	*sg_cap;
-	struct sched_group	*sg;
-};
 
 /**
  * Amount of capacity of a CPU that is (estimated to be) used by CFS tasks
@@ -6466,7 +6402,7 @@ static void dump_eenv_debug(struct energy_env *eenv)
  * A value greater than zero means that the most energy efficient CPU is the
  * one represented by eenv->cpu[eenv->next_idx].cpu_id.
  */
-static inline int select_energy_cpu_idx(struct energy_env *eenv)
+int select_energy_cpu_idx(struct energy_env *eenv)
 {
 	int last_cpu_idx = eenv->max_cpu_count - 1;
 	struct sched_domain *sd;
@@ -6745,7 +6681,7 @@ boosted_cpu_util(int cpu)
 	return util + margin;
 }
 
-static inline unsigned long
+unsigned long
 boosted_task_util(struct task_struct *task)
 {
 	unsigned long util = task_util_est(task);
@@ -7299,7 +7235,7 @@ static int start_cpu(bool boosted)
 	return boosted ? rd->max_cap_orig_cpu : rd->min_cap_orig_cpu;
 }
 
-static inline int find_best_target(struct task_struct *p, int *backup_cpu,
+int find_best_target(struct task_struct *p, int *backup_cpu,
 				   bool boosted, bool prefer_idle)
 {
 	unsigned long min_util = boosted_task_util(p);
@@ -7743,7 +7679,7 @@ static inline void reset_eenv(struct energy_env *eenv)
  * filled in here. Callers are responsible for adding
  * other CPU candidates up to eenv->max_cpu_count.
  */
-static inline struct energy_env *get_eenv(struct task_struct *p, int prev_cpu)
+struct energy_env *get_eenv(struct task_struct *p, int prev_cpu)
 {
 	struct energy_env *eenv;
 	cpumask_t cpumask_possible_cpus;
@@ -7970,6 +7906,14 @@ select_task_rq_fair(struct task_struct *p, int prev_cpu, int sd_flag, int wake_f
 			      cpumask_test_cpu(cpu, &p->cpus_allowed);
 	}
 
+	if (sched_feat(EXYNOS_HMP)) {
+		int selected_cpu;
+
+		selected_cpu = exynos_select_cpu(p, prev_cpu, sync, sd_flag);
+		if (selected_cpu >= 0)
+			return selected_cpu;
+	}
+
 	for_each_domain(cpu, tmp) {
 		if (!(tmp->flags & SD_LOAD_BALANCE))
 			break;
@@ -8720,6 +8664,11 @@ static inline int migrate_degrades_locality(struct task_struct *p,
 }
 #endif
 
+static inline bool smaller_cpu_capacity(int cpu, int ref)
+{
+	return capacity_orig_of(cpu) < capacity_orig_of(ref);
+}
+
 /*
  * can_migrate_task - may task p from runqueue rq be migrated to this_cpu?
  */
@@ -8732,11 +8681,21 @@ int can_migrate_task(struct task_struct *p, struct lb_env *env)
 
 	/*
 	 * We do not migrate tasks that are:
+	 * 0) cannot be migrated to smaller capacity cpu due to schedtune.prefer_perf, or
 	 * 1) throttled_lb_pair, or
 	 * 2) cannot be migrated to this CPU due to cpus_allowed, or
 	 * 3) running (obviously), or
 	 * 4) are cache-hot on their current CPU.
 	 */
+	if (!ontime_can_migration(p, env->dst_cpu))
+		return 0;
+
+#ifdef CONFIG_SCHED_TUNE
+	if (smaller_cpu_capacity(env->dst_cpu, env->src_cpu) &&
+	    schedtune_prefer_perf(p))
+		return 0;
+#endif
+
 	if (throttled_lb_pair(task_group(p), env->src_cpu, env->dst_cpu))
 		return 0;
 
@@ -10301,6 +10260,9 @@ static int need_active_balance(struct lb_env *env)
 			return 1;
 	}
 
+	if (sched_feat(EXYNOS_HMP))
+		return exynos_need_active_balance(env->idle, sd, env->src_cpu, env->dst_cpu);
+
 	/*
 	 * The dst_cpu is idle and the src_cpu CPU has only 1 CFS task.
 	 * It's worth migrating the task if the src_cpu's capacity is reduced
@@ -11387,6 +11349,9 @@ static __latent_entropy void run_rebalance_domains(struct softirq_action *h)
 #else
 	rebalance_domains(this_rq, idle);
 #endif
+
+	ontime_migration();
+	schedtune_group_util_update();
 }
 
 /*
diff --git a/kernel/sched/features.h b/kernel/sched/features.h
index dbade300ef8c..ca512de98d61 100644
--- a/kernel/sched/features.h
+++ b/kernel/sched/features.h
@@ -119,6 +119,11 @@ SCHED_FEAT(EAS_PREFER_IDLE, true)
 SCHED_FEAT(FIND_BEST_TARGET, true)
 SCHED_FEAT(FBT_STRICT_ORDER, true)
 
+#ifdef CONFIG_SCHED_EHMP
+SCHED_FEAT(EXYNOS_HMP, true)
+#else
+SCHED_FEAT(EXYNOS_HMP, false)
+#endif
 /*
  * Apply schedtune boost hold to tasks of all sched classes.
  * If enabled, schedtune will hold the boost applied to a CPU
diff --git a/kernel/sched/sched.h b/kernel/sched/sched.h
index a91c79cb112e..d6987ba1cfc6 100644
--- a/kernel/sched/sched.h
+++ b/kernel/sched/sched.h
@@ -856,6 +856,9 @@ struct rq {
 	u64 cum_window_demand;
 #endif /* CONFIG_SCHED_WALT */
 
+#ifdef CONFIG_SCHED_EHMP
+	bool ontime_migrating;
+#endif
 
 #ifdef CONFIG_IRQ_TIME_ACCOUNTING
 	u64 prev_irq_time;
@@ -2160,6 +2163,85 @@ extern void nohz_balance_exit_idle(unsigned int cpu);
 static inline void nohz_balance_exit_idle(unsigned int cpu) { }
 #endif
 
+/*#define DEBUG_EENV_DECISIONS*/
+
+#ifdef DEBUG_EENV_DECISIONS
+/* max of 8 levels of sched groups traversed */
+#define EAS_EENV_DEBUG_LEVELS 16
+
+struct _eenv_debug {
+	unsigned long cap;
+	unsigned long norm_util;
+	unsigned long cap_energy;
+	unsigned long idle_energy;
+	unsigned long this_energy;
+	unsigned long this_busy_energy;
+	unsigned long this_idle_energy;
+	cpumask_t group_cpumask;
+	unsigned long cpu_util[1];
+};
+#endif
+
+struct eenv_cpu {
+	/* CPU ID, must be in cpus_mask */
+	int     cpu_id;
+
+	/*
+	 * Index (into sched_group_energy::cap_states) of the OPP the
+	 * CPU needs to run at if the task is placed on it.
+	 * This includes the both active and blocked load, due to
+	 * other tasks on this CPU,  as well as the task's own
+	 * utilization.
+	*/
+	int     cap_idx;
+	int     cap;
+
+	/* Estimated system energy */
+	unsigned long energy;
+
+	/* Estimated energy variation wrt EAS_CPU_PRV */
+	long nrg_delta;
+
+#ifdef DEBUG_EENV_DECISIONS
+	struct _eenv_debug *debug;
+	int debug_idx;
+#endif /* DEBUG_EENV_DECISIONS */
+};
+
+struct energy_env {
+	/* Utilization to move */
+	struct task_struct	*p;
+	unsigned long		util_delta;
+	unsigned long		util_delta_boosted;
+
+	/* Mask of CPUs candidates to evaluate */
+	cpumask_t		cpus_mask;
+
+	/* CPU candidates to evaluate */
+	struct eenv_cpu *cpu;
+	int eenv_cpu_count;
+
+#ifdef DEBUG_EENV_DECISIONS
+	/* pointer to the memory block reserved
+	 * for debug on this CPU - there will be
+	 * sizeof(struct _eenv_debug) *
+	 *  (EAS_CPU_CNT * EAS_EENV_DEBUG_LEVELS)
+	 * bytes allocated here.
+	 */
+	struct _eenv_debug *debug;
+#endif
+	/*
+	 * Index (into energy_env::cpu) of the morst energy efficient CPU for
+	 * the specified energy_env::task
+	 */
+	int	next_idx;
+	int	max_cpu_count;
+
+	/* Support data */
+	struct sched_group	*sg_top;
+	struct sched_group	*sg_cap;
+	struct sched_group	*sg;
+};
 
 #ifdef CONFIG_SMP
 
diff --git a/kernel/sched/tune.c b/kernel/sched/tune.c
index 74a45606dc8c..534ee933ceb6 100644
--- a/kernel/sched/tune.c
+++ b/kernel/sched/tune.c
@@ -5,6 +5,7 @@
 #include <linux/printk.h>
 #include <linux/rcupdate.h>
 #include <linux/slab.h>
+#include <linux/ehmp.h>
 
 #include <trace/events/sched.h>
 
@@ -17,6 +18,52 @@ extern struct reciprocal_value schedtune_spc_rdiv;
 /* We hold schedtune boost in effect for at least this long */
 #define SCHEDTUNE_BOOST_HOLD_NS 50000000ULL
 
+static int perf_threshold = 0;
+
+int schedtune_perf_threshold(void)
+{
+	return perf_threshold + 1;
+}
+
+struct group_balancer {
+	/* sum of task utilization in group */
+	unsigned long util;
+
+	/* group balancing threshold */
+	unsigned long threshold;
+
+	/* imbalance ratio by heaviest task */
+	unsigned int imbalance_ratio;
+
+	/* balance ratio by heaviest task */
+	unsigned int balance_ratio;
+
+	/* heaviest task utilization in group */
+	unsigned long heaviest_util;
+
+	/* group utilization update interval */
+	unsigned long update_interval;
+
+	/* next group utilization update time */
+	unsigned long next_update_time;
+
+	/*
+	 * group imbalance time = imbalance_count * update_interval
+	 * imbalance_count >= imbalance_duration -> need balance
+	 */
+	unsigned int imbalance_duration;
+	unsigned int imbalance_count;
+
+	/* utilization tracking window size */
+	unsigned long window;
+
+	/* group balancer locking */
+	raw_spinlock_t lock;
+
+	/* need group balancing? */
+	bool need_balance;
+};
+
 /*
  * EAS scheduler tunables for task groups.
  */
@@ -35,6 +82,13 @@ struct schedtune {
 	/* Hint to bias scheduling of tasks on that SchedTune CGroup
 	 * towards idle CPUs */
 	int prefer_idle;
+
+	/* Hint to bias scheduling of tasks on that SchedTune CGroup
+	 * towards high performance CPUs */
+	int prefer_perf;
+
+	/* SchedTune group balancer */
+	struct group_balancer gb;
 };
 
 static inline struct schedtune *css_st(struct cgroup_subsys_state *css)
@@ -65,6 +119,7 @@ static struct schedtune
 root_schedtune = {
 	.boost	= 0,
 	.prefer_idle = 0,
+	.prefer_perf = 0,
 };
 
 /*
@@ -442,6 +497,337 @@ int schedtune_prefer_idle(struct task_struct *p)
 	return prefer_idle;
 }
 
+#ifdef CONFIG_SCHED_EHMP
+static atomic_t kernel_prefer_perf_req[BOOSTGROUPS_COUNT];
+int kernel_prefer_perf(int grp_idx)
+{
+	if (grp_idx >= BOOSTGROUPS_COUNT)
+		return -EINVAL;
+
+	return atomic_read(&kernel_prefer_perf_req[grp_idx]);
+}
+
+void request_kernel_prefer_perf(int grp_idx, int enable)
+{
+	if (grp_idx >= BOOSTGROUPS_COUNT)
+		return;
+
+	if (enable)
+		atomic_inc(&kernel_prefer_perf_req[grp_idx]);
+	else
+		BUG_ON(atomic_dec_return(&kernel_prefer_perf_req[grp_idx]) < 0);
+}
+#else
+static inline int kernel_prefer_perf(int grp_idx) { return 0; }
+#endif
+
+int schedtune_prefer_perf(struct task_struct *p)
+{
+	struct schedtune *st;
+	int prefer_perf;
+
+	if (unlikely(!schedtune_initialized))
+		return 0;
+
+	/* Get prefer_perf value */
+	rcu_read_lock();
+	st = task_schedtune(p);
+	prefer_perf = max(st->prefer_perf, kernel_prefer_perf(st->idx));
+	rcu_read_unlock();
+
+	return prefer_perf;
+}
+
+int schedtune_need_group_balance(struct task_struct *p)
+{
+	bool balance;
+
+	if (unlikely(!schedtune_initialized))
+		return 0;
+
+	rcu_read_lock();
+	balance = task_schedtune(p)->gb.need_balance;
+	rcu_read_unlock();
+
+	return balance;
+}
+
+static inline void
+check_need_group_balance(int group_idx, struct group_balancer *gb)
+{
+	int heaviest_ratio;
+
+	if (!gb->util) {
+		gb->imbalance_count = 0;
+		gb->need_balance = false;
+
+		goto out;
+	}
+
+	heaviest_ratio = gb->heaviest_util * 100 / gb->util;
+
+	if (gb->need_balance) {
+		if (gb->util < gb->threshold || heaviest_ratio < gb->balance_ratio) {
+			gb->imbalance_count = 0;
+			gb->need_balance = false;
+		}
+
+		goto out;
+	}
+
+	if (gb->util >= gb->threshold && heaviest_ratio > gb->imbalance_ratio) {
+		gb->imbalance_count++;
+
+		if (gb->imbalance_count >= gb->imbalance_duration)
+			gb->need_balance = true;
+	} else {
+		gb->imbalance_count = 0;
+	}
+
+out:
+	trace_sched_tune_check_group_balance(group_idx,
+				gb->imbalance_count, gb->need_balance);
+}
+
+static void __schedtune_group_util_update(struct schedtune *st)
+{
+	struct group_balancer *gb = &st->gb;
+	unsigned long now = cpu_rq(0)->clock_task;
+	struct css_task_iter it;
+	struct task_struct *p;
+	struct task_struct *heaviest_p = NULL;
+	unsigned long util_sum = 0;
+	unsigned long heaviest_util = 0;
+	unsigned int total = 0, accumulated = 0;
+
+	if (!raw_spin_trylock(&gb->lock))
+		return;
+
+	if (!gb->update_interval)
+		goto out;
+
+	if (time_before(now, gb->next_update_time))
+		goto out;
+
+	css_task_iter_start(&st->css, 0, &it);
+	while ((p = css_task_iter_next(&it))) {
+		unsigned long clock_task, delta, util;
+
+		total++;
+
+		clock_task = task_rq(p)->clock_task;
+		delta = clock_task - p->se.avg.last_update_time;
+		if (p->se.avg.last_update_time && delta > gb->window)
+			continue;
+
+		util = p->se.avg.util_avg;
+		if (util > heaviest_util) {
+			heaviest_util = util;
+			heaviest_p = p;
+		}
+
+		util_sum += p->se.avg.util_avg;
+		accumulated++;
+	}
+	css_task_iter_end(&it);
+
+	gb->util = util_sum;
+	gb->heaviest_util = heaviest_util;
+	gb->next_update_time = now + gb->update_interval;
+
+	/* if there is no task in group, heaviest_p is always NULL */
+	if (heaviest_p)
+		trace_sched_tune_grouputil_update(st->idx, total, accumulated,
+				gb->util, heaviest_p, gb->heaviest_util);
+
+	check_need_group_balance(st->idx, gb);
+out:
+	raw_spin_unlock(&gb->lock);
+}
+
+void schedtune_group_util_update(void)
+{
+	int idx;
+
+	if (unlikely(!schedtune_initialized))
+		return;
+
+	rcu_read_lock();
+
+	for (idx = 1; idx < BOOSTGROUPS_COUNT; idx++) {
+		struct schedtune *st = allocated_group[idx];
+
+		if (!st)
+			continue;
+		__schedtune_group_util_update(st);
+	}
+
+	rcu_read_unlock();
+}
+
+static u64
+gb_util_read(struct cgroup_subsys_state *css, struct cftype *cft)
+{
+	struct schedtune *st = css_st(css);
+
+	return st->gb.util;
+}
+
+static u64
+gb_heaviest_ratio_read(struct cgroup_subsys_state *css, struct cftype *cft)
+{
+	struct schedtune *st = css_st(css);
+
+	if (!st->gb.util)
+		return 0;
+
+	return st->gb.heaviest_util * 100 / st->gb.util;
+}
+
+static u64
+gb_threshold_read(struct cgroup_subsys_state *css, struct cftype *cft)
+{
+	struct schedtune *st = css_st(css);
+
+	return st->gb.threshold;
+}
+
+static int
+gb_threshold_write(struct cgroup_subsys_state *css, struct cftype *cft,
+	    u64 threshold)
+{
+	struct schedtune *st = css_st(css);
+	struct group_balancer *gb = &st->gb;
+
+	raw_spin_lock(&gb->lock);
+	gb->threshold = threshold;
+	check_need_group_balance(st->idx, gb);
+	raw_spin_unlock(&gb->lock);
+
+	return 0;
+}
+
+static u64
+gb_imbalance_ratio_read(struct cgroup_subsys_state *css, struct cftype *cft)
+{
+	struct schedtune *st = css_st(css);
+
+	return st->gb.imbalance_ratio;
+}
+
+static int
+gb_imbalance_ratio_write(struct cgroup_subsys_state *css, struct cftype *cft,
+	    u64 ratio)
+{
+	struct schedtune *st = css_st(css);
+	struct group_balancer *gb = &st->gb;
+
+	ratio = min_t(u64, ratio, 100);
+
+	raw_spin_lock(&gb->lock);
+	gb->imbalance_ratio = ratio;
+	check_need_group_balance(st->idx, gb);
+	raw_spin_unlock(&gb->lock);
+
+	return 0;
+}
+
+static u64
+gb_balance_ratio_read(struct cgroup_subsys_state *css, struct cftype *cft)
+{
+	struct schedtune *st = css_st(css);
+
+	return st->gb.balance_ratio;
+}
+
+static int
+gb_balance_ratio_write(struct cgroup_subsys_state *css, struct cftype *cft,
+	    u64 ratio)
+{
+	struct schedtune *st = css_st(css);
+	struct group_balancer *gb = &st->gb;
+
+	ratio = min_t(u64, ratio, 100);
+
+	raw_spin_lock(&gb->lock);
+	gb->balance_ratio = ratio;
+	check_need_group_balance(st->idx, gb);
+	raw_spin_unlock(&gb->lock);
+
+	return 0;
+}
+
+static u64
+gb_interval_read(struct cgroup_subsys_state *css, struct cftype *cft)
+{
+	struct schedtune *st = css_st(css);
+
+	return st->gb.update_interval / NSEC_PER_USEC;
+}
+
+static int
+gb_interval_write(struct cgroup_subsys_state *css, struct cftype *cft,
+	    u64 interval_us)
+{
+	struct schedtune *st = css_st(css);
+	struct group_balancer *gb = &st->gb;
+
+	raw_spin_lock(&gb->lock);
+	gb->update_interval = interval_us * NSEC_PER_USEC;
+	if (!interval_us) {
+		gb->util = 0;
+		gb->need_balance = false;
+	}
+	raw_spin_unlock(&gb->lock);
+
+	return 0;
+}
+
+static u64
+gb_duration_read(struct cgroup_subsys_state *css, struct cftype *cft)
+{
+	struct schedtune *st = css_st(css);
+
+	return st->gb.imbalance_duration;
+}
+
+static int
+gb_duration_write(struct cgroup_subsys_state *css, struct cftype *cft,
+	    u64 duration)
+{
+	struct schedtune *st = css_st(css);
+	struct group_balancer *gb = &st->gb;
+
+	raw_spin_lock(&gb->lock);
+	gb->imbalance_duration = duration;
+	check_need_group_balance(st->idx, gb);
+	raw_spin_unlock(&gb->lock);
+
+	return 0;
+}
+
+static u64
+gb_window_read(struct cgroup_subsys_state *css, struct cftype *cft)
+{
+	struct schedtune *st = css_st(css);
+
+	return st->gb.window / NSEC_PER_MSEC;
+}
+
+static int
+gb_window_write(struct cgroup_subsys_state *css, struct cftype *cft,
+	    u64 window)
+{
+	struct schedtune *st = css_st(css);
+	struct group_balancer *gb = &st->gb;
+
+	raw_spin_lock(&gb->lock);
+	gb->window = window * NSEC_PER_MSEC;
+	raw_spin_unlock(&gb->lock);
+
+	return 0;
+}
+
 static u64
 prefer_idle_read(struct cgroup_subsys_state *css, struct cftype *cft)
 {
@@ -460,6 +846,24 @@ prefer_idle_write(struct cgroup_subsys_state *css, struct cftype *cft,
 	return 0;
 }
 
+static u64
+prefer_perf_read(struct cgroup_subsys_state *css, struct cftype *cft)
+{
+	struct schedtune *st = css_st(css);
+
+	return st->prefer_perf;
+}
+
+static int
+prefer_perf_write(struct cgroup_subsys_state *css, struct cftype *cft,
+	    u64 prefer_perf)
+{
+	struct schedtune *st = css_st(css);
+	st->prefer_perf = prefer_perf;
+
+	return 0;
+}
+
 static s64
 boost_read(struct cgroup_subsys_state *css, struct cftype *cft)
 {
@@ -496,6 +900,49 @@ static struct cftype files[] = {
 		.read_u64 = prefer_idle_read,
 		.write_u64 = prefer_idle_write,
 	},
+	{
+		.name = "prefer_perf",
+		.read_u64 = prefer_perf_read,
+		.write_u64 = prefer_perf_write,
+	},
+	{
+		.name = "gb_util",
+		.read_u64 = gb_util_read,
+	},
+	{
+		.name = "gb_heaviest_ratio",
+		.read_u64 = gb_heaviest_ratio_read,
+	},
+	{
+		.name = "gb_threshold",
+		.read_u64 = gb_threshold_read,
+		.write_u64 = gb_threshold_write,
+	},
+	{
+		.name = "gb_imbalance_ratio",
+		.read_u64 = gb_imbalance_ratio_read,
+		.write_u64 = gb_imbalance_ratio_write,
+	},
+	{
+		.name = "gb_balance_ratio",
+		.read_u64 = gb_balance_ratio_read,
+		.write_u64 = gb_balance_ratio_write,
+	},
+	{
+		.name = "gb_interval_us",
+		.read_u64 = gb_interval_read,
+		.write_u64 = gb_interval_write,
+	},
+	{
+		.name = "gb_duration",
+		.read_u64 = gb_duration_read,
+		.write_u64 = gb_duration_write,
+	},
+	{
+		.name = "gb_window_ms",
+		.read_u64 = gb_window_read,
+		.write_u64 = gb_window_write,
+	},
 	{ }	/* terminate */
 };
 
@@ -519,6 +966,22 @@ schedtune_boostgroup_init(struct schedtune *st)
 	return 0;
 }
 
+static void
+schedtune_group_balancer_init(struct schedtune *st)
+{
+	raw_spin_lock_init(&st->gb.lock);
+
+	st->gb.threshold = ULONG_MAX;
+	st->gb.imbalance_ratio = 0;				/* 0% */
+	st->gb.update_interval = 0;				/* disable update */
+	st->gb.next_update_time = cpu_rq(0)->clock_task;
+
+	st->gb.imbalance_duration = 0;
+	st->gb.imbalance_count = 0;
+
+	st->gb.window = 100 * NSEC_PER_MSEC;		/* 100ms */
+}
+
 static struct cgroup_subsys_state *
 schedtune_css_alloc(struct cgroup_subsys_state *parent_css)
 {
@@ -548,6 +1011,8 @@ schedtune_css_alloc(struct cgroup_subsys_state *parent_css)
 	if (!st)
 		goto out;
 
+	schedtune_group_balancer_init(st);
+
 	/* Initialize per CPUs boost group support */
 	st->idx = idx;
 	if (schedtune_boostgroup_init(st))
@@ -616,6 +1081,9 @@ schedtune_init(void)
 {
 	schedtune_spc_rdiv = reciprocal_value(100);
 	schedtune_init_cgroups();
+
+	perf_threshold = find_second_max_cap();
+
 	return 0;
 }
 postcore_initcall(schedtune_init);
diff --git a/kernel/sched/tune.h b/kernel/sched/tune.h
index e79e1b198921..1588ba24bff9 100644
--- a/kernel/sched/tune.h
+++ b/kernel/sched/tune.h
@@ -15,7 +15,13 @@ struct target_nrg {
 int schedtune_cpu_boost(int cpu);
 int schedtune_task_boost(struct task_struct *tsk);
 
+void schedtune_group_util_update(void);
+int schedtune_need_group_balance(struct task_struct *p);
+
+int schedtune_perf_threshold(void);
+
 int schedtune_prefer_idle(struct task_struct *tsk);
+int schedtune_prefer_perf(struct task_struct *tsk);
 
 void schedtune_enqueue_task(struct task_struct *p, int cpu);
 void schedtune_dequeue_task(struct task_struct *p, int cpu);
@@ -25,7 +31,13 @@ void schedtune_dequeue_task(struct task_struct *p, int cpu);
 #define schedtune_cpu_boost(cpu)  0
 #define schedtune_task_boost(tsk) 0
 
+#define schedtune_group_util_update() do { } while (0)
+#define schedtune_need_group_balance(task) 0
+
+#define schedtune_perf_threshold() 0
+
 #define schedtune_prefer_idle(tsk) 0
+#define schedtune_prefer_perf(tsk) 0
 
 #define schedtune_enqueue_task(task, cpu) do { } while (0)
 #define schedtune_dequeue_task(task, cpu) do { } while (0)
-- 
2.20.1